@byted-las/contextlake-openclaw 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import { ContextLakeConfig } from '../utils/config';
2
2
  export declare function getCliCommands(pluginConfig: ContextLakeConfig, logger: any): {
3
- connectAction: (datasource_name: string, options: any) => Promise<void>;
3
+ connectAction: (datasource_name: string, url: string, options: any) => Promise<void>;
4
4
  ingestAction: (datasource_name: string) => Promise<void>;
5
5
  searchAction: (query: any, options: any) => Promise<void>;
6
6
  listAction: (options: any) => Promise<void>;
@@ -34,28 +34,22 @@ function parseMetadata(metadata) {
34
34
  }
35
35
  function getCliCommands(pluginConfig, logger) {
36
36
  return {
37
- connectAction: async (datasource_name, options) => {
38
- logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, options });
37
+ connectAction: async (datasource_name, url, options) => {
38
+ logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, url, options });
39
39
  try {
40
40
  const params = {
41
41
  datasource_name,
42
- vendor: options.vendor,
42
+ url,
43
43
  endpoint: options.endpoint,
44
44
  access_key: options.ak,
45
45
  secret_key: options.sk,
46
46
  region: options.region,
47
- bucket: options.bucket,
48
- prefix: options.prefix,
49
47
  sample_rows: parseInt(options.sampleRows),
50
48
  };
51
49
  // eslint-disable-next-line no-console
52
50
  console.log(`[contextlake connect] Connecting to datasource "${datasource_name}"...`);
53
51
  // eslint-disable-next-line no-console
54
- console.log(` vendor: ${params.vendor}`);
55
- // eslint-disable-next-line no-console
56
- console.log(` bucket: ${params.bucket}`);
57
- // eslint-disable-next-line no-console
58
- console.log(` prefix: ${params.prefix}`);
52
+ console.log(` url: ${params.url}`);
59
53
  const result = await (0, profiler_1.connectDataSource)(params);
60
54
  // eslint-disable-next-line no-console
61
55
  console.log(JSON.stringify(result, null, 2));
@@ -26,6 +26,10 @@ function registerAll(ctx, logger) {
26
26
  logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.readS3ObjectTool.name}`);
27
27
  ctx.registerTool(tools.writeLanceCatalogTool);
28
28
  logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.writeLanceCatalogTool.name}`);
29
+ ctx.registerTool(tools.readLanceCatalogTool);
30
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.readLanceCatalogTool.name}`);
31
+ ctx.registerTool(tools.generatePresignedUrlTool);
32
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.generatePresignedUrlTool.name}`);
29
33
  ctx.registerTool(tools.listDatasourceTool);
30
34
  logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listDatasourceTool.name}`);
31
35
  for (const lasTool of tools.lasTools) {
@@ -46,15 +50,12 @@ function registerAll(ctx, logger) {
46
50
  .description('Manage ContextLake knowledge base');
47
51
  const commands = (0, cli_1.getCliCommands)(pluginConfig, logger);
48
52
  // connect -- data source profiling (las-data-profiler)
49
- contextlake.command('connect <datasource_name>')
53
+ contextlake.command('connect <datasource_name> <url>')
50
54
  .description('Connect to a data source and profile its structure, schemas, and media metadata into LanceDB')
51
- .requiredOption('--vendor <vendor>', 'Data source type: volcengine | alibaba | tencent | aws | local')
52
55
  .option('--endpoint <url>', 'S3 Endpoint URL (not needed for local)')
53
56
  .option('--ak <credential_id>', 'Credential ID for the data source')
54
57
  .option('--sk <credential_value>', 'Credential value for the data source')
55
58
  .option('--region <region>', 'Region identifier (e.g. cn-beijing)')
56
- .requiredOption('--bucket <bucket>', 'Bucket name (or local root directory for local vendor)')
57
- .requiredOption('--prefix <prefix>', 'Path prefix to limit scan scope')
58
59
  .option('--sample-rows <number>', 'Number of rows to sample per structured file', '100')
59
60
  .action(commands.connectAction);
60
61
  // Ingest
@@ -133,18 +133,13 @@ function getSlashCommands(pluginConfig, logger) {
133
133
  const args = rawArgs.split(' ').filter((arg) => arg.trim() !== '');
134
134
  logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command profiler started`, { args });
135
135
  try {
136
- if (args.length < 4) {
137
- return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <vendor> <bucket> <prefix> [endpoint] [ak] [sk] [region]` };
138
- }
139
- const [datasource_name, vendor, bucket, prefix, endpoint, access_key, secret_key, region] = args;
140
- if (!['volcengine', 'alibaba', 'tencent', 'aws', 'local'].includes(vendor)) {
141
- return { text: `**Error:** Invalid vendor. Must be one of: volcengine, alibaba, tencent, aws, local` };
136
+ if (args.length < 2) {
137
+ return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <url> [endpoint] [ak] [sk] [region]` };
142
138
  }
139
+ const [datasource_name, url, endpoint, access_key, secret_key, region] = args;
143
140
  const params = {
144
141
  datasource_name,
145
- vendor: vendor,
146
- bucket,
147
- prefix,
142
+ url,
148
143
  endpoint,
149
144
  access_key,
150
145
  secret_key,
@@ -9,6 +9,8 @@ export declare function getAgentTools(pluginConfig: ContextLakeConfig, logger: a
9
9
  listDatasourceTool: AnyAgentTool;
10
10
  listS3ObjectsTool: AnyAgentTool;
11
11
  readS3ObjectTool: AnyAgentTool;
12
+ generatePresignedUrlTool: AnyAgentTool;
12
13
  writeLanceCatalogTool: AnyAgentTool;
14
+ readLanceCatalogTool: AnyAgentTool;
13
15
  lasTools: AnyAgentTool[];
14
16
  };
@@ -253,16 +253,10 @@ Example User Queries:
253
253
  type: 'object',
254
254
  properties: {
255
255
  datasource_name: { type: 'string', description: 'Name of the data source' },
256
- vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Data source type' },
257
- endpoint: { type: 'string', description: 'S3 Endpoint URL (not needed for local)' },
258
- access_key: { type: 'string', description: 'Credential ID for the data source' },
259
- secret_key: { type: 'string', description: 'Credential value for the data source' },
260
- region: { type: 'string', description: 'Region identifier (e.g. cn-beijing)' },
261
- bucket: { type: 'string', description: 'Bucket name (or local root directory for local vendor)' },
262
- prefix: { type: 'string', description: 'Path prefix to limit scan scope' },
256
+ url: { type: 'string', description: 'Data source URL (e.g. tos://bucket/prefix, oss://..., s3://..., file:///path)' },
263
257
  sample_rows: { type: 'integer', description: 'Number of rows to sample per structured file' }
264
258
  },
265
- required: ['datasource_name', 'vendor', 'bucket', 'prefix'],
259
+ required: ['datasource_name', 'url'],
266
260
  additionalProperties: false
267
261
  },
268
262
  async execute(toolCallId, params) {
@@ -295,17 +289,15 @@ Example User Queries:
295
289
  parameters: {
296
290
  type: 'object',
297
291
  properties: {
298
- vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'] },
299
- bucket: { type: 'string' },
292
+ url: { type: 'string', description: 'Data source URL (e.g. tos://bucket/prefix, oss://..., file:///path)' },
293
+ vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
294
+ bucket: { type: 'string', description: 'Required if url is not provided' },
300
295
  prefix: { type: 'string' },
301
296
  endpoint: { type: 'string' },
302
- access_key: { type: 'string' },
303
- secret_key: { type: 'string' },
304
- region: { type: 'string' },
305
297
  maxKeys: { type: 'integer' },
306
298
  continuationToken: { type: 'string' }
307
299
  },
308
- required: ['vendor', 'bucket'],
300
+ required: [],
309
301
  additionalProperties: false
310
302
  },
311
303
  async execute(toolCallId, params) {
@@ -326,22 +318,40 @@ Example User Queries:
326
318
  parameters: {
327
319
  type: 'object',
328
320
  properties: {
329
- vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'] },
330
- bucket: { type: 'string' },
331
- key: { type: 'string' },
321
+ url: { type: 'string', description: 'Full URL to the object (e.g. tos://bucket/path/to/key.txt)' },
322
+ vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
323
+ bucket: { type: 'string', description: 'Required if url is not provided' },
324
+ key: { type: 'string', description: 'Required if url is not provided' },
332
325
  endpoint: { type: 'string' },
333
- access_key: { type: 'string' },
334
- secret_key: { type: 'string' },
335
- region: { type: 'string' },
336
326
  maxBytes: { type: 'integer' }
337
327
  },
338
- required: ['vendor', 'bucket', 'key'],
328
+ required: [],
339
329
  additionalProperties: false
340
330
  },
341
331
  async execute(toolCallId, params) {
342
332
  let actualParams = params.params || params;
343
333
  try {
344
- const buf = await (0, s3_tools_1.readS3Object)(actualParams, actualParams.key, actualParams.maxBytes);
334
+ // Extract key from url if provided
335
+ let key = actualParams.key;
336
+ if (actualParams.url && !key) {
337
+ try {
338
+ if (actualParams.url.startsWith('file://')) {
339
+ // Key is not strictly needed for file://, bucket contains the path in parseS3Url
340
+ key = '';
341
+ }
342
+ else {
343
+ const parsedUrl = new URL(actualParams.url);
344
+ key = parsedUrl.pathname.replace(/^\//, '');
345
+ }
346
+ }
347
+ catch (e) {
348
+ // let it fail in readS3Object
349
+ }
350
+ }
351
+ if (!key && !actualParams.url?.startsWith('file://')) {
352
+ throw new Error('key is required or must be part of the url');
353
+ }
354
+ const buf = await (0, s3_tools_1.readS3Object)(actualParams, key, actualParams.maxBytes);
345
355
  // Return as base64 string
346
356
  return { content: [{ type: "text", text: buf.toString('base64') }], details: { length: buf.length } };
347
357
  }
@@ -350,6 +360,52 @@ Example User Queries:
350
360
  }
351
361
  }
352
362
  },
363
+ generatePresignedUrlTool: {
364
+ name: 'generate-presigned-url',
365
+ label: 'Generate Presigned URL',
366
+ description: 'Generate a presigned HTTP URL for an S3/TOS object, allowing temporary public access',
367
+ parameters: {
368
+ type: 'object',
369
+ properties: {
370
+ url: { type: 'string', description: 'Full URL to the object (e.g. tos://bucket/path/to/key.txt)' },
371
+ vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
372
+ bucket: { type: 'string', description: 'Required if url is not provided' },
373
+ key: { type: 'string', description: 'Required if url is not provided' },
374
+ endpoint: { type: 'string' },
375
+ expiresIn: { type: 'integer', description: 'Expiration time in seconds (default 3600)' }
376
+ },
377
+ required: [],
378
+ additionalProperties: false
379
+ },
380
+ async execute(toolCallId, params) {
381
+ let actualParams = params.params || params;
382
+ try {
383
+ let key = actualParams.key;
384
+ if (actualParams.url && !key) {
385
+ try {
386
+ if (actualParams.url.startsWith('file://')) {
387
+ key = '';
388
+ }
389
+ else {
390
+ const parsedUrl = new URL(actualParams.url);
391
+ key = parsedUrl.pathname.replace(/^\//, '');
392
+ }
393
+ }
394
+ catch (e) {
395
+ // let it fail in getPresignedUrl
396
+ }
397
+ }
398
+ if (!key && !actualParams.url?.startsWith('file://')) {
399
+ throw new Error('key is required or must be part of the url');
400
+ }
401
+ const url = await (0, s3_tools_1.getPresignedUrl)(actualParams, key, actualParams.expiresIn);
402
+ return { content: [{ type: "text", text: url }], details: { url } };
403
+ }
404
+ catch (e) {
405
+ return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
406
+ }
407
+ }
408
+ },
353
409
  writeLanceCatalogTool: {
354
410
  name: 'write-lance-catalog',
355
411
  label: 'Write LanceDB Catalog',
@@ -374,6 +430,32 @@ Example User Queries:
374
430
  return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
375
431
  }
376
432
  }
433
+ },
434
+ readLanceCatalogTool: {
435
+ name: 'read-lance-catalog',
436
+ label: 'Read LanceDB Catalog',
437
+ description: 'Read records from a local LanceDB table for validation or ingestion processes',
438
+ parameters: {
439
+ type: 'object',
440
+ properties: {
441
+ db_path: { type: 'string', description: 'Path to the local LanceDB database' },
442
+ table_name: { type: 'string', description: 'Name of the table to read' },
443
+ limit: { type: 'integer', description: 'Maximum number of records to return' },
444
+ filter: { type: 'string', description: 'SQL-like filter string (e.g., "category = \'structured\'")' }
445
+ },
446
+ required: ['db_path', 'table_name'],
447
+ additionalProperties: false
448
+ },
449
+ async execute(toolCallId, params) {
450
+ let actualParams = params.params || params;
451
+ try {
452
+ const results = await (0, lance_tools_1.readLanceCatalog)(actualParams);
453
+ return { content: [{ type: "text", text: JSON.stringify(results) }], details: { count: results.length, data: results } };
454
+ }
455
+ catch (e) {
456
+ return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
457
+ }
458
+ }
377
459
  }
378
460
  };
379
461
  }
@@ -3,4 +3,11 @@ export interface LanceWriteParams {
3
3
  table_name: string;
4
4
  records: any[];
5
5
  }
6
+ export interface LanceReadParams {
7
+ db_path: string;
8
+ table_name: string;
9
+ limit?: number;
10
+ filter?: string;
11
+ }
6
12
  export declare function writeLanceCatalog(params: LanceWriteParams): Promise<void>;
13
+ export declare function readLanceCatalog(params: LanceReadParams): Promise<any[]>;
@@ -34,7 +34,9 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.writeLanceCatalog = writeLanceCatalog;
37
+ exports.readLanceCatalog = readLanceCatalog;
37
38
  const lancedb = __importStar(require("@lancedb/lancedb"));
39
+ const fs = __importStar(require("fs"));
38
40
  async function writeLanceCatalog(params) {
39
41
  if (!params.records || params.records.length === 0) {
40
42
  return;
@@ -49,3 +51,23 @@ async function writeLanceCatalog(params) {
49
51
  await db.createTable(params.table_name, params.records);
50
52
  }
51
53
  }
54
+ async function readLanceCatalog(params) {
55
+ if (!fs.existsSync(params.db_path)) {
56
+ throw new Error(`Database not found at ${params.db_path}`);
57
+ }
58
+ const db = await lancedb.connect(params.db_path);
59
+ const tableNames = await db.tableNames();
60
+ if (!tableNames.includes(params.table_name)) {
61
+ throw new Error(`Table '${params.table_name}' not found in database`);
62
+ }
63
+ const table = await db.openTable(params.table_name);
64
+ let query = table.query();
65
+ if (params.filter) {
66
+ query = query.where(params.filter);
67
+ }
68
+ if (params.limit && params.limit > 0) {
69
+ query = query.limit(params.limit);
70
+ }
71
+ const results = await query.toArray();
72
+ return results;
73
+ }
@@ -2,8 +2,35 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.getLasTools = getLasTools;
4
4
  const las_api_1 = require("./las-api");
5
+ const s3_tools_1 = require("./s3-tools");
5
6
  function getLasTools(pluginConfig, logger) {
6
7
  const apiClient = new las_api_1.LasApiClient(pluginConfig, logger);
8
+ const processUrl = async (url) => {
9
+ if (!url)
10
+ return url;
11
+ if (url.startsWith('tos://')) {
12
+ // LAS operators prefer native tos:// paths when supported, leave as is
13
+ return url;
14
+ }
15
+ else if (url.startsWith('oss://') || url.startsWith('s3://') || url.startsWith('cos://') || url.startsWith('file://')) {
16
+ // Need presigned HTTP url for other vendors
17
+ logger.info(`[LasTools] Presigning URL for vendor: ${url}`);
18
+ try {
19
+ // If it's a file:// we also presign it to file:// which might not be supported by remote LAS,
20
+ // but local files typically need to be uploaded to TOS first. We'll leave file:// to fail or be handled elsewhere.
21
+ if (url.startsWith('file://'))
22
+ return url;
23
+ const urlParts = new URL(url);
24
+ const key = urlParts.pathname.replace(/^\//, '');
25
+ return await (0, s3_tools_1.getPresignedUrl)({ url }, key, 3600);
26
+ }
27
+ catch (e) {
28
+ logger.warn(`[LasTools] Failed to presign URL: ${url}`, { error: e.message });
29
+ return url; // fallback to original
30
+ }
31
+ }
32
+ return url;
33
+ };
7
34
  const callApi = async (method, args) => {
8
35
  try {
9
36
  // @ts-ignore
@@ -33,6 +60,9 @@ Parameters in data:
33
60
  required: ['data']
34
61
  },
35
62
  async execute(toolCallId, params) {
63
+ if (params.data?.image) {
64
+ params.data.image = await processUrl(params.data.image);
65
+ }
36
66
  return await callApi('process', ['las_image_resample', params.data]);
37
67
  }
38
68
  },
@@ -53,6 +83,9 @@ Parameters in data:
53
83
  required: ['data']
54
84
  },
55
85
  async execute(toolCallId, params) {
86
+ if (params.data?.input_path) {
87
+ params.data.input_path = await processUrl(params.data.input_path);
88
+ }
56
89
  return await callApi('process', ['las_audio_extract_and_split', params.data]);
57
90
  }
58
91
  },
@@ -71,6 +104,9 @@ Parameters in data:
71
104
  required: ['data']
72
105
  },
73
106
  async execute(toolCallId, params) {
107
+ if (params.data?.input_path) {
108
+ params.data.input_path = await processUrl(params.data.input_path);
109
+ }
74
110
  return await callApi('process', ['las_audio_convert', params.data]);
75
111
  }
76
112
  },
@@ -89,6 +125,9 @@ Parameters in data:
89
125
  required: ['data']
90
126
  },
91
127
  async execute(toolCallId, params) {
128
+ if (params.data?.audio?.url) {
129
+ params.data.audio.url = await processUrl(params.data.audio.url);
130
+ }
92
131
  return await callApi('submitAndPoll', ['las_asr_pro', params.data]);
93
132
  }
94
133
  },
@@ -105,6 +144,9 @@ Parameters in data:
105
144
  required: ['data']
106
145
  },
107
146
  async execute(toolCallId, params) {
147
+ if (params.data?.audio?.url) {
148
+ params.data.audio.url = await processUrl(params.data.audio.url);
149
+ }
108
150
  return await callApi('submitAndPoll', ['las_seed_2_0', params.data]);
109
151
  }
110
152
  },
@@ -128,6 +170,13 @@ Parameters:
128
170
  required: ['model', 'input']
129
171
  },
130
172
  async execute(toolCallId, params) {
173
+ if (params.input && Array.isArray(params.input)) {
174
+ for (const item of params.input) {
175
+ if (item.type === 'image_url' && item.image_url?.url) {
176
+ item.image_url.url = await processUrl(item.image_url.url);
177
+ }
178
+ }
179
+ }
131
180
  return await callApi('multimodalEmbedding', [
132
181
  params.model,
133
182
  params.input,
@@ -151,6 +200,9 @@ Parameters in data:
151
200
  required: ['data']
152
201
  },
153
202
  async execute(toolCallId, params) {
203
+ if (params.data?.video_url) {
204
+ params.data.video_url = await processUrl(params.data.video_url);
205
+ }
154
206
  return await callApi('submitAndPoll', ['las_long_video_understand', params.data]);
155
207
  }
156
208
  },
@@ -169,6 +221,9 @@ Parameters in data:
169
221
  required: ['data']
170
222
  },
171
223
  async execute(toolCallId, params) {
224
+ if (params.data?.url) {
225
+ params.data.url = await processUrl(params.data.url);
226
+ }
172
227
  return await callApi('submitAndPoll', ['las_pdf_parse_doubao', params.data]);
173
228
  }
174
229
  },
@@ -187,6 +242,9 @@ Parameters in data:
187
242
  required: ['data']
188
243
  },
189
244
  async execute(toolCallId, params) {
245
+ if (params.data?.video_url) {
246
+ params.data.video_url = await processUrl(params.data.video_url);
247
+ }
190
248
  return await callApi('submitAndPoll', ['las_video_resize', params.data]);
191
249
  }
192
250
  }
@@ -1,12 +1,13 @@
1
1
  export interface ConnectParams {
2
2
  datasource_name: string;
3
- vendor: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
3
+ url: string;
4
+ vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
4
5
  endpoint?: string;
5
6
  access_key?: string;
6
7
  secret_key?: string;
7
8
  region?: string;
8
- bucket: string;
9
- prefix: string;
9
+ bucket?: string;
10
+ prefix?: string;
10
11
  sample_rows?: number;
11
12
  }
12
13
  export interface ConnectResult {
@@ -98,12 +98,41 @@ function classifyFile(ext) {
98
98
  async function connectDataSource(params, _ctx) {
99
99
  if (!params.datasource_name)
100
100
  throw new Error('datasource_name is required');
101
- if (!params.vendor)
102
- throw new Error('vendor is required');
103
- if (!params.bucket)
104
- throw new Error('bucket is required');
105
- if (params.prefix === undefined || params.prefix === null)
106
- throw new Error('prefix is required');
101
+ if (!params.url)
102
+ throw new Error('url is required (e.g. tos://bucket/prefix)');
103
+ // Parse URL: tos://bucket/prefix
104
+ try {
105
+ if (params.url.startsWith('file://') || params.url.startsWith('/')) {
106
+ params.vendor = 'local';
107
+ const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
108
+ params.bucket = localPath;
109
+ params.prefix = '.';
110
+ }
111
+ else {
112
+ const parsedUrl = new URL(params.url);
113
+ const protocol = parsedUrl.protocol.replace(':', '');
114
+ if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
115
+ if (protocol === 'tos')
116
+ params.vendor = 'volcengine';
117
+ else if (protocol === 'oss')
118
+ params.vendor = 'alibaba';
119
+ else if (protocol === 'cos')
120
+ params.vendor = 'tencent';
121
+ else if (protocol === 's3')
122
+ params.vendor = 'aws';
123
+ params.bucket = parsedUrl.hostname;
124
+ params.prefix = parsedUrl.pathname.replace(/^\//, ''); // Remove leading slash
125
+ }
126
+ else {
127
+ throw new Error(`Unsupported protocol: ${protocol}`);
128
+ }
129
+ }
130
+ }
131
+ catch (e) {
132
+ if (!params.vendor || !params.bucket || params.prefix === undefined) {
133
+ throw new Error(`Invalid url format: ${e.message}`);
134
+ }
135
+ }
107
136
  if (params.vendor !== 'local') {
108
137
  if (!params.endpoint && params.vendor !== 'aws')
109
138
  throw new Error(`endpoint is required for vendor "${params.vendor}"`);
@@ -139,7 +168,14 @@ async function connectDataSource(params, _ctx) {
139
168
  const allRecords = [];
140
169
  const scan_ts = new Date().toISOString() + 'Z';
141
170
  while (isTruncated) {
142
- const response = await (0, s3_tools_1.listS3Objects)(params, params.prefix, 1000, continuationToken);
171
+ const response = await (0, s3_tools_1.listS3Objects)({
172
+ vendor: params.vendor,
173
+ bucket: params.bucket,
174
+ endpoint: params.endpoint,
175
+ access_key: params.access_key,
176
+ secret_key: params.secret_key,
177
+ region: params.region
178
+ }, params.prefix || '', 1000, continuationToken);
143
179
  for (const obj of response.Contents) {
144
180
  const key = obj.Key || '';
145
181
  if (key.endsWith('/'))
@@ -1,11 +1,13 @@
1
1
  export interface S3Params {
2
- vendor: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
2
+ url?: string;
3
+ vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
3
4
  endpoint?: string;
4
5
  access_key?: string;
5
6
  secret_key?: string;
6
7
  region?: string;
7
- bucket: string;
8
+ bucket?: string;
8
9
  }
10
+ export declare function parseS3Url(params: S3Params): S3Params;
9
11
  export declare function listS3Objects(params: S3Params, prefix: string, maxKeys?: number, continuationToken?: string): Promise<{
10
12
  Contents: any[];
11
13
  IsTruncated: boolean;
@@ -16,3 +18,4 @@ export declare function listS3Objects(params: S3Params, prefix: string, maxKeys?
16
18
  NextContinuationToken: string | undefined;
17
19
  }>;
18
20
  export declare function readS3Object(params: S3Params, key: string, maxBytes?: number): Promise<Buffer>;
21
+ export declare function getPresignedUrl(params: S3Params, key: string, expiresIn?: number): Promise<string>;
@@ -33,12 +33,48 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.parseS3Url = parseS3Url;
36
37
  exports.listS3Objects = listS3Objects;
37
38
  exports.readS3Object = readS3Object;
39
+ exports.getPresignedUrl = getPresignedUrl;
38
40
  const client_s3_1 = require("@aws-sdk/client-s3");
41
+ const s3_request_presigner_1 = require("@aws-sdk/s3-request-presigner");
39
42
  const fs = __importStar(require("fs"));
40
43
  const path = __importStar(require("path"));
44
+ function parseS3Url(params) {
45
+ if (params.url) {
46
+ if (params.url.startsWith('file://') || params.url.startsWith('/')) {
47
+ params.vendor = 'local';
48
+ const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
49
+ params.bucket = localPath;
50
+ }
51
+ else {
52
+ const parsedUrl = new URL(params.url);
53
+ const protocol = parsedUrl.protocol.replace(':', '');
54
+ if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
55
+ if (protocol === 'tos')
56
+ params.vendor = 'volcengine';
57
+ else if (protocol === 'oss')
58
+ params.vendor = 'alibaba';
59
+ else if (protocol === 'cos')
60
+ params.vendor = 'tencent';
61
+ else if (protocol === 's3')
62
+ params.vendor = 'aws';
63
+ params.bucket = parsedUrl.hostname;
64
+ // Prefix is usually parsed separately or passed explicitly for listing
65
+ }
66
+ else {
67
+ throw new Error(`Unsupported protocol: ${protocol}`);
68
+ }
69
+ }
70
+ }
71
+ if (!params.vendor || !params.bucket) {
72
+ throw new Error('Could not determine vendor or bucket. Please provide a valid url or vendor/bucket directly.');
73
+ }
74
+ return params;
75
+ }
41
76
  function createS3Client(params) {
77
+ params = parseS3Url(params);
42
78
  if (params.vendor === 'local')
43
79
  return null;
44
80
  let endpoint = params.endpoint;
@@ -80,6 +116,7 @@ function createS3Client(params) {
80
116
  });
81
117
  }
82
118
  async function listS3Objects(params, prefix, maxKeys = 1000, continuationToken) {
119
+ params = parseS3Url(params);
83
120
  if (params.vendor === 'local') {
84
121
  const root = params.bucket;
85
122
  const prefixPath = prefix && prefix !== '.' ? path.join(root, prefix) : root;
@@ -130,6 +167,7 @@ async function listS3Objects(params, prefix, maxKeys = 1000, continuationToken)
130
167
  };
131
168
  }
132
169
  async function readS3Object(params, key, maxBytes) {
170
+ params = parseS3Url(params);
133
171
  if (params.vendor === 'local') {
134
172
  const fullPath = path.join(params.bucket, key);
135
173
  if (maxBytes) {
@@ -165,3 +203,19 @@ async function readS3Object(params, key, maxBytes) {
165
203
  }
166
204
  return Buffer.alloc(0);
167
205
  }
206
+ async function getPresignedUrl(params, key, expiresIn = 3600) {
207
+ params = parseS3Url(params);
208
+ if (params.vendor === 'local') {
209
+ const fullPath = path.join(params.bucket, key);
210
+ return `file://${fullPath}`;
211
+ }
212
+ const client = createS3Client(params);
213
+ if (!client)
214
+ throw new Error('Failed to create S3 client');
215
+ const command = new client_s3_1.GetObjectCommand({
216
+ Bucket: params.bucket,
217
+ Key: key
218
+ });
219
+ const signedUrl = await (0, s3_request_presigner_1.getSignedUrl)(client, command, { expiresIn });
220
+ return signedUrl;
221
+ }
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "contextlake-openclaw",
3
3
  "name": "ContextLake",
4
- "version": "1.0.4",
4
+ "version": "1.0.5",
5
5
  "description": "A lightweight knowledge base plugin for OpenClaw using LanceDB and TOS, with data profiling support",
6
6
  "skills": ["./src/skills"],
7
7
  "configSchema": {