@byted-las/contextlake-openclaw 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -23
- package/dist/src/commands/cli.d.ts +1 -1
- package/dist/src/commands/cli.js +10 -14
- package/dist/src/commands/index.js +11 -4
- package/dist/src/commands/slashcmd.js +4 -9
- package/dist/src/commands/tools.d.ts +5 -0
- package/dist/src/commands/tools.js +180 -10
- package/dist/src/lib/actions/lance-tools.d.ts +13 -0
- package/dist/src/lib/actions/lance-tools.js +73 -0
- package/dist/src/lib/actions/las-tools.js +58 -0
- package/dist/src/lib/actions/profiler.d.ts +4 -3
- package/dist/src/lib/actions/profiler.js +156 -141
- package/dist/src/lib/actions/s3-tools.d.ts +21 -0
- package/dist/src/lib/actions/s3-tools.js +221 -0
- package/dist/src/skills/SKILL.md +14 -151
- package/dist/src/skills/las-data-profiler/SKILL.md +14 -151
- package/dist/src/utils/config.js +5 -4
- package/dist/src/utils/credentials.d.ts +4 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +3 -1
- package/src/commands/cli.ts +10 -14
- package/src/commands/index.ts +16 -4
- package/src/commands/slashcmd.ts +4 -10
- package/src/commands/tools.ts +177 -12
- package/src/lib/actions/lance-tools.ts +58 -0
- package/src/lib/actions/las-tools.ts +56 -0
- package/src/lib/actions/profiler.ts +148 -157
- package/src/lib/actions/s3-tools.ts +203 -0
- package/src/skills/las-data-profiler/SKILL.md +14 -151
- package/src/utils/config.ts +5 -4
- package/src/utils/credentials.ts +6 -0
- package/src/lib/scripts/s3_catalog.py +0 -617
package/src/commands/cli.ts
CHANGED
|
@@ -37,29 +37,23 @@ function parseMetadata(metadata: any): Record<string, any> {
|
|
|
37
37
|
|
|
38
38
|
export function getCliCommands(pluginConfig: ContextLakeConfig, logger: any) {
|
|
39
39
|
return {
|
|
40
|
-
connectAction: async (datasource_name: string, options: any) => {
|
|
41
|
-
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, options });
|
|
40
|
+
connectAction: async (datasource_name: string, url: string, options: any) => {
|
|
41
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, url, options });
|
|
42
42
|
try {
|
|
43
43
|
const params: ConnectParams = {
|
|
44
44
|
datasource_name,
|
|
45
|
-
|
|
45
|
+
url,
|
|
46
46
|
endpoint: options.endpoint,
|
|
47
47
|
access_key: options.ak,
|
|
48
48
|
secret_key: options.sk,
|
|
49
49
|
region: options.region,
|
|
50
|
-
bucket: options.bucket,
|
|
51
|
-
prefix: options.prefix,
|
|
52
50
|
sample_rows: parseInt(options.sampleRows),
|
|
53
51
|
};
|
|
54
52
|
|
|
55
53
|
// eslint-disable-next-line no-console
|
|
56
54
|
console.log(`[contextlake connect] Connecting to datasource "${datasource_name}"...`);
|
|
57
55
|
// eslint-disable-next-line no-console
|
|
58
|
-
console.log(`
|
|
59
|
-
// eslint-disable-next-line no-console
|
|
60
|
-
console.log(` bucket: ${params.bucket}`);
|
|
61
|
-
// eslint-disable-next-line no-console
|
|
62
|
-
console.log(` prefix: ${params.prefix}`);
|
|
56
|
+
console.log(` url: ${params.url}`);
|
|
63
57
|
|
|
64
58
|
const result = await connectDataSource(params);
|
|
65
59
|
// eslint-disable-next-line no-console
|
|
@@ -147,13 +141,15 @@ export function getCliCommands(pluginConfig: ContextLakeConfig, logger: any) {
|
|
|
147
141
|
console.log('Please provide your credentials below. Press enter to keep the current value.');
|
|
148
142
|
|
|
149
143
|
const lasApiKey = await promptForInput('LAS_API_KEY', currentCreds.LAS_API_KEY);
|
|
150
|
-
const
|
|
151
|
-
const
|
|
144
|
+
const accessKey = await promptForInput('ACCESS_KEY', currentCreds.ACCESS_KEY || currentCreds.VOLCENGINE_ACCESS_KEY);
|
|
145
|
+
const secretKey = await promptForInput('SECRET_KEY', currentCreds.SECRET_KEY || currentCreds.VOLCENGINE_SECRET_KEY);
|
|
146
|
+
const region = await promptForInput('REGION', currentCreds.REGION || currentCreds.VOLCENGINE_REGION || 'cn-beijing');
|
|
152
147
|
|
|
153
148
|
const newCreds = {
|
|
154
149
|
LAS_API_KEY: lasApiKey,
|
|
155
|
-
|
|
156
|
-
|
|
150
|
+
ACCESS_KEY: accessKey,
|
|
151
|
+
SECRET_KEY: secretKey,
|
|
152
|
+
REGION: region
|
|
157
153
|
};
|
|
158
154
|
|
|
159
155
|
saveCredentials(newCreds);
|
package/src/commands/index.ts
CHANGED
|
@@ -26,6 +26,21 @@ export function registerAll(ctx: OpenClawPluginApi, logger: PluginLogger) {
|
|
|
26
26
|
|
|
27
27
|
ctx.registerTool(tools.lasDataProfilerTool );
|
|
28
28
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.lasDataProfilerTool.name}`);
|
|
29
|
+
|
|
30
|
+
ctx.registerTool(tools.listS3ObjectsTool );
|
|
31
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listS3ObjectsTool.name}`);
|
|
32
|
+
|
|
33
|
+
ctx.registerTool(tools.readS3ObjectTool );
|
|
34
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.readS3ObjectTool.name}`);
|
|
35
|
+
|
|
36
|
+
ctx.registerTool(tools.writeLanceCatalogTool );
|
|
37
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.writeLanceCatalogTool.name}`);
|
|
38
|
+
|
|
39
|
+
ctx.registerTool(tools.readLanceCatalogTool );
|
|
40
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.readLanceCatalogTool.name}`);
|
|
41
|
+
|
|
42
|
+
ctx.registerTool(tools.generatePresignedUrlTool );
|
|
43
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.generatePresignedUrlTool.name}`);
|
|
29
44
|
|
|
30
45
|
ctx.registerTool(tools.listDatasourceTool );
|
|
31
46
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listDatasourceTool.name}`);
|
|
@@ -51,15 +66,12 @@ export function registerAll(ctx: OpenClawPluginApi, logger: PluginLogger) {
|
|
|
51
66
|
const commands = getCliCommands(pluginConfig, logger);
|
|
52
67
|
|
|
53
68
|
// connect -- data source profiling (las-data-profiler)
|
|
54
|
-
contextlake.command('connect <datasource_name>')
|
|
69
|
+
contextlake.command('connect <datasource_name> <url>')
|
|
55
70
|
.description('Connect to a data source and profile its structure, schemas, and media metadata into LanceDB')
|
|
56
|
-
.requiredOption('--vendor <vendor>', 'Data source type: volcengine | alibaba | tencent | aws | local')
|
|
57
71
|
.option('--endpoint <url>', 'S3 Endpoint URL (not needed for local)')
|
|
58
72
|
.option('--ak <credential_id>', 'Credential ID for the data source')
|
|
59
73
|
.option('--sk <credential_value>', 'Credential value for the data source')
|
|
60
74
|
.option('--region <region>', 'Region identifier (e.g. cn-beijing)')
|
|
61
|
-
.requiredOption('--bucket <bucket>', 'Bucket name (or local root directory for local vendor)')
|
|
62
|
-
.requiredOption('--prefix <prefix>', 'Path prefix to limit scan scope')
|
|
63
75
|
.option('--sample-rows <number>', 'Number of rows to sample per structured file', '100')
|
|
64
76
|
.action(commands.connectAction);
|
|
65
77
|
|
package/src/commands/slashcmd.ts
CHANGED
|
@@ -108,21 +108,15 @@ export function getSlashCommands(pluginConfig: ContextLakeConfig, logger: any) {
|
|
|
108
108
|
|
|
109
109
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command profiler started`, { args });
|
|
110
110
|
try {
|
|
111
|
-
if (args.length <
|
|
112
|
-
return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <
|
|
111
|
+
if (args.length < 2) {
|
|
112
|
+
return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <url> [endpoint] [ak] [sk] [region]` };
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
-
const [datasource_name,
|
|
116
|
-
|
|
117
|
-
if (!['volcengine', 'alibaba', 'tencent', 'aws', 'local'].includes(vendor)) {
|
|
118
|
-
return { text: `**Error:** Invalid vendor. Must be one of: volcengine, alibaba, tencent, aws, local` };
|
|
119
|
-
}
|
|
115
|
+
const [datasource_name, url, endpoint, access_key, secret_key, region] = args;
|
|
120
116
|
|
|
121
117
|
const params: ConnectParams = {
|
|
122
118
|
datasource_name,
|
|
123
|
-
|
|
124
|
-
bucket,
|
|
125
|
-
prefix,
|
|
119
|
+
url,
|
|
126
120
|
endpoint,
|
|
127
121
|
access_key,
|
|
128
122
|
secret_key,
|
package/src/commands/tools.ts
CHANGED
|
@@ -3,6 +3,8 @@ import { retrieveAssets } from '../lib/actions/retrieve';
|
|
|
3
3
|
import { listAssets, deleteAssets } from '../lib/actions/manage';
|
|
4
4
|
import { connectDataSource, listDataSources } from '../lib/actions/profiler';
|
|
5
5
|
import { getLasTools } from '../lib/actions/las-tools';
|
|
6
|
+
import { listS3Objects, readS3Object, getPresignedUrl } from '../lib/actions/s3-tools';
|
|
7
|
+
import { writeLanceCatalog, readLanceCatalog } from '../lib/actions/lance-tools';
|
|
6
8
|
import { ContextLakeConfig } from '../utils/config';
|
|
7
9
|
// @ts-ignore
|
|
8
10
|
import type { AnyAgentTool } from 'openclaw/plugin-sdk';
|
|
@@ -14,6 +16,11 @@ export function getAgentTools(pluginConfig: ContextLakeConfig, logger: any): {
|
|
|
14
16
|
deleteTool: AnyAgentTool;
|
|
15
17
|
lasDataProfilerTool: AnyAgentTool;
|
|
16
18
|
listDatasourceTool: AnyAgentTool;
|
|
19
|
+
listS3ObjectsTool: AnyAgentTool;
|
|
20
|
+
readS3ObjectTool: AnyAgentTool;
|
|
21
|
+
generatePresignedUrlTool: AnyAgentTool;
|
|
22
|
+
writeLanceCatalogTool: AnyAgentTool;
|
|
23
|
+
readLanceCatalogTool: AnyAgentTool;
|
|
17
24
|
lasTools: AnyAgentTool[];
|
|
18
25
|
} {
|
|
19
26
|
const lasTools = getLasTools(pluginConfig, logger);
|
|
@@ -269,16 +276,10 @@ Example User Queries:
|
|
|
269
276
|
type: 'object',
|
|
270
277
|
properties: {
|
|
271
278
|
datasource_name: { type: 'string', description: 'Name of the data source' },
|
|
272
|
-
|
|
273
|
-
endpoint: { type: 'string', description: 'S3 Endpoint URL (not needed for local)' },
|
|
274
|
-
access_key: { type: 'string', description: 'Credential ID for the data source' },
|
|
275
|
-
secret_key: { type: 'string', description: 'Credential value for the data source' },
|
|
276
|
-
region: { type: 'string', description: 'Region identifier (e.g. cn-beijing)' },
|
|
277
|
-
bucket: { type: 'string', description: 'Bucket name (or local root directory for local vendor)' },
|
|
278
|
-
prefix: { type: 'string', description: 'Path prefix to limit scan scope' },
|
|
279
|
+
url: { type: 'string', description: 'Data source URL (e.g. tos://bucket/prefix, oss://..., s3://..., file:///path)' },
|
|
279
280
|
sample_rows: { type: 'integer', description: 'Number of rows to sample per structured file' }
|
|
280
281
|
},
|
|
281
|
-
required: ['datasource_name', '
|
|
282
|
+
required: ['datasource_name', 'url'],
|
|
282
283
|
additionalProperties: false
|
|
283
284
|
},
|
|
284
285
|
|
|
@@ -299,13 +300,177 @@ Example User Queries:
|
|
|
299
300
|
} catch (error: any) {
|
|
300
301
|
logger.error(`[${new Date().toISOString()}] [ContextLake] las-data-profiler skill failed`, { error: error.message, stack: error.stack });
|
|
301
302
|
return {
|
|
302
|
-
content: [{ type: "text", text: String(error.message
|
|
303
|
-
|
|
304
|
-
details: { error: error.message
|
|
305
|
-
}
|
|
303
|
+
content: [{ type: "text", text: String(error.message) }],
|
|
304
|
+
details: { error: error.message }
|
|
306
305
|
} as any;
|
|
307
306
|
}
|
|
308
307
|
}
|
|
308
|
+
},
|
|
309
|
+
listS3ObjectsTool: {
|
|
310
|
+
name: 'list-s3-objects',
|
|
311
|
+
label: 'List S3 Objects',
|
|
312
|
+
description: 'List objects in an S3-compatible bucket or local directory',
|
|
313
|
+
parameters: {
|
|
314
|
+
type: 'object',
|
|
315
|
+
properties: {
|
|
316
|
+
url: { type: 'string', description: 'Data source URL (e.g. tos://bucket/prefix, oss://..., file:///path)' },
|
|
317
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
318
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
319
|
+
prefix: { type: 'string' },
|
|
320
|
+
endpoint: { type: 'string' },
|
|
321
|
+
maxKeys: { type: 'integer' },
|
|
322
|
+
continuationToken: { type: 'string' }
|
|
323
|
+
},
|
|
324
|
+
required: [],
|
|
325
|
+
additionalProperties: false
|
|
326
|
+
},
|
|
327
|
+
async execute(toolCallId: string, params: any) {
|
|
328
|
+
let actualParams = params.params || params;
|
|
329
|
+
try {
|
|
330
|
+
const result = await listS3Objects(actualParams, actualParams.prefix || '', actualParams.maxKeys, actualParams.continuationToken);
|
|
331
|
+
return { content: [{ type: "text", text: JSON.stringify(result) }], details: result } as any;
|
|
332
|
+
} catch (e: any) {
|
|
333
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } } as any;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
},
|
|
337
|
+
readS3ObjectTool: {
|
|
338
|
+
name: 'read-s3-object',
|
|
339
|
+
label: 'Read S3 Object',
|
|
340
|
+
description: 'Read the contents or headers of an S3 object',
|
|
341
|
+
parameters: {
|
|
342
|
+
type: 'object',
|
|
343
|
+
properties: {
|
|
344
|
+
url: { type: 'string', description: 'Full URL to the object (e.g. tos://bucket/path/to/key.txt)' },
|
|
345
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
346
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
347
|
+
key: { type: 'string', description: 'Required if url is not provided' },
|
|
348
|
+
endpoint: { type: 'string' },
|
|
349
|
+
maxBytes: { type: 'integer' }
|
|
350
|
+
},
|
|
351
|
+
required: [],
|
|
352
|
+
additionalProperties: false
|
|
353
|
+
},
|
|
354
|
+
async execute(toolCallId: string, params: any) {
|
|
355
|
+
let actualParams = params.params || params;
|
|
356
|
+
try {
|
|
357
|
+
// Extract key from url if provided
|
|
358
|
+
let key = actualParams.key;
|
|
359
|
+
if (actualParams.url && !key) {
|
|
360
|
+
try {
|
|
361
|
+
if (actualParams.url.startsWith('file://')) {
|
|
362
|
+
// Key is not strictly needed for file://, bucket contains the path in parseS3Url
|
|
363
|
+
key = '';
|
|
364
|
+
} else {
|
|
365
|
+
const parsedUrl = new URL(actualParams.url);
|
|
366
|
+
key = parsedUrl.pathname.replace(/^\//, '');
|
|
367
|
+
}
|
|
368
|
+
} catch (e) {
|
|
369
|
+
// let it fail in readS3Object
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
if (!key && !actualParams.url?.startsWith('file://')) {
|
|
373
|
+
throw new Error('key is required or must be part of the url');
|
|
374
|
+
}
|
|
375
|
+
const buf = await readS3Object(actualParams, key, actualParams.maxBytes);
|
|
376
|
+
// Return as base64 string
|
|
377
|
+
return { content: [{ type: "text", text: buf.toString('base64') }], details: { length: buf.length } } as any;
|
|
378
|
+
} catch (e: any) {
|
|
379
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } } as any;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
},
|
|
383
|
+
generatePresignedUrlTool: {
|
|
384
|
+
name: 'generate-presigned-url',
|
|
385
|
+
label: 'Generate Presigned URL',
|
|
386
|
+
description: 'Generate a presigned HTTP URL for an S3/TOS object, allowing temporary public access',
|
|
387
|
+
parameters: {
|
|
388
|
+
type: 'object',
|
|
389
|
+
properties: {
|
|
390
|
+
url: { type: 'string', description: 'Full URL to the object (e.g. tos://bucket/path/to/key.txt)' },
|
|
391
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
392
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
393
|
+
key: { type: 'string', description: 'Required if url is not provided' },
|
|
394
|
+
endpoint: { type: 'string' },
|
|
395
|
+
expiresIn: { type: 'integer', description: 'Expiration time in seconds (default 3600)' }
|
|
396
|
+
},
|
|
397
|
+
required: [],
|
|
398
|
+
additionalProperties: false
|
|
399
|
+
},
|
|
400
|
+
async execute(toolCallId: string, params: any) {
|
|
401
|
+
let actualParams = params.params || params;
|
|
402
|
+
try {
|
|
403
|
+
let key = actualParams.key;
|
|
404
|
+
if (actualParams.url && !key) {
|
|
405
|
+
try {
|
|
406
|
+
if (actualParams.url.startsWith('file://')) {
|
|
407
|
+
key = '';
|
|
408
|
+
} else {
|
|
409
|
+
const parsedUrl = new URL(actualParams.url);
|
|
410
|
+
key = parsedUrl.pathname.replace(/^\//, '');
|
|
411
|
+
}
|
|
412
|
+
} catch (e) {
|
|
413
|
+
// let it fail in getPresignedUrl
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
if (!key && !actualParams.url?.startsWith('file://')) {
|
|
417
|
+
throw new Error('key is required or must be part of the url');
|
|
418
|
+
}
|
|
419
|
+
const url = await getPresignedUrl(actualParams, key, actualParams.expiresIn);
|
|
420
|
+
return { content: [{ type: "text", text: url }], details: { url } } as any;
|
|
421
|
+
} catch (e: any) {
|
|
422
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } } as any;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
},
|
|
426
|
+
writeLanceCatalogTool: {
|
|
427
|
+
name: 'write-lance-catalog',
|
|
428
|
+
label: 'Write LanceDB Catalog',
|
|
429
|
+
description: 'Write an array of file records into a local LanceDB table',
|
|
430
|
+
parameters: {
|
|
431
|
+
type: 'object',
|
|
432
|
+
properties: {
|
|
433
|
+
db_path: { type: 'string' },
|
|
434
|
+
table_name: { type: 'string' },
|
|
435
|
+
records: { type: 'array', items: { type: 'object' } }
|
|
436
|
+
},
|
|
437
|
+
required: ['db_path', 'table_name', 'records'],
|
|
438
|
+
additionalProperties: false
|
|
439
|
+
},
|
|
440
|
+
async execute(toolCallId: string, params: any) {
|
|
441
|
+
let actualParams = params.params || params;
|
|
442
|
+
try {
|
|
443
|
+
await writeLanceCatalog(actualParams);
|
|
444
|
+
return { content: [{ type: "text", text: "Successfully wrote records to LanceDB" }], details: { count: actualParams.records.length } } as any;
|
|
445
|
+
} catch (e: any) {
|
|
446
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } } as any;
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
},
|
|
450
|
+
readLanceCatalogTool: {
|
|
451
|
+
name: 'read-lance-catalog',
|
|
452
|
+
label: 'Read LanceDB Catalog',
|
|
453
|
+
description: 'Read records from a local LanceDB table for validation or ingestion processes',
|
|
454
|
+
parameters: {
|
|
455
|
+
type: 'object',
|
|
456
|
+
properties: {
|
|
457
|
+
db_path: { type: 'string', description: 'Path to the local LanceDB database' },
|
|
458
|
+
table_name: { type: 'string', description: 'Name of the table to read' },
|
|
459
|
+
limit: { type: 'integer', description: 'Maximum number of records to return' },
|
|
460
|
+
filter: { type: 'string', description: 'SQL-like filter string (e.g., "category = \'structured\'")' }
|
|
461
|
+
},
|
|
462
|
+
required: ['db_path', 'table_name'],
|
|
463
|
+
additionalProperties: false
|
|
464
|
+
},
|
|
465
|
+
async execute(toolCallId: string, params: any) {
|
|
466
|
+
let actualParams = params.params || params;
|
|
467
|
+
try {
|
|
468
|
+
const results = await readLanceCatalog(actualParams);
|
|
469
|
+
return { content: [{ type: "text", text: JSON.stringify(results) }], details: { count: results.length, data: results } } as any;
|
|
470
|
+
} catch (e: any) {
|
|
471
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } } as any;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
309
474
|
}
|
|
310
475
|
};
|
|
311
476
|
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import * as lancedb from '@lancedb/lancedb';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
|
|
4
|
+
export interface LanceWriteParams {
|
|
5
|
+
db_path: string;
|
|
6
|
+
table_name: string;
|
|
7
|
+
records: any[];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface LanceReadParams {
|
|
11
|
+
db_path: string;
|
|
12
|
+
table_name: string;
|
|
13
|
+
limit?: number;
|
|
14
|
+
filter?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export async function writeLanceCatalog(params: LanceWriteParams) {
|
|
18
|
+
if (!params.records || params.records.length === 0) {
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const db = await lancedb.connect(params.db_path);
|
|
23
|
+
const tableNames = await db.tableNames();
|
|
24
|
+
|
|
25
|
+
if (tableNames.includes(params.table_name)) {
|
|
26
|
+
const table = await db.openTable(params.table_name);
|
|
27
|
+
await table.add(params.records);
|
|
28
|
+
} else {
|
|
29
|
+
await db.createTable(params.table_name, params.records);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export async function readLanceCatalog(params: LanceReadParams) {
|
|
34
|
+
if (!fs.existsSync(params.db_path)) {
|
|
35
|
+
throw new Error(`Database not found at ${params.db_path}`);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const db = await lancedb.connect(params.db_path);
|
|
39
|
+
const tableNames = await db.tableNames();
|
|
40
|
+
|
|
41
|
+
if (!tableNames.includes(params.table_name)) {
|
|
42
|
+
throw new Error(`Table '${params.table_name}' not found in database`);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const table = await db.openTable(params.table_name);
|
|
46
|
+
let query = table.query();
|
|
47
|
+
|
|
48
|
+
if (params.filter) {
|
|
49
|
+
query = query.where(params.filter);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (params.limit && params.limit > 0) {
|
|
53
|
+
query = query.limit(params.limit);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const results = await query.toArray();
|
|
57
|
+
return results;
|
|
58
|
+
}
|
|
@@ -1,11 +1,36 @@
|
|
|
1
1
|
import { LasApiClient } from './las-api';
|
|
2
2
|
import { ContextLakeConfig } from '../../utils/config';
|
|
3
|
+
import { getPresignedUrl } from './s3-tools';
|
|
3
4
|
// @ts-ignore
|
|
4
5
|
import type { AnyAgentTool } from 'openclaw/plugin-sdk';
|
|
5
6
|
|
|
6
7
|
export function getLasTools(pluginConfig: ContextLakeConfig, logger: any): AnyAgentTool[] {
|
|
7
8
|
const apiClient = new LasApiClient(pluginConfig, logger);
|
|
8
9
|
|
|
10
|
+
const processUrl = async (url: string): Promise<string> => {
|
|
11
|
+
if (!url) return url;
|
|
12
|
+
if (url.startsWith('tos://')) {
|
|
13
|
+
// LAS operators prefer native tos:// paths when supported, leave as is
|
|
14
|
+
return url;
|
|
15
|
+
} else if (url.startsWith('oss://') || url.startsWith('s3://') || url.startsWith('cos://') || url.startsWith('file://')) {
|
|
16
|
+
// Need presigned HTTP url for other vendors
|
|
17
|
+
logger.info(`[LasTools] Presigning URL for vendor: ${url}`);
|
|
18
|
+
try {
|
|
19
|
+
// If it's a file:// we also presign it to file:// which might not be supported by remote LAS,
|
|
20
|
+
// but local files typically need to be uploaded to TOS first. We'll leave file:// to fail or be handled elsewhere.
|
|
21
|
+
if (url.startsWith('file://')) return url;
|
|
22
|
+
|
|
23
|
+
const urlParts = new URL(url);
|
|
24
|
+
const key = urlParts.pathname.replace(/^\//, '');
|
|
25
|
+
return await getPresignedUrl({ url }, key, 3600);
|
|
26
|
+
} catch (e: any) {
|
|
27
|
+
logger.warn(`[LasTools] Failed to presign URL: ${url}`, { error: e.message });
|
|
28
|
+
return url; // fallback to original
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return url;
|
|
32
|
+
};
|
|
33
|
+
|
|
9
34
|
const callApi = async (method: string, args: any[]) => {
|
|
10
35
|
try {
|
|
11
36
|
// @ts-ignore
|
|
@@ -35,6 +60,9 @@ Parameters in data:
|
|
|
35
60
|
required: ['data']
|
|
36
61
|
},
|
|
37
62
|
async execute(toolCallId: string, params: any) {
|
|
63
|
+
if (params.data?.image) {
|
|
64
|
+
params.data.image = await processUrl(params.data.image);
|
|
65
|
+
}
|
|
38
66
|
return await callApi('process', ['las_image_resample', params.data]);
|
|
39
67
|
}
|
|
40
68
|
},
|
|
@@ -55,6 +83,9 @@ Parameters in data:
|
|
|
55
83
|
required: ['data']
|
|
56
84
|
},
|
|
57
85
|
async execute(toolCallId: string, params: any) {
|
|
86
|
+
if (params.data?.input_path) {
|
|
87
|
+
params.data.input_path = await processUrl(params.data.input_path);
|
|
88
|
+
}
|
|
58
89
|
return await callApi('process', ['las_audio_extract_and_split', params.data]);
|
|
59
90
|
}
|
|
60
91
|
},
|
|
@@ -73,6 +104,9 @@ Parameters in data:
|
|
|
73
104
|
required: ['data']
|
|
74
105
|
},
|
|
75
106
|
async execute(toolCallId: string, params: any) {
|
|
107
|
+
if (params.data?.input_path) {
|
|
108
|
+
params.data.input_path = await processUrl(params.data.input_path);
|
|
109
|
+
}
|
|
76
110
|
return await callApi('process', ['las_audio_convert', params.data]);
|
|
77
111
|
}
|
|
78
112
|
},
|
|
@@ -91,6 +125,9 @@ Parameters in data:
|
|
|
91
125
|
required: ['data']
|
|
92
126
|
},
|
|
93
127
|
async execute(toolCallId: string, params: any) {
|
|
128
|
+
if (params.data?.audio?.url) {
|
|
129
|
+
params.data.audio.url = await processUrl(params.data.audio.url);
|
|
130
|
+
}
|
|
94
131
|
return await callApi('submitAndPoll', ['las_asr_pro', params.data]);
|
|
95
132
|
}
|
|
96
133
|
},
|
|
@@ -107,6 +144,9 @@ Parameters in data:
|
|
|
107
144
|
required: ['data']
|
|
108
145
|
},
|
|
109
146
|
async execute(toolCallId: string, params: any) {
|
|
147
|
+
if (params.data?.audio?.url) {
|
|
148
|
+
params.data.audio.url = await processUrl(params.data.audio.url);
|
|
149
|
+
}
|
|
110
150
|
return await callApi('submitAndPoll', ['las_seed_2_0', params.data]);
|
|
111
151
|
}
|
|
112
152
|
},
|
|
@@ -130,6 +170,13 @@ Parameters:
|
|
|
130
170
|
required: ['model', 'input']
|
|
131
171
|
},
|
|
132
172
|
async execute(toolCallId: string, params: any) {
|
|
173
|
+
if (params.input && Array.isArray(params.input)) {
|
|
174
|
+
for (const item of params.input) {
|
|
175
|
+
if (item.type === 'image_url' && item.image_url?.url) {
|
|
176
|
+
item.image_url.url = await processUrl(item.image_url.url);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
133
180
|
return await callApi('multimodalEmbedding', [
|
|
134
181
|
params.model,
|
|
135
182
|
params.input,
|
|
@@ -153,6 +200,9 @@ Parameters in data:
|
|
|
153
200
|
required: ['data']
|
|
154
201
|
},
|
|
155
202
|
async execute(toolCallId: string, params: any) {
|
|
203
|
+
if (params.data?.video_url) {
|
|
204
|
+
params.data.video_url = await processUrl(params.data.video_url);
|
|
205
|
+
}
|
|
156
206
|
return await callApi('submitAndPoll', ['las_long_video_understand', params.data]);
|
|
157
207
|
}
|
|
158
208
|
},
|
|
@@ -171,6 +221,9 @@ Parameters in data:
|
|
|
171
221
|
required: ['data']
|
|
172
222
|
},
|
|
173
223
|
async execute(toolCallId: string, params: any) {
|
|
224
|
+
if (params.data?.url) {
|
|
225
|
+
params.data.url = await processUrl(params.data.url);
|
|
226
|
+
}
|
|
174
227
|
return await callApi('submitAndPoll', ['las_pdf_parse_doubao', params.data]);
|
|
175
228
|
}
|
|
176
229
|
},
|
|
@@ -189,6 +242,9 @@ Parameters in data:
|
|
|
189
242
|
required: ['data']
|
|
190
243
|
},
|
|
191
244
|
async execute(toolCallId: string, params: any) {
|
|
245
|
+
if (params.data?.video_url) {
|
|
246
|
+
params.data.video_url = await processUrl(params.data.video_url);
|
|
247
|
+
}
|
|
192
248
|
return await callApi('submitAndPoll', ['las_video_resize', params.data]);
|
|
193
249
|
}
|
|
194
250
|
}
|