@byted-las/contextlake-openclaw 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/commands/cli.d.ts +1 -1
- package/dist/src/commands/cli.js +4 -10
- package/dist/src/commands/index.js +5 -4
- package/dist/src/commands/slashcmd.js +4 -9
- package/dist/src/commands/tools.d.ts +2 -0
- package/dist/src/commands/tools.js +104 -22
- package/dist/src/lib/actions/lance-tools.d.ts +7 -0
- package/dist/src/lib/actions/lance-tools.js +22 -0
- package/dist/src/lib/actions/las-tools.js +58 -0
- package/dist/src/lib/actions/profiler.d.ts +4 -3
- package/dist/src/lib/actions/profiler.js +43 -7
- package/dist/src/lib/actions/s3-tools.d.ts +5 -2
- package/dist/src/lib/actions/s3-tools.js +54 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -1
- package/src/commands/cli.ts +4 -10
- package/src/commands/index.ts +7 -4
- package/src/commands/slashcmd.ts +4 -10
- package/src/commands/tools.ts +102 -24
- package/src/lib/actions/lance-tools.ts +35 -0
- package/src/lib/actions/las-tools.ts +56 -0
- package/src/lib/actions/profiler.ts +42 -7
- package/src/lib/actions/s3-tools.ts +59 -4
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@byted-las/contextlake-openclaw",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"description": "ContextLake OpenClaw Plugin for managing knowledge base",
|
|
5
5
|
"main": "index.ts",
|
|
6
6
|
"files": [
|
|
@@ -34,6 +34,7 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@aws-sdk/client-s3": "^3.1014.0",
|
|
37
|
+
"@aws-sdk/s3-request-presigner": "^3.1014.0",
|
|
37
38
|
"@lancedb/lancedb": "^0.26.2",
|
|
38
39
|
"@volcengine/tos-sdk": "^2.9.0",
|
|
39
40
|
"commander": "^14.0.3",
|
package/src/commands/cli.ts
CHANGED
|
@@ -37,29 +37,23 @@ function parseMetadata(metadata: any): Record<string, any> {
|
|
|
37
37
|
|
|
38
38
|
export function getCliCommands(pluginConfig: ContextLakeConfig, logger: any) {
|
|
39
39
|
return {
|
|
40
|
-
connectAction: async (datasource_name: string, options: any) => {
|
|
41
|
-
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, options });
|
|
40
|
+
connectAction: async (datasource_name: string, url: string, options: any) => {
|
|
41
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, url, options });
|
|
42
42
|
try {
|
|
43
43
|
const params: ConnectParams = {
|
|
44
44
|
datasource_name,
|
|
45
|
-
|
|
45
|
+
url,
|
|
46
46
|
endpoint: options.endpoint,
|
|
47
47
|
access_key: options.ak,
|
|
48
48
|
secret_key: options.sk,
|
|
49
49
|
region: options.region,
|
|
50
|
-
bucket: options.bucket,
|
|
51
|
-
prefix: options.prefix,
|
|
52
50
|
sample_rows: parseInt(options.sampleRows),
|
|
53
51
|
};
|
|
54
52
|
|
|
55
53
|
// eslint-disable-next-line no-console
|
|
56
54
|
console.log(`[contextlake connect] Connecting to datasource "${datasource_name}"...`);
|
|
57
55
|
// eslint-disable-next-line no-console
|
|
58
|
-
console.log(`
|
|
59
|
-
// eslint-disable-next-line no-console
|
|
60
|
-
console.log(` bucket: ${params.bucket}`);
|
|
61
|
-
// eslint-disable-next-line no-console
|
|
62
|
-
console.log(` prefix: ${params.prefix}`);
|
|
56
|
+
console.log(` url: ${params.url}`);
|
|
63
57
|
|
|
64
58
|
const result = await connectDataSource(params);
|
|
65
59
|
// eslint-disable-next-line no-console
|
package/src/commands/index.ts
CHANGED
|
@@ -35,6 +35,12 @@ export function registerAll(ctx: OpenClawPluginApi, logger: PluginLogger) {
|
|
|
35
35
|
|
|
36
36
|
ctx.registerTool(tools.writeLanceCatalogTool );
|
|
37
37
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.writeLanceCatalogTool.name}`);
|
|
38
|
+
|
|
39
|
+
ctx.registerTool(tools.readLanceCatalogTool );
|
|
40
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.readLanceCatalogTool.name}`);
|
|
41
|
+
|
|
42
|
+
ctx.registerTool(tools.generatePresignedUrlTool );
|
|
43
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.generatePresignedUrlTool.name}`);
|
|
38
44
|
|
|
39
45
|
ctx.registerTool(tools.listDatasourceTool );
|
|
40
46
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listDatasourceTool.name}`);
|
|
@@ -60,15 +66,12 @@ export function registerAll(ctx: OpenClawPluginApi, logger: PluginLogger) {
|
|
|
60
66
|
const commands = getCliCommands(pluginConfig, logger);
|
|
61
67
|
|
|
62
68
|
// connect -- data source profiling (las-data-profiler)
|
|
63
|
-
contextlake.command('connect <datasource_name>')
|
|
69
|
+
contextlake.command('connect <datasource_name> <url>')
|
|
64
70
|
.description('Connect to a data source and profile its structure, schemas, and media metadata into LanceDB')
|
|
65
|
-
.requiredOption('--vendor <vendor>', 'Data source type: volcengine | alibaba | tencent | aws | local')
|
|
66
71
|
.option('--endpoint <url>', 'S3 Endpoint URL (not needed for local)')
|
|
67
72
|
.option('--ak <credential_id>', 'Credential ID for the data source')
|
|
68
73
|
.option('--sk <credential_value>', 'Credential value for the data source')
|
|
69
74
|
.option('--region <region>', 'Region identifier (e.g. cn-beijing)')
|
|
70
|
-
.requiredOption('--bucket <bucket>', 'Bucket name (or local root directory for local vendor)')
|
|
71
|
-
.requiredOption('--prefix <prefix>', 'Path prefix to limit scan scope')
|
|
72
75
|
.option('--sample-rows <number>', 'Number of rows to sample per structured file', '100')
|
|
73
76
|
.action(commands.connectAction);
|
|
74
77
|
|
package/src/commands/slashcmd.ts
CHANGED
|
@@ -108,21 +108,15 @@ export function getSlashCommands(pluginConfig: ContextLakeConfig, logger: any) {
|
|
|
108
108
|
|
|
109
109
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command profiler started`, { args });
|
|
110
110
|
try {
|
|
111
|
-
if (args.length <
|
|
112
|
-
return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <
|
|
111
|
+
if (args.length < 2) {
|
|
112
|
+
return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <url> [endpoint] [ak] [sk] [region]` };
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
-
const [datasource_name,
|
|
116
|
-
|
|
117
|
-
if (!['volcengine', 'alibaba', 'tencent', 'aws', 'local'].includes(vendor)) {
|
|
118
|
-
return { text: `**Error:** Invalid vendor. Must be one of: volcengine, alibaba, tencent, aws, local` };
|
|
119
|
-
}
|
|
115
|
+
const [datasource_name, url, endpoint, access_key, secret_key, region] = args;
|
|
120
116
|
|
|
121
117
|
const params: ConnectParams = {
|
|
122
118
|
datasource_name,
|
|
123
|
-
|
|
124
|
-
bucket,
|
|
125
|
-
prefix,
|
|
119
|
+
url,
|
|
126
120
|
endpoint,
|
|
127
121
|
access_key,
|
|
128
122
|
secret_key,
|
package/src/commands/tools.ts
CHANGED
|
@@ -3,8 +3,8 @@ import { retrieveAssets } from '../lib/actions/retrieve';
|
|
|
3
3
|
import { listAssets, deleteAssets } from '../lib/actions/manage';
|
|
4
4
|
import { connectDataSource, listDataSources } from '../lib/actions/profiler';
|
|
5
5
|
import { getLasTools } from '../lib/actions/las-tools';
|
|
6
|
-
import { listS3Objects, readS3Object } from '../lib/actions/s3-tools';
|
|
7
|
-
import { writeLanceCatalog } from '../lib/actions/lance-tools';
|
|
6
|
+
import { listS3Objects, readS3Object, getPresignedUrl } from '../lib/actions/s3-tools';
|
|
7
|
+
import { writeLanceCatalog, readLanceCatalog } from '../lib/actions/lance-tools';
|
|
8
8
|
import { ContextLakeConfig } from '../utils/config';
|
|
9
9
|
// @ts-ignore
|
|
10
10
|
import type { AnyAgentTool } from 'openclaw/plugin-sdk';
|
|
@@ -18,7 +18,9 @@ export function getAgentTools(pluginConfig: ContextLakeConfig, logger: any): {
|
|
|
18
18
|
listDatasourceTool: AnyAgentTool;
|
|
19
19
|
listS3ObjectsTool: AnyAgentTool;
|
|
20
20
|
readS3ObjectTool: AnyAgentTool;
|
|
21
|
+
generatePresignedUrlTool: AnyAgentTool;
|
|
21
22
|
writeLanceCatalogTool: AnyAgentTool;
|
|
23
|
+
readLanceCatalogTool: AnyAgentTool;
|
|
22
24
|
lasTools: AnyAgentTool[];
|
|
23
25
|
} {
|
|
24
26
|
const lasTools = getLasTools(pluginConfig, logger);
|
|
@@ -274,16 +276,10 @@ Example User Queries:
|
|
|
274
276
|
type: 'object',
|
|
275
277
|
properties: {
|
|
276
278
|
datasource_name: { type: 'string', description: 'Name of the data source' },
|
|
277
|
-
|
|
278
|
-
endpoint: { type: 'string', description: 'S3 Endpoint URL (not needed for local)' },
|
|
279
|
-
access_key: { type: 'string', description: 'Credential ID for the data source' },
|
|
280
|
-
secret_key: { type: 'string', description: 'Credential value for the data source' },
|
|
281
|
-
region: { type: 'string', description: 'Region identifier (e.g. cn-beijing)' },
|
|
282
|
-
bucket: { type: 'string', description: 'Bucket name (or local root directory for local vendor)' },
|
|
283
|
-
prefix: { type: 'string', description: 'Path prefix to limit scan scope' },
|
|
279
|
+
url: { type: 'string', description: 'Data source URL (e.g. tos://bucket/prefix, oss://..., s3://..., file:///path)' },
|
|
284
280
|
sample_rows: { type: 'integer', description: 'Number of rows to sample per structured file' }
|
|
285
281
|
},
|
|
286
|
-
required: ['datasource_name', '
|
|
282
|
+
required: ['datasource_name', 'url'],
|
|
287
283
|
additionalProperties: false
|
|
288
284
|
},
|
|
289
285
|
|
|
@@ -317,17 +313,15 @@ Example User Queries:
|
|
|
317
313
|
parameters: {
|
|
318
314
|
type: 'object',
|
|
319
315
|
properties: {
|
|
320
|
-
|
|
321
|
-
|
|
316
|
+
url: { type: 'string', description: 'Data source URL (e.g. tos://bucket/prefix, oss://..., file:///path)' },
|
|
317
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
318
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
322
319
|
prefix: { type: 'string' },
|
|
323
320
|
endpoint: { type: 'string' },
|
|
324
|
-
access_key: { type: 'string' },
|
|
325
|
-
secret_key: { type: 'string' },
|
|
326
|
-
region: { type: 'string' },
|
|
327
321
|
maxKeys: { type: 'integer' },
|
|
328
322
|
continuationToken: { type: 'string' }
|
|
329
323
|
},
|
|
330
|
-
required: [
|
|
324
|
+
required: [],
|
|
331
325
|
additionalProperties: false
|
|
332
326
|
},
|
|
333
327
|
async execute(toolCallId: string, params: any) {
|
|
@@ -347,22 +341,38 @@ Example User Queries:
|
|
|
347
341
|
parameters: {
|
|
348
342
|
type: 'object',
|
|
349
343
|
properties: {
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
344
|
+
url: { type: 'string', description: 'Full URL to the object (e.g. tos://bucket/path/to/key.txt)' },
|
|
345
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
346
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
347
|
+
key: { type: 'string', description: 'Required if url is not provided' },
|
|
353
348
|
endpoint: { type: 'string' },
|
|
354
|
-
access_key: { type: 'string' },
|
|
355
|
-
secret_key: { type: 'string' },
|
|
356
|
-
region: { type: 'string' },
|
|
357
349
|
maxBytes: { type: 'integer' }
|
|
358
350
|
},
|
|
359
|
-
required: [
|
|
351
|
+
required: [],
|
|
360
352
|
additionalProperties: false
|
|
361
353
|
},
|
|
362
354
|
async execute(toolCallId: string, params: any) {
|
|
363
355
|
let actualParams = params.params || params;
|
|
364
356
|
try {
|
|
365
|
-
|
|
357
|
+
// Extract key from url if provided
|
|
358
|
+
let key = actualParams.key;
|
|
359
|
+
if (actualParams.url && !key) {
|
|
360
|
+
try {
|
|
361
|
+
if (actualParams.url.startsWith('file://')) {
|
|
362
|
+
// Key is not strictly needed for file://, bucket contains the path in parseS3Url
|
|
363
|
+
key = '';
|
|
364
|
+
} else {
|
|
365
|
+
const parsedUrl = new URL(actualParams.url);
|
|
366
|
+
key = parsedUrl.pathname.replace(/^\//, '');
|
|
367
|
+
}
|
|
368
|
+
} catch (e) {
|
|
369
|
+
// let it fail in readS3Object
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
if (!key && !actualParams.url?.startsWith('file://')) {
|
|
373
|
+
throw new Error('key is required or must be part of the url');
|
|
374
|
+
}
|
|
375
|
+
const buf = await readS3Object(actualParams, key, actualParams.maxBytes);
|
|
366
376
|
// Return as base64 string
|
|
367
377
|
return { content: [{ type: "text", text: buf.toString('base64') }], details: { length: buf.length } } as any;
|
|
368
378
|
} catch (e: any) {
|
|
@@ -370,6 +380,49 @@ Example User Queries:
|
|
|
370
380
|
}
|
|
371
381
|
}
|
|
372
382
|
},
|
|
383
|
+
generatePresignedUrlTool: {
|
|
384
|
+
name: 'generate-presigned-url',
|
|
385
|
+
label: 'Generate Presigned URL',
|
|
386
|
+
description: 'Generate a presigned HTTP URL for an S3/TOS object, allowing temporary public access',
|
|
387
|
+
parameters: {
|
|
388
|
+
type: 'object',
|
|
389
|
+
properties: {
|
|
390
|
+
url: { type: 'string', description: 'Full URL to the object (e.g. tos://bucket/path/to/key.txt)' },
|
|
391
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
392
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
393
|
+
key: { type: 'string', description: 'Required if url is not provided' },
|
|
394
|
+
endpoint: { type: 'string' },
|
|
395
|
+
expiresIn: { type: 'integer', description: 'Expiration time in seconds (default 3600)' }
|
|
396
|
+
},
|
|
397
|
+
required: [],
|
|
398
|
+
additionalProperties: false
|
|
399
|
+
},
|
|
400
|
+
async execute(toolCallId: string, params: any) {
|
|
401
|
+
let actualParams = params.params || params;
|
|
402
|
+
try {
|
|
403
|
+
let key = actualParams.key;
|
|
404
|
+
if (actualParams.url && !key) {
|
|
405
|
+
try {
|
|
406
|
+
if (actualParams.url.startsWith('file://')) {
|
|
407
|
+
key = '';
|
|
408
|
+
} else {
|
|
409
|
+
const parsedUrl = new URL(actualParams.url);
|
|
410
|
+
key = parsedUrl.pathname.replace(/^\//, '');
|
|
411
|
+
}
|
|
412
|
+
} catch (e) {
|
|
413
|
+
// let it fail in getPresignedUrl
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
if (!key && !actualParams.url?.startsWith('file://')) {
|
|
417
|
+
throw new Error('key is required or must be part of the url');
|
|
418
|
+
}
|
|
419
|
+
const url = await getPresignedUrl(actualParams, key, actualParams.expiresIn);
|
|
420
|
+
return { content: [{ type: "text", text: url }], details: { url } } as any;
|
|
421
|
+
} catch (e: any) {
|
|
422
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } } as any;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
},
|
|
373
426
|
writeLanceCatalogTool: {
|
|
374
427
|
name: 'write-lance-catalog',
|
|
375
428
|
label: 'Write LanceDB Catalog',
|
|
@@ -393,6 +446,31 @@ Example User Queries:
|
|
|
393
446
|
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } } as any;
|
|
394
447
|
}
|
|
395
448
|
}
|
|
449
|
+
},
|
|
450
|
+
readLanceCatalogTool: {
|
|
451
|
+
name: 'read-lance-catalog',
|
|
452
|
+
label: 'Read LanceDB Catalog',
|
|
453
|
+
description: 'Read records from a local LanceDB table for validation or ingestion processes',
|
|
454
|
+
parameters: {
|
|
455
|
+
type: 'object',
|
|
456
|
+
properties: {
|
|
457
|
+
db_path: { type: 'string', description: 'Path to the local LanceDB database' },
|
|
458
|
+
table_name: { type: 'string', description: 'Name of the table to read' },
|
|
459
|
+
limit: { type: 'integer', description: 'Maximum number of records to return' },
|
|
460
|
+
filter: { type: 'string', description: 'SQL-like filter string (e.g., "category = \'structured\'")' }
|
|
461
|
+
},
|
|
462
|
+
required: ['db_path', 'table_name'],
|
|
463
|
+
additionalProperties: false
|
|
464
|
+
},
|
|
465
|
+
async execute(toolCallId: string, params: any) {
|
|
466
|
+
let actualParams = params.params || params;
|
|
467
|
+
try {
|
|
468
|
+
const results = await readLanceCatalog(actualParams);
|
|
469
|
+
return { content: [{ type: "text", text: JSON.stringify(results) }], details: { count: results.length, data: results } } as any;
|
|
470
|
+
} catch (e: any) {
|
|
471
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } } as any;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
396
474
|
}
|
|
397
475
|
};
|
|
398
476
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as lancedb from '@lancedb/lancedb';
|
|
2
|
+
import * as fs from 'fs';
|
|
2
3
|
|
|
3
4
|
export interface LanceWriteParams {
|
|
4
5
|
db_path: string;
|
|
@@ -6,6 +7,13 @@ export interface LanceWriteParams {
|
|
|
6
7
|
records: any[];
|
|
7
8
|
}
|
|
8
9
|
|
|
10
|
+
export interface LanceReadParams {
|
|
11
|
+
db_path: string;
|
|
12
|
+
table_name: string;
|
|
13
|
+
limit?: number;
|
|
14
|
+
filter?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
9
17
|
export async function writeLanceCatalog(params: LanceWriteParams) {
|
|
10
18
|
if (!params.records || params.records.length === 0) {
|
|
11
19
|
return;
|
|
@@ -21,3 +29,30 @@ export async function writeLanceCatalog(params: LanceWriteParams) {
|
|
|
21
29
|
await db.createTable(params.table_name, params.records);
|
|
22
30
|
}
|
|
23
31
|
}
|
|
32
|
+
|
|
33
|
+
export async function readLanceCatalog(params: LanceReadParams) {
|
|
34
|
+
if (!fs.existsSync(params.db_path)) {
|
|
35
|
+
throw new Error(`Database not found at ${params.db_path}`);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const db = await lancedb.connect(params.db_path);
|
|
39
|
+
const tableNames = await db.tableNames();
|
|
40
|
+
|
|
41
|
+
if (!tableNames.includes(params.table_name)) {
|
|
42
|
+
throw new Error(`Table '${params.table_name}' not found in database`);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const table = await db.openTable(params.table_name);
|
|
46
|
+
let query = table.query();
|
|
47
|
+
|
|
48
|
+
if (params.filter) {
|
|
49
|
+
query = query.where(params.filter);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (params.limit && params.limit > 0) {
|
|
53
|
+
query = query.limit(params.limit);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const results = await query.toArray();
|
|
57
|
+
return results;
|
|
58
|
+
}
|
|
@@ -1,11 +1,36 @@
|
|
|
1
1
|
import { LasApiClient } from './las-api';
|
|
2
2
|
import { ContextLakeConfig } from '../../utils/config';
|
|
3
|
+
import { getPresignedUrl } from './s3-tools';
|
|
3
4
|
// @ts-ignore
|
|
4
5
|
import type { AnyAgentTool } from 'openclaw/plugin-sdk';
|
|
5
6
|
|
|
6
7
|
export function getLasTools(pluginConfig: ContextLakeConfig, logger: any): AnyAgentTool[] {
|
|
7
8
|
const apiClient = new LasApiClient(pluginConfig, logger);
|
|
8
9
|
|
|
10
|
+
const processUrl = async (url: string): Promise<string> => {
|
|
11
|
+
if (!url) return url;
|
|
12
|
+
if (url.startsWith('tos://')) {
|
|
13
|
+
// LAS operators prefer native tos:// paths when supported, leave as is
|
|
14
|
+
return url;
|
|
15
|
+
} else if (url.startsWith('oss://') || url.startsWith('s3://') || url.startsWith('cos://') || url.startsWith('file://')) {
|
|
16
|
+
// Need presigned HTTP url for other vendors
|
|
17
|
+
logger.info(`[LasTools] Presigning URL for vendor: ${url}`);
|
|
18
|
+
try {
|
|
19
|
+
// If it's a file:// we also presign it to file:// which might not be supported by remote LAS,
|
|
20
|
+
// but local files typically need to be uploaded to TOS first. We'll leave file:// to fail or be handled elsewhere.
|
|
21
|
+
if (url.startsWith('file://')) return url;
|
|
22
|
+
|
|
23
|
+
const urlParts = new URL(url);
|
|
24
|
+
const key = urlParts.pathname.replace(/^\//, '');
|
|
25
|
+
return await getPresignedUrl({ url }, key, 3600);
|
|
26
|
+
} catch (e: any) {
|
|
27
|
+
logger.warn(`[LasTools] Failed to presign URL: ${url}`, { error: e.message });
|
|
28
|
+
return url; // fallback to original
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return url;
|
|
32
|
+
};
|
|
33
|
+
|
|
9
34
|
const callApi = async (method: string, args: any[]) => {
|
|
10
35
|
try {
|
|
11
36
|
// @ts-ignore
|
|
@@ -35,6 +60,9 @@ Parameters in data:
|
|
|
35
60
|
required: ['data']
|
|
36
61
|
},
|
|
37
62
|
async execute(toolCallId: string, params: any) {
|
|
63
|
+
if (params.data?.image) {
|
|
64
|
+
params.data.image = await processUrl(params.data.image);
|
|
65
|
+
}
|
|
38
66
|
return await callApi('process', ['las_image_resample', params.data]);
|
|
39
67
|
}
|
|
40
68
|
},
|
|
@@ -55,6 +83,9 @@ Parameters in data:
|
|
|
55
83
|
required: ['data']
|
|
56
84
|
},
|
|
57
85
|
async execute(toolCallId: string, params: any) {
|
|
86
|
+
if (params.data?.input_path) {
|
|
87
|
+
params.data.input_path = await processUrl(params.data.input_path);
|
|
88
|
+
}
|
|
58
89
|
return await callApi('process', ['las_audio_extract_and_split', params.data]);
|
|
59
90
|
}
|
|
60
91
|
},
|
|
@@ -73,6 +104,9 @@ Parameters in data:
|
|
|
73
104
|
required: ['data']
|
|
74
105
|
},
|
|
75
106
|
async execute(toolCallId: string, params: any) {
|
|
107
|
+
if (params.data?.input_path) {
|
|
108
|
+
params.data.input_path = await processUrl(params.data.input_path);
|
|
109
|
+
}
|
|
76
110
|
return await callApi('process', ['las_audio_convert', params.data]);
|
|
77
111
|
}
|
|
78
112
|
},
|
|
@@ -91,6 +125,9 @@ Parameters in data:
|
|
|
91
125
|
required: ['data']
|
|
92
126
|
},
|
|
93
127
|
async execute(toolCallId: string, params: any) {
|
|
128
|
+
if (params.data?.audio?.url) {
|
|
129
|
+
params.data.audio.url = await processUrl(params.data.audio.url);
|
|
130
|
+
}
|
|
94
131
|
return await callApi('submitAndPoll', ['las_asr_pro', params.data]);
|
|
95
132
|
}
|
|
96
133
|
},
|
|
@@ -107,6 +144,9 @@ Parameters in data:
|
|
|
107
144
|
required: ['data']
|
|
108
145
|
},
|
|
109
146
|
async execute(toolCallId: string, params: any) {
|
|
147
|
+
if (params.data?.audio?.url) {
|
|
148
|
+
params.data.audio.url = await processUrl(params.data.audio.url);
|
|
149
|
+
}
|
|
110
150
|
return await callApi('submitAndPoll', ['las_seed_2_0', params.data]);
|
|
111
151
|
}
|
|
112
152
|
},
|
|
@@ -130,6 +170,13 @@ Parameters:
|
|
|
130
170
|
required: ['model', 'input']
|
|
131
171
|
},
|
|
132
172
|
async execute(toolCallId: string, params: any) {
|
|
173
|
+
if (params.input && Array.isArray(params.input)) {
|
|
174
|
+
for (const item of params.input) {
|
|
175
|
+
if (item.type === 'image_url' && item.image_url?.url) {
|
|
176
|
+
item.image_url.url = await processUrl(item.image_url.url);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
133
180
|
return await callApi('multimodalEmbedding', [
|
|
134
181
|
params.model,
|
|
135
182
|
params.input,
|
|
@@ -153,6 +200,9 @@ Parameters in data:
|
|
|
153
200
|
required: ['data']
|
|
154
201
|
},
|
|
155
202
|
async execute(toolCallId: string, params: any) {
|
|
203
|
+
if (params.data?.video_url) {
|
|
204
|
+
params.data.video_url = await processUrl(params.data.video_url);
|
|
205
|
+
}
|
|
156
206
|
return await callApi('submitAndPoll', ['las_long_video_understand', params.data]);
|
|
157
207
|
}
|
|
158
208
|
},
|
|
@@ -171,6 +221,9 @@ Parameters in data:
|
|
|
171
221
|
required: ['data']
|
|
172
222
|
},
|
|
173
223
|
async execute(toolCallId: string, params: any) {
|
|
224
|
+
if (params.data?.url) {
|
|
225
|
+
params.data.url = await processUrl(params.data.url);
|
|
226
|
+
}
|
|
174
227
|
return await callApi('submitAndPoll', ['las_pdf_parse_doubao', params.data]);
|
|
175
228
|
}
|
|
176
229
|
},
|
|
@@ -189,6 +242,9 @@ Parameters in data:
|
|
|
189
242
|
required: ['data']
|
|
190
243
|
},
|
|
191
244
|
async execute(toolCallId: string, params: any) {
|
|
245
|
+
if (params.data?.video_url) {
|
|
246
|
+
params.data.video_url = await processUrl(params.data.video_url);
|
|
247
|
+
}
|
|
192
248
|
return await callApi('submitAndPoll', ['las_video_resize', params.data]);
|
|
193
249
|
}
|
|
194
250
|
}
|
|
@@ -7,13 +7,14 @@ import * as mime from 'mime-types';
|
|
|
7
7
|
|
|
8
8
|
export interface ConnectParams {
|
|
9
9
|
datasource_name: string;
|
|
10
|
-
|
|
10
|
+
url: string;
|
|
11
|
+
vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
|
|
11
12
|
endpoint?: string;
|
|
12
13
|
access_key?: string;
|
|
13
14
|
secret_key?: string;
|
|
14
15
|
region?: string;
|
|
15
|
-
bucket
|
|
16
|
-
prefix
|
|
16
|
+
bucket?: string;
|
|
17
|
+
prefix?: string;
|
|
17
18
|
sample_rows?: number;
|
|
18
19
|
}
|
|
19
20
|
|
|
@@ -90,9 +91,36 @@ export async function connectDataSource(
|
|
|
90
91
|
_ctx?: any
|
|
91
92
|
): Promise<ConnectResult> {
|
|
92
93
|
if (!params.datasource_name) throw new Error('datasource_name is required');
|
|
93
|
-
if (!params.
|
|
94
|
-
|
|
95
|
-
|
|
94
|
+
if (!params.url) throw new Error('url is required (e.g. tos://bucket/prefix)');
|
|
95
|
+
|
|
96
|
+
// Parse URL: tos://bucket/prefix
|
|
97
|
+
try {
|
|
98
|
+
if (params.url.startsWith('file://') || params.url.startsWith('/')) {
|
|
99
|
+
params.vendor = 'local';
|
|
100
|
+
const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
|
|
101
|
+
params.bucket = localPath;
|
|
102
|
+
params.prefix = '.';
|
|
103
|
+
} else {
|
|
104
|
+
const parsedUrl = new URL(params.url);
|
|
105
|
+
const protocol = parsedUrl.protocol.replace(':', '');
|
|
106
|
+
|
|
107
|
+
if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
|
|
108
|
+
if (protocol === 'tos') params.vendor = 'volcengine';
|
|
109
|
+
else if (protocol === 'oss') params.vendor = 'alibaba';
|
|
110
|
+
else if (protocol === 'cos') params.vendor = 'tencent';
|
|
111
|
+
else if (protocol === 's3') params.vendor = 'aws';
|
|
112
|
+
|
|
113
|
+
params.bucket = parsedUrl.hostname;
|
|
114
|
+
params.prefix = parsedUrl.pathname.replace(/^\//, ''); // Remove leading slash
|
|
115
|
+
} else {
|
|
116
|
+
throw new Error(`Unsupported protocol: ${protocol}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
} catch (e: any) {
|
|
120
|
+
if (!params.vendor || !params.bucket || params.prefix === undefined) {
|
|
121
|
+
throw new Error(`Invalid url format: ${e.message}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
96
124
|
|
|
97
125
|
if (params.vendor !== 'local') {
|
|
98
126
|
if (!params.endpoint && params.vendor !== 'aws') throw new Error(`endpoint is required for vendor "${params.vendor}"`);
|
|
@@ -134,7 +162,14 @@ export async function connectDataSource(
|
|
|
134
162
|
const scan_ts = new Date().toISOString() + 'Z';
|
|
135
163
|
|
|
136
164
|
while (isTruncated) {
|
|
137
|
-
const response = await listS3Objects(
|
|
165
|
+
const response = await listS3Objects({
|
|
166
|
+
vendor: params.vendor as any,
|
|
167
|
+
bucket: params.bucket as string,
|
|
168
|
+
endpoint: params.endpoint,
|
|
169
|
+
access_key: params.access_key,
|
|
170
|
+
secret_key: params.secret_key,
|
|
171
|
+
region: params.region
|
|
172
|
+
}, params.prefix || '', 1000, continuationToken);
|
|
138
173
|
|
|
139
174
|
for (const obj of response.Contents) {
|
|
140
175
|
const key = obj.Key || '';
|
|
@@ -1,17 +1,51 @@
|
|
|
1
1
|
import { S3Client, ListObjectsV2Command, GetObjectCommand } from '@aws-sdk/client-s3';
|
|
2
|
+
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
|
|
2
3
|
import * as fs from 'fs';
|
|
3
4
|
import * as path from 'path';
|
|
4
5
|
|
|
5
6
|
export interface S3Params {
|
|
6
|
-
|
|
7
|
+
url?: string;
|
|
8
|
+
vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
|
|
7
9
|
endpoint?: string;
|
|
8
10
|
access_key?: string;
|
|
9
11
|
secret_key?: string;
|
|
10
12
|
region?: string;
|
|
11
|
-
bucket
|
|
13
|
+
bucket?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function parseS3Url(params: S3Params): S3Params {
|
|
17
|
+
if (params.url) {
|
|
18
|
+
if (params.url.startsWith('file://') || params.url.startsWith('/')) {
|
|
19
|
+
params.vendor = 'local';
|
|
20
|
+
const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
|
|
21
|
+
params.bucket = localPath;
|
|
22
|
+
} else {
|
|
23
|
+
const parsedUrl = new URL(params.url);
|
|
24
|
+
const protocol = parsedUrl.protocol.replace(':', '');
|
|
25
|
+
|
|
26
|
+
if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
|
|
27
|
+
if (protocol === 'tos') params.vendor = 'volcengine';
|
|
28
|
+
else if (protocol === 'oss') params.vendor = 'alibaba';
|
|
29
|
+
else if (protocol === 'cos') params.vendor = 'tencent';
|
|
30
|
+
else if (protocol === 's3') params.vendor = 'aws';
|
|
31
|
+
|
|
32
|
+
params.bucket = parsedUrl.hostname;
|
|
33
|
+
// Prefix is usually parsed separately or passed explicitly for listing
|
|
34
|
+
} else {
|
|
35
|
+
throw new Error(`Unsupported protocol: ${protocol}`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (!params.vendor || !params.bucket) {
|
|
41
|
+
throw new Error('Could not determine vendor or bucket. Please provide a valid url or vendor/bucket directly.');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return params;
|
|
12
45
|
}
|
|
13
46
|
|
|
14
47
|
function createS3Client(params: S3Params): S3Client | null {
|
|
48
|
+
params = parseS3Url(params);
|
|
15
49
|
if (params.vendor === 'local') return null;
|
|
16
50
|
|
|
17
51
|
let endpoint = params.endpoint;
|
|
@@ -56,8 +90,9 @@ function createS3Client(params: S3Params): S3Client | null {
|
|
|
56
90
|
}
|
|
57
91
|
|
|
58
92
|
export async function listS3Objects(params: S3Params, prefix: string, maxKeys: number = 1000, continuationToken?: string) {
|
|
93
|
+
params = parseS3Url(params);
|
|
59
94
|
if (params.vendor === 'local') {
|
|
60
|
-
const root = params.bucket;
|
|
95
|
+
const root = params.bucket as string;
|
|
61
96
|
const prefixPath = prefix && prefix !== '.' ? path.join(root, prefix) : root;
|
|
62
97
|
const files: any[] = [];
|
|
63
98
|
|
|
@@ -108,8 +143,9 @@ export async function listS3Objects(params: S3Params, prefix: string, maxKeys: n
|
|
|
108
143
|
}
|
|
109
144
|
|
|
110
145
|
export async function readS3Object(params: S3Params, key: string, maxBytes?: number): Promise<Buffer> {
|
|
146
|
+
params = parseS3Url(params);
|
|
111
147
|
if (params.vendor === 'local') {
|
|
112
|
-
const fullPath = path.join(params.bucket, key);
|
|
148
|
+
const fullPath = path.join(params.bucket as string, key);
|
|
113
149
|
if (maxBytes) {
|
|
114
150
|
const fd = fs.openSync(fullPath, 'r');
|
|
115
151
|
const buffer = Buffer.alloc(maxBytes);
|
|
@@ -146,3 +182,22 @@ export async function readS3Object(params: S3Params, key: string, maxBytes?: num
|
|
|
146
182
|
}
|
|
147
183
|
return Buffer.alloc(0);
|
|
148
184
|
}
|
|
185
|
+
|
|
186
|
+
export async function getPresignedUrl(params: S3Params, key: string, expiresIn: number = 3600): Promise<string> {
|
|
187
|
+
params = parseS3Url(params);
|
|
188
|
+
if (params.vendor === 'local') {
|
|
189
|
+
const fullPath = path.join(params.bucket as string, key);
|
|
190
|
+
return `file://${fullPath}`;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const client = createS3Client(params);
|
|
194
|
+
if (!client) throw new Error('Failed to create S3 client');
|
|
195
|
+
|
|
196
|
+
const command = new GetObjectCommand({
|
|
197
|
+
Bucket: params.bucket,
|
|
198
|
+
Key: key
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
const signedUrl = await getSignedUrl(client, command, { expiresIn });
|
|
202
|
+
return signedUrl;
|
|
203
|
+
}
|