@byted-las/contextlake-openclaw 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/commands/cli.d.ts +1 -1
- package/dist/src/commands/cli.js +4 -10
- package/dist/src/commands/index.js +5 -4
- package/dist/src/commands/slashcmd.js +4 -9
- package/dist/src/commands/tools.d.ts +2 -0
- package/dist/src/commands/tools.js +104 -22
- package/dist/src/lib/actions/lance-tools.d.ts +7 -0
- package/dist/src/lib/actions/lance-tools.js +22 -0
- package/dist/src/lib/actions/las-tools.js +58 -0
- package/dist/src/lib/actions/profiler.d.ts +4 -3
- package/dist/src/lib/actions/profiler.js +43 -7
- package/dist/src/lib/actions/s3-tools.d.ts +5 -2
- package/dist/src/lib/actions/s3-tools.js +54 -0
- package/dist/src/skills/SKILL.md +3 -1
- package/dist/src/skills/contextlake-delete/SKILL.md +2 -0
- package/dist/src/skills/contextlake-ingest/SKILL.md +2 -0
- package/dist/src/skills/contextlake-list/SKILL.md +2 -0
- package/dist/src/skills/contextlake-retrieve/SKILL.md +2 -0
- package/dist/src/skills/las-data-profiler/SKILL.md +3 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -1
- package/src/commands/cli.ts +4 -10
- package/src/commands/index.ts +7 -4
- package/src/commands/slashcmd.ts +4 -10
- package/src/commands/tools.ts +102 -24
- package/src/lib/actions/lance-tools.ts +35 -0
- package/src/lib/actions/las-tools.ts +56 -0
- package/src/lib/actions/profiler.ts +42 -7
- package/src/lib/actions/s3-tools.ts +59 -4
- package/src/skills/contextlake-delete/SKILL.md +2 -0
- package/src/skills/contextlake-ingest/SKILL.md +2 -0
- package/src/skills/contextlake-list/SKILL.md +2 -0
- package/src/skills/contextlake-retrieve/SKILL.md +2 -0
- package/src/skills/las-data-profiler/SKILL.md +3 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ContextLakeConfig } from '../utils/config';
|
|
2
2
|
export declare function getCliCommands(pluginConfig: ContextLakeConfig, logger: any): {
|
|
3
|
-
connectAction: (datasource_name: string, options: any) => Promise<void>;
|
|
3
|
+
connectAction: (datasource_name: string, url: string, options: any) => Promise<void>;
|
|
4
4
|
ingestAction: (datasource_name: string) => Promise<void>;
|
|
5
5
|
searchAction: (query: any, options: any) => Promise<void>;
|
|
6
6
|
listAction: (options: any) => Promise<void>;
|
package/dist/src/commands/cli.js
CHANGED
|
@@ -34,28 +34,22 @@ function parseMetadata(metadata) {
|
|
|
34
34
|
}
|
|
35
35
|
function getCliCommands(pluginConfig, logger) {
|
|
36
36
|
return {
|
|
37
|
-
connectAction: async (datasource_name, options) => {
|
|
38
|
-
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, options });
|
|
37
|
+
connectAction: async (datasource_name, url, options) => {
|
|
38
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, url, options });
|
|
39
39
|
try {
|
|
40
40
|
const params = {
|
|
41
41
|
datasource_name,
|
|
42
|
-
|
|
42
|
+
url,
|
|
43
43
|
endpoint: options.endpoint,
|
|
44
44
|
access_key: options.ak,
|
|
45
45
|
secret_key: options.sk,
|
|
46
46
|
region: options.region,
|
|
47
|
-
bucket: options.bucket,
|
|
48
|
-
prefix: options.prefix,
|
|
49
47
|
sample_rows: parseInt(options.sampleRows),
|
|
50
48
|
};
|
|
51
49
|
// eslint-disable-next-line no-console
|
|
52
50
|
console.log(`[contextlake connect] Connecting to datasource "${datasource_name}"...`);
|
|
53
51
|
// eslint-disable-next-line no-console
|
|
54
|
-
console.log(`
|
|
55
|
-
// eslint-disable-next-line no-console
|
|
56
|
-
console.log(` bucket: ${params.bucket}`);
|
|
57
|
-
// eslint-disable-next-line no-console
|
|
58
|
-
console.log(` prefix: ${params.prefix}`);
|
|
52
|
+
console.log(` url: ${params.url}`);
|
|
59
53
|
const result = await (0, profiler_1.connectDataSource)(params);
|
|
60
54
|
// eslint-disable-next-line no-console
|
|
61
55
|
console.log(JSON.stringify(result, null, 2));
|
|
@@ -26,6 +26,10 @@ function registerAll(ctx, logger) {
|
|
|
26
26
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.readS3ObjectTool.name}`);
|
|
27
27
|
ctx.registerTool(tools.writeLanceCatalogTool);
|
|
28
28
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.writeLanceCatalogTool.name}`);
|
|
29
|
+
ctx.registerTool(tools.readLanceCatalogTool);
|
|
30
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.readLanceCatalogTool.name}`);
|
|
31
|
+
ctx.registerTool(tools.generatePresignedUrlTool);
|
|
32
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.generatePresignedUrlTool.name}`);
|
|
29
33
|
ctx.registerTool(tools.listDatasourceTool);
|
|
30
34
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listDatasourceTool.name}`);
|
|
31
35
|
for (const lasTool of tools.lasTools) {
|
|
@@ -46,15 +50,12 @@ function registerAll(ctx, logger) {
|
|
|
46
50
|
.description('Manage ContextLake knowledge base');
|
|
47
51
|
const commands = (0, cli_1.getCliCommands)(pluginConfig, logger);
|
|
48
52
|
// connect -- data source profiling (las-data-profiler)
|
|
49
|
-
contextlake.command('connect <datasource_name>')
|
|
53
|
+
contextlake.command('connect <datasource_name> <url>')
|
|
50
54
|
.description('Connect to a data source and profile its structure, schemas, and media metadata into LanceDB')
|
|
51
|
-
.requiredOption('--vendor <vendor>', 'Data source type: volcengine | alibaba | tencent | aws | local')
|
|
52
55
|
.option('--endpoint <url>', 'S3 Endpoint URL (not needed for local)')
|
|
53
56
|
.option('--ak <credential_id>', 'Credential ID for the data source')
|
|
54
57
|
.option('--sk <credential_value>', 'Credential value for the data source')
|
|
55
58
|
.option('--region <region>', 'Region identifier (e.g. cn-beijing)')
|
|
56
|
-
.requiredOption('--bucket <bucket>', 'Bucket name (or local root directory for local vendor)')
|
|
57
|
-
.requiredOption('--prefix <prefix>', 'Path prefix to limit scan scope')
|
|
58
59
|
.option('--sample-rows <number>', 'Number of rows to sample per structured file', '100')
|
|
59
60
|
.action(commands.connectAction);
|
|
60
61
|
// Ingest
|
|
@@ -133,18 +133,13 @@ function getSlashCommands(pluginConfig, logger) {
|
|
|
133
133
|
const args = rawArgs.split(' ').filter((arg) => arg.trim() !== '');
|
|
134
134
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command profiler started`, { args });
|
|
135
135
|
try {
|
|
136
|
-
if (args.length <
|
|
137
|
-
return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <
|
|
138
|
-
}
|
|
139
|
-
const [datasource_name, vendor, bucket, prefix, endpoint, access_key, secret_key, region] = args;
|
|
140
|
-
if (!['volcengine', 'alibaba', 'tencent', 'aws', 'local'].includes(vendor)) {
|
|
141
|
-
return { text: `**Error:** Invalid vendor. Must be one of: volcengine, alibaba, tencent, aws, local` };
|
|
136
|
+
if (args.length < 2) {
|
|
137
|
+
return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <url> [endpoint] [ak] [sk] [region]` };
|
|
142
138
|
}
|
|
139
|
+
const [datasource_name, url, endpoint, access_key, secret_key, region] = args;
|
|
143
140
|
const params = {
|
|
144
141
|
datasource_name,
|
|
145
|
-
|
|
146
|
-
bucket,
|
|
147
|
-
prefix,
|
|
142
|
+
url,
|
|
148
143
|
endpoint,
|
|
149
144
|
access_key,
|
|
150
145
|
secret_key,
|
|
@@ -9,6 +9,8 @@ export declare function getAgentTools(pluginConfig: ContextLakeConfig, logger: a
|
|
|
9
9
|
listDatasourceTool: AnyAgentTool;
|
|
10
10
|
listS3ObjectsTool: AnyAgentTool;
|
|
11
11
|
readS3ObjectTool: AnyAgentTool;
|
|
12
|
+
generatePresignedUrlTool: AnyAgentTool;
|
|
12
13
|
writeLanceCatalogTool: AnyAgentTool;
|
|
14
|
+
readLanceCatalogTool: AnyAgentTool;
|
|
13
15
|
lasTools: AnyAgentTool[];
|
|
14
16
|
};
|
|
@@ -253,16 +253,10 @@ Example User Queries:
|
|
|
253
253
|
type: 'object',
|
|
254
254
|
properties: {
|
|
255
255
|
datasource_name: { type: 'string', description: 'Name of the data source' },
|
|
256
|
-
|
|
257
|
-
endpoint: { type: 'string', description: 'S3 Endpoint URL (not needed for local)' },
|
|
258
|
-
access_key: { type: 'string', description: 'Credential ID for the data source' },
|
|
259
|
-
secret_key: { type: 'string', description: 'Credential value for the data source' },
|
|
260
|
-
region: { type: 'string', description: 'Region identifier (e.g. cn-beijing)' },
|
|
261
|
-
bucket: { type: 'string', description: 'Bucket name (or local root directory for local vendor)' },
|
|
262
|
-
prefix: { type: 'string', description: 'Path prefix to limit scan scope' },
|
|
256
|
+
url: { type: 'string', description: 'Data source URL (e.g. tos://bucket/prefix, oss://..., s3://..., file:///path)' },
|
|
263
257
|
sample_rows: { type: 'integer', description: 'Number of rows to sample per structured file' }
|
|
264
258
|
},
|
|
265
|
-
required: ['datasource_name', '
|
|
259
|
+
required: ['datasource_name', 'url'],
|
|
266
260
|
additionalProperties: false
|
|
267
261
|
},
|
|
268
262
|
async execute(toolCallId, params) {
|
|
@@ -295,17 +289,15 @@ Example User Queries:
|
|
|
295
289
|
parameters: {
|
|
296
290
|
type: 'object',
|
|
297
291
|
properties: {
|
|
298
|
-
|
|
299
|
-
|
|
292
|
+
url: { type: 'string', description: 'Data source URL (e.g. tos://bucket/prefix, oss://..., file:///path)' },
|
|
293
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
294
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
300
295
|
prefix: { type: 'string' },
|
|
301
296
|
endpoint: { type: 'string' },
|
|
302
|
-
access_key: { type: 'string' },
|
|
303
|
-
secret_key: { type: 'string' },
|
|
304
|
-
region: { type: 'string' },
|
|
305
297
|
maxKeys: { type: 'integer' },
|
|
306
298
|
continuationToken: { type: 'string' }
|
|
307
299
|
},
|
|
308
|
-
required: [
|
|
300
|
+
required: [],
|
|
309
301
|
additionalProperties: false
|
|
310
302
|
},
|
|
311
303
|
async execute(toolCallId, params) {
|
|
@@ -326,22 +318,40 @@ Example User Queries:
|
|
|
326
318
|
parameters: {
|
|
327
319
|
type: 'object',
|
|
328
320
|
properties: {
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
321
|
+
url: { type: 'string', description: 'Full URL to the object (e.g. tos://bucket/path/to/key.txt)' },
|
|
322
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
323
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
324
|
+
key: { type: 'string', description: 'Required if url is not provided' },
|
|
332
325
|
endpoint: { type: 'string' },
|
|
333
|
-
access_key: { type: 'string' },
|
|
334
|
-
secret_key: { type: 'string' },
|
|
335
|
-
region: { type: 'string' },
|
|
336
326
|
maxBytes: { type: 'integer' }
|
|
337
327
|
},
|
|
338
|
-
required: [
|
|
328
|
+
required: [],
|
|
339
329
|
additionalProperties: false
|
|
340
330
|
},
|
|
341
331
|
async execute(toolCallId, params) {
|
|
342
332
|
let actualParams = params.params || params;
|
|
343
333
|
try {
|
|
344
|
-
|
|
334
|
+
// Extract key from url if provided
|
|
335
|
+
let key = actualParams.key;
|
|
336
|
+
if (actualParams.url && !key) {
|
|
337
|
+
try {
|
|
338
|
+
if (actualParams.url.startsWith('file://')) {
|
|
339
|
+
// Key is not strictly needed for file://, bucket contains the path in parseS3Url
|
|
340
|
+
key = '';
|
|
341
|
+
}
|
|
342
|
+
else {
|
|
343
|
+
const parsedUrl = new URL(actualParams.url);
|
|
344
|
+
key = parsedUrl.pathname.replace(/^\//, '');
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
catch (e) {
|
|
348
|
+
// let it fail in readS3Object
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
if (!key && !actualParams.url?.startsWith('file://')) {
|
|
352
|
+
throw new Error('key is required or must be part of the url');
|
|
353
|
+
}
|
|
354
|
+
const buf = await (0, s3_tools_1.readS3Object)(actualParams, key, actualParams.maxBytes);
|
|
345
355
|
// Return as base64 string
|
|
346
356
|
return { content: [{ type: "text", text: buf.toString('base64') }], details: { length: buf.length } };
|
|
347
357
|
}
|
|
@@ -350,6 +360,52 @@ Example User Queries:
|
|
|
350
360
|
}
|
|
351
361
|
}
|
|
352
362
|
},
|
|
363
|
+
generatePresignedUrlTool: {
|
|
364
|
+
name: 'generate-presigned-url',
|
|
365
|
+
label: 'Generate Presigned URL',
|
|
366
|
+
description: 'Generate a presigned HTTP URL for an S3/TOS object, allowing temporary public access',
|
|
367
|
+
parameters: {
|
|
368
|
+
type: 'object',
|
|
369
|
+
properties: {
|
|
370
|
+
url: { type: 'string', description: 'Full URL to the object (e.g. tos://bucket/path/to/key.txt)' },
|
|
371
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Required if url is not provided' },
|
|
372
|
+
bucket: { type: 'string', description: 'Required if url is not provided' },
|
|
373
|
+
key: { type: 'string', description: 'Required if url is not provided' },
|
|
374
|
+
endpoint: { type: 'string' },
|
|
375
|
+
expiresIn: { type: 'integer', description: 'Expiration time in seconds (default 3600)' }
|
|
376
|
+
},
|
|
377
|
+
required: [],
|
|
378
|
+
additionalProperties: false
|
|
379
|
+
},
|
|
380
|
+
async execute(toolCallId, params) {
|
|
381
|
+
let actualParams = params.params || params;
|
|
382
|
+
try {
|
|
383
|
+
let key = actualParams.key;
|
|
384
|
+
if (actualParams.url && !key) {
|
|
385
|
+
try {
|
|
386
|
+
if (actualParams.url.startsWith('file://')) {
|
|
387
|
+
key = '';
|
|
388
|
+
}
|
|
389
|
+
else {
|
|
390
|
+
const parsedUrl = new URL(actualParams.url);
|
|
391
|
+
key = parsedUrl.pathname.replace(/^\//, '');
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
catch (e) {
|
|
395
|
+
// let it fail in getPresignedUrl
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
if (!key && !actualParams.url?.startsWith('file://')) {
|
|
399
|
+
throw new Error('key is required or must be part of the url');
|
|
400
|
+
}
|
|
401
|
+
const url = await (0, s3_tools_1.getPresignedUrl)(actualParams, key, actualParams.expiresIn);
|
|
402
|
+
return { content: [{ type: "text", text: url }], details: { url } };
|
|
403
|
+
}
|
|
404
|
+
catch (e) {
|
|
405
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
},
|
|
353
409
|
writeLanceCatalogTool: {
|
|
354
410
|
name: 'write-lance-catalog',
|
|
355
411
|
label: 'Write LanceDB Catalog',
|
|
@@ -374,6 +430,32 @@ Example User Queries:
|
|
|
374
430
|
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
|
|
375
431
|
}
|
|
376
432
|
}
|
|
433
|
+
},
|
|
434
|
+
readLanceCatalogTool: {
|
|
435
|
+
name: 'read-lance-catalog',
|
|
436
|
+
label: 'Read LanceDB Catalog',
|
|
437
|
+
description: 'Read records from a local LanceDB table for validation or ingestion processes',
|
|
438
|
+
parameters: {
|
|
439
|
+
type: 'object',
|
|
440
|
+
properties: {
|
|
441
|
+
db_path: { type: 'string', description: 'Path to the local LanceDB database' },
|
|
442
|
+
table_name: { type: 'string', description: 'Name of the table to read' },
|
|
443
|
+
limit: { type: 'integer', description: 'Maximum number of records to return' },
|
|
444
|
+
filter: { type: 'string', description: 'SQL-like filter string (e.g., "category = \'structured\'")' }
|
|
445
|
+
},
|
|
446
|
+
required: ['db_path', 'table_name'],
|
|
447
|
+
additionalProperties: false
|
|
448
|
+
},
|
|
449
|
+
async execute(toolCallId, params) {
|
|
450
|
+
let actualParams = params.params || params;
|
|
451
|
+
try {
|
|
452
|
+
const results = await (0, lance_tools_1.readLanceCatalog)(actualParams);
|
|
453
|
+
return { content: [{ type: "text", text: JSON.stringify(results) }], details: { count: results.length, data: results } };
|
|
454
|
+
}
|
|
455
|
+
catch (e) {
|
|
456
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
|
|
457
|
+
}
|
|
458
|
+
}
|
|
377
459
|
}
|
|
378
460
|
};
|
|
379
461
|
}
|
|
@@ -3,4 +3,11 @@ export interface LanceWriteParams {
|
|
|
3
3
|
table_name: string;
|
|
4
4
|
records: any[];
|
|
5
5
|
}
|
|
6
|
+
export interface LanceReadParams {
|
|
7
|
+
db_path: string;
|
|
8
|
+
table_name: string;
|
|
9
|
+
limit?: number;
|
|
10
|
+
filter?: string;
|
|
11
|
+
}
|
|
6
12
|
export declare function writeLanceCatalog(params: LanceWriteParams): Promise<void>;
|
|
13
|
+
export declare function readLanceCatalog(params: LanceReadParams): Promise<any[]>;
|
|
@@ -34,7 +34,9 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.writeLanceCatalog = writeLanceCatalog;
|
|
37
|
+
exports.readLanceCatalog = readLanceCatalog;
|
|
37
38
|
const lancedb = __importStar(require("@lancedb/lancedb"));
|
|
39
|
+
const fs = __importStar(require("fs"));
|
|
38
40
|
async function writeLanceCatalog(params) {
|
|
39
41
|
if (!params.records || params.records.length === 0) {
|
|
40
42
|
return;
|
|
@@ -49,3 +51,23 @@ async function writeLanceCatalog(params) {
|
|
|
49
51
|
await db.createTable(params.table_name, params.records);
|
|
50
52
|
}
|
|
51
53
|
}
|
|
54
|
+
async function readLanceCatalog(params) {
|
|
55
|
+
if (!fs.existsSync(params.db_path)) {
|
|
56
|
+
throw new Error(`Database not found at ${params.db_path}`);
|
|
57
|
+
}
|
|
58
|
+
const db = await lancedb.connect(params.db_path);
|
|
59
|
+
const tableNames = await db.tableNames();
|
|
60
|
+
if (!tableNames.includes(params.table_name)) {
|
|
61
|
+
throw new Error(`Table '${params.table_name}' not found in database`);
|
|
62
|
+
}
|
|
63
|
+
const table = await db.openTable(params.table_name);
|
|
64
|
+
let query = table.query();
|
|
65
|
+
if (params.filter) {
|
|
66
|
+
query = query.where(params.filter);
|
|
67
|
+
}
|
|
68
|
+
if (params.limit && params.limit > 0) {
|
|
69
|
+
query = query.limit(params.limit);
|
|
70
|
+
}
|
|
71
|
+
const results = await query.toArray();
|
|
72
|
+
return results;
|
|
73
|
+
}
|
|
@@ -2,8 +2,35 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.getLasTools = getLasTools;
|
|
4
4
|
const las_api_1 = require("./las-api");
|
|
5
|
+
const s3_tools_1 = require("./s3-tools");
|
|
5
6
|
function getLasTools(pluginConfig, logger) {
|
|
6
7
|
const apiClient = new las_api_1.LasApiClient(pluginConfig, logger);
|
|
8
|
+
const processUrl = async (url) => {
|
|
9
|
+
if (!url)
|
|
10
|
+
return url;
|
|
11
|
+
if (url.startsWith('tos://')) {
|
|
12
|
+
// LAS operators prefer native tos:// paths when supported, leave as is
|
|
13
|
+
return url;
|
|
14
|
+
}
|
|
15
|
+
else if (url.startsWith('oss://') || url.startsWith('s3://') || url.startsWith('cos://') || url.startsWith('file://')) {
|
|
16
|
+
// Need presigned HTTP url for other vendors
|
|
17
|
+
logger.info(`[LasTools] Presigning URL for vendor: ${url}`);
|
|
18
|
+
try {
|
|
19
|
+
// If it's a file:// we also presign it to file:// which might not be supported by remote LAS,
|
|
20
|
+
// but local files typically need to be uploaded to TOS first. We'll leave file:// to fail or be handled elsewhere.
|
|
21
|
+
if (url.startsWith('file://'))
|
|
22
|
+
return url;
|
|
23
|
+
const urlParts = new URL(url);
|
|
24
|
+
const key = urlParts.pathname.replace(/^\//, '');
|
|
25
|
+
return await (0, s3_tools_1.getPresignedUrl)({ url }, key, 3600);
|
|
26
|
+
}
|
|
27
|
+
catch (e) {
|
|
28
|
+
logger.warn(`[LasTools] Failed to presign URL: ${url}`, { error: e.message });
|
|
29
|
+
return url; // fallback to original
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return url;
|
|
33
|
+
};
|
|
7
34
|
const callApi = async (method, args) => {
|
|
8
35
|
try {
|
|
9
36
|
// @ts-ignore
|
|
@@ -33,6 +60,9 @@ Parameters in data:
|
|
|
33
60
|
required: ['data']
|
|
34
61
|
},
|
|
35
62
|
async execute(toolCallId, params) {
|
|
63
|
+
if (params.data?.image) {
|
|
64
|
+
params.data.image = await processUrl(params.data.image);
|
|
65
|
+
}
|
|
36
66
|
return await callApi('process', ['las_image_resample', params.data]);
|
|
37
67
|
}
|
|
38
68
|
},
|
|
@@ -53,6 +83,9 @@ Parameters in data:
|
|
|
53
83
|
required: ['data']
|
|
54
84
|
},
|
|
55
85
|
async execute(toolCallId, params) {
|
|
86
|
+
if (params.data?.input_path) {
|
|
87
|
+
params.data.input_path = await processUrl(params.data.input_path);
|
|
88
|
+
}
|
|
56
89
|
return await callApi('process', ['las_audio_extract_and_split', params.data]);
|
|
57
90
|
}
|
|
58
91
|
},
|
|
@@ -71,6 +104,9 @@ Parameters in data:
|
|
|
71
104
|
required: ['data']
|
|
72
105
|
},
|
|
73
106
|
async execute(toolCallId, params) {
|
|
107
|
+
if (params.data?.input_path) {
|
|
108
|
+
params.data.input_path = await processUrl(params.data.input_path);
|
|
109
|
+
}
|
|
74
110
|
return await callApi('process', ['las_audio_convert', params.data]);
|
|
75
111
|
}
|
|
76
112
|
},
|
|
@@ -89,6 +125,9 @@ Parameters in data:
|
|
|
89
125
|
required: ['data']
|
|
90
126
|
},
|
|
91
127
|
async execute(toolCallId, params) {
|
|
128
|
+
if (params.data?.audio?.url) {
|
|
129
|
+
params.data.audio.url = await processUrl(params.data.audio.url);
|
|
130
|
+
}
|
|
92
131
|
return await callApi('submitAndPoll', ['las_asr_pro', params.data]);
|
|
93
132
|
}
|
|
94
133
|
},
|
|
@@ -105,6 +144,9 @@ Parameters in data:
|
|
|
105
144
|
required: ['data']
|
|
106
145
|
},
|
|
107
146
|
async execute(toolCallId, params) {
|
|
147
|
+
if (params.data?.audio?.url) {
|
|
148
|
+
params.data.audio.url = await processUrl(params.data.audio.url);
|
|
149
|
+
}
|
|
108
150
|
return await callApi('submitAndPoll', ['las_seed_2_0', params.data]);
|
|
109
151
|
}
|
|
110
152
|
},
|
|
@@ -128,6 +170,13 @@ Parameters:
|
|
|
128
170
|
required: ['model', 'input']
|
|
129
171
|
},
|
|
130
172
|
async execute(toolCallId, params) {
|
|
173
|
+
if (params.input && Array.isArray(params.input)) {
|
|
174
|
+
for (const item of params.input) {
|
|
175
|
+
if (item.type === 'image_url' && item.image_url?.url) {
|
|
176
|
+
item.image_url.url = await processUrl(item.image_url.url);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
131
180
|
return await callApi('multimodalEmbedding', [
|
|
132
181
|
params.model,
|
|
133
182
|
params.input,
|
|
@@ -151,6 +200,9 @@ Parameters in data:
|
|
|
151
200
|
required: ['data']
|
|
152
201
|
},
|
|
153
202
|
async execute(toolCallId, params) {
|
|
203
|
+
if (params.data?.video_url) {
|
|
204
|
+
params.data.video_url = await processUrl(params.data.video_url);
|
|
205
|
+
}
|
|
154
206
|
return await callApi('submitAndPoll', ['las_long_video_understand', params.data]);
|
|
155
207
|
}
|
|
156
208
|
},
|
|
@@ -169,6 +221,9 @@ Parameters in data:
|
|
|
169
221
|
required: ['data']
|
|
170
222
|
},
|
|
171
223
|
async execute(toolCallId, params) {
|
|
224
|
+
if (params.data?.url) {
|
|
225
|
+
params.data.url = await processUrl(params.data.url);
|
|
226
|
+
}
|
|
172
227
|
return await callApi('submitAndPoll', ['las_pdf_parse_doubao', params.data]);
|
|
173
228
|
}
|
|
174
229
|
},
|
|
@@ -187,6 +242,9 @@ Parameters in data:
|
|
|
187
242
|
required: ['data']
|
|
188
243
|
},
|
|
189
244
|
async execute(toolCallId, params) {
|
|
245
|
+
if (params.data?.video_url) {
|
|
246
|
+
params.data.video_url = await processUrl(params.data.video_url);
|
|
247
|
+
}
|
|
190
248
|
return await callApi('submitAndPoll', ['las_video_resize', params.data]);
|
|
191
249
|
}
|
|
192
250
|
}
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
export interface ConnectParams {
|
|
2
2
|
datasource_name: string;
|
|
3
|
-
|
|
3
|
+
url: string;
|
|
4
|
+
vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
|
|
4
5
|
endpoint?: string;
|
|
5
6
|
access_key?: string;
|
|
6
7
|
secret_key?: string;
|
|
7
8
|
region?: string;
|
|
8
|
-
bucket
|
|
9
|
-
prefix
|
|
9
|
+
bucket?: string;
|
|
10
|
+
prefix?: string;
|
|
10
11
|
sample_rows?: number;
|
|
11
12
|
}
|
|
12
13
|
export interface ConnectResult {
|
|
@@ -98,12 +98,41 @@ function classifyFile(ext) {
|
|
|
98
98
|
async function connectDataSource(params, _ctx) {
|
|
99
99
|
if (!params.datasource_name)
|
|
100
100
|
throw new Error('datasource_name is required');
|
|
101
|
-
if (!params.
|
|
102
|
-
throw new Error('
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
101
|
+
if (!params.url)
|
|
102
|
+
throw new Error('url is required (e.g. tos://bucket/prefix)');
|
|
103
|
+
// Parse URL: tos://bucket/prefix
|
|
104
|
+
try {
|
|
105
|
+
if (params.url.startsWith('file://') || params.url.startsWith('/')) {
|
|
106
|
+
params.vendor = 'local';
|
|
107
|
+
const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
|
|
108
|
+
params.bucket = localPath;
|
|
109
|
+
params.prefix = '.';
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
const parsedUrl = new URL(params.url);
|
|
113
|
+
const protocol = parsedUrl.protocol.replace(':', '');
|
|
114
|
+
if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
|
|
115
|
+
if (protocol === 'tos')
|
|
116
|
+
params.vendor = 'volcengine';
|
|
117
|
+
else if (protocol === 'oss')
|
|
118
|
+
params.vendor = 'alibaba';
|
|
119
|
+
else if (protocol === 'cos')
|
|
120
|
+
params.vendor = 'tencent';
|
|
121
|
+
else if (protocol === 's3')
|
|
122
|
+
params.vendor = 'aws';
|
|
123
|
+
params.bucket = parsedUrl.hostname;
|
|
124
|
+
params.prefix = parsedUrl.pathname.replace(/^\//, ''); // Remove leading slash
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
throw new Error(`Unsupported protocol: ${protocol}`);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
catch (e) {
|
|
132
|
+
if (!params.vendor || !params.bucket || params.prefix === undefined) {
|
|
133
|
+
throw new Error(`Invalid url format: ${e.message}`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
107
136
|
if (params.vendor !== 'local') {
|
|
108
137
|
if (!params.endpoint && params.vendor !== 'aws')
|
|
109
138
|
throw new Error(`endpoint is required for vendor "${params.vendor}"`);
|
|
@@ -139,7 +168,14 @@ async function connectDataSource(params, _ctx) {
|
|
|
139
168
|
const allRecords = [];
|
|
140
169
|
const scan_ts = new Date().toISOString() + 'Z';
|
|
141
170
|
while (isTruncated) {
|
|
142
|
-
const response = await (0, s3_tools_1.listS3Objects)(
|
|
171
|
+
const response = await (0, s3_tools_1.listS3Objects)({
|
|
172
|
+
vendor: params.vendor,
|
|
173
|
+
bucket: params.bucket,
|
|
174
|
+
endpoint: params.endpoint,
|
|
175
|
+
access_key: params.access_key,
|
|
176
|
+
secret_key: params.secret_key,
|
|
177
|
+
region: params.region
|
|
178
|
+
}, params.prefix || '', 1000, continuationToken);
|
|
143
179
|
for (const obj of response.Contents) {
|
|
144
180
|
const key = obj.Key || '';
|
|
145
181
|
if (key.endsWith('/'))
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
export interface S3Params {
|
|
2
|
-
|
|
2
|
+
url?: string;
|
|
3
|
+
vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
|
|
3
4
|
endpoint?: string;
|
|
4
5
|
access_key?: string;
|
|
5
6
|
secret_key?: string;
|
|
6
7
|
region?: string;
|
|
7
|
-
bucket
|
|
8
|
+
bucket?: string;
|
|
8
9
|
}
|
|
10
|
+
export declare function parseS3Url(params: S3Params): S3Params;
|
|
9
11
|
export declare function listS3Objects(params: S3Params, prefix: string, maxKeys?: number, continuationToken?: string): Promise<{
|
|
10
12
|
Contents: any[];
|
|
11
13
|
IsTruncated: boolean;
|
|
@@ -16,3 +18,4 @@ export declare function listS3Objects(params: S3Params, prefix: string, maxKeys?
|
|
|
16
18
|
NextContinuationToken: string | undefined;
|
|
17
19
|
}>;
|
|
18
20
|
export declare function readS3Object(params: S3Params, key: string, maxBytes?: number): Promise<Buffer>;
|
|
21
|
+
export declare function getPresignedUrl(params: S3Params, key: string, expiresIn?: number): Promise<string>;
|
|
@@ -33,12 +33,48 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.parseS3Url = parseS3Url;
|
|
36
37
|
exports.listS3Objects = listS3Objects;
|
|
37
38
|
exports.readS3Object = readS3Object;
|
|
39
|
+
exports.getPresignedUrl = getPresignedUrl;
|
|
38
40
|
const client_s3_1 = require("@aws-sdk/client-s3");
|
|
41
|
+
const s3_request_presigner_1 = require("@aws-sdk/s3-request-presigner");
|
|
39
42
|
const fs = __importStar(require("fs"));
|
|
40
43
|
const path = __importStar(require("path"));
|
|
44
|
+
function parseS3Url(params) {
|
|
45
|
+
if (params.url) {
|
|
46
|
+
if (params.url.startsWith('file://') || params.url.startsWith('/')) {
|
|
47
|
+
params.vendor = 'local';
|
|
48
|
+
const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
|
|
49
|
+
params.bucket = localPath;
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
const parsedUrl = new URL(params.url);
|
|
53
|
+
const protocol = parsedUrl.protocol.replace(':', '');
|
|
54
|
+
if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
|
|
55
|
+
if (protocol === 'tos')
|
|
56
|
+
params.vendor = 'volcengine';
|
|
57
|
+
else if (protocol === 'oss')
|
|
58
|
+
params.vendor = 'alibaba';
|
|
59
|
+
else if (protocol === 'cos')
|
|
60
|
+
params.vendor = 'tencent';
|
|
61
|
+
else if (protocol === 's3')
|
|
62
|
+
params.vendor = 'aws';
|
|
63
|
+
params.bucket = parsedUrl.hostname;
|
|
64
|
+
// Prefix is usually parsed separately or passed explicitly for listing
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
throw new Error(`Unsupported protocol: ${protocol}`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (!params.vendor || !params.bucket) {
|
|
72
|
+
throw new Error('Could not determine vendor or bucket. Please provide a valid url or vendor/bucket directly.');
|
|
73
|
+
}
|
|
74
|
+
return params;
|
|
75
|
+
}
|
|
41
76
|
function createS3Client(params) {
|
|
77
|
+
params = parseS3Url(params);
|
|
42
78
|
if (params.vendor === 'local')
|
|
43
79
|
return null;
|
|
44
80
|
let endpoint = params.endpoint;
|
|
@@ -80,6 +116,7 @@ function createS3Client(params) {
|
|
|
80
116
|
});
|
|
81
117
|
}
|
|
82
118
|
async function listS3Objects(params, prefix, maxKeys = 1000, continuationToken) {
|
|
119
|
+
params = parseS3Url(params);
|
|
83
120
|
if (params.vendor === 'local') {
|
|
84
121
|
const root = params.bucket;
|
|
85
122
|
const prefixPath = prefix && prefix !== '.' ? path.join(root, prefix) : root;
|
|
@@ -130,6 +167,7 @@ async function listS3Objects(params, prefix, maxKeys = 1000, continuationToken)
|
|
|
130
167
|
};
|
|
131
168
|
}
|
|
132
169
|
async function readS3Object(params, key, maxBytes) {
|
|
170
|
+
params = parseS3Url(params);
|
|
133
171
|
if (params.vendor === 'local') {
|
|
134
172
|
const fullPath = path.join(params.bucket, key);
|
|
135
173
|
if (maxBytes) {
|
|
@@ -165,3 +203,19 @@ async function readS3Object(params, key, maxBytes) {
|
|
|
165
203
|
}
|
|
166
204
|
return Buffer.alloc(0);
|
|
167
205
|
}
|
|
206
|
+
async function getPresignedUrl(params, key, expiresIn = 3600) {
|
|
207
|
+
params = parseS3Url(params);
|
|
208
|
+
if (params.vendor === 'local') {
|
|
209
|
+
const fullPath = path.join(params.bucket, key);
|
|
210
|
+
return `file://${fullPath}`;
|
|
211
|
+
}
|
|
212
|
+
const client = createS3Client(params);
|
|
213
|
+
if (!client)
|
|
214
|
+
throw new Error('Failed to create S3 client');
|
|
215
|
+
const command = new client_s3_1.GetObjectCommand({
|
|
216
|
+
Bucket: params.bucket,
|
|
217
|
+
Key: key
|
|
218
|
+
});
|
|
219
|
+
const signedUrl = await (0, s3_request_presigner_1.getSignedUrl)(client, command, { expiresIn });
|
|
220
|
+
return signedUrl;
|
|
221
|
+
}
|