@byted-las/contextlake-openclaw 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/index.d.ts +2 -1
  2. package/dist/index.js +5 -5
  3. package/dist/src/client/lancedb.js +13 -4
  4. package/dist/src/commands/cli.d.ts +5 -2
  5. package/dist/src/commands/cli.js +94 -10
  6. package/dist/src/commands/index.d.ts +2 -1
  7. package/dist/src/commands/index.js +31 -35
  8. package/dist/src/commands/slashcmd.d.ts +8 -1
  9. package/dist/src/commands/slashcmd.js +90 -6
  10. package/dist/src/commands/tools.d.ts +10 -218
  11. package/dist/src/commands/tools.js +109 -104
  12. package/dist/src/lib/actions/ingest-source.d.ts +15 -0
  13. package/dist/src/lib/actions/ingest-source.js +193 -0
  14. package/dist/src/lib/actions/ingest.d.ts +14 -7
  15. package/dist/src/lib/actions/ingest.js +133 -63
  16. package/dist/src/lib/actions/las-api.d.ts +13 -0
  17. package/dist/src/lib/actions/las-api.js +105 -0
  18. package/dist/src/lib/actions/las-tools.d.ts +3 -0
  19. package/dist/src/lib/actions/las-tools.js +194 -0
  20. package/dist/src/lib/actions/las.d.ts +64 -0
  21. package/dist/src/lib/actions/las.js +72 -0
  22. package/dist/src/lib/actions/manage.d.ts +3 -2
  23. package/dist/src/{skills/las-data-profiler/index.d.ts → lib/actions/profiler.d.ts} +4 -2
  24. package/dist/src/{skills/las-data-profiler/index.js → lib/actions/profiler.js} +19 -3
  25. package/dist/src/lib/actions/retrieve.d.ts +2 -1
  26. package/dist/src/lib/actions/retrieve.js +2 -18
  27. package/{src/skills/las-data-profiler → dist/src/lib/scripts}/s3_catalog.py +10 -1
  28. package/dist/src/processor/loader.js +9 -2
  29. package/dist/src/service/embedding/factory.js +1 -10
  30. package/dist/src/service/embedding/interface.d.ts +8 -1
  31. package/dist/src/service/embedding/local.js +16 -13
  32. package/dist/src/service/embedding/remote.d.ts +7 -0
  33. package/dist/src/service/embedding/remote.js +108 -7
  34. package/dist/src/service/metadata/interface.d.ts +1 -0
  35. package/dist/src/service/metadata/local.d.ts +1 -0
  36. package/dist/src/service/metadata/local.js +6 -0
  37. package/dist/src/skills/SKILL.md +174 -0
  38. package/dist/src/skills/contextlake-delete/SKILL.md +36 -0
  39. package/dist/src/skills/contextlake-ingest/SKILL.md +40 -0
  40. package/dist/src/skills/contextlake-list/SKILL.md +22 -0
  41. package/dist/src/skills/contextlake-retrieve/SKILL.md +37 -0
  42. package/dist/src/skills/las-data-profiler/SKILL.md +174 -0
  43. package/dist/src/utils/config.d.ts +34 -1
  44. package/dist/src/utils/config.js +16 -3
  45. package/dist/src/utils/credentials.d.ts +8 -0
  46. package/dist/src/utils/credentials.js +77 -0
  47. package/index.ts +8 -8
  48. package/openclaw.plugin.json +1 -1
  49. package/package.json +8 -7
  50. package/src/client/lancedb.ts +32 -21
  51. package/src/commands/cli.ts +105 -13
  52. package/src/commands/index.ts +45 -42
  53. package/src/commands/slashcmd.ts +69 -10
  54. package/src/commands/tools.ts +142 -117
  55. package/src/lib/actions/ingest.ts +151 -75
  56. package/src/lib/actions/las-api.ts +119 -0
  57. package/src/lib/actions/las-tools.ts +196 -0
  58. package/src/lib/actions/manage.ts +6 -5
  59. package/src/{skills/las-data-profiler/index.ts → lib/actions/profiler.ts} +21 -4
  60. package/src/lib/actions/retrieve.ts +16 -34
  61. package/src/lib/scripts/s3_catalog.py +617 -0
  62. package/src/processor/loader.ts +12 -4
  63. package/src/service/embedding/factory.ts +1 -8
  64. package/src/service/embedding/interface.ts +9 -1
  65. package/src/service/embedding/remote.ts +133 -13
  66. package/src/service/metadata/interface.ts +1 -0
  67. package/src/service/metadata/local.ts +7 -0
  68. package/src/service/storage/factory.ts +2 -2
  69. package/src/utils/config.ts +61 -8
  70. package/src/utils/credentials.ts +50 -0
  71. package/bin/contextlake-openclaw.js +0 -5
  72. package/dist/src/skills/las-data-profiler/register.d.ts +0 -1
  73. package/dist/src/skills/las-data-profiler/register.js +0 -19
  74. package/src/service/embedding/local.ts +0 -118
  75. package/src/skills/las-data-profiler/register.ts +0 -19
@@ -1,103 +1,179 @@
1
1
  import { createStorageProvider } from '../../service/storage/factory';
2
2
  import { createMetadataProvider } from '../../service/metadata/factory';
3
- import { processFile, splitText } from '../../processor/loader';
3
+ import { LasApiClient } from './las-api';
4
+ import { ContextLakeConfig } from '../../utils/config';
5
+ import { getLasTools } from './las-tools';
6
+ import * as lancedb from '@lancedb/lancedb';
4
7
  import * as path from 'path';
8
+ import * as fs from 'fs';
9
+ import * as os from 'os';
5
10
  // @ts-ignore
6
11
  import { v4 as uuidv4 } from 'uuid';
7
12
 
8
- interface IngestParams {
9
- files: string[];
10
- metadata?: Record<string, any>;
11
- chunkSize?: number;
12
- overlap?: number;
13
+ export interface IngestSourceParams {
14
+ datasource_name: string;
13
15
  }
14
16
 
15
- export async function ingestAssets(params: IngestParams, config: any, logger?: any) {
17
+ const BASE_DIR = path.join(os.homedir(), '.openclaw', 'contextlake', 'profiler');
18
+
19
+ export async function ingestSource(params: IngestSourceParams, config: ContextLakeConfig, logger?: any) {
16
20
  if (logger) {
17
- logger.info(`[ContextLake-Action] Calling ingestAssets with params: ${JSON.stringify(params)}`);
21
+ logger.info(`[ContextLake-Action] Calling ingestSource with params: ${JSON.stringify(params)}`);
18
22
  } else {
19
- // eslint-disable-next-line no-console
20
- console.log(`[ContextLake-Action] Calling ingestAssets with params: ${JSON.stringify(params)}`);
23
+ // eslint-disable-next-line no-console
24
+ console.log(`[ContextLake-Action] Calling ingestSource with params: ${JSON.stringify(params)}`);
25
+ }
26
+
27
+ const dsDir = path.join(BASE_DIR, params.datasource_name);
28
+ const dbPath = path.join(dsDir, 'catalog_db');
29
+
30
+ if (!fs.existsSync(dbPath)) {
31
+ throw new Error(`Data source database not found at ${dbPath}. Please run profiler connect first.`);
21
32
  }
22
33
 
23
- const storageConfig = config.file_storage || { type: 'local', local_base_dir: './data/files' };
24
34
  const metaConfig = config.metadata_storage || { type: 'local', lancedb_uri: './data/contextlake' };
25
-
26
- const storageProvider = createStorageProvider(storageConfig);
27
- const metadataProvider = createMetadataProvider(metaConfig);
28
-
35
+ const metadataProvider = createMetadataProvider(metaConfig as any);
29
36
  await metadataProvider.connect();
30
- const maxInlineSize = (config.storage_policy?.max_inline_size_kb || 1024) * 1024;
31
37
 
38
+ const lasClient = new LasApiClient(config, logger);
32
39
  const results = [];
33
40
 
34
- for (const filePath of params.files) {
41
+ // Connect to the profiler LanceDB to read the file catalog
42
+ const profilerDb = await lancedb.connect(dbPath);
43
+ const tableNames = await profilerDb.tableNames();
44
+
45
+ if (!tableNames.includes('file_catalog')) {
46
+ throw new Error(`table 'file_catalog' not found in ${dbPath}`);
47
+ }
48
+
49
+ const catalogTable = await profilerDb.openTable('file_catalog');
50
+ const files = await catalogTable.query().toArray();
51
+
52
+ logger?.info(`[ContextLake-Action] Found ${files.length} files in catalog`);
53
+
54
+ // Simple chunking for text
55
+ const splitText = (text: string, chunkSize: number = 500, overlap: number = 50) => {
56
+ const chunks: string[] = [];
57
+ if (!text) return chunks;
58
+ let i = 0;
59
+ while (i < text.length) {
60
+ chunks.push(text.slice(i, i + chunkSize));
61
+ i += chunkSize - overlap;
62
+ }
63
+ return chunks;
64
+ };
65
+
66
+ const processText = async (text: string, fileInfo: any) => {
67
+ const chunks = splitText(text);
68
+ const docs = [];
69
+ for (const chunk of chunks) {
70
+ const vector = await metadataProvider.generateMultimodalEmbedding!([{ type: 'text', text: chunk }]);
71
+ docs.push({
72
+ id: uuidv4(),
73
+ vector,
74
+ text: chunk,
75
+ source: fileInfo.key,
76
+ file_type: fileInfo.category,
77
+ storage_type: 'source',
78
+ url: fileInfo.url || `tos://${fileInfo.bucket}/${fileInfo.key}`,
79
+ metadata: JSON.stringify({ datasource: params.datasource_name }),
80
+ created_at: Date.now(),
81
+ binary_data: Buffer.from('')
82
+ });
83
+ }
84
+ return docs;
85
+ };
86
+
87
+ for (const file of files) {
35
88
  try {
36
- const fileName = path.basename(filePath);
37
- const { buffer, text, type } = await processFile(filePath);
38
- const createdAt = Date.now(); // Current timestamp
39
-
40
- let fileUrl = '';
41
- let storageType = '';
42
- let binaryData: Buffer = Buffer.alloc(0);
89
+ logger?.info(`[ContextLake-Action] Processing file: ${file.key}, type: ${file.media_type}`);
90
+ let docs: any[] = [];
91
+ const fileUrl = file.url || `tos://${file.bucket}/${file.key}`;
43
92
 
44
- // Decide storage strategy
45
- if (buffer.length <= maxInlineSize) {
46
- binaryData = buffer;
47
- fileUrl = `inline://${fileName}`; // Virtual URL for inline
48
- storageType = 'inline';
49
- } else {
50
- fileUrl = await storageProvider.uploadFile(fileName, buffer);
51
- storageType = storageConfig.type;
93
+ if (file.media_type === 'pdf') {
94
+ // PDF Parse
95
+ const result = await lasClient.submitAndPoll('las_pdf_parse_doubao', {
96
+ url: fileUrl
97
+ });
98
+ const markdown = result.data?.markdown || '';
99
+ docs = await processText(markdown, file);
100
+ } else if (file.media_type === 'image') {
101
+ // Multimodal Embedding directly
102
+ const vector = await metadataProvider.generateMultimodalEmbedding!([
103
+ { type: 'image_url', image_url: { url: fileUrl } },
104
+ { type: 'text', text: 'This is an image from the dataset.' }
105
+ ]);
106
+ docs.push({
107
+ id: uuidv4(),
108
+ vector,
109
+ text: 'Image from dataset',
110
+ source: file.key,
111
+ file_type: 'image',
112
+ storage_type: 'source',
113
+ url: fileUrl,
114
+ metadata: JSON.stringify({ datasource: params.datasource_name }),
115
+ created_at: Date.now(),
116
+ binary_data: Buffer.from('')
117
+ });
118
+ } else if (file.media_type === 'audio') {
119
+ // ASR
120
+ const result = await lasClient.submitAndPoll('las_asr_pro', {
121
+ audio: { url: fileUrl, format: (file.key as string).split('.').pop() || 'wav' },
122
+ request: { model_name: 'bigmodel' }
123
+ });
124
+ const text = result.data?.result?.text || '';
125
+ docs = await processText(text, file);
126
+ } else if (file.media_type === 'video') {
127
+ // Video understanding -> text -> embedding
128
+ const result = await lasClient.submitAndPoll('las_long_video_understand', {
129
+ video_url: fileUrl,
130
+ query: "详细描述这个视频的内容",
131
+ model_name: "doubao-seed-2-0-lite-260215"
132
+ });
133
+ // Assuming video output is a text description somewhere in the response.
134
+ // Note: the exact structure depends on the API return, adjusting to generic text.
135
+ const text = JSON.stringify(result.data || '');
136
+
137
+ // Also need audio extract and ASR for video
138
+ // 1. Extract audio
139
+ // The output_path_template needs a unique path per video
140
+ const audioOutputPath = `tos://${file.bucket}/.tmp/audio/${uuidv4()}.wav`;
141
+ await lasClient.process('las_audio_extract_and_split', {
142
+ input_path: fileUrl,
143
+ output_path_template: audioOutputPath,
144
+ output_format: 'wav'
145
+ });
146
+
147
+ // 2. ASR on the extracted audio
148
+ // Wait briefly for object to be available if needed (often synchronous but tos takes a ms)
149
+ const asrResult = await lasClient.submitAndPoll('las_asr_pro', {
150
+ audio: { url: audioOutputPath.replace('{index}.{output_file_ext}', '0.wav'), format: 'wav' },
151
+ request: { model_name: 'bigmodel' }
152
+ });
153
+
154
+ const audioText = asrResult.data?.result?.text || '';
155
+
156
+ // Combine video text and audio text
157
+ const combinedText = `Video Description: ${text}\n\nAudio Transcription: ${audioText}`;
158
+ docs = await processText(combinedText, file);
159
+
160
+ } else if (file.category === 'structured' || file.category === 'non-structured') {
161
+ // If we had a direct text content, we could process it here.
162
+ // Assuming basic local download or similar is available, but for now we skip raw file reading from TOS in this demo script unless implemented.
163
+ // Fallback just logs
164
+ logger?.warn(`[ContextLake-Action] Skipping raw text/structured download for ${file.key} - implement TOS download if needed`);
52
165
  }
53
166
 
54
- const chunks = splitText(text, params.chunkSize || 500, params.overlap || 50);
55
- const docs = [];
56
-
57
- // If no text extracted (e.g. image), store one entry with empty text but with metadata/binary
58
- if (chunks.length === 0) {
59
- const vector = await metadataProvider.generateEmbedding(fileName); // Embed filename as fallback
60
- docs.push({
61
- id: uuidv4(),
62
- vector,
63
- text: '',
64
- source: fileName,
65
- file_type: type,
66
- storage_type: storageType,
67
- url: fileUrl,
68
- metadata: JSON.stringify(params.metadata || {}),
69
- created_at: createdAt,
70
- binary_data: binaryData
71
- });
72
- } else {
73
- for (const chunk of chunks) {
74
- const vector = await metadataProvider.generateEmbedding(chunk);
75
- docs.push({
76
- id: uuidv4(),
77
- vector,
78
- text: chunk,
79
- source: fileName,
80
- file_type: type,
81
- storage_type: storageType,
82
- url: fileUrl,
83
- metadata: JSON.stringify(params.metadata || {}),
84
- created_at: createdAt,
85
- binary_data: binaryData // Only attach to first chunk
86
- });
87
- // Clear binary data for subsequent chunks of the same file to avoid duplication
88
- binaryData = Buffer.alloc(0);
89
- }
167
+ if (docs.length > 0) {
168
+ await metadataProvider.addAssets(docs);
169
+ results.push({ file: file.key, status: 'success', chunks: docs.length });
90
170
  }
91
171
 
92
- await metadataProvider.addAssets(docs);
93
- results.push({ file: fileName, status: 'success', chunks: docs.length });
94
-
95
172
  } catch (error: any) {
96
- // @ts-ignore
97
- results.push({ file: filePath, status: 'error', message: error.message });
173
+ logger?.error(`[ContextLake-Action] Error processing ${file.key}: ${error.message}`);
174
+ results.push({ file: file.key, status: 'error', message: error.message });
98
175
  }
99
176
  }
100
177
 
101
- // Ensure plain JSON serialization
102
- return JSON.parse(JSON.stringify(results));
178
+ return results;
103
179
  }
@@ -0,0 +1,119 @@
1
+ import { ContextLakeConfig } from '../../utils/config';
2
+
3
+ export class LasApiClient {
4
+ private endpoint: string;
5
+ private apiKey: string;
6
+ private logger: any;
7
+
8
+ constructor(config: ContextLakeConfig, logger: any) {
9
+ this.logger = logger;
10
+ this.apiKey = config.metadata_storage?.embedding?.api_key || process.env.LAS_API_KEY || '';
11
+ this.endpoint = config.metadata_storage?.embedding?.api_base || process.env.LAS_BASE_URL || 'https://operator.las.cn-beijing.volces.com';
12
+
13
+ // Remove trailing slash
14
+ if (this.endpoint.endsWith('/')) {
15
+ this.endpoint = this.endpoint.slice(0, -1);
16
+ }
17
+ }
18
+
19
+ private async request(path: string, body: any) {
20
+ if (!this.apiKey) {
21
+ throw new Error('LAS_API_KEY is not configured. Please set it in config or environment variables.');
22
+ }
23
+
24
+ const url = `${this.endpoint}${path}`;
25
+ this.logger.debug(`[LasApiClient] Requesting ${url}`, { body: JSON.stringify(body) });
26
+
27
+ const response = await fetch(url, {
28
+ method: 'POST',
29
+ headers: {
30
+ 'Content-Type': 'application/json',
31
+ 'Authorization': `Bearer ${this.apiKey}`
32
+ },
33
+ body: JSON.stringify(body)
34
+ });
35
+
36
+ if (!response.ok) {
37
+ let errorText = '';
38
+ try {
39
+ errorText = await response.text();
40
+ } catch (e) { }
41
+ throw new Error(`LAS API Error: ${response.status} ${response.statusText} - ${errorText}`);
42
+ }
43
+
44
+ const result = await response.json();
45
+ return result;
46
+ }
47
+
48
+ async process(operatorId: string, data: any, version: string = 'v1') {
49
+ const result = await this.request('/api/v1/process', {
50
+ operator_id: operatorId,
51
+ operator_version: version,
52
+ data
53
+ });
54
+ return result;
55
+ }
56
+
57
+ async submit(operatorId: string, data: any, version: string = 'v1') {
58
+ const result = await this.request('/api/v1/submit', {
59
+ operator_id: operatorId,
60
+ operator_version: version,
61
+ data
62
+ });
63
+ return result;
64
+ }
65
+
66
+ async poll(operatorId: string, taskId: string, version: string = 'v1') {
67
+ const result = await this.request('/api/v1/poll', {
68
+ operator_id: operatorId,
69
+ operator_version: version,
70
+ task_id: taskId
71
+ });
72
+ return result;
73
+ }
74
+
75
+ async submitAndPoll(operatorId: string, data: any, version: string = 'v1', pollIntervalMs: number = 3000, maxRetries: number = 200) {
76
+ const submitResult = await this.submit(operatorId, data, version);
77
+
78
+ if (!submitResult?.metadata?.task_id) {
79
+ throw new Error(`Failed to submit task for ${operatorId}. Response: ${JSON.stringify(submitResult)}`);
80
+ }
81
+
82
+ const taskId = submitResult.metadata.task_id;
83
+ this.logger.info(`[LasApiClient] Task submitted: ${taskId} for ${operatorId}`);
84
+
85
+ let retries = 0;
86
+ while (retries < maxRetries) {
87
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
88
+
89
+ const pollResult = await this.poll(operatorId, taskId, version);
90
+ const status = pollResult?.metadata?.task_status;
91
+
92
+ this.logger.debug(`[LasApiClient] Task ${taskId} status: ${status}`);
93
+
94
+ if (status === 'COMPLETED') {
95
+ return pollResult;
96
+ } else if (status === 'FAILED' || status === 'TIMEOUT') {
97
+ const errorMsg = pollResult?.metadata?.error_msg || 'Unknown error';
98
+ throw new Error(`Task ${taskId} failed with status: ${status}. Message: ${errorMsg}`);
99
+ }
100
+
101
+ retries++;
102
+ }
103
+
104
+ throw new Error(`Task ${taskId} timed out after ${maxRetries} polling attempts.`);
105
+ }
106
+
107
+ async multimodalEmbedding(model: string, input: any[], encodingFormat: string = 'float', dimensions?: number, instructions?: string, sparseEmbedding?: any) {
108
+ const body: any = {
109
+ model,
110
+ input,
111
+ encoding_format: encodingFormat
112
+ };
113
+ if (dimensions) body.dimensions = dimensions;
114
+ if (instructions) body.instructions = instructions;
115
+ if (sparseEmbedding) body.sparse_embedding = sparseEmbedding;
116
+
117
+ return await this.request('/api/v1/embeddings/multimodal', body);
118
+ }
119
+ }
@@ -0,0 +1,196 @@
1
+ import { LasApiClient } from './las-api';
2
+ import { ContextLakeConfig } from '../../utils/config';
3
+ // @ts-ignore
4
+ import type { AnyAgentTool } from 'openclaw/plugin-sdk';
5
+
6
+ export function getLasTools(pluginConfig: ContextLakeConfig, logger: any): AnyAgentTool[] {
7
+ const apiClient = new LasApiClient(pluginConfig, logger);
8
+
9
+ const callApi = async (method: string, args: any[]) => {
10
+ try {
11
+ // @ts-ignore
12
+ return await apiClient[method](...args);
13
+ } catch (error: any) {
14
+ logger.error(`[LasTools] API ${method} failed`, { error: error.message });
15
+ return { error: error.message };
16
+ }
17
+ };
18
+
19
+ return [
20
+ {
21
+ name: 'las_image_resample',
22
+ label: 'LAS Image Resample',
23
+ description: `Resample/Resize an image and save it to TOS.
24
+ Parameters in data:
25
+ - image_src_type (string, default: "image_url"): "image_url" or "image_tos"
26
+ - image (string, required): URL or tos:// path
27
+ - tos_dir (string, required): tos:// output directory
28
+ - image_suffix (string): ".jpg" or ".png"
29
+ - target_size (array of integers): e.g. [1024, 1024]
30
+ - target_dpi (array of integers): e.g. [72, 72]
31
+ - method (string): "nearest", "bilinear", "bicubic", "lanczos"`,
32
+ parameters: {
33
+ type: 'object',
34
+ properties: { data: { type: 'object', additionalProperties: true } },
35
+ required: ['data']
36
+ },
37
+ async execute(toolCallId: string, params: any) {
38
+ return await callApi('process', ['las_image_resample', params.data]);
39
+ }
40
+ },
41
+ {
42
+ name: 'las_audio_extract_and_split',
43
+ label: 'LAS Audio Extract and Split',
44
+ description: `Extract audio from video and split it into chunks.
45
+ Parameters in data:
46
+ - input_path (string, required): tos:// video path
47
+ - output_path_template (string, required): e.g. tos://bucket/{index}.{output_file_ext}
48
+ - split_duration (number): duration in seconds, default 30.0
49
+ - output_format (string): "wav", "mp3", "flac"
50
+ - timeout (integer)
51
+ - extra_params (array of string): ffmpeg params`,
52
+ parameters: {
53
+ type: 'object',
54
+ properties: { data: { type: 'object', additionalProperties: true } },
55
+ required: ['data']
56
+ },
57
+ async execute(toolCallId: string, params: any) {
58
+ return await callApi('process', ['las_audio_extract_and_split', params.data]);
59
+ }
60
+ },
61
+ {
62
+ name: 'las_audio_convert',
63
+ label: 'LAS Audio Convert',
64
+ description: `Convert audio format.
65
+ Parameters in data:
66
+ - input_path (string, required): tos:// audio path
67
+ - output_path (string, required): tos:// output path
68
+ - output_format (string): "wav", "mp3", "flac"
69
+ - extra_params (array of string): ffmpeg params`,
70
+ parameters: {
71
+ type: 'object',
72
+ properties: { data: { type: 'object', additionalProperties: true } },
73
+ required: ['data']
74
+ },
75
+ async execute(toolCallId: string, params: any) {
76
+ return await callApi('process', ['las_audio_convert', params.data]);
77
+ }
78
+ },
79
+ {
80
+ name: 'las_asr_pro',
81
+ label: 'LAS ASR Pro (Speech Recognition)',
82
+ description: `Perform automatic speech recognition (ASR).
83
+ Parameters in data:
84
+ - resource: "bigasr" or "seedasr"
85
+ - audio (object, required): { url: string, language: string, format: string }
86
+ - request (object, required): { model_name: "bigmodel", ... }
87
+ - user (object): { uid: string }`,
88
+ parameters: {
89
+ type: 'object',
90
+ properties: { data: { type: 'object', additionalProperties: true } },
91
+ required: ['data']
92
+ },
93
+ async execute(toolCallId: string, params: any) {
94
+ return await callApi('submitAndPoll', ['las_asr_pro', params.data]);
95
+ }
96
+ },
97
+ {
98
+ name: 'las_seed_2_0',
99
+ label: 'LAS Seed 2.0 (Audio)',
100
+ description: `ASR with Seed 2.0.
101
+ Parameters in data:
102
+ - audio (object, required): { url, format, language }
103
+ - request (object, required): { model_name: "seedasr", ... }`,
104
+ parameters: {
105
+ type: 'object',
106
+ properties: { data: { type: 'object', additionalProperties: true } },
107
+ required: ['data']
108
+ },
109
+ async execute(toolCallId: string, params: any) {
110
+ return await callApi('submitAndPoll', ['las_seed_2_0', params.data]);
111
+ }
112
+ },
113
+ {
114
+ name: 'las_bare_image_text_embedding',
115
+ label: 'LAS Multimodal Embedding',
116
+ description: `Multimodal Embedding (image and text).
117
+ Parameters:
118
+ - model (string, required): "doubao-embedding-vision-250615"
119
+ - input (array of objects, required): [ { type: "image_url", image_url: { url: "..." } }, { type: "text", text: "..." } ]
120
+ - encoding_format (string): "float", "base64"
121
+ - dimensions (integer): 1024 or 2048`,
122
+ parameters: {
123
+ type: 'object',
124
+ properties: {
125
+ model: { type: 'string', default: 'doubao-embedding-vision-250615' },
126
+ input: { type: 'array', items: { type: 'object' } },
127
+ encoding_format: { type: 'string', default: 'float' },
128
+ dimensions: { type: 'integer' }
129
+ },
130
+ required: ['model', 'input']
131
+ },
132
+ async execute(toolCallId: string, params: any) {
133
+ return await callApi('multimodalEmbedding', [
134
+ params.model,
135
+ params.input,
136
+ params.encoding_format,
137
+ params.dimensions
138
+ ]);
139
+ }
140
+ },
141
+ {
142
+ name: 'las_long_video_understand',
143
+ label: 'LAS Long Video Understand',
144
+ description: `Long Video Understanding.
145
+ Parameters in data:
146
+ - video_url (string, required)
147
+ - query (string, required)
148
+ - model_name (string): default "doubao-seed-2-0-lite-260215"
149
+ - ...other params like fps, target_tokens_per_clip`,
150
+ parameters: {
151
+ type: 'object',
152
+ properties: { data: { type: 'object', additionalProperties: true } },
153
+ required: ['data']
154
+ },
155
+ async execute(toolCallId: string, params: any) {
156
+ return await callApi('submitAndPoll', ['las_long_video_understand', params.data]);
157
+ }
158
+ },
159
+ {
160
+ name: 'las_pdf_parse_doubao',
161
+ label: 'LAS PDF Parse Doubao',
162
+ description: `Parse PDF documents to Markdown.
163
+ Parameters in data:
164
+ - url (string, required): PDF URL or tos://
165
+ - start_page (integer): default 1
166
+ - num_pages (integer): default to end
167
+ - parse_mode (string): "normal" or "detail"`,
168
+ parameters: {
169
+ type: 'object',
170
+ properties: { data: { type: 'object', additionalProperties: true } },
171
+ required: ['data']
172
+ },
173
+ async execute(toolCallId: string, params: any) {
174
+ return await callApi('submitAndPoll', ['las_pdf_parse_doubao', params.data]);
175
+ }
176
+ },
177
+ {
178
+ name: 'las_video_resize',
179
+ label: 'LAS Video Resize',
180
+ description: `Resize video.
181
+ Parameters in data:
182
+ - video_url (string, required): URL or tos://
183
+ - target_width (integer)
184
+ - target_height (integer)
185
+ - output_dir (string, required): tos://`,
186
+ parameters: {
187
+ type: 'object',
188
+ properties: { data: { type: 'object', additionalProperties: true } },
189
+ required: ['data']
190
+ },
191
+ async execute(toolCallId: string, params: any) {
192
+ return await callApi('submitAndPoll', ['las_video_resize', params.data]);
193
+ }
194
+ }
195
+ ];
196
+ }
@@ -1,5 +1,6 @@
1
1
  import { createMetadataProvider } from '../../service/metadata/factory';
2
2
  import { createStorageProvider } from '../../service/storage/factory';
3
+ import { ContextLakeConfig } from '../../utils/config';
3
4
 
4
5
  interface ListParams {
5
6
  limit?: number;
@@ -10,7 +11,7 @@ interface DeleteParams {
10
11
  filter?: string;
11
12
  }
12
13
 
13
- export async function listAssets(params: ListParams, config: any, logger?: any) {
14
+ export async function listAssets(params: ListParams, config: ContextLakeConfig, logger?: any) {
14
15
  if (logger) {
15
16
  logger.info(`[ContextLake-Action] Calling listAssets with params: ${JSON.stringify(params)}`);
16
17
  } else {
@@ -20,7 +21,7 @@ export async function listAssets(params: ListParams, config: any, logger?: any)
20
21
 
21
22
  // Ensure config has default if not provided
22
23
  const metaConfig = config.metadata_storage || { type: 'local', lancedb_uri: './data/contextlake' };
23
- const metadataProvider = createMetadataProvider(metaConfig);
24
+ const metadataProvider = createMetadataProvider(metaConfig as any);
24
25
 
25
26
  await metadataProvider.connect();
26
27
  const docs = await metadataProvider.list(params.limit || 100);
@@ -47,7 +48,7 @@ export async function listAssets(params: ListParams, config: any, logger?: any)
47
48
  return JSON.parse(JSON.stringify(Array.from(fileMap.values())));
48
49
  }
49
50
 
50
- export async function deleteAssets(params: DeleteParams, config: any, logger?: any) {
51
+ export async function deleteAssets(params: DeleteParams, config: ContextLakeConfig, logger?: any) {
51
52
  if (logger) {
52
53
  logger.info(`[ContextLake-Action] Calling deleteAssets with params: ${JSON.stringify(params)}`);
53
54
  } else {
@@ -56,13 +57,13 @@ export async function deleteAssets(params: DeleteParams, config: any, logger?: a
56
57
  }
57
58
 
58
59
  const metaConfig = config.metadata_storage || { type: 'local', lancedb_uri: './data/contextlake' };
59
- const metadataProvider = createMetadataProvider(metaConfig);
60
+ const metadataProvider = createMetadataProvider(metaConfig as any);
60
61
 
61
62
  // file_storage config is optional for deletion (we might not need to delete from storage if inline)
62
63
  // Check if file_storage config exists before creating provider
63
64
  let storageProvider;
64
65
  if (config.file_storage && config.file_storage.type) {
65
- storageProvider = createStorageProvider(config.file_storage);
66
+ storageProvider = createStorageProvider(config.file_storage as any);
66
67
  }
67
68
 
68
69
  await metadataProvider.connect();
@@ -19,7 +19,7 @@ export interface ConnectParams {
19
19
  sample_rows?: number;
20
20
  }
21
21
 
22
- interface ConnectResult {
22
+ export interface ConnectResult {
23
23
  status: 'success' | 'error';
24
24
  datasource_name: string;
25
25
  db_path: string;
@@ -37,7 +37,7 @@ interface ConnectResult {
37
37
  // Constants
38
38
  // ---------------------------------------------------------------------------
39
39
 
40
- const BASE_DIR = path.join(os.homedir(), '.openclaw', 'las-data-profiler');
40
+ const BASE_DIR = path.join(os.homedir(), '.openclaw', 'contextlake', 'profiler');
41
41
  const PYTHON_DEPS = ['boto3', 'lancedb', 'pyarrow', 'pandas', 'Pillow', 'mutagen', 'pymupdf'];
42
42
 
43
43
  // ---------------------------------------------------------------------------
@@ -113,8 +113,8 @@ function ensurePythonDeps(): void {
113
113
  * Get the path to the bundled Python script.
114
114
  */
115
115
  function getScriptPath(): string {
116
- // The Python script is co-located with this module
117
- return path.join(__dirname, 's3_catalog.py');
116
+ // The Python script is located in the scripts directory
117
+ return path.join(__dirname, '../scripts', 's3_catalog.py');
118
118
  }
119
119
 
120
120
  // ---------------------------------------------------------------------------
@@ -252,3 +252,20 @@ export async function connectDataSource(
252
252
  });
253
253
  });
254
254
  }
255
+
256
+ export async function listDataSources(
257
+ _ctx?: any
258
+ ): Promise<{ datasources: string[] }> {
259
+ try {
260
+ if (!fs.existsSync(BASE_DIR)) {
261
+ return { datasources: [] };
262
+ }
263
+ const entries = fs.readdirSync(BASE_DIR, { withFileTypes: true });
264
+ const datasources = entries
265
+ .filter(entry => entry.isDirectory())
266
+ .map(entry => entry.name);
267
+ return { datasources };
268
+ } catch (error: any) {
269
+ throw new Error(`Failed to list data sources: ${error.message}`);
270
+ }
271
+ }