@byted-las/contextlake-openclaw 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/index.d.ts +2 -1
  2. package/dist/index.js +5 -5
  3. package/dist/src/client/lancedb.js +13 -4
  4. package/dist/src/commands/cli.d.ts +5 -2
  5. package/dist/src/commands/cli.js +94 -10
  6. package/dist/src/commands/index.d.ts +2 -1
  7. package/dist/src/commands/index.js +31 -35
  8. package/dist/src/commands/slashcmd.d.ts +8 -1
  9. package/dist/src/commands/slashcmd.js +90 -6
  10. package/dist/src/commands/tools.d.ts +10 -218
  11. package/dist/src/commands/tools.js +109 -104
  12. package/dist/src/lib/actions/ingest-source.d.ts +15 -0
  13. package/dist/src/lib/actions/ingest-source.js +193 -0
  14. package/dist/src/lib/actions/ingest.d.ts +14 -7
  15. package/dist/src/lib/actions/ingest.js +133 -63
  16. package/dist/src/lib/actions/las-api.d.ts +13 -0
  17. package/dist/src/lib/actions/las-api.js +105 -0
  18. package/dist/src/lib/actions/las-tools.d.ts +3 -0
  19. package/dist/src/lib/actions/las-tools.js +194 -0
  20. package/dist/src/lib/actions/las.d.ts +64 -0
  21. package/dist/src/lib/actions/las.js +72 -0
  22. package/dist/src/lib/actions/manage.d.ts +3 -2
  23. package/dist/src/{skills/las-data-profiler/index.d.ts → lib/actions/profiler.d.ts} +4 -2
  24. package/dist/src/{skills/las-data-profiler/index.js → lib/actions/profiler.js} +19 -3
  25. package/dist/src/lib/actions/retrieve.d.ts +2 -1
  26. package/dist/src/lib/actions/retrieve.js +2 -18
  27. package/{src/skills/las-data-profiler → dist/src/lib/scripts}/s3_catalog.py +10 -1
  28. package/dist/src/processor/loader.js +9 -2
  29. package/dist/src/service/embedding/factory.js +1 -10
  30. package/dist/src/service/embedding/interface.d.ts +8 -1
  31. package/dist/src/service/embedding/local.js +16 -13
  32. package/dist/src/service/embedding/remote.d.ts +7 -0
  33. package/dist/src/service/embedding/remote.js +108 -7
  34. package/dist/src/service/metadata/interface.d.ts +1 -0
  35. package/dist/src/service/metadata/local.d.ts +1 -0
  36. package/dist/src/service/metadata/local.js +6 -0
  37. package/dist/src/skills/SKILL.md +174 -0
  38. package/dist/src/skills/contextlake-delete/SKILL.md +36 -0
  39. package/dist/src/skills/contextlake-ingest/SKILL.md +40 -0
  40. package/dist/src/skills/contextlake-list/SKILL.md +22 -0
  41. package/dist/src/skills/contextlake-retrieve/SKILL.md +37 -0
  42. package/dist/src/skills/las-data-profiler/SKILL.md +174 -0
  43. package/dist/src/utils/config.d.ts +34 -1
  44. package/dist/src/utils/config.js +16 -3
  45. package/dist/src/utils/credentials.d.ts +8 -0
  46. package/dist/src/utils/credentials.js +77 -0
  47. package/index.ts +8 -8
  48. package/openclaw.plugin.json +1 -1
  49. package/package.json +8 -7
  50. package/src/client/lancedb.ts +32 -21
  51. package/src/commands/cli.ts +105 -13
  52. package/src/commands/index.ts +45 -42
  53. package/src/commands/slashcmd.ts +69 -10
  54. package/src/commands/tools.ts +142 -117
  55. package/src/lib/actions/ingest.ts +151 -75
  56. package/src/lib/actions/las-api.ts +119 -0
  57. package/src/lib/actions/las-tools.ts +196 -0
  58. package/src/lib/actions/manage.ts +6 -5
  59. package/src/{skills/las-data-profiler/index.ts → lib/actions/profiler.ts} +21 -4
  60. package/src/lib/actions/retrieve.ts +16 -34
  61. package/src/lib/scripts/s3_catalog.py +617 -0
  62. package/src/processor/loader.ts +12 -4
  63. package/src/service/embedding/factory.ts +1 -8
  64. package/src/service/embedding/interface.ts +9 -1
  65. package/src/service/embedding/remote.ts +133 -13
  66. package/src/service/metadata/interface.ts +1 -0
  67. package/src/service/metadata/local.ts +7 -0
  68. package/src/service/storage/factory.ts +2 -2
  69. package/src/utils/config.ts +61 -8
  70. package/src/utils/credentials.ts +50 -0
  71. package/bin/contextlake-openclaw.js +0 -5
  72. package/dist/src/skills/las-data-profiler/register.d.ts +0 -1
  73. package/dist/src/skills/las-data-profiler/register.js +0 -19
  74. package/src/service/embedding/local.ts +0 -118
  75. package/src/skills/las-data-profiler/register.ts +0 -19
@@ -0,0 +1,193 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.ingestSource = ingestSource;
37
+ const factory_1 = require("../../service/metadata/factory");
38
+ const las_api_1 = require("./las-api");
39
+ const lancedb = __importStar(require("@lancedb/lancedb"));
40
+ const path = __importStar(require("path"));
41
+ const fs = __importStar(require("fs"));
42
+ const os = __importStar(require("os"));
43
+ // @ts-ignore
44
+ const uuid_1 = require("uuid");
45
+ const BASE_DIR = path.join(os.homedir(), '.openclaw', 'contextlake', 'profiler');
46
+ async function ingestSource(params, config, logger) {
47
+ if (logger) {
48
+ logger.info(`[ContextLake-Action] Calling ingestSource with params: ${JSON.stringify(params)}`);
49
+ }
50
+ else {
51
+ // eslint-disable-next-line no-console
52
+ console.log(`[ContextLake-Action] Calling ingestSource with params: ${JSON.stringify(params)}`);
53
+ }
54
+ const dsDir = path.join(BASE_DIR, params.datasource_name);
55
+ const dbPath = path.join(dsDir, 'catalog_db');
56
+ if (!fs.existsSync(dbPath)) {
57
+ throw new Error(`Data source database not found at ${dbPath}. Please run profiler connect first.`);
58
+ }
59
+ const metaConfig = config.metadata_storage || { type: 'local', lancedb_uri: './data/contextlake' };
60
+ const metadataProvider = (0, factory_1.createMetadataProvider)(metaConfig);
61
+ await metadataProvider.connect();
62
+ const lasClient = new las_api_1.LasApiClient(config, logger);
63
+ const results = [];
64
+ // Connect to the profiler LanceDB to read the file catalog
65
+ const profilerDb = await lancedb.connect(dbPath);
66
+ const tableNames = await profilerDb.tableNames();
67
+ if (!tableNames.includes('file_catalog')) {
68
+ throw new Error(`table 'file_catalog' not found in ${dbPath}`);
69
+ }
70
+ const catalogTable = await profilerDb.openTable('file_catalog');
71
+ const files = await catalogTable.query().toArray();
72
+ logger?.info(`[ContextLake-Action] Found ${files.length} files in catalog`);
73
+ // Simple chunking for text
74
+ const splitText = (text, chunkSize = 500, overlap = 50) => {
75
+ const chunks = [];
76
+ if (!text)
77
+ return chunks;
78
+ let i = 0;
79
+ while (i < text.length) {
80
+ chunks.push(text.slice(i, i + chunkSize));
81
+ i += chunkSize - overlap;
82
+ }
83
+ return chunks;
84
+ };
85
+ const processText = async (text, fileInfo) => {
86
+ const chunks = splitText(text);
87
+ const docs = [];
88
+ for (const chunk of chunks) {
89
+ const vector = await metadataProvider.generateMultimodalEmbedding([{ type: 'text', text: chunk }]);
90
+ docs.push({
91
+ id: (0, uuid_1.v4)(),
92
+ vector,
93
+ text: chunk,
94
+ source: fileInfo.key,
95
+ file_type: fileInfo.category,
96
+ storage_type: 'source',
97
+ url: fileInfo.url || `tos://${fileInfo.bucket}/${fileInfo.key}`,
98
+ metadata: JSON.stringify({ datasource: params.datasource_name }),
99
+ created_at: Date.now(),
100
+ binary_data: Buffer.from('')
101
+ });
102
+ }
103
+ return docs;
104
+ };
105
+ for (const file of files) {
106
+ try {
107
+ logger?.info(`[ContextLake-Action] Processing file: ${file.key}, type: ${file.media_type}`);
108
+ let docs = [];
109
+ const fileUrl = file.url || `tos://${file.bucket}/${file.key}`;
110
+ if (file.media_type === 'pdf') {
111
+ // PDF Parse
112
+ const result = await lasClient.submitAndPoll('las_pdf_parse_doubao', {
113
+ url: fileUrl
114
+ });
115
+ const markdown = result.data?.markdown || '';
116
+ docs = await processText(markdown, file);
117
+ }
118
+ else if (file.media_type === 'image') {
119
+ // Multimodal Embedding directly
120
+ const vector = await metadataProvider.generateMultimodalEmbedding([
121
+ { type: 'image_url', image_url: { url: fileUrl } },
122
+ { type: 'text', text: 'This is an image from the dataset.' }
123
+ ]);
124
+ docs.push({
125
+ id: (0, uuid_1.v4)(),
126
+ vector,
127
+ text: 'Image from dataset',
128
+ source: file.key,
129
+ file_type: 'image',
130
+ storage_type: 'source',
131
+ url: fileUrl,
132
+ metadata: JSON.stringify({ datasource: params.datasource_name }),
133
+ created_at: Date.now(),
134
+ binary_data: Buffer.from('')
135
+ });
136
+ }
137
+ else if (file.media_type === 'audio') {
138
+ // ASR
139
+ const result = await lasClient.submitAndPoll('las_asr_pro', {
140
+ audio: { url: fileUrl, format: file.key.split('.').pop() || 'wav' },
141
+ request: { model_name: 'bigmodel' }
142
+ });
143
+ const text = result.data?.result?.text || '';
144
+ docs = await processText(text, file);
145
+ }
146
+ else if (file.media_type === 'video') {
147
+ // Video understanding -> text -> embedding
148
+ const result = await lasClient.submitAndPoll('las_long_video_understand', {
149
+ video_url: fileUrl,
150
+ query: "详细描述这个视频的内容",
151
+ model_name: "doubao-seed-2-0-lite-260215"
152
+ });
153
+ // Assuming video output is a text description somewhere in the response.
154
+ // Note: the exact structure depends on the API return, adjusting to generic text.
155
+ const text = JSON.stringify(result.data || '');
156
+ // Also need audio extract and ASR for video
157
+ // 1. Extract audio
158
+ // The output_path_template needs a unique path per video
159
+ const audioOutputPath = `tos://${file.bucket}/.tmp/audio/${(0, uuid_1.v4)()}.wav`;
160
+ await lasClient.process('las_audio_extract_and_split', {
161
+ input_path: fileUrl,
162
+ output_path_template: audioOutputPath,
163
+ output_format: 'wav'
164
+ });
165
+ // 2. ASR on the extracted audio
166
+ // Wait briefly for object to be available if needed (often synchronous but tos takes a ms)
167
+ const asrResult = await lasClient.submitAndPoll('las_asr_pro', {
168
+ audio: { url: audioOutputPath.replace('{index}.{output_file_ext}', '0.wav'), format: 'wav' },
169
+ request: { model_name: 'bigmodel' }
170
+ });
171
+ const audioText = asrResult.data?.result?.text || '';
172
+ // Combine video text and audio text
173
+ const combinedText = `Video Description: ${text}\n\nAudio Transcription: ${audioText}`;
174
+ docs = await processText(combinedText, file);
175
+ }
176
+ else if (file.category === 'structured' || file.category === 'non-structured') {
177
+ // If we had a direct text content, we could process it here.
178
+ // Assuming basic local download or similar is available, but for now we skip raw file reading from TOS in this demo script unless implemented.
179
+ // Fallback just logs
180
+ logger?.warn(`[ContextLake-Action] Skipping raw text/structured download for ${file.key} - implement TOS download if needed`);
181
+ }
182
+ if (docs.length > 0) {
183
+ await metadataProvider.addAssets(docs);
184
+ results.push({ file: file.key, status: 'success', chunks: docs.length });
185
+ }
186
+ }
187
+ catch (error) {
188
+ logger?.error(`[ContextLake-Action] Error processing ${file.key}: ${error.message}`);
189
+ results.push({ file: file.key, status: 'error', message: error.message });
190
+ }
191
+ }
192
+ return results;
193
+ }
@@ -1,8 +1,15 @@
1
- interface IngestParams {
2
- files: string[];
3
- metadata?: Record<string, any>;
4
- chunkSize?: number;
5
- overlap?: number;
1
+ import { ContextLakeConfig } from '../../utils/config';
2
+ export interface IngestSourceParams {
3
+ datasource_name: string;
6
4
  }
7
- export declare function ingestAssets(params: IngestParams, config: any, logger?: any): Promise<any>;
8
- export {};
5
+ export declare function ingestSource(params: IngestSourceParams, config: ContextLakeConfig, logger?: any): Promise<({
6
+ file: any;
7
+ status: string;
8
+ chunks: number;
9
+ message?: undefined;
10
+ } | {
11
+ file: any;
12
+ status: string;
13
+ message: any;
14
+ chunks?: undefined;
15
+ })[]>;
@@ -33,91 +33,161 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.ingestAssets = ingestAssets;
37
- const factory_1 = require("../../service/storage/factory");
38
- const factory_2 = require("../../service/metadata/factory");
39
- const loader_1 = require("../../processor/loader");
36
+ exports.ingestSource = ingestSource;
37
+ const factory_1 = require("../../service/metadata/factory");
38
+ const las_api_1 = require("./las-api");
39
+ const lancedb = __importStar(require("@lancedb/lancedb"));
40
40
  const path = __importStar(require("path"));
41
+ const fs = __importStar(require("fs"));
42
+ const os = __importStar(require("os"));
41
43
  // @ts-ignore
42
44
  const uuid_1 = require("uuid");
43
- async function ingestAssets(params, config, logger) {
45
+ const BASE_DIR = path.join(os.homedir(), '.openclaw', 'contextlake', 'profiler');
46
+ async function ingestSource(params, config, logger) {
44
47
  if (logger) {
45
- logger.info(`[ContextLake-Action] Calling ingestAssets with params: ${JSON.stringify(params)}`);
48
+ logger.info(`[ContextLake-Action] Calling ingestSource with params: ${JSON.stringify(params)}`);
46
49
  }
47
50
  else {
48
51
  // eslint-disable-next-line no-console
49
- console.log(`[ContextLake-Action] Calling ingestAssets with params: ${JSON.stringify(params)}`);
52
+ console.log(`[ContextLake-Action] Calling ingestSource with params: ${JSON.stringify(params)}`);
53
+ }
54
+ const dsDir = path.join(BASE_DIR, params.datasource_name);
55
+ const dbPath = path.join(dsDir, 'catalog_db');
56
+ if (!fs.existsSync(dbPath)) {
57
+ throw new Error(`Data source database not found at ${dbPath}. Please run profiler connect first.`);
50
58
  }
51
- const storageConfig = config.file_storage || { type: 'local', local_base_dir: './data/files' };
52
59
  const metaConfig = config.metadata_storage || { type: 'local', lancedb_uri: './data/contextlake' };
53
- const storageProvider = (0, factory_1.createStorageProvider)(storageConfig);
54
- const metadataProvider = (0, factory_2.createMetadataProvider)(metaConfig);
60
+ const metadataProvider = (0, factory_1.createMetadataProvider)(metaConfig);
55
61
  await metadataProvider.connect();
56
- const maxInlineSize = (config.storage_policy?.max_inline_size_kb || 1024) * 1024;
62
+ const lasClient = new las_api_1.LasApiClient(config, logger);
57
63
  const results = [];
58
- for (const filePath of params.files) {
64
+ // Connect to the profiler LanceDB to read the file catalog
65
+ const profilerDb = await lancedb.connect(dbPath);
66
+ const tableNames = await profilerDb.tableNames();
67
+ if (!tableNames.includes('file_catalog')) {
68
+ throw new Error(`table 'file_catalog' not found in ${dbPath}`);
69
+ }
70
+ const catalogTable = await profilerDb.openTable('file_catalog');
71
+ const files = await catalogTable.query().toArray();
72
+ logger?.info(`[ContextLake-Action] Found ${files.length} files in catalog`);
73
+ // Simple chunking for text
74
+ const splitText = (text, chunkSize = 500, overlap = 50) => {
75
+ const chunks = [];
76
+ if (!text)
77
+ return chunks;
78
+ let i = 0;
79
+ while (i < text.length) {
80
+ chunks.push(text.slice(i, i + chunkSize));
81
+ i += chunkSize - overlap;
82
+ }
83
+ return chunks;
84
+ };
85
+ const processText = async (text, fileInfo) => {
86
+ const chunks = splitText(text);
87
+ const docs = [];
88
+ for (const chunk of chunks) {
89
+ const vector = await metadataProvider.generateMultimodalEmbedding([{ type: 'text', text: chunk }]);
90
+ docs.push({
91
+ id: (0, uuid_1.v4)(),
92
+ vector,
93
+ text: chunk,
94
+ source: fileInfo.key,
95
+ file_type: fileInfo.category,
96
+ storage_type: 'source',
97
+ url: fileInfo.url || `tos://${fileInfo.bucket}/${fileInfo.key}`,
98
+ metadata: JSON.stringify({ datasource: params.datasource_name }),
99
+ created_at: Date.now(),
100
+ binary_data: Buffer.from('')
101
+ });
102
+ }
103
+ return docs;
104
+ };
105
+ for (const file of files) {
59
106
  try {
60
- const fileName = path.basename(filePath);
61
- const { buffer, text, type } = await (0, loader_1.processFile)(filePath);
62
- const createdAt = Date.now(); // Current timestamp
63
- let fileUrl = '';
64
- let storageType = '';
65
- let binaryData = Buffer.alloc(0);
66
- // Decide storage strategy
67
- if (buffer.length <= maxInlineSize) {
68
- binaryData = buffer;
69
- fileUrl = `inline://${fileName}`; // Virtual URL for inline
70
- storageType = 'inline';
71
- }
72
- else {
73
- fileUrl = await storageProvider.uploadFile(fileName, buffer);
74
- storageType = storageConfig.type;
107
+ logger?.info(`[ContextLake-Action] Processing file: ${file.key}, type: ${file.media_type}`);
108
+ let docs = [];
109
+ const fileUrl = file.url || `tos://${file.bucket}/${file.key}`;
110
+ if (file.media_type === 'pdf') {
111
+ // PDF Parse
112
+ const result = await lasClient.submitAndPoll('las_pdf_parse_doubao', {
113
+ url: fileUrl
114
+ });
115
+ const markdown = result.data?.markdown || '';
116
+ docs = await processText(markdown, file);
75
117
  }
76
- const chunks = (0, loader_1.splitText)(text, params.chunkSize || 500, params.overlap || 50);
77
- const docs = [];
78
- // If no text extracted (e.g. image), store one entry with empty text but with metadata/binary
79
- if (chunks.length === 0) {
80
- const vector = await metadataProvider.generateEmbedding(fileName); // Embed filename as fallback
118
+ else if (file.media_type === 'image') {
119
+ // Multimodal Embedding directly
120
+ const vector = await metadataProvider.generateMultimodalEmbedding([
121
+ { type: 'image_url', image_url: { url: fileUrl } },
122
+ { type: 'text', text: 'This is an image from the dataset.' }
123
+ ]);
81
124
  docs.push({
82
125
  id: (0, uuid_1.v4)(),
83
126
  vector,
84
- text: '',
85
- source: fileName,
86
- file_type: type,
87
- storage_type: storageType,
127
+ text: 'Image from dataset',
128
+ source: file.key,
129
+ file_type: 'image',
130
+ storage_type: 'source',
88
131
  url: fileUrl,
89
- metadata: JSON.stringify(params.metadata || {}),
90
- created_at: createdAt,
91
- binary_data: binaryData
132
+ metadata: JSON.stringify({ datasource: params.datasource_name }),
133
+ created_at: Date.now(),
134
+ binary_data: Buffer.from('')
92
135
  });
93
136
  }
94
- else {
95
- for (const chunk of chunks) {
96
- const vector = await metadataProvider.generateEmbedding(chunk);
97
- docs.push({
98
- id: (0, uuid_1.v4)(),
99
- vector,
100
- text: chunk,
101
- source: fileName,
102
- file_type: type,
103
- storage_type: storageType,
104
- url: fileUrl,
105
- metadata: JSON.stringify(params.metadata || {}),
106
- created_at: createdAt,
107
- binary_data: binaryData // Only attach to first chunk
108
- });
109
- // Clear binary data for subsequent chunks of the same file to avoid duplication
110
- binaryData = Buffer.alloc(0);
111
- }
137
+ else if (file.media_type === 'audio') {
138
+ // ASR
139
+ const result = await lasClient.submitAndPoll('las_asr_pro', {
140
+ audio: { url: fileUrl, format: file.key.split('.').pop() || 'wav' },
141
+ request: { model_name: 'bigmodel' }
142
+ });
143
+ const text = result.data?.result?.text || '';
144
+ docs = await processText(text, file);
145
+ }
146
+ else if (file.media_type === 'video') {
147
+ // Video understanding -> text -> embedding
148
+ const result = await lasClient.submitAndPoll('las_long_video_understand', {
149
+ video_url: fileUrl,
150
+ query: "详细描述这个视频的内容",
151
+ model_name: "doubao-seed-2-0-lite-260215"
152
+ });
153
+ // Assuming video output is a text description somewhere in the response.
154
+ // Note: the exact structure depends on the API return, adjusting to generic text.
155
+ const text = JSON.stringify(result.data || '');
156
+ // Also need audio extract and ASR for video
157
+ // 1. Extract audio
158
+ // The output_path_template needs a unique path per video
159
+ const audioOutputPath = `tos://${file.bucket}/.tmp/audio/${(0, uuid_1.v4)()}.wav`;
160
+ await lasClient.process('las_audio_extract_and_split', {
161
+ input_path: fileUrl,
162
+ output_path_template: audioOutputPath,
163
+ output_format: 'wav'
164
+ });
165
+ // 2. ASR on the extracted audio
166
+ // Wait briefly for object to be available if needed (often synchronous but tos takes a ms)
167
+ const asrResult = await lasClient.submitAndPoll('las_asr_pro', {
168
+ audio: { url: audioOutputPath.replace('{index}.{output_file_ext}', '0.wav'), format: 'wav' },
169
+ request: { model_name: 'bigmodel' }
170
+ });
171
+ const audioText = asrResult.data?.result?.text || '';
172
+ // Combine video text and audio text
173
+ const combinedText = `Video Description: ${text}\n\nAudio Transcription: ${audioText}`;
174
+ docs = await processText(combinedText, file);
175
+ }
176
+ else if (file.category === 'structured' || file.category === 'non-structured') {
177
+ // If we had a direct text content, we could process it here.
178
+ // Assuming basic local download or similar is available, but for now we skip raw file reading from TOS in this demo script unless implemented.
179
+ // Fallback just logs
180
+ logger?.warn(`[ContextLake-Action] Skipping raw text/structured download for ${file.key} - implement TOS download if needed`);
181
+ }
182
+ if (docs.length > 0) {
183
+ await metadataProvider.addAssets(docs);
184
+ results.push({ file: file.key, status: 'success', chunks: docs.length });
112
185
  }
113
- await metadataProvider.addAssets(docs);
114
- results.push({ file: fileName, status: 'success', chunks: docs.length });
115
186
  }
116
187
  catch (error) {
117
- // @ts-ignore
118
- results.push({ file: filePath, status: 'error', message: error.message });
188
+ logger?.error(`[ContextLake-Action] Error processing ${file.key}: ${error.message}`);
189
+ results.push({ file: file.key, status: 'error', message: error.message });
119
190
  }
120
191
  }
121
- // Ensure plain JSON serialization
122
- return JSON.parse(JSON.stringify(results));
192
+ return results;
123
193
  }
@@ -0,0 +1,13 @@
1
+ import { ContextLakeConfig } from '../../utils/config';
2
+ export declare class LasApiClient {
3
+ private endpoint;
4
+ private apiKey;
5
+ private logger;
6
+ constructor(config: ContextLakeConfig, logger: any);
7
+ private request;
8
+ process(operatorId: string, data: any, version?: string): Promise<any>;
9
+ submit(operatorId: string, data: any, version?: string): Promise<any>;
10
+ poll(operatorId: string, taskId: string, version?: string): Promise<any>;
11
+ submitAndPoll(operatorId: string, data: any, version?: string, pollIntervalMs?: number, maxRetries?: number): Promise<any>;
12
+ multimodalEmbedding(model: string, input: any[], encodingFormat?: string, dimensions?: number, instructions?: string, sparseEmbedding?: any): Promise<any>;
13
+ }
@@ -0,0 +1,105 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.LasApiClient = void 0;
4
+ class LasApiClient {
5
+ endpoint;
6
+ apiKey;
7
+ logger;
8
+ constructor(config, logger) {
9
+ this.logger = logger;
10
+ this.apiKey = config.metadata_storage?.embedding?.api_key || process.env.LAS_API_KEY || '';
11
+ this.endpoint = config.metadata_storage?.embedding?.api_base || process.env.LAS_BASE_URL || 'https://operator.las.cn-beijing.volces.com';
12
+ // Remove trailing slash
13
+ if (this.endpoint.endsWith('/')) {
14
+ this.endpoint = this.endpoint.slice(0, -1);
15
+ }
16
+ }
17
+ async request(path, body) {
18
+ if (!this.apiKey) {
19
+ throw new Error('LAS_API_KEY is not configured. Please set it in config or environment variables.');
20
+ }
21
+ const url = `${this.endpoint}${path}`;
22
+ this.logger.debug(`[LasApiClient] Requesting ${url}`, { body: JSON.stringify(body) });
23
+ const response = await fetch(url, {
24
+ method: 'POST',
25
+ headers: {
26
+ 'Content-Type': 'application/json',
27
+ 'Authorization': `Bearer ${this.apiKey}`
28
+ },
29
+ body: JSON.stringify(body)
30
+ });
31
+ if (!response.ok) {
32
+ let errorText = '';
33
+ try {
34
+ errorText = await response.text();
35
+ }
36
+ catch (e) { }
37
+ throw new Error(`LAS API Error: ${response.status} ${response.statusText} - ${errorText}`);
38
+ }
39
+ const result = await response.json();
40
+ return result;
41
+ }
42
+ async process(operatorId, data, version = 'v1') {
43
+ const result = await this.request('/api/v1/process', {
44
+ operator_id: operatorId,
45
+ operator_version: version,
46
+ data
47
+ });
48
+ return result;
49
+ }
50
+ async submit(operatorId, data, version = 'v1') {
51
+ const result = await this.request('/api/v1/submit', {
52
+ operator_id: operatorId,
53
+ operator_version: version,
54
+ data
55
+ });
56
+ return result;
57
+ }
58
+ async poll(operatorId, taskId, version = 'v1') {
59
+ const result = await this.request('/api/v1/poll', {
60
+ operator_id: operatorId,
61
+ operator_version: version,
62
+ task_id: taskId
63
+ });
64
+ return result;
65
+ }
66
+ async submitAndPoll(operatorId, data, version = 'v1', pollIntervalMs = 3000, maxRetries = 200) {
67
+ const submitResult = await this.submit(operatorId, data, version);
68
+ if (!submitResult?.metadata?.task_id) {
69
+ throw new Error(`Failed to submit task for ${operatorId}. Response: ${JSON.stringify(submitResult)}`);
70
+ }
71
+ const taskId = submitResult.metadata.task_id;
72
+ this.logger.info(`[LasApiClient] Task submitted: ${taskId} for ${operatorId}`);
73
+ let retries = 0;
74
+ while (retries < maxRetries) {
75
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
76
+ const pollResult = await this.poll(operatorId, taskId, version);
77
+ const status = pollResult?.metadata?.task_status;
78
+ this.logger.debug(`[LasApiClient] Task ${taskId} status: ${status}`);
79
+ if (status === 'COMPLETED') {
80
+ return pollResult;
81
+ }
82
+ else if (status === 'FAILED' || status === 'TIMEOUT') {
83
+ const errorMsg = pollResult?.metadata?.error_msg || 'Unknown error';
84
+ throw new Error(`Task ${taskId} failed with status: ${status}. Message: ${errorMsg}`);
85
+ }
86
+ retries++;
87
+ }
88
+ throw new Error(`Task ${taskId} timed out after ${maxRetries} polling attempts.`);
89
+ }
90
+ async multimodalEmbedding(model, input, encodingFormat = 'float', dimensions, instructions, sparseEmbedding) {
91
+ const body = {
92
+ model,
93
+ input,
94
+ encoding_format: encodingFormat
95
+ };
96
+ if (dimensions)
97
+ body.dimensions = dimensions;
98
+ if (instructions)
99
+ body.instructions = instructions;
100
+ if (sparseEmbedding)
101
+ body.sparse_embedding = sparseEmbedding;
102
+ return await this.request('/api/v1/embeddings/multimodal', body);
103
+ }
104
+ }
105
+ exports.LasApiClient = LasApiClient;
@@ -0,0 +1,3 @@
1
+ import { ContextLakeConfig } from '../../utils/config';
2
+ import type { AnyAgentTool } from 'openclaw/plugin-sdk';
3
+ export declare function getLasTools(pluginConfig: ContextLakeConfig, logger: any): AnyAgentTool[];