@byted-las/contextlake-openclaw 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/index.d.ts +2 -1
  2. package/dist/index.js +5 -5
  3. package/dist/src/client/lancedb.js +13 -4
  4. package/dist/src/commands/cli.d.ts +5 -2
  5. package/dist/src/commands/cli.js +94 -10
  6. package/dist/src/commands/index.d.ts +2 -1
  7. package/dist/src/commands/index.js +31 -35
  8. package/dist/src/commands/slashcmd.d.ts +8 -1
  9. package/dist/src/commands/slashcmd.js +90 -6
  10. package/dist/src/commands/tools.d.ts +10 -218
  11. package/dist/src/commands/tools.js +109 -104
  12. package/dist/src/lib/actions/ingest-source.d.ts +15 -0
  13. package/dist/src/lib/actions/ingest-source.js +193 -0
  14. package/dist/src/lib/actions/ingest.d.ts +14 -7
  15. package/dist/src/lib/actions/ingest.js +133 -63
  16. package/dist/src/lib/actions/las-api.d.ts +13 -0
  17. package/dist/src/lib/actions/las-api.js +105 -0
  18. package/dist/src/lib/actions/las-tools.d.ts +3 -0
  19. package/dist/src/lib/actions/las-tools.js +194 -0
  20. package/dist/src/lib/actions/las.d.ts +64 -0
  21. package/dist/src/lib/actions/las.js +72 -0
  22. package/dist/src/lib/actions/manage.d.ts +3 -2
  23. package/dist/src/{skills/las-data-profiler/index.d.ts → lib/actions/profiler.d.ts} +4 -2
  24. package/dist/src/{skills/las-data-profiler/index.js → lib/actions/profiler.js} +19 -3
  25. package/dist/src/lib/actions/retrieve.d.ts +2 -1
  26. package/dist/src/lib/actions/retrieve.js +2 -18
  27. package/{src/skills/las-data-profiler → dist/src/lib/scripts}/s3_catalog.py +10 -1
  28. package/dist/src/processor/loader.js +9 -2
  29. package/dist/src/service/embedding/factory.js +1 -10
  30. package/dist/src/service/embedding/interface.d.ts +8 -1
  31. package/dist/src/service/embedding/local.js +16 -13
  32. package/dist/src/service/embedding/remote.d.ts +7 -0
  33. package/dist/src/service/embedding/remote.js +108 -7
  34. package/dist/src/service/metadata/interface.d.ts +1 -0
  35. package/dist/src/service/metadata/local.d.ts +1 -0
  36. package/dist/src/service/metadata/local.js +6 -0
  37. package/dist/src/skills/SKILL.md +174 -0
  38. package/dist/src/skills/contextlake-delete/SKILL.md +36 -0
  39. package/dist/src/skills/contextlake-ingest/SKILL.md +40 -0
  40. package/dist/src/skills/contextlake-list/SKILL.md +22 -0
  41. package/dist/src/skills/contextlake-retrieve/SKILL.md +37 -0
  42. package/dist/src/skills/las-data-profiler/SKILL.md +174 -0
  43. package/dist/src/utils/config.d.ts +34 -1
  44. package/dist/src/utils/config.js +16 -3
  45. package/dist/src/utils/credentials.d.ts +8 -0
  46. package/dist/src/utils/credentials.js +77 -0
  47. package/index.ts +8 -8
  48. package/openclaw.plugin.json +1 -1
  49. package/package.json +8 -7
  50. package/src/client/lancedb.ts +32 -21
  51. package/src/commands/cli.ts +105 -13
  52. package/src/commands/index.ts +45 -42
  53. package/src/commands/slashcmd.ts +69 -10
  54. package/src/commands/tools.ts +142 -117
  55. package/src/lib/actions/ingest.ts +151 -75
  56. package/src/lib/actions/las-api.ts +119 -0
  57. package/src/lib/actions/las-tools.ts +196 -0
  58. package/src/lib/actions/manage.ts +6 -5
  59. package/src/{skills/las-data-profiler/index.ts → lib/actions/profiler.ts} +21 -4
  60. package/src/lib/actions/retrieve.ts +16 -34
  61. package/src/lib/scripts/s3_catalog.py +617 -0
  62. package/src/processor/loader.ts +12 -4
  63. package/src/service/embedding/factory.ts +1 -8
  64. package/src/service/embedding/interface.ts +9 -1
  65. package/src/service/embedding/remote.ts +133 -13
  66. package/src/service/metadata/interface.ts +1 -0
  67. package/src/service/metadata/local.ts +7 -0
  68. package/src/service/storage/factory.ts +2 -2
  69. package/src/utils/config.ts +61 -8
  70. package/src/utils/credentials.ts +50 -0
  71. package/bin/contextlake-openclaw.js +0 -5
  72. package/dist/src/skills/las-data-profiler/register.d.ts +0 -1
  73. package/dist/src/skills/las-data-profiler/register.js +0 -19
  74. package/src/service/embedding/local.ts +0 -118
  75. package/src/skills/las-data-profiler/register.ts +0 -19
@@ -1,219 +1,11 @@
1
- export declare function getAgentTools(pluginConfig: any, logger: any): {
2
- ingestTool: {
3
- name: string;
4
- label: string;
5
- description: string;
6
- parameters: {
7
- type: string;
8
- properties: {
9
- files: {
10
- type: string;
11
- items: {
12
- type: string;
13
- };
14
- description: string;
15
- };
16
- metadata: {
17
- type: string;
18
- description: string;
19
- additionalProperties: boolean;
20
- };
21
- chunkSize: {
22
- type: string;
23
- description: string;
24
- };
25
- overlap: {
26
- type: string;
27
- description: string;
28
- };
29
- };
30
- required: string[];
31
- additionalProperties: boolean;
32
- };
33
- schema: {
34
- type: string;
35
- properties: {
36
- files: {
37
- type: string;
38
- items: {
39
- type: string;
40
- };
41
- description: string;
42
- };
43
- metadata: {
44
- type: string;
45
- description: string;
46
- additionalProperties: boolean;
47
- };
48
- chunkSize: {
49
- type: string;
50
- description: string;
51
- };
52
- overlap: {
53
- type: string;
54
- description: string;
55
- };
56
- };
57
- required: string[];
58
- additionalProperties: boolean;
59
- };
60
- execute(toolCallId: string, params: any): Promise<{
61
- success: boolean;
62
- result: any;
63
- error?: undefined;
64
- } | {
65
- success: boolean;
66
- error: any;
67
- result?: undefined;
68
- }>;
69
- };
70
- retrieveTool: {
71
- name: string;
72
- label: string;
73
- description: string;
74
- parameters: {
75
- type: string;
76
- properties: {
77
- query: {
78
- type: string;
79
- description: string;
80
- };
81
- top_k: {
82
- type: string;
83
- description: string;
84
- };
85
- filter: {
86
- type: string;
87
- description: string;
88
- };
89
- include_binary: {
90
- type: string;
91
- description: string;
92
- };
93
- };
94
- required: string[];
95
- additionalProperties: boolean;
96
- };
97
- schema: {
98
- type: string;
99
- properties: {
100
- query: {
101
- type: string;
102
- description: string;
103
- };
104
- top_k: {
105
- type: string;
106
- description: string;
107
- };
108
- filter: {
109
- type: string;
110
- description: string;
111
- };
112
- include_binary: {
113
- type: string;
114
- description: string;
115
- };
116
- };
117
- required: string[];
118
- additionalProperties: boolean;
119
- };
120
- execute(toolCallId: string, params: any): Promise<{
121
- success: boolean;
122
- result: any[];
123
- error?: undefined;
124
- } | {
125
- success: boolean;
126
- error: any;
127
- result?: undefined;
128
- }>;
129
- };
130
- listTool: {
131
- name: string;
132
- label: string;
133
- description: string;
134
- parameters: {
135
- type: string;
136
- properties: {
137
- limit: {
138
- type: string;
139
- description: string;
140
- };
141
- };
142
- required: never[];
143
- additionalProperties: boolean;
144
- };
145
- schema: {
146
- type: string;
147
- properties: {
148
- limit: {
149
- type: string;
150
- description: string;
151
- };
152
- };
153
- required: never[];
154
- };
155
- execute(toolCallId: string, params: any): Promise<{
156
- success: boolean;
157
- result: any;
158
- error?: undefined;
159
- } | {
160
- success: boolean;
161
- error: any;
162
- result?: undefined;
163
- }>;
164
- };
165
- deleteTool: {
166
- name: string;
167
- label: string;
168
- description: string;
169
- parameters: {
170
- type: string;
171
- properties: {
172
- file_ids: {
173
- type: string;
174
- items: {
175
- type: string;
176
- };
177
- description: string;
178
- };
179
- filter: {
180
- type: string;
181
- description: string;
182
- };
183
- };
184
- required: never[];
185
- additionalProperties: boolean;
186
- };
187
- schema: {
188
- type: string;
189
- properties: {
190
- file_ids: {
191
- type: string;
192
- items: {
193
- type: string;
194
- };
195
- description: string;
196
- };
197
- filter: {
198
- type: string;
199
- description: string;
200
- };
201
- };
202
- required: never[];
203
- };
204
- execute(toolCallId: string, params: any): Promise<{
205
- success: boolean;
206
- result: {
207
- status: string;
208
- message: string;
209
- deleted_count: number;
210
- storage_cleaned: number;
211
- };
212
- error?: undefined;
213
- } | {
214
- success: boolean;
215
- error: any;
216
- result?: undefined;
217
- }>;
218
- };
1
+ import { ContextLakeConfig } from '../utils/config';
2
+ import type { AnyAgentTool } from 'openclaw/plugin-sdk';
3
+ export declare function getAgentTools(pluginConfig: ContextLakeConfig, logger: any): {
4
+ ingestTool: AnyAgentTool;
5
+ retrieveTool: AnyAgentTool;
6
+ listTool: AnyAgentTool;
7
+ deleteTool: AnyAgentTool;
8
+ lasDataProfilerTool: AnyAgentTool;
9
+ listDatasourceTool: AnyAgentTool;
10
+ lasTools: AnyAgentTool[];
219
11
  };
@@ -4,69 +4,53 @@ exports.getAgentTools = getAgentTools;
4
4
  const ingest_1 = require("../lib/actions/ingest");
5
5
  const retrieve_1 = require("../lib/actions/retrieve");
6
6
  const manage_1 = require("../lib/actions/manage");
7
+ const profiler_1 = require("../lib/actions/profiler");
8
+ const las_tools_1 = require("../lib/actions/las-tools");
7
9
  function getAgentTools(pluginConfig, logger) {
10
+ const lasTools = (0, las_tools_1.getLasTools)(pluginConfig, logger);
8
11
  return {
12
+ lasTools,
13
+ listDatasourceTool: {
14
+ name: 'contextlake-list-datasource',
15
+ label: 'ContextLake List Datasources',
16
+ description: `List all connected and profiled data sources.`,
17
+ parameters: {
18
+ type: 'object',
19
+ properties: {},
20
+ required: [],
21
+ additionalProperties: false
22
+ },
23
+ async execute(toolCallId, params) {
24
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Executing list-datasource skill, toolCallId: ${toolCallId}`);
25
+ try {
26
+ const result = await (0, profiler_1.listDataSources)();
27
+ return {
28
+ content: [{ type: "text", text: JSON.stringify(result) }],
29
+ details: result
30
+ };
31
+ }
32
+ catch (error) {
33
+ logger.error(`[${new Date().toISOString()}] [ContextLake] list-datasource skill failed`, { error: error.message });
34
+ return {
35
+ content: [{ type: "text", text: String(error.message) }],
36
+ details: { error: error.message }
37
+ };
38
+ }
39
+ }
40
+ },
9
41
  ingestTool: {
10
42
  name: 'contextlake-ingest',
11
43
  label: 'ContextLake Ingest',
12
- description: `Upload, ingest, and index documents into the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
44
+ description: `Process and ingest all files from a connected data source into the knowledge base.
13
45
  Use this tool when the user wants to "将知识注入", "上传文件", "入库", "添加文档", "ingest files", or "add knowledge".
14
- Supports processing of various file types including PDF, Word, Markdown, and Text.
15
- Automatically handles text extraction, cleaning, chunking, embedding generation, and storage.
16
-
17
- Example User Queries:
18
- - "帮我把这个文档注入到知识湖中"
19
- - "上传这份 PDF 到知识库"
20
- - "Please ingest these documents into ContextLake"
21
- - "将 /path/to/doc.txt 添加到知识库"`,
46
+ Supports multimodal files (text, images, audio, video, pdf) by using LAS models to understand and embed them.
47
+ Must be called after a data source has been successfully profiled via \`las-data-profiler\`.`,
22
48
  parameters: {
23
49
  type: 'object',
24
50
  properties: {
25
- files: {
26
- type: 'array',
27
- items: { type: 'string' },
28
- description: 'List of file paths to ingest'
29
- },
30
- metadata: {
31
- type: 'object',
32
- description: 'Optional JSON metadata to attach to documents',
33
- additionalProperties: true
34
- },
35
- chunkSize: {
36
- type: 'integer',
37
- description: 'Chunk size for text splitting'
38
- },
39
- overlap: {
40
- type: 'integer',
41
- description: 'Overlap size for text splitting'
42
- }
51
+ datasource_name: { type: 'string', description: 'Name of the data source previously profiled' }
43
52
  },
44
- required: ['files'],
45
- additionalProperties: false
46
- },
47
- schema: {
48
- type: 'object',
49
- properties: {
50
- files: {
51
- type: 'array',
52
- items: { type: 'string' },
53
- description: 'List of file paths to ingest'
54
- },
55
- metadata: {
56
- type: 'object',
57
- description: 'Optional JSON metadata to attach to documents',
58
- additionalProperties: true
59
- },
60
- chunkSize: {
61
- type: 'integer',
62
- description: 'Chunk size for text splitting'
63
- },
64
- overlap: {
65
- type: 'integer',
66
- description: 'Overlap size for text splitting'
67
- }
68
- },
69
- required: ['files'],
53
+ required: ['datasource_name'],
70
54
  additionalProperties: false
71
55
  },
72
56
  async execute(toolCallId, params) {
@@ -80,32 +64,32 @@ Example User Queries:
80
64
  catch (e) {
81
65
  logger.warn(`[ContextLake] Received string params, possibly toolCallId?`, { params });
82
66
  return {
83
- success: false,
84
- error: `Invalid params format: received string "${params}", expected object with 'files' array.`
67
+ content: [{ type: "text", text: `Invalid params format: received string "${params}", expected object with 'datasource_name'.` }],
68
+ details: { error: `Invalid params format: received string "${params}", expected object with 'datasource_name'.` }
85
69
  };
86
70
  }
87
71
  }
88
- if (!actualParams.files && actualParams.params && actualParams.params.files) {
72
+ if (!actualParams.datasource_name && actualParams.params && actualParams.params.datasource_name) {
89
73
  actualParams = actualParams.params;
90
74
  }
91
- if (!actualParams.files || !Array.isArray(actualParams.files)) {
75
+ if (!actualParams.datasource_name) {
92
76
  return {
93
- success: false,
94
- error: `Invalid params: 'files' must be an array. Received keys: ${Object.keys(actualParams)}`
77
+ content: [{ type: "text", text: `Invalid params: 'datasource_name' is required. Received keys: ${Object.keys(actualParams)}` }],
78
+ details: { error: `Invalid params: 'datasource_name' is required. Received keys: ${Object.keys(actualParams)}` }
95
79
  };
96
80
  }
97
- const result = await (0, ingest_1.ingestAssets)(actualParams, pluginConfig, logger);
81
+ const result = await (0, ingest_1.ingestSource)(actualParams, pluginConfig, logger);
98
82
  logger.info(`[${new Date().toISOString()}] [ContextLake] Ingest skill completed successfully`, { resultSummary: Array.isArray(result) ? `Processed ${result.length} items` : 'Success' });
99
83
  return {
100
- success: true,
101
- result
84
+ content: [{ type: "text", text: JSON.stringify(result) }],
85
+ details: result
102
86
  };
103
87
  }
104
88
  catch (error) {
105
89
  logger.error(`[${new Date().toISOString()}] [ContextLake] Ingest skill failed`, { error: error.message, stack: error.stack });
106
90
  return {
107
- success: false,
108
- error: error.message
91
+ content: [{ type: "text", text: String(error.message) }],
92
+ details: { error: error.message }
109
93
  };
110
94
  }
111
95
  }
@@ -133,17 +117,6 @@ Example User Queries:
133
117
  required: ['query'],
134
118
  additionalProperties: false
135
119
  },
136
- schema: {
137
- type: 'object',
138
- properties: {
139
- query: { type: 'string', description: 'Search query' },
140
- top_k: { type: 'integer', description: 'Number of results to return' },
141
- filter: { type: 'string', description: 'Filter string' },
142
- include_binary: { type: 'boolean', description: 'Whether to include binary content' }
143
- },
144
- required: ['query'],
145
- additionalProperties: false
146
- },
147
120
  async execute(toolCallId, params) {
148
121
  logger.info(`[${new Date().toISOString()}] [ContextLake] Executing retrieve skill, toolCallId: ${toolCallId}`, { params: JSON.stringify(params) });
149
122
  try {
@@ -163,22 +136,23 @@ Example User Queries:
163
136
  }
164
137
  if (!actualParams || typeof actualParams.query !== 'string') {
165
138
  return {
166
- success: false,
167
- error: `Invalid params: 'query' is required and must be a string. Received: ${JSON.stringify(actualParams)}`
139
+ content: [{ type: "text", text: `Invalid params: 'query' is required and must be a string. Received: ${JSON.stringify(actualParams)}` }],
140
+ details: { error: `Invalid params: 'query' is required and must be a string. Received: ${JSON.stringify(actualParams)}` }
168
141
  };
169
142
  }
170
143
  const result = await (0, retrieve_1.retrieveAssets)(actualParams, pluginConfig, logger);
171
144
  logger.info(`[${new Date().toISOString()}] [ContextLake] Retrieve skill completed`, { resultCount: Array.isArray(result) ? result.length : 0 });
172
145
  return {
173
- success: true,
174
- result
146
+ content: [{ type: "text", text: JSON.stringify(result) }],
147
+ details: result
175
148
  };
176
149
  }
177
150
  catch (error) {
178
151
  logger.error(`[${new Date().toISOString()}] [ContextLake] Retrieve skill failed`, { error: error.message, stack: error.stack });
179
152
  return {
180
- success: false,
181
- error: error.message
153
+ content: [{ type: "text", text: String(error.message) }],
154
+ details: { error: error.message
155
+ }
182
156
  };
183
157
  }
184
158
  }
@@ -201,13 +175,6 @@ Example User Queries:
201
175
  required: [],
202
176
  additionalProperties: false
203
177
  },
204
- schema: {
205
- type: 'object',
206
- properties: {
207
- limit: { type: 'integer', description: 'Limit for list action' }
208
- },
209
- required: []
210
- },
211
178
  async execute(toolCallId, params) {
212
179
  logger.info(`[${new Date().toISOString()}] [ContextLake] Executing list skill, toolCallId: ${toolCallId}`, { params: JSON.stringify(params) });
213
180
  try {
@@ -218,15 +185,16 @@ Example User Queries:
218
185
  const result = await (0, manage_1.listAssets)(actualParams, pluginConfig, logger);
219
186
  logger.info(`[${new Date().toISOString()}] [ContextLake] List skill completed`, { count: Array.isArray(result) ? result.length : 0 });
220
187
  return {
221
- success: true,
222
- result
188
+ content: [{ type: "text", text: JSON.stringify(result) }],
189
+ details: result
223
190
  };
224
191
  }
225
192
  catch (error) {
226
193
  logger.error(`[${new Date().toISOString()}] [ContextLake] List skill failed`, { error: error.message, stack: error.stack });
227
194
  return {
228
- success: false,
229
- error: error.message
195
+ content: [{ type: "text", text: String(error.message) }],
196
+ details: { error: error.message
197
+ }
230
198
  };
231
199
  }
232
200
  }
@@ -251,14 +219,6 @@ Example User Queries:
251
219
  required: [],
252
220
  additionalProperties: false
253
221
  },
254
- schema: {
255
- type: 'object',
256
- properties: {
257
- file_ids: { type: 'array', items: { type: 'string' }, description: 'File IDs to delete' },
258
- filter: { type: 'string', description: 'Filter string for deletion' }
259
- },
260
- required: []
261
- },
262
222
  async execute(toolCallId, params) {
263
223
  logger.info(`[${new Date().toISOString()}] [ContextLake] Executing delete skill, toolCallId: ${toolCallId}`, { params: JSON.stringify(params) });
264
224
  try {
@@ -269,15 +229,60 @@ Example User Queries:
269
229
  const result = await (0, manage_1.deleteAssets)(actualParams, pluginConfig, logger);
270
230
  logger.info(`[${new Date().toISOString()}] [ContextLake] Delete skill completed`, { result });
271
231
  return {
272
- success: true,
273
- result
232
+ content: [{ type: "text", text: JSON.stringify(result) }],
233
+ details: result
274
234
  };
275
235
  }
276
236
  catch (error) {
277
237
  logger.error(`[${new Date().toISOString()}] [ContextLake] Delete skill failed`, { error: error.message, stack: error.stack });
278
238
  return {
279
- success: false,
280
- error: error.message
239
+ content: [{ type: "text", text: String(error.message) }],
240
+ details: { error: error.message
241
+ }
242
+ };
243
+ }
244
+ }
245
+ },
246
+ lasDataProfilerTool: {
247
+ name: 'las-data-profiler',
248
+ label: 'LAS Data Profiler',
249
+ description: 'Connect to a data source (TOS/OSS/COS/S3/Local) and profile its structure, schemas, and media metadata into LanceDB',
250
+ parameters: {
251
+ type: 'object',
252
+ properties: {
253
+ datasource_name: { type: 'string', description: 'Name of the data source' },
254
+ vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'], description: 'Data source type' },
255
+ endpoint: { type: 'string', description: 'S3 Endpoint URL (not needed for local)' },
256
+ access_key: { type: 'string', description: 'Credential ID for the data source' },
257
+ secret_key: { type: 'string', description: 'Credential value for the data source' },
258
+ region: { type: 'string', description: 'Region identifier (e.g. cn-beijing)' },
259
+ bucket: { type: 'string', description: 'Bucket name (or local root directory for local vendor)' },
260
+ prefix: { type: 'string', description: 'Path prefix to limit scan scope' },
261
+ sample_rows: { type: 'integer', description: 'Number of rows to sample per structured file' }
262
+ },
263
+ required: ['datasource_name', 'vendor', 'bucket', 'prefix'],
264
+ additionalProperties: false
265
+ },
266
+ async execute(toolCallId, params) {
267
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Executing las-data-profiler skill, toolCallId: ${toolCallId}`, { params: JSON.stringify(params) });
268
+ try {
269
+ let actualParams = params;
270
+ if (params && params.params) {
271
+ actualParams = params.params;
272
+ }
273
+ const result = await (0, profiler_1.connectDataSource)(actualParams);
274
+ logger.info(`[${new Date().toISOString()}] [ContextLake] las-data-profiler skill completed`, { result });
275
+ return {
276
+ content: [{ type: "text", text: JSON.stringify(result) }],
277
+ details: result
278
+ };
279
+ }
280
+ catch (error) {
281
+ logger.error(`[${new Date().toISOString()}] [ContextLake] las-data-profiler skill failed`, { error: error.message, stack: error.stack });
282
+ return {
283
+ content: [{ type: "text", text: String(error.message) }],
284
+ details: { error: error.message
285
+ }
281
286
  };
282
287
  }
283
288
  }
@@ -0,0 +1,15 @@
1
+ import { ContextLakeConfig } from '../../utils/config';
2
+ export interface IngestSourceParams {
3
+ datasource_name: string;
4
+ }
5
+ export declare function ingestSource(params: IngestSourceParams, config: ContextLakeConfig, logger?: any): Promise<({
6
+ file: any;
7
+ status: string;
8
+ chunks: number;
9
+ message?: undefined;
10
+ } | {
11
+ file: any;
12
+ status: string;
13
+ message: any;
14
+ chunks?: undefined;
15
+ })[]>;