@byted-las/contextlake-openclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +64 -0
  2. package/bin/contextlake-openclaw.js +5 -0
  3. package/dist/index.d.ts +113 -0
  4. package/dist/index.js +73 -0
  5. package/dist/src/client/lancedb.d.ts +30 -0
  6. package/dist/src/client/lancedb.js +113 -0
  7. package/dist/src/client/tos.d.ts +19 -0
  8. package/dist/src/client/tos.js +81 -0
  9. package/dist/src/commands/cli.d.ts +6 -0
  10. package/dist/src/commands/cli.js +78 -0
  11. package/dist/src/commands/index.d.ts +1 -0
  12. package/dist/src/commands/index.js +139 -0
  13. package/dist/src/commands/slashcmd.d.ts +14 -0
  14. package/dist/src/commands/slashcmd.js +91 -0
  15. package/dist/src/commands/tools.d.ts +219 -0
  16. package/dist/src/commands/tools.js +286 -0
  17. package/dist/src/lib/actions/ingest.d.ts +8 -0
  18. package/dist/src/lib/actions/ingest.js +123 -0
  19. package/dist/src/lib/actions/manage.d.ts +15 -0
  20. package/dist/src/lib/actions/manage.js +91 -0
  21. package/dist/src/lib/actions/retrieve.d.ts +8 -0
  22. package/dist/src/lib/actions/retrieve.js +73 -0
  23. package/dist/src/processor/loader.d.ts +7 -0
  24. package/dist/src/processor/loader.js +83 -0
  25. package/dist/src/service/embedding/factory.d.ts +2 -0
  26. package/dist/src/service/embedding/factory.js +16 -0
  27. package/dist/src/service/embedding/interface.d.ts +18 -0
  28. package/dist/src/service/embedding/interface.js +2 -0
  29. package/dist/src/service/embedding/local.d.ts +14 -0
  30. package/dist/src/service/embedding/local.js +104 -0
  31. package/dist/src/service/embedding/remote.d.ts +9 -0
  32. package/dist/src/service/embedding/remote.js +42 -0
  33. package/dist/src/service/metadata/factory.d.ts +13 -0
  34. package/dist/src/service/metadata/factory.js +48 -0
  35. package/dist/src/service/metadata/interface.d.ts +17 -0
  36. package/dist/src/service/metadata/interface.js +2 -0
  37. package/dist/src/service/metadata/local.d.ts +13 -0
  38. package/dist/src/service/metadata/local.js +49 -0
  39. package/dist/src/service/storage/factory.d.ts +2 -0
  40. package/dist/src/service/storage/factory.js +19 -0
  41. package/dist/src/service/storage/interface.d.ts +32 -0
  42. package/dist/src/service/storage/interface.js +2 -0
  43. package/dist/src/service/storage/local.d.ts +9 -0
  44. package/dist/src/service/storage/local.js +72 -0
  45. package/dist/src/skills/las-data-profiler/index.d.ts +26 -0
  46. package/dist/src/skills/las-data-profiler/index.js +231 -0
  47. package/dist/src/skills/las-data-profiler/register.d.ts +1 -0
  48. package/dist/src/skills/las-data-profiler/register.js +19 -0
  49. package/dist/src/utils/config.d.ts +1 -0
  50. package/dist/src/utils/config.js +16 -0
  51. package/index.ts +78 -0
  52. package/openclaw.plugin.json +57 -0
  53. package/package.json +52 -0
  54. package/src/client/lancedb.ts +102 -0
  55. package/src/client/tos.ts +100 -0
  56. package/src/commands/cli.ts +77 -0
  57. package/src/commands/index.ts +156 -0
  58. package/src/commands/slashcmd.ts +95 -0
  59. package/src/commands/tools.ts +286 -0
  60. package/src/lib/actions/ingest.ts +103 -0
  61. package/src/lib/actions/manage.ts +107 -0
  62. package/src/lib/actions/retrieve.ts +90 -0
  63. package/src/processor/loader.ts +58 -0
  64. package/src/service/embedding/factory.ts +13 -0
  65. package/src/service/embedding/interface.ts +21 -0
  66. package/src/service/embedding/local.ts +118 -0
  67. package/src/service/embedding/remote.ts +45 -0
  68. package/src/service/metadata/factory.ts +52 -0
  69. package/src/service/metadata/interface.ts +19 -0
  70. package/src/service/metadata/local.ts +60 -0
  71. package/src/service/storage/factory.ts +16 -0
  72. package/src/service/storage/interface.ts +36 -0
  73. package/src/service/storage/local.ts +42 -0
  74. package/src/skills/contextlake-delete/SKILL.md +36 -0
  75. package/src/skills/contextlake-ingest/SKILL.md +40 -0
  76. package/src/skills/contextlake-list/SKILL.md +22 -0
  77. package/src/skills/contextlake-retrieve/SKILL.md +37 -0
  78. package/src/skills/las-data-profiler/SKILL.md +174 -0
  79. package/src/skills/las-data-profiler/index.ts +254 -0
  80. package/src/skills/las-data-profiler/register.ts +19 -0
  81. package/src/skills/las-data-profiler/s3_catalog.py +608 -0
  82. package/src/utils/config.ts +13 -0
@@ -0,0 +1,139 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.registerAll = registerAll;
4
+ const tools_1 = require("./tools");
5
+ const cli_1 = require("./cli");
6
+ const slashcmd_1 = require("./slashcmd");
7
+ const config_1 = require("../utils/config");
8
+ const las_data_profiler_1 = require("../skills/las-data-profiler");
9
+ function registerAll(ctx, logger) {
10
+ const pluginConfig = (0, config_1.getPluginConfig)(ctx);
11
+ // Register Agent Tools
12
+ try {
13
+ const tools = (0, tools_1.getAgentTools)(pluginConfig, logger);
14
+ ctx.registerTool(tools.ingestTool);
15
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.ingestTool.name}`);
16
+ ctx.registerTool(tools.retrieveTool);
17
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.retrieveTool.name}`);
18
+ ctx.registerTool(tools.listTool);
19
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listTool.name}`);
20
+ ctx.registerTool(tools.deleteTool);
21
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.deleteTool.name}`);
22
+ }
23
+ catch (error) {
24
+ logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering agent tools: ${error.message}`, { stack: error.stack });
25
+ throw error;
26
+ }
27
+ // Register CLI
28
+ try {
29
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Registering CLI commands`);
30
+ ctx.registerCli((cliContext) => {
31
+ const program = cliContext.program;
32
+ const contextlake = program.command('contextlake')
33
+ .description('Manage ContextLake knowledge base');
34
+ const commands = (0, cli_1.getCliCommands)(pluginConfig, logger);
35
+ // connect -- data source profiling (las-data-profiler)
36
+ contextlake.command('connect <datasource_name>')
37
+ .description('Connect to a data source and profile its structure, schemas, and media metadata into LanceDB')
38
+ .requiredOption('--vendor <vendor>', 'Data source type: volcengine | alibaba | tencent | aws | local')
39
+ .option('--endpoint <url>', 'S3 Endpoint URL (not needed for local)')
40
+ .option('--ak <credential_id>', 'Credential ID for the data source')
41
+ .option('--sk <credential_value>', 'Credential value for the data source')
42
+ .option('--region <region>', 'Region identifier (e.g. cn-beijing)')
43
+ .requiredOption('--bucket <bucket>', 'Bucket name (or local root directory for local vendor)')
44
+ .requiredOption('--prefix <prefix>', 'Path prefix to limit scan scope')
45
+ .option('--sample-rows <number>', 'Number of rows to sample per structured file', '100')
46
+ .action(async (datasource_name, options) => {
47
+ try {
48
+ const params = {
49
+ datasource_name,
50
+ vendor: options.vendor,
51
+ endpoint: options.endpoint,
52
+ access_key: options.ak,
53
+ secret_key: options.sk,
54
+ region: options.region,
55
+ bucket: options.bucket,
56
+ prefix: options.prefix,
57
+ sample_rows: parseInt(options.sampleRows),
58
+ };
59
+ console.log(`[contextlake connect] Connecting to datasource "${datasource_name}"...`);
60
+ console.log(` vendor: ${params.vendor}`);
61
+ console.log(` bucket: ${params.bucket}`);
62
+ console.log(` prefix: ${params.prefix}`);
63
+ const result = await (0, las_data_profiler_1.connectDataSource)(params);
64
+ console.log(JSON.stringify(result, null, 2));
65
+ }
66
+ catch (e) {
67
+ console.error('Error:', e.message);
68
+ process.exitCode = 1;
69
+ }
70
+ });
71
+ // Ingest
72
+ contextlake.command('ingest <files...>')
73
+ .description('Ingest one or more files into the knowledge base')
74
+ .option('-c, --chunk-size <number>', 'Chunk size for text splitting', '500')
75
+ .option('-o, --overlap <number>', 'Chunk overlap size', '50')
76
+ .option('-m, --metadata <json>', 'JSON metadata to attach to the documents')
77
+ .action(commands.ingestAction);
78
+ // Search
79
+ contextlake.command('search <query>')
80
+ .description('Search the knowledge base for relevant documents')
81
+ .option('-k, --top-k <number>', 'Number of top results to return', '5')
82
+ .option('-f, --filter <string>', 'Filter string for the search')
83
+ .option('-b, --binary', 'Include binary content in the result', false)
84
+ .action(commands.searchAction);
85
+ // List
86
+ contextlake.command('list')
87
+ .description('List documents currently in the knowledge base')
88
+ .option('-l, --limit <number>', 'Maximum number of results to return', '100')
89
+ .action(commands.listAction);
90
+ // Delete
91
+ contextlake.command('delete')
92
+ .description('Delete documents from the knowledge base')
93
+ .option('--ids <ids...>', 'List of specific file IDs to delete')
94
+ .option('-f, --filter <string>', 'Filter string to match documents for deletion')
95
+ .action(commands.deleteAction);
96
+ }, { commands: ['contextlake'] });
97
+ logger.info(`[${new Date().toISOString()}] [ContextLake] CLI commands registered`);
98
+ }
99
+ catch (error) {
100
+ logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering CLI commands: ${error.message}`, { stack: error.stack });
101
+ throw error;
102
+ }
103
+ // Register Slash Commands
104
+ try {
105
+ if (typeof ctx.registerCommand !== 'function') {
106
+ logger.warn(`[ContextLake] registerCommand is not available in current OpenClaw version, skipping slash commands registration.`);
107
+ return;
108
+ }
109
+ const slashCommands = (0, slashcmd_1.getSlashCommands)(pluginConfig, logger);
110
+ ctx.registerCommand({
111
+ name: 'contextlake-ingest',
112
+ description: 'Ingest files into the knowledge base (usage: /contextlake-ingest file1 file2)',
113
+ acceptsArgs: true,
114
+ handler: slashCommands.ingestHandler
115
+ });
116
+ ctx.registerCommand({
117
+ name: 'contextlake-list',
118
+ description: 'List documents currently in the knowledge base',
119
+ acceptsArgs: true,
120
+ handler: slashCommands.listHandler
121
+ });
122
+ ctx.registerCommand({
123
+ name: 'contextlake-search',
124
+ description: 'Search the knowledge base for relevant documents',
125
+ acceptsArgs: true,
126
+ handler: slashCommands.searchHandler
127
+ });
128
+ ctx.registerCommand({
129
+ name: 'contextlake-delete',
130
+ description: 'Delete documents from the knowledge base by ID',
131
+ acceptsArgs: true,
132
+ handler: slashCommands.deleteHandler
133
+ });
134
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash commands registered`);
135
+ }
136
+ catch (error) {
137
+ logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering Slash commands: ${error.message}`, { stack: error.stack });
138
+ }
139
+ }
@@ -0,0 +1,14 @@
1
+ export declare function getSlashCommands(pluginConfig: any, logger: any): {
2
+ ingestHandler: (commandCtx: any) => Promise<{
3
+ text: string;
4
+ }>;
5
+ listHandler: (commandCtx: any) => Promise<{
6
+ text: string;
7
+ }>;
8
+ searchHandler: (commandCtx: any) => Promise<{
9
+ text: string;
10
+ }>;
11
+ deleteHandler: (commandCtx: any) => Promise<{
12
+ text: string;
13
+ }>;
14
+ };
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getSlashCommands = getSlashCommands;
4
+ const ingest_1 = require("../lib/actions/ingest");
5
+ const retrieve_1 = require("../lib/actions/retrieve");
6
+ const manage_1 = require("../lib/actions/manage");
7
+ function getSlashCommands(pluginConfig, logger) {
8
+ return {
9
+ ingestHandler: async (commandCtx) => {
10
+ const rawArgs = commandCtx.args || "";
11
+ const args = rawArgs.split(' ').filter((arg) => arg.trim() !== '');
12
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command ingest started`, { args });
13
+ try {
14
+ if (args.length === 0) {
15
+ return { text: `**Error:** Missing files. Usage: /contextlake-ingest /path/to/file1 /path/to/file2` };
16
+ }
17
+ const result = await (0, ingest_1.ingestAssets)({
18
+ files: args,
19
+ metadata: {}
20
+ }, pluginConfig, logger);
21
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command ingest completed`, { resultCount: result.length });
22
+ return { text: `**Ingest Results (${result.length} files processed):**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
23
+ }
24
+ catch (e) {
25
+ logger.error(`[ContextLake] Slash ingest failed`, { error: e.message });
26
+ return { text: `**Error executing ingest:** ${e.message}` };
27
+ }
28
+ },
29
+ listHandler: async (commandCtx) => {
30
+ const rawArgs = commandCtx.args || "";
31
+ const args = rawArgs.split(' ').filter((arg) => arg.trim() !== '');
32
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command list started`, { args });
33
+ try {
34
+ let limit = 100;
35
+ if (args.length > 0 && !isNaN(parseInt(args[0]))) {
36
+ limit = parseInt(args[0]);
37
+ }
38
+ const result = await (0, manage_1.listAssets)({ limit }, pluginConfig, logger);
39
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command list completed`, { resultCount: result.length });
40
+ return { text: `**ContextLake Documents (${result.length}):**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
41
+ }
42
+ catch (e) {
43
+ logger.error(`[ContextLake] Slash list failed`, { error: e.message });
44
+ return { text: `**Error executing list:** ${e.message}` };
45
+ }
46
+ },
47
+ searchHandler: async (commandCtx) => {
48
+ const rawArgs = commandCtx.args || "";
49
+ const args = rawArgs.split(' ').filter((arg) => arg.trim() !== '');
50
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command search started`, { args });
51
+ try {
52
+ if (args.length === 0) {
53
+ return { text: `**Error:** Missing search query. Usage: /contextlake-search "your query here"` };
54
+ }
55
+ const query = args.join(' ');
56
+ const result = await (0, retrieve_1.retrieveAssets)({
57
+ query,
58
+ top_k: 5,
59
+ filter: undefined,
60
+ include_binary: false
61
+ }, pluginConfig, logger);
62
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command search completed`, { resultCount: result.length });
63
+ return { text: `**Search Results for "${query}" (${result.length}):**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
64
+ }
65
+ catch (e) {
66
+ logger.error(`[ContextLake] Slash search failed`, { error: e.message });
67
+ return { text: `**Error executing search:** ${e.message}` };
68
+ }
69
+ },
70
+ deleteHandler: async (commandCtx) => {
71
+ const rawArgs = commandCtx.args || "";
72
+ const args = rawArgs.split(' ').filter((arg) => arg.trim() !== '');
73
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command delete started`, { args });
74
+ try {
75
+ if (args.length === 0) {
76
+ return { text: `**Error:** Missing file IDs. Usage: /contextlake-delete fileId1 fileId2` };
77
+ }
78
+ const result = await (0, manage_1.deleteAssets)({
79
+ file_ids: args,
80
+ filter: undefined
81
+ }, pluginConfig, logger);
82
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command delete completed`, { result });
83
+ return { text: `**Delete Results:**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
84
+ }
85
+ catch (e) {
86
+ logger.error(`[ContextLake] Slash delete failed`, { error: e.message });
87
+ return { text: `**Error executing delete:** ${e.message}` };
88
+ }
89
+ }
90
+ };
91
+ }
@@ -0,0 +1,219 @@
1
+ export declare function getAgentTools(pluginConfig: any, logger: any): {
2
+ ingestTool: {
3
+ name: string;
4
+ label: string;
5
+ description: string;
6
+ parameters: {
7
+ type: string;
8
+ properties: {
9
+ files: {
10
+ type: string;
11
+ items: {
12
+ type: string;
13
+ };
14
+ description: string;
15
+ };
16
+ metadata: {
17
+ type: string;
18
+ description: string;
19
+ additionalProperties: boolean;
20
+ };
21
+ chunkSize: {
22
+ type: string;
23
+ description: string;
24
+ };
25
+ overlap: {
26
+ type: string;
27
+ description: string;
28
+ };
29
+ };
30
+ required: string[];
31
+ additionalProperties: boolean;
32
+ };
33
+ schema: {
34
+ type: string;
35
+ properties: {
36
+ files: {
37
+ type: string;
38
+ items: {
39
+ type: string;
40
+ };
41
+ description: string;
42
+ };
43
+ metadata: {
44
+ type: string;
45
+ description: string;
46
+ additionalProperties: boolean;
47
+ };
48
+ chunkSize: {
49
+ type: string;
50
+ description: string;
51
+ };
52
+ overlap: {
53
+ type: string;
54
+ description: string;
55
+ };
56
+ };
57
+ required: string[];
58
+ additionalProperties: boolean;
59
+ };
60
+ execute(toolCallId: string, params: any): Promise<{
61
+ success: boolean;
62
+ result: any;
63
+ error?: undefined;
64
+ } | {
65
+ success: boolean;
66
+ error: any;
67
+ result?: undefined;
68
+ }>;
69
+ };
70
+ retrieveTool: {
71
+ name: string;
72
+ label: string;
73
+ description: string;
74
+ parameters: {
75
+ type: string;
76
+ properties: {
77
+ query: {
78
+ type: string;
79
+ description: string;
80
+ };
81
+ top_k: {
82
+ type: string;
83
+ description: string;
84
+ };
85
+ filter: {
86
+ type: string;
87
+ description: string;
88
+ };
89
+ include_binary: {
90
+ type: string;
91
+ description: string;
92
+ };
93
+ };
94
+ required: string[];
95
+ additionalProperties: boolean;
96
+ };
97
+ schema: {
98
+ type: string;
99
+ properties: {
100
+ query: {
101
+ type: string;
102
+ description: string;
103
+ };
104
+ top_k: {
105
+ type: string;
106
+ description: string;
107
+ };
108
+ filter: {
109
+ type: string;
110
+ description: string;
111
+ };
112
+ include_binary: {
113
+ type: string;
114
+ description: string;
115
+ };
116
+ };
117
+ required: string[];
118
+ additionalProperties: boolean;
119
+ };
120
+ execute(toolCallId: string, params: any): Promise<{
121
+ success: boolean;
122
+ result: any[];
123
+ error?: undefined;
124
+ } | {
125
+ success: boolean;
126
+ error: any;
127
+ result?: undefined;
128
+ }>;
129
+ };
130
+ listTool: {
131
+ name: string;
132
+ label: string;
133
+ description: string;
134
+ parameters: {
135
+ type: string;
136
+ properties: {
137
+ limit: {
138
+ type: string;
139
+ description: string;
140
+ };
141
+ };
142
+ required: never[];
143
+ additionalProperties: boolean;
144
+ };
145
+ schema: {
146
+ type: string;
147
+ properties: {
148
+ limit: {
149
+ type: string;
150
+ description: string;
151
+ };
152
+ };
153
+ required: never[];
154
+ };
155
+ execute(toolCallId: string, params: any): Promise<{
156
+ success: boolean;
157
+ result: any;
158
+ error?: undefined;
159
+ } | {
160
+ success: boolean;
161
+ error: any;
162
+ result?: undefined;
163
+ }>;
164
+ };
165
+ deleteTool: {
166
+ name: string;
167
+ label: string;
168
+ description: string;
169
+ parameters: {
170
+ type: string;
171
+ properties: {
172
+ file_ids: {
173
+ type: string;
174
+ items: {
175
+ type: string;
176
+ };
177
+ description: string;
178
+ };
179
+ filter: {
180
+ type: string;
181
+ description: string;
182
+ };
183
+ };
184
+ required: never[];
185
+ additionalProperties: boolean;
186
+ };
187
+ schema: {
188
+ type: string;
189
+ properties: {
190
+ file_ids: {
191
+ type: string;
192
+ items: {
193
+ type: string;
194
+ };
195
+ description: string;
196
+ };
197
+ filter: {
198
+ type: string;
199
+ description: string;
200
+ };
201
+ };
202
+ required: never[];
203
+ };
204
+ execute(toolCallId: string, params: any): Promise<{
205
+ success: boolean;
206
+ result: {
207
+ status: string;
208
+ message: string;
209
+ deleted_count: number;
210
+ storage_cleaned: number;
211
+ };
212
+ error?: undefined;
213
+ } | {
214
+ success: boolean;
215
+ error: any;
216
+ result?: undefined;
217
+ }>;
218
+ };
219
+ };