@byted-las/contextlake-openclaw 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -0
- package/bin/contextlake-openclaw.js +5 -0
- package/dist/index.d.ts +113 -0
- package/dist/index.js +73 -0
- package/dist/src/client/lancedb.d.ts +30 -0
- package/dist/src/client/lancedb.js +113 -0
- package/dist/src/client/tos.d.ts +19 -0
- package/dist/src/client/tos.js +81 -0
- package/dist/src/commands/cli.d.ts +6 -0
- package/dist/src/commands/cli.js +78 -0
- package/dist/src/commands/index.d.ts +1 -0
- package/dist/src/commands/index.js +139 -0
- package/dist/src/commands/slashcmd.d.ts +14 -0
- package/dist/src/commands/slashcmd.js +91 -0
- package/dist/src/commands/tools.d.ts +219 -0
- package/dist/src/commands/tools.js +286 -0
- package/dist/src/lib/actions/ingest.d.ts +8 -0
- package/dist/src/lib/actions/ingest.js +123 -0
- package/dist/src/lib/actions/manage.d.ts +15 -0
- package/dist/src/lib/actions/manage.js +91 -0
- package/dist/src/lib/actions/retrieve.d.ts +8 -0
- package/dist/src/lib/actions/retrieve.js +73 -0
- package/dist/src/processor/loader.d.ts +7 -0
- package/dist/src/processor/loader.js +83 -0
- package/dist/src/service/embedding/factory.d.ts +2 -0
- package/dist/src/service/embedding/factory.js +16 -0
- package/dist/src/service/embedding/interface.d.ts +18 -0
- package/dist/src/service/embedding/interface.js +2 -0
- package/dist/src/service/embedding/local.d.ts +14 -0
- package/dist/src/service/embedding/local.js +104 -0
- package/dist/src/service/embedding/remote.d.ts +9 -0
- package/dist/src/service/embedding/remote.js +42 -0
- package/dist/src/service/metadata/factory.d.ts +13 -0
- package/dist/src/service/metadata/factory.js +48 -0
- package/dist/src/service/metadata/interface.d.ts +17 -0
- package/dist/src/service/metadata/interface.js +2 -0
- package/dist/src/service/metadata/local.d.ts +13 -0
- package/dist/src/service/metadata/local.js +49 -0
- package/dist/src/service/storage/factory.d.ts +2 -0
- package/dist/src/service/storage/factory.js +19 -0
- package/dist/src/service/storage/interface.d.ts +32 -0
- package/dist/src/service/storage/interface.js +2 -0
- package/dist/src/service/storage/local.d.ts +9 -0
- package/dist/src/service/storage/local.js +72 -0
- package/dist/src/skills/las-data-profiler/index.d.ts +26 -0
- package/dist/src/skills/las-data-profiler/index.js +231 -0
- package/dist/src/skills/las-data-profiler/register.d.ts +1 -0
- package/dist/src/skills/las-data-profiler/register.js +19 -0
- package/dist/src/utils/config.d.ts +1 -0
- package/dist/src/utils/config.js +16 -0
- package/index.ts +78 -0
- package/openclaw.plugin.json +57 -0
- package/package.json +52 -0
- package/src/client/lancedb.ts +102 -0
- package/src/client/tos.ts +100 -0
- package/src/commands/cli.ts +77 -0
- package/src/commands/index.ts +156 -0
- package/src/commands/slashcmd.ts +95 -0
- package/src/commands/tools.ts +286 -0
- package/src/lib/actions/ingest.ts +103 -0
- package/src/lib/actions/manage.ts +107 -0
- package/src/lib/actions/retrieve.ts +90 -0
- package/src/processor/loader.ts +58 -0
- package/src/service/embedding/factory.ts +13 -0
- package/src/service/embedding/interface.ts +21 -0
- package/src/service/embedding/local.ts +118 -0
- package/src/service/embedding/remote.ts +45 -0
- package/src/service/metadata/factory.ts +52 -0
- package/src/service/metadata/interface.ts +19 -0
- package/src/service/metadata/local.ts +60 -0
- package/src/service/storage/factory.ts +16 -0
- package/src/service/storage/interface.ts +36 -0
- package/src/service/storage/local.ts +42 -0
- package/src/skills/contextlake-delete/SKILL.md +36 -0
- package/src/skills/contextlake-ingest/SKILL.md +40 -0
- package/src/skills/contextlake-list/SKILL.md +22 -0
- package/src/skills/contextlake-retrieve/SKILL.md +37 -0
- package/src/skills/las-data-profiler/SKILL.md +174 -0
- package/src/skills/las-data-profiler/index.ts +254 -0
- package/src/skills/las-data-profiler/register.ts +19 -0
- package/src/skills/las-data-profiler/s3_catalog.py +608 -0
- package/src/utils/config.ts +13 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// @ts-ignore
|
|
2
|
+
import { PluginContext } from 'openclaw/plugin-sdk';
|
|
3
|
+
import { ingestAssets } from '../lib/actions/ingest';
|
|
4
|
+
import { retrieveAssets } from '../lib/actions/retrieve';
|
|
5
|
+
import { listAssets, deleteAssets } from '../lib/actions/manage';
|
|
6
|
+
|
|
7
|
+
export function getCliCommands(pluginConfig: any, logger: any) {
|
|
8
|
+
return {
|
|
9
|
+
ingestAction: async (files: any, options: any) => {
|
|
10
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI ingest started`, { files, options });
|
|
11
|
+
try {
|
|
12
|
+
const metadata = options.metadata ? JSON.parse(options.metadata) : {};
|
|
13
|
+
const result = await ingestAssets({
|
|
14
|
+
files,
|
|
15
|
+
metadata,
|
|
16
|
+
chunkSize: parseInt(options.chunkSize),
|
|
17
|
+
overlap: parseInt(options.overlap)
|
|
18
|
+
}, pluginConfig, logger);
|
|
19
|
+
// eslint-disable-next-line no-console
|
|
20
|
+
console.log(JSON.stringify(result, null, 2));
|
|
21
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI ingest success`);
|
|
22
|
+
} catch (e: any) {
|
|
23
|
+
console.error('Error:', e.message);
|
|
24
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] CLI ingest failed`, { error: e.message, stack: e.stack });
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
|
|
28
|
+
searchAction: async (query: any, options: any) => {
|
|
29
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI search started`, { query, options });
|
|
30
|
+
try {
|
|
31
|
+
const result = await retrieveAssets({
|
|
32
|
+
query,
|
|
33
|
+
top_k: parseInt(options.topK),
|
|
34
|
+
filter: options.filter,
|
|
35
|
+
include_binary: options.binary
|
|
36
|
+
}, pluginConfig, logger);
|
|
37
|
+
// eslint-disable-next-line no-console
|
|
38
|
+
console.log(JSON.stringify(result, null, 2));
|
|
39
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI search success`);
|
|
40
|
+
} catch (e: any) {
|
|
41
|
+
console.error('Error:', e.message);
|
|
42
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] CLI search failed`, { error: e.message, stack: e.stack });
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
|
|
46
|
+
listAction: async (options: any) => {
|
|
47
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI list started`, { options });
|
|
48
|
+
try {
|
|
49
|
+
const result = await listAssets({
|
|
50
|
+
limit: parseInt(options.limit)
|
|
51
|
+
}, pluginConfig, logger);
|
|
52
|
+
// eslint-disable-next-line no-console
|
|
53
|
+
console.log(JSON.stringify(result, null, 2));
|
|
54
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI list success`);
|
|
55
|
+
} catch (e: any) {
|
|
56
|
+
console.error('Error:', e.message);
|
|
57
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] CLI list failed`, { error: e.message, stack: e.stack });
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
|
|
61
|
+
deleteAction: async (options: any) => {
|
|
62
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI delete started`, { options });
|
|
63
|
+
try {
|
|
64
|
+
const result = await deleteAssets({
|
|
65
|
+
file_ids: options.ids,
|
|
66
|
+
filter: options.filter
|
|
67
|
+
}, pluginConfig, logger);
|
|
68
|
+
// eslint-disable-next-line no-console
|
|
69
|
+
console.log(JSON.stringify(result, null, 2));
|
|
70
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI delete success`);
|
|
71
|
+
} catch (e: any) {
|
|
72
|
+
console.error('Error:', e.message);
|
|
73
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] CLI delete failed`, { error: e.message, stack: e.stack });
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { getAgentTools } from './tools';
|
|
2
|
+
import { getCliCommands } from './cli';
|
|
3
|
+
import { getSlashCommands } from './slashcmd';
|
|
4
|
+
import { getPluginConfig } from '../utils/config';
|
|
5
|
+
import { connectDataSource, ConnectParams } from '../skills/las-data-profiler';
|
|
6
|
+
|
|
7
|
+
export function registerAll(ctx: any, logger: any) {
|
|
8
|
+
const pluginConfig = getPluginConfig(ctx);
|
|
9
|
+
|
|
10
|
+
// Register Agent Tools
|
|
11
|
+
try {
|
|
12
|
+
const tools = getAgentTools(pluginConfig, logger);
|
|
13
|
+
|
|
14
|
+
ctx.registerTool(tools.ingestTool);
|
|
15
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.ingestTool.name}`);
|
|
16
|
+
|
|
17
|
+
ctx.registerTool(tools.retrieveTool);
|
|
18
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.retrieveTool.name}`);
|
|
19
|
+
|
|
20
|
+
ctx.registerTool(tools.listTool);
|
|
21
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listTool.name}`);
|
|
22
|
+
|
|
23
|
+
ctx.registerTool(tools.deleteTool);
|
|
24
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.deleteTool.name}`);
|
|
25
|
+
|
|
26
|
+
} catch (error: any) {
|
|
27
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering agent tools: ${error.message}`, { stack: error.stack });
|
|
28
|
+
throw error;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Register CLI
|
|
32
|
+
try {
|
|
33
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Registering CLI commands`);
|
|
34
|
+
ctx.registerCli((cliContext: any) => {
|
|
35
|
+
const program = cliContext.program;
|
|
36
|
+
const contextlake = program.command('contextlake')
|
|
37
|
+
.description('Manage ContextLake knowledge base');
|
|
38
|
+
|
|
39
|
+
const commands = getCliCommands(pluginConfig, logger);
|
|
40
|
+
|
|
41
|
+
// connect -- data source profiling (las-data-profiler)
|
|
42
|
+
contextlake.command('connect <datasource_name>')
|
|
43
|
+
.description('Connect to a data source and profile its structure, schemas, and media metadata into LanceDB')
|
|
44
|
+
.requiredOption('--vendor <vendor>', 'Data source type: volcengine | alibaba | tencent | aws | local')
|
|
45
|
+
.option('--endpoint <url>', 'S3 Endpoint URL (not needed for local)')
|
|
46
|
+
.option('--ak <credential_id>', 'Credential ID for the data source')
|
|
47
|
+
.option('--sk <credential_value>', 'Credential value for the data source')
|
|
48
|
+
.option('--region <region>', 'Region identifier (e.g. cn-beijing)')
|
|
49
|
+
.requiredOption('--bucket <bucket>', 'Bucket name (or local root directory for local vendor)')
|
|
50
|
+
.requiredOption('--prefix <prefix>', 'Path prefix to limit scan scope')
|
|
51
|
+
.option('--sample-rows <number>', 'Number of rows to sample per structured file', '100')
|
|
52
|
+
.action(async (datasource_name: string, options: any) => {
|
|
53
|
+
try {
|
|
54
|
+
const params: ConnectParams = {
|
|
55
|
+
datasource_name,
|
|
56
|
+
vendor: options.vendor,
|
|
57
|
+
endpoint: options.endpoint,
|
|
58
|
+
access_key: options.ak,
|
|
59
|
+
secret_key: options.sk,
|
|
60
|
+
region: options.region,
|
|
61
|
+
bucket: options.bucket,
|
|
62
|
+
prefix: options.prefix,
|
|
63
|
+
sample_rows: parseInt(options.sampleRows),
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
console.log(`[contextlake connect] Connecting to datasource "${datasource_name}"...`);
|
|
67
|
+
console.log(` vendor: ${params.vendor}`);
|
|
68
|
+
console.log(` bucket: ${params.bucket}`);
|
|
69
|
+
console.log(` prefix: ${params.prefix}`);
|
|
70
|
+
|
|
71
|
+
const result = await connectDataSource(params);
|
|
72
|
+
console.log(JSON.stringify(result, null, 2));
|
|
73
|
+
} catch (e: any) {
|
|
74
|
+
console.error('Error:', e.message);
|
|
75
|
+
process.exitCode = 1;
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
// Ingest
|
|
80
|
+
contextlake.command('ingest <files...>')
|
|
81
|
+
.description('Ingest one or more files into the knowledge base')
|
|
82
|
+
.option('-c, --chunk-size <number>', 'Chunk size for text splitting', '500')
|
|
83
|
+
.option('-o, --overlap <number>', 'Chunk overlap size', '50')
|
|
84
|
+
.option('-m, --metadata <json>', 'JSON metadata to attach to the documents')
|
|
85
|
+
.action(commands.ingestAction);
|
|
86
|
+
|
|
87
|
+
// Search
|
|
88
|
+
contextlake.command('search <query>')
|
|
89
|
+
.description('Search the knowledge base for relevant documents')
|
|
90
|
+
.option('-k, --top-k <number>', 'Number of top results to return', '5')
|
|
91
|
+
.option('-f, --filter <string>', 'Filter string for the search')
|
|
92
|
+
.option('-b, --binary', 'Include binary content in the result', false)
|
|
93
|
+
.action(commands.searchAction);
|
|
94
|
+
|
|
95
|
+
// List
|
|
96
|
+
contextlake.command('list')
|
|
97
|
+
.description('List documents currently in the knowledge base')
|
|
98
|
+
.option('-l, --limit <number>', 'Maximum number of results to return', '100')
|
|
99
|
+
.action(commands.listAction);
|
|
100
|
+
|
|
101
|
+
// Delete
|
|
102
|
+
contextlake.command('delete')
|
|
103
|
+
.description('Delete documents from the knowledge base')
|
|
104
|
+
.option('--ids <ids...>', 'List of specific file IDs to delete')
|
|
105
|
+
.option('-f, --filter <string>', 'Filter string to match documents for deletion')
|
|
106
|
+
.action(commands.deleteAction);
|
|
107
|
+
|
|
108
|
+
}, { commands: ['contextlake'] });
|
|
109
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI commands registered`);
|
|
110
|
+
} catch (error: any) {
|
|
111
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering CLI commands: ${error.message}`, { stack: error.stack });
|
|
112
|
+
throw error;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Register Slash Commands
|
|
116
|
+
try {
|
|
117
|
+
if (typeof ctx.registerCommand !== 'function') {
|
|
118
|
+
logger.warn(`[ContextLake] registerCommand is not available in current OpenClaw version, skipping slash commands registration.`);
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const slashCommands = getSlashCommands(pluginConfig, logger);
|
|
123
|
+
|
|
124
|
+
ctx.registerCommand({
|
|
125
|
+
name: 'contextlake-ingest',
|
|
126
|
+
description: 'Ingest files into the knowledge base (usage: /contextlake-ingest file1 file2)',
|
|
127
|
+
acceptsArgs: true,
|
|
128
|
+
handler: slashCommands.ingestHandler
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
ctx.registerCommand({
|
|
132
|
+
name: 'contextlake-list',
|
|
133
|
+
description: 'List documents currently in the knowledge base',
|
|
134
|
+
acceptsArgs: true,
|
|
135
|
+
handler: slashCommands.listHandler
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
ctx.registerCommand({
|
|
139
|
+
name: 'contextlake-search',
|
|
140
|
+
description: 'Search the knowledge base for relevant documents',
|
|
141
|
+
acceptsArgs: true,
|
|
142
|
+
handler: slashCommands.searchHandler
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
ctx.registerCommand({
|
|
146
|
+
name: 'contextlake-delete',
|
|
147
|
+
description: 'Delete documents from the knowledge base by ID',
|
|
148
|
+
acceptsArgs: true,
|
|
149
|
+
handler: slashCommands.deleteHandler
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash commands registered`);
|
|
153
|
+
} catch (error: any) {
|
|
154
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering Slash commands: ${error.message}`, { stack: error.stack });
|
|
155
|
+
}
|
|
156
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
// @ts-ignore
|
|
2
|
+
import { PluginContext } from 'openclaw/plugin-sdk';
|
|
3
|
+
import { ingestAssets } from '../lib/actions/ingest';
|
|
4
|
+
import { retrieveAssets } from '../lib/actions/retrieve';
|
|
5
|
+
import { listAssets, deleteAssets } from '../lib/actions/manage';
|
|
6
|
+
|
|
7
|
+
export function getSlashCommands(pluginConfig: any, logger: any) {
|
|
8
|
+
return {
|
|
9
|
+
ingestHandler: async (commandCtx: any) => {
|
|
10
|
+
const rawArgs = commandCtx.args || "";
|
|
11
|
+
const args = rawArgs.split(' ').filter((arg: string) => arg.trim() !== '');
|
|
12
|
+
|
|
13
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command ingest started`, { args });
|
|
14
|
+
try {
|
|
15
|
+
if (args.length === 0) {
|
|
16
|
+
return { text: `**Error:** Missing files. Usage: /contextlake-ingest /path/to/file1 /path/to/file2` };
|
|
17
|
+
}
|
|
18
|
+
const result = await ingestAssets({
|
|
19
|
+
files: args,
|
|
20
|
+
metadata: {}
|
|
21
|
+
}, pluginConfig, logger);
|
|
22
|
+
|
|
23
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command ingest completed`, { resultCount: result.length });
|
|
24
|
+
return { text: `**Ingest Results (${result.length} files processed):**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
|
|
25
|
+
} catch (e: any) {
|
|
26
|
+
logger.error(`[ContextLake] Slash ingest failed`, { error: e.message });
|
|
27
|
+
return { text: `**Error executing ingest:** ${e.message}` };
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
|
|
31
|
+
listHandler: async (commandCtx: any) => {
|
|
32
|
+
const rawArgs = commandCtx.args || "";
|
|
33
|
+
const args = rawArgs.split(' ').filter((arg: string) => arg.trim() !== '');
|
|
34
|
+
|
|
35
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command list started`, { args });
|
|
36
|
+
try {
|
|
37
|
+
let limit = 100;
|
|
38
|
+
if (args.length > 0 && !isNaN(parseInt(args[0]))) {
|
|
39
|
+
limit = parseInt(args[0]);
|
|
40
|
+
}
|
|
41
|
+
const result = await listAssets({ limit }, pluginConfig, logger);
|
|
42
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command list completed`, { resultCount: result.length });
|
|
43
|
+
return { text: `**ContextLake Documents (${result.length}):**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
|
|
44
|
+
} catch (e: any) {
|
|
45
|
+
logger.error(`[ContextLake] Slash list failed`, { error: e.message });
|
|
46
|
+
return { text: `**Error executing list:** ${e.message}` };
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
|
|
50
|
+
searchHandler: async (commandCtx: any) => {
|
|
51
|
+
const rawArgs = commandCtx.args || "";
|
|
52
|
+
const args = rawArgs.split(' ').filter((arg: string) => arg.trim() !== '');
|
|
53
|
+
|
|
54
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command search started`, { args });
|
|
55
|
+
try {
|
|
56
|
+
if (args.length === 0) {
|
|
57
|
+
return { text: `**Error:** Missing search query. Usage: /contextlake-search "your query here"` };
|
|
58
|
+
}
|
|
59
|
+
const query = args.join(' ');
|
|
60
|
+
const result = await retrieveAssets({
|
|
61
|
+
query,
|
|
62
|
+
top_k: 5,
|
|
63
|
+
filter: undefined,
|
|
64
|
+
include_binary: false
|
|
65
|
+
}, pluginConfig, logger);
|
|
66
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command search completed`, { resultCount: result.length });
|
|
67
|
+
return { text: `**Search Results for "${query}" (${result.length}):**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
|
|
68
|
+
} catch (e: any) {
|
|
69
|
+
logger.error(`[ContextLake] Slash search failed`, { error: e.message });
|
|
70
|
+
return { text: `**Error executing search:** ${e.message}` };
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
|
|
74
|
+
deleteHandler: async (commandCtx: any) => {
|
|
75
|
+
const rawArgs = commandCtx.args || "";
|
|
76
|
+
const args = rawArgs.split(' ').filter((arg: string) => arg.trim() !== '');
|
|
77
|
+
|
|
78
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command delete started`, { args });
|
|
79
|
+
try {
|
|
80
|
+
if (args.length === 0) {
|
|
81
|
+
return { text: `**Error:** Missing file IDs. Usage: /contextlake-delete fileId1 fileId2` };
|
|
82
|
+
}
|
|
83
|
+
const result = await deleteAssets({
|
|
84
|
+
file_ids: args,
|
|
85
|
+
filter: undefined
|
|
86
|
+
}, pluginConfig, logger);
|
|
87
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command delete completed`, { result });
|
|
88
|
+
return { text: `**Delete Results:**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
|
|
89
|
+
} catch (e: any) {
|
|
90
|
+
logger.error(`[ContextLake] Slash delete failed`, { error: e.message });
|
|
91
|
+
return { text: `**Error executing delete:** ${e.message}` };
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
}
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import { ingestAssets } from '../lib/actions/ingest';
|
|
2
|
+
import { retrieveAssets } from '../lib/actions/retrieve';
|
|
3
|
+
import { listAssets, deleteAssets } from '../lib/actions/manage';
|
|
4
|
+
|
|
5
|
+
export function getAgentTools(pluginConfig: any, logger: any) {
|
|
6
|
+
return {
|
|
7
|
+
ingestTool: {
|
|
8
|
+
name: 'contextlake-ingest',
|
|
9
|
+
label: 'ContextLake Ingest',
|
|
10
|
+
description: `Upload, ingest, and index documents into the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
|
|
11
|
+
Use this tool when the user wants to "将知识注入", "上传文件", "入库", "添加文档", "ingest files", or "add knowledge".
|
|
12
|
+
Supports processing of various file types including PDF, Word, Markdown, and Text.
|
|
13
|
+
Automatically handles text extraction, cleaning, chunking, embedding generation, and storage.
|
|
14
|
+
|
|
15
|
+
Example User Queries:
|
|
16
|
+
- "帮我把这个文档注入到知识湖中"
|
|
17
|
+
- "上传这份 PDF 到知识库"
|
|
18
|
+
- "Please ingest these documents into ContextLake"
|
|
19
|
+
- "将 /path/to/doc.txt 添加到知识库"`,
|
|
20
|
+
parameters: {
|
|
21
|
+
type: 'object',
|
|
22
|
+
properties: {
|
|
23
|
+
files: {
|
|
24
|
+
type: 'array',
|
|
25
|
+
items: { type: 'string' },
|
|
26
|
+
description: 'List of file paths to ingest'
|
|
27
|
+
},
|
|
28
|
+
metadata: {
|
|
29
|
+
type: 'object',
|
|
30
|
+
description: 'Optional JSON metadata to attach to documents',
|
|
31
|
+
additionalProperties: true
|
|
32
|
+
},
|
|
33
|
+
chunkSize: {
|
|
34
|
+
type: 'integer',
|
|
35
|
+
description: 'Chunk size for text splitting'
|
|
36
|
+
},
|
|
37
|
+
overlap: {
|
|
38
|
+
type: 'integer',
|
|
39
|
+
description: 'Overlap size for text splitting'
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
required: ['files'],
|
|
43
|
+
additionalProperties: false
|
|
44
|
+
},
|
|
45
|
+
schema: {
|
|
46
|
+
type: 'object',
|
|
47
|
+
properties: {
|
|
48
|
+
files: {
|
|
49
|
+
type: 'array',
|
|
50
|
+
items: { type: 'string' },
|
|
51
|
+
description: 'List of file paths to ingest'
|
|
52
|
+
},
|
|
53
|
+
metadata: {
|
|
54
|
+
type: 'object',
|
|
55
|
+
description: 'Optional JSON metadata to attach to documents',
|
|
56
|
+
additionalProperties: true
|
|
57
|
+
},
|
|
58
|
+
chunkSize: {
|
|
59
|
+
type: 'integer',
|
|
60
|
+
description: 'Chunk size for text splitting'
|
|
61
|
+
},
|
|
62
|
+
overlap: {
|
|
63
|
+
type: 'integer',
|
|
64
|
+
description: 'Overlap size for text splitting'
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
required: ['files'],
|
|
68
|
+
additionalProperties: false
|
|
69
|
+
},
|
|
70
|
+
async execute(toolCallId: string, params: any) {
|
|
71
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Executing ingest skill, toolCallId: ${toolCallId}`, { params: JSON.stringify(params) });
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
let actualParams = params;
|
|
75
|
+
if (typeof params === 'string') {
|
|
76
|
+
try {
|
|
77
|
+
actualParams = JSON.parse(params);
|
|
78
|
+
} catch (e) {
|
|
79
|
+
logger.warn(`[ContextLake] Received string params, possibly toolCallId?`, { params });
|
|
80
|
+
return {
|
|
81
|
+
success: false,
|
|
82
|
+
error: `Invalid params format: received string "${params}", expected object with 'files' array.`
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (!actualParams.files && actualParams.params && actualParams.params.files) {
|
|
88
|
+
actualParams = actualParams.params;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (!actualParams.files || !Array.isArray(actualParams.files)) {
|
|
92
|
+
return {
|
|
93
|
+
success: false,
|
|
94
|
+
error: `Invalid params: 'files' must be an array. Received keys: ${Object.keys(actualParams)}`
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const result = await ingestAssets(actualParams, pluginConfig, logger);
|
|
99
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Ingest skill completed successfully`, { resultSummary: Array.isArray(result) ? `Processed ${result.length} items` : 'Success' });
|
|
100
|
+
return {
|
|
101
|
+
success: true,
|
|
102
|
+
result
|
|
103
|
+
};
|
|
104
|
+
} catch (error: any) {
|
|
105
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] Ingest skill failed`, { error: error.message, stack: error.stack });
|
|
106
|
+
return {
|
|
107
|
+
success: false,
|
|
108
|
+
error: error.message
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
retrieveTool: {
|
|
114
|
+
name: 'contextlake-retrieve',
|
|
115
|
+
label: 'ContextLake Retrieve',
|
|
116
|
+
description: `Search, query, and retrieve relevant information from the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
|
|
117
|
+
Use this tool when the user wants to "搜索知识", "获取信息", "召回文档", "查询知识库", "search knowledge base", or "retrieve documents".
|
|
118
|
+
Uses vector similarity search to find semantically related document chunks.
|
|
119
|
+
|
|
120
|
+
Example User Queries:
|
|
121
|
+
- "知识库里有关于产品安装的文档吗?"
|
|
122
|
+
- "帮我从知识湖中召回关于财务报表的资料"
|
|
123
|
+
- "Search the knowledge base for deployment guides"
|
|
124
|
+
- "根据知识库内容,回答如何配置网关"`,
|
|
125
|
+
parameters: {
|
|
126
|
+
type: 'object',
|
|
127
|
+
properties: {
|
|
128
|
+
query: { type: 'string', description: 'Search query' },
|
|
129
|
+
top_k: { type: 'integer', description: 'Number of results to return' },
|
|
130
|
+
filter: { type: 'string', description: 'Filter string' },
|
|
131
|
+
include_binary: { type: 'boolean', description: 'Whether to include binary content' }
|
|
132
|
+
},
|
|
133
|
+
required: ['query'],
|
|
134
|
+
additionalProperties: false
|
|
135
|
+
},
|
|
136
|
+
schema: {
|
|
137
|
+
type: 'object',
|
|
138
|
+
properties: {
|
|
139
|
+
query: { type: 'string', description: 'Search query' },
|
|
140
|
+
top_k: { type: 'integer', description: 'Number of results to return' },
|
|
141
|
+
filter: { type: 'string', description: 'Filter string' },
|
|
142
|
+
include_binary: { type: 'boolean', description: 'Whether to include binary content' }
|
|
143
|
+
},
|
|
144
|
+
required: ['query'],
|
|
145
|
+
additionalProperties: false
|
|
146
|
+
},
|
|
147
|
+
async execute(toolCallId: string, params: any) {
|
|
148
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Executing retrieve skill, toolCallId: ${toolCallId}`, { params: JSON.stringify(params) });
|
|
149
|
+
|
|
150
|
+
try {
|
|
151
|
+
let actualParams = params;
|
|
152
|
+
if (typeof params === 'string') {
|
|
153
|
+
try {
|
|
154
|
+
actualParams = JSON.parse(params);
|
|
155
|
+
} catch (e) {
|
|
156
|
+
logger.warn(`[ContextLake] Received string params for retrieve, attempting fallback parsing`, { params });
|
|
157
|
+
// For retrieve, if it's a string, maybe they just passed the query directly
|
|
158
|
+
actualParams = { query: params };
|
|
159
|
+
}
|
|
160
|
+
} else if (params && params.params) {
|
|
161
|
+
actualParams = params.params;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (!actualParams || typeof actualParams.query !== 'string') {
|
|
165
|
+
return {
|
|
166
|
+
success: false,
|
|
167
|
+
error: `Invalid params: 'query' is required and must be a string. Received: ${JSON.stringify(actualParams)}`
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const result = await retrieveAssets(actualParams, pluginConfig, logger);
|
|
172
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Retrieve skill completed`, { resultCount: Array.isArray(result) ? result.length : 0 });
|
|
173
|
+
return {
|
|
174
|
+
success: true,
|
|
175
|
+
result
|
|
176
|
+
};
|
|
177
|
+
} catch (error: any) {
|
|
178
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] Retrieve skill failed`, { error: error.message, stack: error.stack });
|
|
179
|
+
return {
|
|
180
|
+
success: false,
|
|
181
|
+
error: error.message
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
listTool: {
|
|
187
|
+
name: 'contextlake-list',
|
|
188
|
+
label: 'ContextLake List',
|
|
189
|
+
description: `List documents and assets currently in the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
|
|
190
|
+
Use this tool when the user wants to "列出所有知识", "查看知识库文件", "显示文档列表", "list knowledge", or "show documents".
|
|
191
|
+
|
|
192
|
+
Example User Queries:
|
|
193
|
+
- "知识湖里目前有哪些文件?"
|
|
194
|
+
- "列出前10个知识库文档"
|
|
195
|
+
- "Show me all documents in the knowledge base"`,
|
|
196
|
+
parameters: {
|
|
197
|
+
type: 'object',
|
|
198
|
+
properties: {
|
|
199
|
+
limit: { type: 'integer', description: 'Limit for list action' }
|
|
200
|
+
},
|
|
201
|
+
required: [],
|
|
202
|
+
additionalProperties: false
|
|
203
|
+
},
|
|
204
|
+
schema: {
|
|
205
|
+
type: 'object',
|
|
206
|
+
properties: {
|
|
207
|
+
limit: { type: 'integer', description: 'Limit for list action' }
|
|
208
|
+
},
|
|
209
|
+
required: []
|
|
210
|
+
},
|
|
211
|
+
async execute(toolCallId: string, params: any) {
|
|
212
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Executing list skill, toolCallId: ${toolCallId}`, { params: JSON.stringify(params) });
|
|
213
|
+
|
|
214
|
+
try {
|
|
215
|
+
let actualParams = params;
|
|
216
|
+
if (params && params.params) {
|
|
217
|
+
actualParams = params.params;
|
|
218
|
+
}
|
|
219
|
+
const result = await listAssets(actualParams, pluginConfig, logger);
|
|
220
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] List skill completed`, { count: Array.isArray(result) ? result.length : 0 });
|
|
221
|
+
return {
|
|
222
|
+
success: true,
|
|
223
|
+
result
|
|
224
|
+
};
|
|
225
|
+
} catch (error: any) {
|
|
226
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] List skill failed`, { error: error.message, stack: error.stack });
|
|
227
|
+
return {
|
|
228
|
+
success: false,
|
|
229
|
+
error: error.message
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
},
|
|
234
|
+
deleteTool: {
|
|
235
|
+
name: 'contextlake-delete',
|
|
236
|
+
label: 'ContextLake Delete',
|
|
237
|
+
description: `Delete documents and assets from the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
|
|
238
|
+
Use this tool when the user wants to "删除某个文档", "清理知识库", "移除文件", "delete knowledge", or "remove documents".
|
|
239
|
+
Supports deleting documents by their specific IDs or by applying a SQL-like filter.
|
|
240
|
+
|
|
241
|
+
Example User Queries:
|
|
242
|
+
- "删除 ID 为 12345 的文档"
|
|
243
|
+
- "Please delete the old architecture document from the knowledge base"
|
|
244
|
+
- "从知识湖中移除 category 是 finance 的所有记录"`,
|
|
245
|
+
parameters: {
|
|
246
|
+
type: 'object',
|
|
247
|
+
properties: {
|
|
248
|
+
file_ids: { type: 'array', items: { type: 'string' }, description: 'File IDs to delete' },
|
|
249
|
+
filter: { type: 'string', description: 'Filter string for deletion' }
|
|
250
|
+
},
|
|
251
|
+
required: [],
|
|
252
|
+
additionalProperties: false
|
|
253
|
+
},
|
|
254
|
+
schema: {
|
|
255
|
+
type: 'object',
|
|
256
|
+
properties: {
|
|
257
|
+
file_ids: { type: 'array', items: { type: 'string' }, description: 'File IDs to delete' },
|
|
258
|
+
filter: { type: 'string', description: 'Filter string for deletion' }
|
|
259
|
+
},
|
|
260
|
+
required: []
|
|
261
|
+
},
|
|
262
|
+
async execute(toolCallId: string, params: any) {
|
|
263
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Executing delete skill, toolCallId: ${toolCallId}`, { params: JSON.stringify(params) });
|
|
264
|
+
|
|
265
|
+
try {
|
|
266
|
+
let actualParams = params;
|
|
267
|
+
if (params && params.params) {
|
|
268
|
+
actualParams = params.params;
|
|
269
|
+
}
|
|
270
|
+
const result = await deleteAssets(actualParams, pluginConfig, logger);
|
|
271
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Delete skill completed`, { result });
|
|
272
|
+
return {
|
|
273
|
+
success: true,
|
|
274
|
+
result
|
|
275
|
+
};
|
|
276
|
+
} catch (error: any) {
|
|
277
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] Delete skill failed`, { error: error.message, stack: error.stack });
|
|
278
|
+
return {
|
|
279
|
+
success: false,
|
|
280
|
+
error: error.message
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
};
|
|
286
|
+
}
|