@byted-las/contextlake-openclaw 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -23
- package/dist/src/client/lancedb.js +1 -1
- package/dist/src/commands/cli.d.ts +2 -1
- package/dist/src/commands/cli.js +33 -8
- package/dist/src/commands/index.js +31 -6
- package/dist/src/commands/slashcmd.d.ts +6 -0
- package/dist/src/commands/slashcmd.js +90 -6
- package/dist/src/commands/tools.d.ts +5 -0
- package/dist/src/commands/tools.js +134 -39
- package/dist/src/lib/actions/ingest-source.d.ts +15 -0
- package/dist/src/lib/actions/ingest-source.js +193 -0
- package/dist/src/lib/actions/ingest.d.ts +13 -7
- package/dist/src/lib/actions/ingest.js +133 -58
- package/dist/src/lib/actions/lance-tools.d.ts +6 -0
- package/dist/src/lib/actions/lance-tools.js +51 -0
- package/dist/src/lib/actions/las-api.d.ts +13 -0
- package/dist/src/lib/actions/las-api.js +105 -0
- package/dist/src/lib/actions/las-tools.d.ts +3 -0
- package/dist/src/lib/actions/las-tools.js +194 -0
- package/dist/src/lib/actions/las.d.ts +64 -0
- package/dist/src/lib/actions/las.js +72 -0
- package/dist/src/lib/actions/profiler.d.ts +3 -0
- package/dist/src/lib/actions/profiler.js +135 -140
- package/dist/src/lib/actions/retrieve.js +2 -8
- package/dist/src/lib/actions/s3-tools.d.ts +18 -0
- package/dist/src/lib/actions/s3-tools.js +167 -0
- package/dist/src/lib/scripts/s3_catalog.py +10 -1
- package/dist/src/service/embedding/factory.js +1 -10
- package/dist/src/service/embedding/interface.d.ts +5 -0
- package/dist/src/service/embedding/remote.d.ts +1 -0
- package/dist/src/service/embedding/remote.js +31 -0
- package/dist/src/service/metadata/interface.d.ts +1 -0
- package/dist/src/service/metadata/local.d.ts +1 -0
- package/dist/src/service/metadata/local.js +6 -0
- package/dist/src/skills/SKILL.md +14 -151
- package/dist/src/skills/las-data-profiler/SKILL.md +14 -151
- package/dist/src/utils/config.js +13 -3
- package/dist/src/utils/credentials.d.ts +12 -0
- package/dist/src/utils/credentials.js +77 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -5
- package/src/client/lancedb.ts +1 -1
- package/src/commands/cli.ts +37 -9
- package/src/commands/index.ts +39 -6
- package/src/commands/slashcmd.ts +67 -7
- package/src/commands/tools.ts +140 -45
- package/src/lib/actions/ingest.ts +151 -71
- package/src/lib/actions/lance-tools.ts +23 -0
- package/src/lib/actions/las-api.ts +119 -0
- package/src/lib/actions/las-tools.ts +196 -0
- package/src/lib/actions/profiler.ts +134 -161
- package/src/lib/actions/retrieve.ts +2 -10
- package/src/lib/actions/s3-tools.ts +148 -0
- package/src/service/embedding/factory.ts +1 -8
- package/src/service/embedding/interface.ts +6 -0
- package/src/service/embedding/remote.ts +36 -0
- package/src/service/metadata/interface.ts +1 -0
- package/src/service/metadata/local.ts +7 -0
- package/src/skills/las-data-profiler/SKILL.md +14 -151
- package/src/utils/config.ts +15 -3
- package/src/utils/credentials.ts +56 -0
- package/bin/contextlake-openclaw.js +0 -5
- package/src/lib/scripts/s3_catalog.py +0 -608
- package/src/service/embedding/local.ts +0 -121
package/README.md
CHANGED
|
@@ -1,27 +1,32 @@
|
|
|
1
1
|
# ContextLake OpenClaw Plugin
|
|
2
2
|
|
|
3
|
-
A powerful,
|
|
3
|
+
A powerful, multi-modal Knowledge Base / Knowledge Lake (知识库/知识湖) plugin for the OpenClaw Agent framework.
|
|
4
4
|
|
|
5
|
-
This plugin allows OpenClaw agents to natively understand, index, and retrieve
|
|
5
|
+
This plugin allows OpenClaw agents to natively understand, profile, index, and retrieve data from diverse data sources (Local, TOS, S3, etc.). It supports advanced multi-modal document ingestion (Text, PDF, Images, Audio, Video) powered by Large AI Services (LAS) and vector similarity search.
|
|
6
6
|
|
|
7
7
|
## Features
|
|
8
8
|
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
11
|
-
- **
|
|
12
|
-
- **
|
|
13
|
-
- **
|
|
14
|
-
|
|
15
|
-
- Uses `
|
|
16
|
-
-
|
|
9
|
+
- **Data Profiling**: Connect to heterogeneous data sources (TOS/OSS/COS/S3/Local) and auto-profile schemas and media metadata.
|
|
10
|
+
- **Multi-modal Ingestion**: Automatically extracts text, converts PDFs, parses audio (ASR), understands video, and embeds images directly using LAS multi-modal models.
|
|
11
|
+
- **Agentic Tools**: Exposes `contextlake-ingest`, `contextlake-retrieve`, `contextlake-list`, `contextlake-delete`, `las-data-profiler`, and `contextlake-list-datasource` skills directly to the LLM.
|
|
12
|
+
- **Slash Commands**: Quick chat actions (`/contextlake-search`, `/contextlake-ingest`, `/contextlake-profiler`) for fast human-in-the-loop operations.
|
|
13
|
+
- **CLI Management**: Full command-line interface for bulk operations and pipeline integration.
|
|
14
|
+
- **Pluggable Architecture**:
|
|
15
|
+
- Uses `LanceDB` for local embedded multi-modal vector storage.
|
|
16
|
+
- Pluggable storage providers (Local and TOS).
|
|
17
|
+
|
|
18
|
+
## Documentation
|
|
19
|
+
|
|
20
|
+
For a comprehensive guide on installation, initialization (`onboard`), connecting data sources (`connect`/`profiler`), ingestion, and searching, please refer to the [**USAGE.md**](./USAGE.md).
|
|
21
|
+
|
|
22
|
+
For detailed architecture and internal workflows, please refer to [AGENTS.md](./AGENTS.md).
|
|
17
23
|
|
|
18
24
|
## Installation
|
|
19
25
|
|
|
20
26
|
Assuming you are in an OpenClaw environment:
|
|
21
27
|
|
|
22
28
|
```bash
|
|
23
|
-
npm install
|
|
24
|
-
npm run build
|
|
29
|
+
npm install @byted-las/contextlake-openclaw
|
|
25
30
|
```
|
|
26
31
|
|
|
27
32
|
Then register the plugin in your OpenClaw configuration:
|
|
@@ -30,7 +35,7 @@ Then register the plugin in your OpenClaw configuration:
|
|
|
30
35
|
{
|
|
31
36
|
"plugins": [
|
|
32
37
|
{
|
|
33
|
-
"package": "contextlake-openclaw",
|
|
38
|
+
"package": "@byted-las/contextlake-openclaw",
|
|
34
39
|
"config": {
|
|
35
40
|
"metadata_storage": {
|
|
36
41
|
"type": "local",
|
|
@@ -46,19 +51,36 @@ Then register the plugin in your OpenClaw configuration:
|
|
|
46
51
|
}
|
|
47
52
|
```
|
|
48
53
|
|
|
54
|
+
## Quick Start (Slash Commands)
|
|
55
|
+
|
|
56
|
+
1. **Initialize Credentials** (Run in terminal):
|
|
57
|
+
```bash
|
|
58
|
+
openclaw contextlake onboard
|
|
59
|
+
```
|
|
60
|
+
2. **Profile a Data Source**:
|
|
61
|
+
```text
|
|
62
|
+
/contextlake-profiler my_data local /path/to/my/files .
|
|
63
|
+
```
|
|
64
|
+
3. **Ingest the Data Source** (Processes PDF, Audio, Images, Text):
|
|
65
|
+
```text
|
|
66
|
+
/contextlake-ingest my_data
|
|
67
|
+
```
|
|
68
|
+
4. **Search your Knowledge Base**:
|
|
69
|
+
```text
|
|
70
|
+
/contextlake-search How do I configure the API?
|
|
71
|
+
```
|
|
72
|
+
|
|
49
73
|
## Agent Tools (Skills)
|
|
50
74
|
|
|
51
75
|
The LLM agent automatically has access to the following tools:
|
|
52
76
|
|
|
53
|
-
1. **`
|
|
54
|
-
2. **`contextlake-
|
|
55
|
-
3. **`contextlake-
|
|
56
|
-
4. **`contextlake-
|
|
77
|
+
1. **`las-data-profiler`**: Connect to a data source and profile its structure.
|
|
78
|
+
2. **`contextlake-list-datasource`**: List all connected and profiled data sources.
|
|
79
|
+
3. **`contextlake-ingest`**: Process and ingest all profiled files into the knowledge base via multi-modal embeddings.
|
|
80
|
+
4. **`contextlake-retrieve`**: Performs vector search on the indexed documents based on a query.
|
|
81
|
+
5. **`contextlake-list`**: Lists all currently indexed documents.
|
|
82
|
+
6. **`contextlake-delete`**: Removes documents by ID or filter.
|
|
57
83
|
|
|
58
84
|
You can simply talk to the agent:
|
|
59
|
-
> "
|
|
60
|
-
> "在知识湖中检索关于架构设计的文档。"
|
|
61
|
-
|
|
62
|
-
## Development
|
|
63
|
-
|
|
64
|
-
For detailed architecture and internal workflows, please refer to [AGENTS.md](./AGENTS.md).
|
|
85
|
+
> "帮我把 `my_data` 数据源注入到知识库中。"
|
|
86
|
+
> "在知识湖中检索关于架构设计的文档。"
|
|
@@ -97,7 +97,7 @@ class ContextLakeLanceDBClient {
|
|
|
97
97
|
}
|
|
98
98
|
return await fallbackQuery.toArray();
|
|
99
99
|
}
|
|
100
|
-
const vector = await this.embeddingProvider.
|
|
100
|
+
const vector = await this.embeddingProvider.generateMultimodalEmbedding([{ type: 'text', text: query }]);
|
|
101
101
|
// @ts-ignore
|
|
102
102
|
let search = table.vectorSearch(vector).limit(normalizedLimit);
|
|
103
103
|
if (filter) {
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { ContextLakeConfig } from '../utils/config';
|
|
2
2
|
export declare function getCliCommands(pluginConfig: ContextLakeConfig, logger: any): {
|
|
3
3
|
connectAction: (datasource_name: string, options: any) => Promise<void>;
|
|
4
|
-
ingestAction: (
|
|
4
|
+
ingestAction: (datasource_name: string) => Promise<void>;
|
|
5
5
|
searchAction: (query: any, options: any) => Promise<void>;
|
|
6
6
|
listAction: (options: any) => Promise<void>;
|
|
7
7
|
deleteAction: (options: any) => Promise<void>;
|
|
8
|
+
onboardAction: () => Promise<void>;
|
|
8
9
|
};
|
package/dist/src/commands/cli.js
CHANGED
|
@@ -6,6 +6,7 @@ const ingest_1 = require("../lib/actions/ingest");
|
|
|
6
6
|
const retrieve_1 = require("../lib/actions/retrieve");
|
|
7
7
|
const manage_1 = require("../lib/actions/manage");
|
|
8
8
|
const profiler_1 = require("../lib/actions/profiler");
|
|
9
|
+
const credentials_1 = require("../utils/credentials");
|
|
9
10
|
function parseOptionalInt(value, fallback) {
|
|
10
11
|
const parsed = Number.parseInt(String(value), 10);
|
|
11
12
|
return Number.isFinite(parsed) ? parsed : fallback;
|
|
@@ -67,15 +68,11 @@ function getCliCommands(pluginConfig, logger) {
|
|
|
67
68
|
process.exitCode = 1;
|
|
68
69
|
}
|
|
69
70
|
},
|
|
70
|
-
ingestAction: async (
|
|
71
|
-
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI ingest started`, {
|
|
71
|
+
ingestAction: async (datasource_name) => {
|
|
72
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI ingest started`, { datasource_name });
|
|
72
73
|
try {
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
files,
|
|
76
|
-
metadata,
|
|
77
|
-
chunkSize: parseOptionalInt(options.chunkSize, 500),
|
|
78
|
-
overlap: parseOptionalInt(options.overlap, 50)
|
|
74
|
+
const result = await (0, ingest_1.ingestSource)({
|
|
75
|
+
datasource_name
|
|
79
76
|
}, pluginConfig, logger);
|
|
80
77
|
// eslint-disable-next-line no-console
|
|
81
78
|
console.log(JSON.stringify(result, null, 2));
|
|
@@ -134,6 +131,34 @@ function getCliCommands(pluginConfig, logger) {
|
|
|
134
131
|
console.error('Error:', e.message);
|
|
135
132
|
logger.error(`[${new Date().toISOString()}] [ContextLake] CLI delete failed`, { error: e.message, stack: e.stack });
|
|
136
133
|
}
|
|
134
|
+
},
|
|
135
|
+
onboardAction: async () => {
|
|
136
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI onboard started`);
|
|
137
|
+
try {
|
|
138
|
+
const currentCreds = (0, credentials_1.loadCredentials)();
|
|
139
|
+
// eslint-disable-next-line no-console
|
|
140
|
+
console.log('Welcome to ContextLake Onboarding!');
|
|
141
|
+
// eslint-disable-next-line no-console
|
|
142
|
+
console.log('Please provide your credentials below. Press enter to keep the current value.');
|
|
143
|
+
const lasApiKey = await (0, credentials_1.promptForInput)('LAS_API_KEY', currentCreds.LAS_API_KEY);
|
|
144
|
+
const accessKey = await (0, credentials_1.promptForInput)('ACCESS_KEY', currentCreds.ACCESS_KEY || currentCreds.VOLCENGINE_ACCESS_KEY);
|
|
145
|
+
const secretKey = await (0, credentials_1.promptForInput)('SECRET_KEY', currentCreds.SECRET_KEY || currentCreds.VOLCENGINE_SECRET_KEY);
|
|
146
|
+
const region = await (0, credentials_1.promptForInput)('REGION', currentCreds.REGION || currentCreds.VOLCENGINE_REGION || 'cn-beijing');
|
|
147
|
+
const newCreds = {
|
|
148
|
+
LAS_API_KEY: lasApiKey,
|
|
149
|
+
ACCESS_KEY: accessKey,
|
|
150
|
+
SECRET_KEY: secretKey,
|
|
151
|
+
REGION: region
|
|
152
|
+
};
|
|
153
|
+
(0, credentials_1.saveCredentials)(newCreds);
|
|
154
|
+
// eslint-disable-next-line no-console
|
|
155
|
+
console.log('Credentials saved successfully!');
|
|
156
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI onboard success`);
|
|
157
|
+
}
|
|
158
|
+
catch (e) {
|
|
159
|
+
console.error('Error during onboarding:', e.message);
|
|
160
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] CLI onboard failed`, { error: e.message, stack: e.stack });
|
|
161
|
+
}
|
|
137
162
|
}
|
|
138
163
|
};
|
|
139
164
|
}
|
|
@@ -20,6 +20,18 @@ function registerAll(ctx, logger) {
|
|
|
20
20
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.deleteTool.name}`);
|
|
21
21
|
ctx.registerTool(tools.lasDataProfilerTool);
|
|
22
22
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.lasDataProfilerTool.name}`);
|
|
23
|
+
ctx.registerTool(tools.listS3ObjectsTool);
|
|
24
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listS3ObjectsTool.name}`);
|
|
25
|
+
ctx.registerTool(tools.readS3ObjectTool);
|
|
26
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.readS3ObjectTool.name}`);
|
|
27
|
+
ctx.registerTool(tools.writeLanceCatalogTool);
|
|
28
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.writeLanceCatalogTool.name}`);
|
|
29
|
+
ctx.registerTool(tools.listDatasourceTool);
|
|
30
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listDatasourceTool.name}`);
|
|
31
|
+
for (const lasTool of tools.lasTools) {
|
|
32
|
+
ctx.registerTool(lasTool);
|
|
33
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${lasTool.name}`);
|
|
34
|
+
}
|
|
23
35
|
}
|
|
24
36
|
catch (error) {
|
|
25
37
|
logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering agent tools: ${error.message}${error.stack ? '\\n' + error.stack : ''}`);
|
|
@@ -46,11 +58,8 @@ function registerAll(ctx, logger) {
|
|
|
46
58
|
.option('--sample-rows <number>', 'Number of rows to sample per structured file', '100')
|
|
47
59
|
.action(commands.connectAction);
|
|
48
60
|
// Ingest
|
|
49
|
-
contextlake.command('ingest <
|
|
50
|
-
.description('
|
|
51
|
-
.option('-c, --chunk-size <number>', 'Chunk size for text splitting', '500')
|
|
52
|
-
.option('-o, --overlap <number>', 'Chunk overlap size', '50')
|
|
53
|
-
.option('-m, --metadata <json>', 'JSON metadata to attach to the documents')
|
|
61
|
+
contextlake.command('ingest <datasource_name>')
|
|
62
|
+
.description('Process and ingest all files from a connected data source into the knowledge base')
|
|
54
63
|
.action(commands.ingestAction);
|
|
55
64
|
// Search
|
|
56
65
|
contextlake.command('search <query>')
|
|
@@ -70,6 +79,10 @@ function registerAll(ctx, logger) {
|
|
|
70
79
|
.option('--ids <ids...>', 'List of specific file IDs to delete')
|
|
71
80
|
.option('-f, --filter <string>', 'Filter string to match documents for deletion')
|
|
72
81
|
.action(commands.deleteAction);
|
|
82
|
+
// Onboard
|
|
83
|
+
contextlake.command('onboard')
|
|
84
|
+
.description('Configure credentials for ContextLake')
|
|
85
|
+
.action(commands.onboardAction);
|
|
73
86
|
}, { commands: ['contextlake'] });
|
|
74
87
|
logger.info(`[${new Date().toISOString()}] [ContextLake] CLI commands registered`);
|
|
75
88
|
}
|
|
@@ -86,7 +99,7 @@ function registerAll(ctx, logger) {
|
|
|
86
99
|
const slashCommands = (0, slashcmd_1.getSlashCommands)(pluginConfig, logger);
|
|
87
100
|
ctx.registerCommand({
|
|
88
101
|
name: 'contextlake-ingest',
|
|
89
|
-
description: '
|
|
102
|
+
description: 'Process and ingest all files from a connected data source (usage: /contextlake-ingest <datasource_name>)',
|
|
90
103
|
acceptsArgs: true,
|
|
91
104
|
handler: slashCommands.ingestHandler
|
|
92
105
|
});
|
|
@@ -108,6 +121,18 @@ function registerAll(ctx, logger) {
|
|
|
108
121
|
acceptsArgs: true,
|
|
109
122
|
handler: slashCommands.deleteHandler
|
|
110
123
|
});
|
|
124
|
+
ctx.registerCommand({
|
|
125
|
+
name: 'contextlake-profiler',
|
|
126
|
+
description: 'Connect to a data source and profile its structure (usage: /contextlake-profiler <datasource_name> <vendor> <bucket> <prefix>)',
|
|
127
|
+
acceptsArgs: true,
|
|
128
|
+
handler: slashCommands.profilerHandler
|
|
129
|
+
});
|
|
130
|
+
ctx.registerCommand({
|
|
131
|
+
name: 'contextlake-list-datasource',
|
|
132
|
+
description: 'List all connected and profiled data sources (usage: /contextlake-list-datasource)',
|
|
133
|
+
acceptsArgs: false,
|
|
134
|
+
handler: slashCommands.listDatasourceHandler
|
|
135
|
+
});
|
|
111
136
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash commands registered`);
|
|
112
137
|
}
|
|
113
138
|
catch (error) {
|
|
@@ -12,4 +12,10 @@ export declare function getSlashCommands(pluginConfig: ContextLakeConfig, logger
|
|
|
12
12
|
deleteHandler: (commandCtx: any) => Promise<{
|
|
13
13
|
text: string;
|
|
14
14
|
}>;
|
|
15
|
+
profilerHandler: (commandCtx: any) => Promise<{
|
|
16
|
+
text: string;
|
|
17
|
+
}>;
|
|
18
|
+
listDatasourceHandler: (commandCtx: any) => Promise<{
|
|
19
|
+
text: string;
|
|
20
|
+
}>;
|
|
15
21
|
};
|
|
@@ -1,9 +1,46 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
2
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
36
|
exports.getSlashCommands = getSlashCommands;
|
|
4
37
|
const ingest_1 = require("../lib/actions/ingest");
|
|
5
38
|
const retrieve_1 = require("../lib/actions/retrieve");
|
|
6
39
|
const manage_1 = require("../lib/actions/manage");
|
|
40
|
+
const profiler_1 = require("../lib/actions/profiler");
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const path = __importStar(require("path"));
|
|
43
|
+
const os = __importStar(require("os"));
|
|
7
44
|
function getSlashCommands(pluginConfig, logger) {
|
|
8
45
|
return {
|
|
9
46
|
ingestHandler: async (commandCtx) => {
|
|
@@ -12,12 +49,16 @@ function getSlashCommands(pluginConfig, logger) {
|
|
|
12
49
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command ingest started`, { args });
|
|
13
50
|
try {
|
|
14
51
|
if (args.length === 0) {
|
|
15
|
-
return { text: `**Error:** Missing
|
|
52
|
+
return { text: `**Error:** Missing datasource_name. Usage: /contextlake-ingest <datasource_name>` };
|
|
16
53
|
}
|
|
17
|
-
const
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
54
|
+
const datasource_name = args[0];
|
|
55
|
+
const BASE_DIR = path.join(os.homedir(), '.openclaw', 'contextlake', 'profiler');
|
|
56
|
+
const dsDir = path.join(BASE_DIR, datasource_name);
|
|
57
|
+
const dbPath = path.join(dsDir, 'catalog_db');
|
|
58
|
+
if (!fs.existsSync(dbPath)) {
|
|
59
|
+
return { text: `**Error:** Data source "${datasource_name}" has not been profiled yet.\n\nPlease run the profiler first using:\n\`/contextlake-profiler <datasource_name> <vendor> <bucket> <prefix> [endpoint] [ak] [sk] [region]\`` };
|
|
60
|
+
}
|
|
61
|
+
const result = await (0, ingest_1.ingestSource)({ datasource_name }, pluginConfig, logger);
|
|
21
62
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command ingest completed`, { resultCount: result.length });
|
|
22
63
|
return { text: `**Ingest Results (${result.length} files processed):**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
|
|
23
64
|
}
|
|
@@ -86,6 +127,49 @@ function getSlashCommands(pluginConfig, logger) {
|
|
|
86
127
|
logger.error(`[ContextLake] Slash delete failed`, { error: e.message });
|
|
87
128
|
return { text: `**Error executing delete:** ${e.message}` };
|
|
88
129
|
}
|
|
89
|
-
}
|
|
130
|
+
},
|
|
131
|
+
profilerHandler: async (commandCtx) => {
|
|
132
|
+
const rawArgs = commandCtx.args || "";
|
|
133
|
+
const args = rawArgs.split(' ').filter((arg) => arg.trim() !== '');
|
|
134
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command profiler started`, { args });
|
|
135
|
+
try {
|
|
136
|
+
if (args.length < 4) {
|
|
137
|
+
return { text: `**Error:** Missing arguments. Usage: /contextlake-profiler <datasource_name> <vendor> <bucket> <prefix> [endpoint] [ak] [sk] [region]` };
|
|
138
|
+
}
|
|
139
|
+
const [datasource_name, vendor, bucket, prefix, endpoint, access_key, secret_key, region] = args;
|
|
140
|
+
if (!['volcengine', 'alibaba', 'tencent', 'aws', 'local'].includes(vendor)) {
|
|
141
|
+
return { text: `**Error:** Invalid vendor. Must be one of: volcengine, alibaba, tencent, aws, local` };
|
|
142
|
+
}
|
|
143
|
+
const params = {
|
|
144
|
+
datasource_name,
|
|
145
|
+
vendor: vendor,
|
|
146
|
+
bucket,
|
|
147
|
+
prefix,
|
|
148
|
+
endpoint,
|
|
149
|
+
access_key,
|
|
150
|
+
secret_key,
|
|
151
|
+
region,
|
|
152
|
+
};
|
|
153
|
+
const result = await (0, profiler_1.connectDataSource)(params);
|
|
154
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command profiler completed`, { result });
|
|
155
|
+
return { text: `**Profiler Results:**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
|
|
156
|
+
}
|
|
157
|
+
catch (e) {
|
|
158
|
+
logger.error(`[ContextLake] Slash profiler failed`, { error: e.message });
|
|
159
|
+
return { text: `**Error executing profiler:** ${e.message}` };
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
listDatasourceHandler: async (commandCtx) => {
|
|
163
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command list-datasource started`);
|
|
164
|
+
try {
|
|
165
|
+
const result = await (0, profiler_1.listDataSources)();
|
|
166
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Slash command list-datasource completed`, { result });
|
|
167
|
+
return { text: `**Data Sources:**\n\`\`\`json\n${JSON.stringify(result, null, 2)}\n\`\`\`` };
|
|
168
|
+
}
|
|
169
|
+
catch (e) {
|
|
170
|
+
logger.error(`[ContextLake] Slash list-datasource failed`, { error: e.message });
|
|
171
|
+
return { text: `**Error executing list-datasource:** ${e.message}` };
|
|
172
|
+
}
|
|
173
|
+
},
|
|
90
174
|
};
|
|
91
175
|
}
|
|
@@ -6,4 +6,9 @@ export declare function getAgentTools(pluginConfig: ContextLakeConfig, logger: a
|
|
|
6
6
|
listTool: AnyAgentTool;
|
|
7
7
|
deleteTool: AnyAgentTool;
|
|
8
8
|
lasDataProfilerTool: AnyAgentTool;
|
|
9
|
+
listDatasourceTool: AnyAgentTool;
|
|
10
|
+
listS3ObjectsTool: AnyAgentTool;
|
|
11
|
+
readS3ObjectTool: AnyAgentTool;
|
|
12
|
+
writeLanceCatalogTool: AnyAgentTool;
|
|
13
|
+
lasTools: AnyAgentTool[];
|
|
9
14
|
};
|
|
@@ -5,44 +5,54 @@ const ingest_1 = require("../lib/actions/ingest");
|
|
|
5
5
|
const retrieve_1 = require("../lib/actions/retrieve");
|
|
6
6
|
const manage_1 = require("../lib/actions/manage");
|
|
7
7
|
const profiler_1 = require("../lib/actions/profiler");
|
|
8
|
+
const las_tools_1 = require("../lib/actions/las-tools");
|
|
9
|
+
const s3_tools_1 = require("../lib/actions/s3-tools");
|
|
10
|
+
const lance_tools_1 = require("../lib/actions/lance-tools");
|
|
8
11
|
function getAgentTools(pluginConfig, logger) {
|
|
12
|
+
const lasTools = (0, las_tools_1.getLasTools)(pluginConfig, logger);
|
|
9
13
|
return {
|
|
14
|
+
lasTools,
|
|
15
|
+
listDatasourceTool: {
|
|
16
|
+
name: 'contextlake-list-datasource',
|
|
17
|
+
label: 'ContextLake List Datasources',
|
|
18
|
+
description: `List all connected and profiled data sources.`,
|
|
19
|
+
parameters: {
|
|
20
|
+
type: 'object',
|
|
21
|
+
properties: {},
|
|
22
|
+
required: [],
|
|
23
|
+
additionalProperties: false
|
|
24
|
+
},
|
|
25
|
+
async execute(toolCallId, params) {
|
|
26
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Executing list-datasource skill, toolCallId: ${toolCallId}`);
|
|
27
|
+
try {
|
|
28
|
+
const result = await (0, profiler_1.listDataSources)();
|
|
29
|
+
return {
|
|
30
|
+
content: [{ type: "text", text: JSON.stringify(result) }],
|
|
31
|
+
details: result
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
catch (error) {
|
|
35
|
+
logger.error(`[${new Date().toISOString()}] [ContextLake] list-datasource skill failed`, { error: error.message });
|
|
36
|
+
return {
|
|
37
|
+
content: [{ type: "text", text: String(error.message) }],
|
|
38
|
+
details: { error: error.message }
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
},
|
|
10
43
|
ingestTool: {
|
|
11
44
|
name: 'contextlake-ingest',
|
|
12
45
|
label: 'ContextLake Ingest',
|
|
13
|
-
description: `
|
|
46
|
+
description: `Process and ingest all files from a connected data source into the knowledge base.
|
|
14
47
|
Use this tool when the user wants to "将知识注入", "上传文件", "入库", "添加文档", "ingest files", or "add knowledge".
|
|
15
|
-
Supports
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
Example User Queries:
|
|
19
|
-
- "帮我把这个文档注入到知识湖中"
|
|
20
|
-
- "上传这份 PDF 到知识库"
|
|
21
|
-
- "Please ingest these documents into ContextLake"
|
|
22
|
-
- "将 /path/to/doc.txt 添加到知识库"`,
|
|
48
|
+
Supports multimodal files (text, images, audio, video, pdf) by using LAS models to understand and embed them.
|
|
49
|
+
Must be called after a data source has been successfully profiled via \`las-data-profiler\`.`,
|
|
23
50
|
parameters: {
|
|
24
51
|
type: 'object',
|
|
25
52
|
properties: {
|
|
26
|
-
|
|
27
|
-
type: 'array',
|
|
28
|
-
items: { type: 'string' },
|
|
29
|
-
description: 'List of file paths to ingest'
|
|
30
|
-
},
|
|
31
|
-
metadata: {
|
|
32
|
-
type: 'object',
|
|
33
|
-
description: 'Optional JSON metadata to attach to documents',
|
|
34
|
-
additionalProperties: true
|
|
35
|
-
},
|
|
36
|
-
chunkSize: {
|
|
37
|
-
type: 'integer',
|
|
38
|
-
description: 'Chunk size for text splitting'
|
|
39
|
-
},
|
|
40
|
-
overlap: {
|
|
41
|
-
type: 'integer',
|
|
42
|
-
description: 'Overlap size for text splitting'
|
|
43
|
-
}
|
|
53
|
+
datasource_name: { type: 'string', description: 'Name of the data source previously profiled' }
|
|
44
54
|
},
|
|
45
|
-
required: ['
|
|
55
|
+
required: ['datasource_name'],
|
|
46
56
|
additionalProperties: false
|
|
47
57
|
},
|
|
48
58
|
async execute(toolCallId, params) {
|
|
@@ -56,21 +66,21 @@ Example User Queries:
|
|
|
56
66
|
catch (e) {
|
|
57
67
|
logger.warn(`[ContextLake] Received string params, possibly toolCallId?`, { params });
|
|
58
68
|
return {
|
|
59
|
-
content: [{ type: "text", text: `Invalid params format: received string "${params}", expected object with '
|
|
60
|
-
details: { error: `Invalid params format: received string "${params}", expected object with '
|
|
69
|
+
content: [{ type: "text", text: `Invalid params format: received string "${params}", expected object with 'datasource_name'.` }],
|
|
70
|
+
details: { error: `Invalid params format: received string "${params}", expected object with 'datasource_name'.` }
|
|
61
71
|
};
|
|
62
72
|
}
|
|
63
73
|
}
|
|
64
|
-
if (!actualParams.
|
|
74
|
+
if (!actualParams.datasource_name && actualParams.params && actualParams.params.datasource_name) {
|
|
65
75
|
actualParams = actualParams.params;
|
|
66
76
|
}
|
|
67
|
-
if (!actualParams.
|
|
77
|
+
if (!actualParams.datasource_name) {
|
|
68
78
|
return {
|
|
69
|
-
content: [{ type: "text", text: `Invalid params: '
|
|
70
|
-
details: { error: `Invalid params: '
|
|
79
|
+
content: [{ type: "text", text: `Invalid params: 'datasource_name' is required. Received keys: ${Object.keys(actualParams)}` }],
|
|
80
|
+
details: { error: `Invalid params: 'datasource_name' is required. Received keys: ${Object.keys(actualParams)}` }
|
|
71
81
|
};
|
|
72
82
|
}
|
|
73
|
-
const result = await (0, ingest_1.
|
|
83
|
+
const result = await (0, ingest_1.ingestSource)(actualParams, pluginConfig, logger);
|
|
74
84
|
logger.info(`[${new Date().toISOString()}] [ContextLake] Ingest skill completed successfully`, { resultSummary: Array.isArray(result) ? `Processed ${result.length} items` : 'Success' });
|
|
75
85
|
return {
|
|
76
86
|
content: [{ type: "text", text: JSON.stringify(result) }],
|
|
@@ -81,8 +91,7 @@ Example User Queries:
|
|
|
81
91
|
logger.error(`[${new Date().toISOString()}] [ContextLake] Ingest skill failed`, { error: error.message, stack: error.stack });
|
|
82
92
|
return {
|
|
83
93
|
content: [{ type: "text", text: String(error.message) }],
|
|
84
|
-
details: { error: error.message
|
|
85
|
-
}
|
|
94
|
+
details: { error: error.message }
|
|
86
95
|
};
|
|
87
96
|
}
|
|
88
97
|
}
|
|
@@ -274,11 +283,97 @@ Example User Queries:
|
|
|
274
283
|
logger.error(`[${new Date().toISOString()}] [ContextLake] las-data-profiler skill failed`, { error: error.message, stack: error.stack });
|
|
275
284
|
return {
|
|
276
285
|
content: [{ type: "text", text: String(error.message) }],
|
|
277
|
-
details: { error: error.message
|
|
278
|
-
}
|
|
286
|
+
details: { error: error.message }
|
|
279
287
|
};
|
|
280
288
|
}
|
|
281
289
|
}
|
|
290
|
+
},
|
|
291
|
+
listS3ObjectsTool: {
|
|
292
|
+
name: 'list-s3-objects',
|
|
293
|
+
label: 'List S3 Objects',
|
|
294
|
+
description: 'List objects in an S3-compatible bucket or local directory',
|
|
295
|
+
parameters: {
|
|
296
|
+
type: 'object',
|
|
297
|
+
properties: {
|
|
298
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'] },
|
|
299
|
+
bucket: { type: 'string' },
|
|
300
|
+
prefix: { type: 'string' },
|
|
301
|
+
endpoint: { type: 'string' },
|
|
302
|
+
access_key: { type: 'string' },
|
|
303
|
+
secret_key: { type: 'string' },
|
|
304
|
+
region: { type: 'string' },
|
|
305
|
+
maxKeys: { type: 'integer' },
|
|
306
|
+
continuationToken: { type: 'string' }
|
|
307
|
+
},
|
|
308
|
+
required: ['vendor', 'bucket'],
|
|
309
|
+
additionalProperties: false
|
|
310
|
+
},
|
|
311
|
+
async execute(toolCallId, params) {
|
|
312
|
+
let actualParams = params.params || params;
|
|
313
|
+
try {
|
|
314
|
+
const result = await (0, s3_tools_1.listS3Objects)(actualParams, actualParams.prefix || '', actualParams.maxKeys, actualParams.continuationToken);
|
|
315
|
+
return { content: [{ type: "text", text: JSON.stringify(result) }], details: result };
|
|
316
|
+
}
|
|
317
|
+
catch (e) {
|
|
318
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
},
|
|
322
|
+
readS3ObjectTool: {
|
|
323
|
+
name: 'read-s3-object',
|
|
324
|
+
label: 'Read S3 Object',
|
|
325
|
+
description: 'Read the contents or headers of an S3 object',
|
|
326
|
+
parameters: {
|
|
327
|
+
type: 'object',
|
|
328
|
+
properties: {
|
|
329
|
+
vendor: { type: 'string', enum: ['volcengine', 'alibaba', 'tencent', 'aws', 'local'] },
|
|
330
|
+
bucket: { type: 'string' },
|
|
331
|
+
key: { type: 'string' },
|
|
332
|
+
endpoint: { type: 'string' },
|
|
333
|
+
access_key: { type: 'string' },
|
|
334
|
+
secret_key: { type: 'string' },
|
|
335
|
+
region: { type: 'string' },
|
|
336
|
+
maxBytes: { type: 'integer' }
|
|
337
|
+
},
|
|
338
|
+
required: ['vendor', 'bucket', 'key'],
|
|
339
|
+
additionalProperties: false
|
|
340
|
+
},
|
|
341
|
+
async execute(toolCallId, params) {
|
|
342
|
+
let actualParams = params.params || params;
|
|
343
|
+
try {
|
|
344
|
+
const buf = await (0, s3_tools_1.readS3Object)(actualParams, actualParams.key, actualParams.maxBytes);
|
|
345
|
+
// Return as base64 string
|
|
346
|
+
return { content: [{ type: "text", text: buf.toString('base64') }], details: { length: buf.length } };
|
|
347
|
+
}
|
|
348
|
+
catch (e) {
|
|
349
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
},
|
|
353
|
+
writeLanceCatalogTool: {
|
|
354
|
+
name: 'write-lance-catalog',
|
|
355
|
+
label: 'Write LanceDB Catalog',
|
|
356
|
+
description: 'Write an array of file records into a local LanceDB table',
|
|
357
|
+
parameters: {
|
|
358
|
+
type: 'object',
|
|
359
|
+
properties: {
|
|
360
|
+
db_path: { type: 'string' },
|
|
361
|
+
table_name: { type: 'string' },
|
|
362
|
+
records: { type: 'array', items: { type: 'object' } }
|
|
363
|
+
},
|
|
364
|
+
required: ['db_path', 'table_name', 'records'],
|
|
365
|
+
additionalProperties: false
|
|
366
|
+
},
|
|
367
|
+
async execute(toolCallId, params) {
|
|
368
|
+
let actualParams = params.params || params;
|
|
369
|
+
try {
|
|
370
|
+
await (0, lance_tools_1.writeLanceCatalog)(actualParams);
|
|
371
|
+
return { content: [{ type: "text", text: "Successfully wrote records to LanceDB" }], details: { count: actualParams.records.length } };
|
|
372
|
+
}
|
|
373
|
+
catch (e) {
|
|
374
|
+
return { content: [{ type: "text", text: String(e.message) }], details: { error: e.message } };
|
|
375
|
+
}
|
|
376
|
+
}
|
|
282
377
|
}
|
|
283
378
|
};
|
|
284
379
|
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { ContextLakeConfig } from '../../utils/config';
|
|
2
|
+
export interface IngestSourceParams {
|
|
3
|
+
datasource_name: string;
|
|
4
|
+
}
|
|
5
|
+
export declare function ingestSource(params: IngestSourceParams, config: ContextLakeConfig, logger?: any): Promise<({
|
|
6
|
+
file: any;
|
|
7
|
+
status: string;
|
|
8
|
+
chunks: number;
|
|
9
|
+
message?: undefined;
|
|
10
|
+
} | {
|
|
11
|
+
file: any;
|
|
12
|
+
status: string;
|
|
13
|
+
message: any;
|
|
14
|
+
chunks?: undefined;
|
|
15
|
+
})[]>;
|