@byted-las/contextlake-openclaw 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/commands/cli.d.ts +1 -1
- package/dist/src/commands/cli.js +4 -10
- package/dist/src/commands/index.js +5 -4
- package/dist/src/commands/slashcmd.js +4 -9
- package/dist/src/commands/tools.d.ts +2 -0
- package/dist/src/commands/tools.js +104 -22
- package/dist/src/lib/actions/lance-tools.d.ts +7 -0
- package/dist/src/lib/actions/lance-tools.js +22 -0
- package/dist/src/lib/actions/las-tools.js +58 -0
- package/dist/src/lib/actions/profiler.d.ts +4 -3
- package/dist/src/lib/actions/profiler.js +43 -7
- package/dist/src/lib/actions/s3-tools.d.ts +5 -2
- package/dist/src/lib/actions/s3-tools.js +54 -0
- package/dist/src/skills/SKILL.md +3 -1
- package/dist/src/skills/contextlake-delete/SKILL.md +2 -0
- package/dist/src/skills/contextlake-ingest/SKILL.md +2 -0
- package/dist/src/skills/contextlake-list/SKILL.md +2 -0
- package/dist/src/skills/contextlake-retrieve/SKILL.md +2 -0
- package/dist/src/skills/las-data-profiler/SKILL.md +3 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -1
- package/src/commands/cli.ts +4 -10
- package/src/commands/index.ts +7 -4
- package/src/commands/slashcmd.ts +4 -10
- package/src/commands/tools.ts +102 -24
- package/src/lib/actions/lance-tools.ts +35 -0
- package/src/lib/actions/las-tools.ts +56 -0
- package/src/lib/actions/profiler.ts +42 -7
- package/src/lib/actions/s3-tools.ts +59 -4
- package/src/skills/contextlake-delete/SKILL.md +2 -0
- package/src/skills/contextlake-ingest/SKILL.md +2 -0
- package/src/skills/contextlake-list/SKILL.md +2 -0
- package/src/skills/contextlake-retrieve/SKILL.md +2 -0
- package/src/skills/las-data-profiler/SKILL.md +3 -1
|
@@ -1,17 +1,51 @@
|
|
|
1
1
|
import { S3Client, ListObjectsV2Command, GetObjectCommand } from '@aws-sdk/client-s3';
|
|
2
|
+
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
|
|
2
3
|
import * as fs from 'fs';
|
|
3
4
|
import * as path from 'path';
|
|
4
5
|
|
|
5
6
|
export interface S3Params {
|
|
6
|
-
|
|
7
|
+
url?: string;
|
|
8
|
+
vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
|
|
7
9
|
endpoint?: string;
|
|
8
10
|
access_key?: string;
|
|
9
11
|
secret_key?: string;
|
|
10
12
|
region?: string;
|
|
11
|
-
bucket
|
|
13
|
+
bucket?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function parseS3Url(params: S3Params): S3Params {
|
|
17
|
+
if (params.url) {
|
|
18
|
+
if (params.url.startsWith('file://') || params.url.startsWith('/')) {
|
|
19
|
+
params.vendor = 'local';
|
|
20
|
+
const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
|
|
21
|
+
params.bucket = localPath;
|
|
22
|
+
} else {
|
|
23
|
+
const parsedUrl = new URL(params.url);
|
|
24
|
+
const protocol = parsedUrl.protocol.replace(':', '');
|
|
25
|
+
|
|
26
|
+
if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
|
|
27
|
+
if (protocol === 'tos') params.vendor = 'volcengine';
|
|
28
|
+
else if (protocol === 'oss') params.vendor = 'alibaba';
|
|
29
|
+
else if (protocol === 'cos') params.vendor = 'tencent';
|
|
30
|
+
else if (protocol === 's3') params.vendor = 'aws';
|
|
31
|
+
|
|
32
|
+
params.bucket = parsedUrl.hostname;
|
|
33
|
+
// Prefix is usually parsed separately or passed explicitly for listing
|
|
34
|
+
} else {
|
|
35
|
+
throw new Error(`Unsupported protocol: ${protocol}`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (!params.vendor || !params.bucket) {
|
|
41
|
+
throw new Error('Could not determine vendor or bucket. Please provide a valid url or vendor/bucket directly.');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return params;
|
|
12
45
|
}
|
|
13
46
|
|
|
14
47
|
function createS3Client(params: S3Params): S3Client | null {
|
|
48
|
+
params = parseS3Url(params);
|
|
15
49
|
if (params.vendor === 'local') return null;
|
|
16
50
|
|
|
17
51
|
let endpoint = params.endpoint;
|
|
@@ -56,8 +90,9 @@ function createS3Client(params: S3Params): S3Client | null {
|
|
|
56
90
|
}
|
|
57
91
|
|
|
58
92
|
export async function listS3Objects(params: S3Params, prefix: string, maxKeys: number = 1000, continuationToken?: string) {
|
|
93
|
+
params = parseS3Url(params);
|
|
59
94
|
if (params.vendor === 'local') {
|
|
60
|
-
const root = params.bucket;
|
|
95
|
+
const root = params.bucket as string;
|
|
61
96
|
const prefixPath = prefix && prefix !== '.' ? path.join(root, prefix) : root;
|
|
62
97
|
const files: any[] = [];
|
|
63
98
|
|
|
@@ -108,8 +143,9 @@ export async function listS3Objects(params: S3Params, prefix: string, maxKeys: n
|
|
|
108
143
|
}
|
|
109
144
|
|
|
110
145
|
export async function readS3Object(params: S3Params, key: string, maxBytes?: number): Promise<Buffer> {
|
|
146
|
+
params = parseS3Url(params);
|
|
111
147
|
if (params.vendor === 'local') {
|
|
112
|
-
const fullPath = path.join(params.bucket, key);
|
|
148
|
+
const fullPath = path.join(params.bucket as string, key);
|
|
113
149
|
if (maxBytes) {
|
|
114
150
|
const fd = fs.openSync(fullPath, 'r');
|
|
115
151
|
const buffer = Buffer.alloc(maxBytes);
|
|
@@ -146,3 +182,22 @@ export async function readS3Object(params: S3Params, key: string, maxBytes?: num
|
|
|
146
182
|
}
|
|
147
183
|
return Buffer.alloc(0);
|
|
148
184
|
}
|
|
185
|
+
|
|
186
|
+
export async function getPresignedUrl(params: S3Params, key: string, expiresIn: number = 3600): Promise<string> {
|
|
187
|
+
params = parseS3Url(params);
|
|
188
|
+
if (params.vendor === 'local') {
|
|
189
|
+
const fullPath = path.join(params.bucket as string, key);
|
|
190
|
+
return `file://${fullPath}`;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const client = createS3Client(params);
|
|
194
|
+
if (!client) throw new Error('Failed to create S3 client');
|
|
195
|
+
|
|
196
|
+
const command = new GetObjectCommand({
|
|
197
|
+
Bucket: params.bucket,
|
|
198
|
+
Key: key
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
const signedUrl = await getSignedUrl(client, command, { expiresIn });
|
|
202
|
+
return signedUrl;
|
|
203
|
+
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
---
|
|
1
2
|
name: contextlake-delete
|
|
2
3
|
description: |
|
|
3
4
|
Delete documents and assets from the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
|
|
@@ -34,3 +35,4 @@ parameters:
|
|
|
34
35
|
type: string
|
|
35
36
|
description: SQL-like filter string to identify documents to delete (e.g. "metadata.category = 'obsolete'").
|
|
36
37
|
required: false
|
|
38
|
+
---
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
---
|
|
1
2
|
name: contextlake-ingest
|
|
2
3
|
description: |
|
|
3
4
|
Upload, ingest, and index documents into the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
|
|
@@ -38,3 +39,4 @@ parameters:
|
|
|
38
39
|
type: integer
|
|
39
40
|
description: Overlap size for text splitting
|
|
40
41
|
required: false
|
|
42
|
+
---
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
---
|
|
1
2
|
name: contextlake-list
|
|
2
3
|
description: |
|
|
3
4
|
List documents and assets currently in the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
|
|
@@ -20,3 +21,4 @@ parameters:
|
|
|
20
21
|
type: integer
|
|
21
22
|
description: Maximum number of documents to return (default 100).
|
|
22
23
|
required: false
|
|
24
|
+
---
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
---
|
|
1
2
|
name: contextlake-retrieve
|
|
2
3
|
description: |
|
|
3
4
|
Search, query, and retrieve relevant information from the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
|
|
@@ -35,3 +36,4 @@ parameters:
|
|
|
35
36
|
type: boolean
|
|
36
37
|
description: Whether to include binary content
|
|
37
38
|
required: false
|
|
39
|
+
---
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
---
|
|
1
2
|
name: byted-las-data-profiler
|
|
2
3
|
description: |
|
|
3
4
|
Volcengine TOS Dataset Profiling Tool. Based on the S3-compatible protocol, it scans the file structure in TOS buckets and catalogs them.
|
|
@@ -5,6 +6,7 @@ description: |
|
|
|
5
6
|
|
|
6
7
|
IMPORTANT RULE: You are STRICTLY FORBIDDEN from writing or executing Python scripts to access S3/TOS or LanceDB.
|
|
7
8
|
You MUST exclusively use the provided tools (`list-s3-objects`, `read-s3-object`, `write-lance-catalog`, `las-data-profiler`) to accomplish the profiling tasks.
|
|
9
|
+
---
|
|
8
10
|
|
|
9
11
|
## Trigger Scenarios
|
|
10
12
|
Be sure to use this Skill when the user mentions the following scenarios:
|
|
@@ -34,4 +36,4 @@ If you need to perform custom exploration, you can use `list-s3-objects` to trav
|
|
|
34
36
|
|
|
35
37
|
## Output Location
|
|
36
38
|
- LanceDB table storage path: `~/.openclaw/contextlake/profiler/{datasource_name}/catalog_db`
|
|
37
|
-
- Configuration file: `~/.openclaw/contextlake/profiler/{datasource_name}/env.sh`
|
|
39
|
+
- Configuration file: `~/.openclaw/contextlake/profiler/{datasource_name}/env.sh`
|