@byted-las/contextlake-openclaw 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/src/commands/cli.d.ts +1 -1
  2. package/dist/src/commands/cli.js +4 -10
  3. package/dist/src/commands/index.js +5 -4
  4. package/dist/src/commands/slashcmd.js +4 -9
  5. package/dist/src/commands/tools.d.ts +2 -0
  6. package/dist/src/commands/tools.js +104 -22
  7. package/dist/src/lib/actions/lance-tools.d.ts +7 -0
  8. package/dist/src/lib/actions/lance-tools.js +22 -0
  9. package/dist/src/lib/actions/las-tools.js +58 -0
  10. package/dist/src/lib/actions/profiler.d.ts +4 -3
  11. package/dist/src/lib/actions/profiler.js +43 -7
  12. package/dist/src/lib/actions/s3-tools.d.ts +5 -2
  13. package/dist/src/lib/actions/s3-tools.js +54 -0
  14. package/dist/src/skills/SKILL.md +3 -1
  15. package/dist/src/skills/contextlake-delete/SKILL.md +2 -0
  16. package/dist/src/skills/contextlake-ingest/SKILL.md +2 -0
  17. package/dist/src/skills/contextlake-list/SKILL.md +2 -0
  18. package/dist/src/skills/contextlake-retrieve/SKILL.md +2 -0
  19. package/dist/src/skills/las-data-profiler/SKILL.md +3 -1
  20. package/openclaw.plugin.json +1 -1
  21. package/package.json +2 -1
  22. package/src/commands/cli.ts +4 -10
  23. package/src/commands/index.ts +7 -4
  24. package/src/commands/slashcmd.ts +4 -10
  25. package/src/commands/tools.ts +102 -24
  26. package/src/lib/actions/lance-tools.ts +35 -0
  27. package/src/lib/actions/las-tools.ts +56 -0
  28. package/src/lib/actions/profiler.ts +42 -7
  29. package/src/lib/actions/s3-tools.ts +59 -4
  30. package/src/skills/contextlake-delete/SKILL.md +2 -0
  31. package/src/skills/contextlake-ingest/SKILL.md +2 -0
  32. package/src/skills/contextlake-list/SKILL.md +2 -0
  33. package/src/skills/contextlake-retrieve/SKILL.md +2 -0
  34. package/src/skills/las-data-profiler/SKILL.md +3 -1
@@ -1,17 +1,51 @@
1
1
  import { S3Client, ListObjectsV2Command, GetObjectCommand } from '@aws-sdk/client-s3';
2
+ import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
2
3
  import * as fs from 'fs';
3
4
  import * as path from 'path';
4
5
 
5
6
  export interface S3Params {
6
- vendor: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
7
+ url?: string;
8
+ vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
7
9
  endpoint?: string;
8
10
  access_key?: string;
9
11
  secret_key?: string;
10
12
  region?: string;
11
- bucket: string;
13
+ bucket?: string;
14
+ }
15
+
16
+ export function parseS3Url(params: S3Params): S3Params {
17
+ if (params.url) {
18
+ if (params.url.startsWith('file://') || params.url.startsWith('/')) {
19
+ params.vendor = 'local';
20
+ const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
21
+ params.bucket = localPath;
22
+ } else {
23
+ const parsedUrl = new URL(params.url);
24
+ const protocol = parsedUrl.protocol.replace(':', '');
25
+
26
+ if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
27
+ if (protocol === 'tos') params.vendor = 'volcengine';
28
+ else if (protocol === 'oss') params.vendor = 'alibaba';
29
+ else if (protocol === 'cos') params.vendor = 'tencent';
30
+ else if (protocol === 's3') params.vendor = 'aws';
31
+
32
+ params.bucket = parsedUrl.hostname;
33
+ // Prefix is usually parsed separately or passed explicitly for listing
34
+ } else {
35
+ throw new Error(`Unsupported protocol: ${protocol}`);
36
+ }
37
+ }
38
+ }
39
+
40
+ if (!params.vendor || !params.bucket) {
41
+ throw new Error('Could not determine vendor or bucket. Please provide a valid url or vendor/bucket directly.');
42
+ }
43
+
44
+ return params;
12
45
  }
13
46
 
14
47
  function createS3Client(params: S3Params): S3Client | null {
48
+ params = parseS3Url(params);
15
49
  if (params.vendor === 'local') return null;
16
50
 
17
51
  let endpoint = params.endpoint;
@@ -56,8 +90,9 @@ function createS3Client(params: S3Params): S3Client | null {
56
90
  }
57
91
 
58
92
  export async function listS3Objects(params: S3Params, prefix: string, maxKeys: number = 1000, continuationToken?: string) {
93
+ params = parseS3Url(params);
59
94
  if (params.vendor === 'local') {
60
- const root = params.bucket;
95
+ const root = params.bucket as string;
61
96
  const prefixPath = prefix && prefix !== '.' ? path.join(root, prefix) : root;
62
97
  const files: any[] = [];
63
98
 
@@ -108,8 +143,9 @@ export async function listS3Objects(params: S3Params, prefix: string, maxKeys: n
108
143
  }
109
144
 
110
145
  export async function readS3Object(params: S3Params, key: string, maxBytes?: number): Promise<Buffer> {
146
+ params = parseS3Url(params);
111
147
  if (params.vendor === 'local') {
112
- const fullPath = path.join(params.bucket, key);
148
+ const fullPath = path.join(params.bucket as string, key);
113
149
  if (maxBytes) {
114
150
  const fd = fs.openSync(fullPath, 'r');
115
151
  const buffer = Buffer.alloc(maxBytes);
@@ -146,3 +182,22 @@ export async function readS3Object(params: S3Params, key: string, maxBytes?: num
146
182
  }
147
183
  return Buffer.alloc(0);
148
184
  }
185
+
186
+ export async function getPresignedUrl(params: S3Params, key: string, expiresIn: number = 3600): Promise<string> {
187
+ params = parseS3Url(params);
188
+ if (params.vendor === 'local') {
189
+ const fullPath = path.join(params.bucket as string, key);
190
+ return `file://${fullPath}`;
191
+ }
192
+
193
+ const client = createS3Client(params);
194
+ if (!client) throw new Error('Failed to create S3 client');
195
+
196
+ const command = new GetObjectCommand({
197
+ Bucket: params.bucket,
198
+ Key: key
199
+ });
200
+
201
+ const signedUrl = await getSignedUrl(client, command, { expiresIn });
202
+ return signedUrl;
203
+ }
@@ -1,3 +1,4 @@
1
+ ---
1
2
  name: contextlake-delete
2
3
  description: |
3
4
  Delete documents and assets from the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
@@ -34,3 +35,4 @@ parameters:
34
35
  type: string
35
36
  description: SQL-like filter string to identify documents to delete (e.g. "metadata.category = 'obsolete'").
36
37
  required: false
38
+ ---
@@ -1,3 +1,4 @@
1
+ ---
1
2
  name: contextlake-ingest
2
3
  description: |
3
4
  Upload, ingest, and index documents into the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
@@ -38,3 +39,4 @@ parameters:
38
39
  type: integer
39
40
  description: Overlap size for text splitting
40
41
  required: false
42
+ ---
@@ -1,3 +1,4 @@
1
+ ---
1
2
  name: contextlake-list
2
3
  description: |
3
4
  List documents and assets currently in the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
@@ -20,3 +21,4 @@ parameters:
20
21
  type: integer
21
22
  description: Maximum number of documents to return (default 100).
22
23
  required: false
24
+ ---
@@ -1,3 +1,4 @@
1
+ ---
1
2
  name: contextlake-retrieve
2
3
  description: |
3
4
  Search, query, and retrieve relevant information from the ContextLake Knowledge Base (知识库) / Knowledge Lake (知识湖).
@@ -35,3 +36,4 @@ parameters:
35
36
  type: boolean
36
37
  description: Whether to include binary content
37
38
  required: false
39
+ ---
@@ -1,3 +1,4 @@
1
+ ---
1
2
  name: byted-las-data-profiler
2
3
  description: |
3
4
  Volcengine TOS Dataset Profiling Tool. Based on the S3-compatible protocol, it scans the file structure in TOS buckets and catalogs them.
@@ -5,6 +6,7 @@ description: |
5
6
 
6
7
  IMPORTANT RULE: You are STRICTLY FORBIDDEN from writing or executing Python scripts to access S3/TOS or LanceDB.
7
8
  You MUST exclusively use the provided tools (`list-s3-objects`, `read-s3-object`, `write-lance-catalog`, `las-data-profiler`) to accomplish the profiling tasks.
9
+ ---
8
10
 
9
11
  ## Trigger Scenarios
10
12
  Be sure to use this Skill when the user mentions the following scenarios:
@@ -34,4 +36,4 @@ If you need to perform custom exploration, you can use `list-s3-objects` to trav
34
36
 
35
37
  ## Output Location
36
38
  - LanceDB table storage path: `~/.openclaw/contextlake/profiler/{datasource_name}/catalog_db`
37
- - Configuration file: `~/.openclaw/contextlake/profiler/{datasource_name}/env.sh`
39
+ - Configuration file: `~/.openclaw/contextlake/profiler/{datasource_name}/env.sh`