@byted-las/contextlake-openclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +64 -0
  2. package/bin/contextlake-openclaw.js +5 -0
  3. package/dist/index.d.ts +113 -0
  4. package/dist/index.js +73 -0
  5. package/dist/src/client/lancedb.d.ts +30 -0
  6. package/dist/src/client/lancedb.js +113 -0
  7. package/dist/src/client/tos.d.ts +19 -0
  8. package/dist/src/client/tos.js +81 -0
  9. package/dist/src/commands/cli.d.ts +6 -0
  10. package/dist/src/commands/cli.js +78 -0
  11. package/dist/src/commands/index.d.ts +1 -0
  12. package/dist/src/commands/index.js +139 -0
  13. package/dist/src/commands/slashcmd.d.ts +14 -0
  14. package/dist/src/commands/slashcmd.js +91 -0
  15. package/dist/src/commands/tools.d.ts +219 -0
  16. package/dist/src/commands/tools.js +286 -0
  17. package/dist/src/lib/actions/ingest.d.ts +8 -0
  18. package/dist/src/lib/actions/ingest.js +123 -0
  19. package/dist/src/lib/actions/manage.d.ts +15 -0
  20. package/dist/src/lib/actions/manage.js +91 -0
  21. package/dist/src/lib/actions/retrieve.d.ts +8 -0
  22. package/dist/src/lib/actions/retrieve.js +73 -0
  23. package/dist/src/processor/loader.d.ts +7 -0
  24. package/dist/src/processor/loader.js +83 -0
  25. package/dist/src/service/embedding/factory.d.ts +2 -0
  26. package/dist/src/service/embedding/factory.js +16 -0
  27. package/dist/src/service/embedding/interface.d.ts +18 -0
  28. package/dist/src/service/embedding/interface.js +2 -0
  29. package/dist/src/service/embedding/local.d.ts +14 -0
  30. package/dist/src/service/embedding/local.js +104 -0
  31. package/dist/src/service/embedding/remote.d.ts +9 -0
  32. package/dist/src/service/embedding/remote.js +42 -0
  33. package/dist/src/service/metadata/factory.d.ts +13 -0
  34. package/dist/src/service/metadata/factory.js +48 -0
  35. package/dist/src/service/metadata/interface.d.ts +17 -0
  36. package/dist/src/service/metadata/interface.js +2 -0
  37. package/dist/src/service/metadata/local.d.ts +13 -0
  38. package/dist/src/service/metadata/local.js +49 -0
  39. package/dist/src/service/storage/factory.d.ts +2 -0
  40. package/dist/src/service/storage/factory.js +19 -0
  41. package/dist/src/service/storage/interface.d.ts +32 -0
  42. package/dist/src/service/storage/interface.js +2 -0
  43. package/dist/src/service/storage/local.d.ts +9 -0
  44. package/dist/src/service/storage/local.js +72 -0
  45. package/dist/src/skills/las-data-profiler/index.d.ts +26 -0
  46. package/dist/src/skills/las-data-profiler/index.js +231 -0
  47. package/dist/src/skills/las-data-profiler/register.d.ts +1 -0
  48. package/dist/src/skills/las-data-profiler/register.js +19 -0
  49. package/dist/src/utils/config.d.ts +1 -0
  50. package/dist/src/utils/config.js +16 -0
  51. package/index.ts +78 -0
  52. package/openclaw.plugin.json +57 -0
  53. package/package.json +52 -0
  54. package/src/client/lancedb.ts +102 -0
  55. package/src/client/tos.ts +100 -0
  56. package/src/commands/cli.ts +77 -0
  57. package/src/commands/index.ts +156 -0
  58. package/src/commands/slashcmd.ts +95 -0
  59. package/src/commands/tools.ts +286 -0
  60. package/src/lib/actions/ingest.ts +103 -0
  61. package/src/lib/actions/manage.ts +107 -0
  62. package/src/lib/actions/retrieve.ts +90 -0
  63. package/src/processor/loader.ts +58 -0
  64. package/src/service/embedding/factory.ts +13 -0
  65. package/src/service/embedding/interface.ts +21 -0
  66. package/src/service/embedding/local.ts +118 -0
  67. package/src/service/embedding/remote.ts +45 -0
  68. package/src/service/metadata/factory.ts +52 -0
  69. package/src/service/metadata/interface.ts +19 -0
  70. package/src/service/metadata/local.ts +60 -0
  71. package/src/service/storage/factory.ts +16 -0
  72. package/src/service/storage/interface.ts +36 -0
  73. package/src/service/storage/local.ts +42 -0
  74. package/src/skills/contextlake-delete/SKILL.md +36 -0
  75. package/src/skills/contextlake-ingest/SKILL.md +40 -0
  76. package/src/skills/contextlake-list/SKILL.md +22 -0
  77. package/src/skills/contextlake-retrieve/SKILL.md +37 -0
  78. package/src/skills/las-data-profiler/SKILL.md +174 -0
  79. package/src/skills/las-data-profiler/index.ts +254 -0
  80. package/src/skills/las-data-profiler/register.ts +19 -0
  81. package/src/skills/las-data-profiler/s3_catalog.py +608 -0
  82. package/src/utils/config.ts +13 -0
@@ -0,0 +1,254 @@
1
+ import * as path from 'path';
2
+ import * as fs from 'fs';
3
+ import * as os from 'os';
4
+ import { execSync, spawn } from 'child_process';
5
+
6
+ // ---------------------------------------------------------------------------
7
+ // Types
8
+ // ---------------------------------------------------------------------------
9
+
10
+ export interface ConnectParams {
11
+ datasource_name: string;
12
+ vendor: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
13
+ endpoint?: string;
14
+ access_key?: string;
15
+ secret_key?: string;
16
+ region?: string;
17
+ bucket: string;
18
+ prefix: string;
19
+ sample_rows?: number;
20
+ }
21
+
22
+ interface ConnectResult {
23
+ status: 'success' | 'error';
24
+ datasource_name: string;
25
+ db_path: string;
26
+ env_path: string;
27
+ tables: string[];
28
+ summary?: {
29
+ total_files: number;
30
+ structured_files: number;
31
+ media_files: number;
32
+ };
33
+ error?: string;
34
+ }
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Constants
38
+ // ---------------------------------------------------------------------------
39
+
40
+ const BASE_DIR = path.join(os.homedir(), '.openclaw', 'las-data-profiler');
41
+ const PYTHON_DEPS = ['boto3', 'lancedb', 'pyarrow', 'pandas', 'Pillow', 'mutagen', 'pymupdf'];
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Helpers
45
+ // ---------------------------------------------------------------------------
46
+
47
+ function getDataSourceDir(name: string): string {
48
+ return path.join(BASE_DIR, name);
49
+ }
50
+
51
+ function ensureDir(dir: string): void {
52
+ fs.mkdirSync(dir, { recursive: true });
53
+ }
54
+
55
+ /**
56
+ * Generate env.sh with all connection parameters for this datasource.
57
+ * This file can be sourced to re-run the profiler or for debugging.
58
+ */
59
+ function writeEnvFile(dir: string, params: ConnectParams): string {
60
+ const envPath = path.join(dir, 'env.sh');
61
+ const lines: string[] = [
62
+ '#!/usr/bin/env bash',
63
+ '# Auto-generated by las-data-profiler connect',
64
+ `# Datasource: ${params.datasource_name}`,
65
+ `# Created: ${new Date().toISOString()}`,
66
+ '',
67
+ `export LAS_VENDOR="${params.vendor}"`,
68
+ `export LAS_BUCKET="${params.bucket}"`,
69
+ `export LAS_PREFIX="${params.prefix}"`,
70
+ ];
71
+
72
+ if (params.endpoint) {
73
+ lines.push(`export LAS_ENDPOINT="${params.endpoint}"`);
74
+ }
75
+ if (params.access_key) {
76
+ lines.push(`export LAS_ACCESS_KEY="${params.access_key}"`);
77
+ }
78
+ if (params.secret_key) {
79
+ lines.push(`export LAS_SECRET_KEY="${params.secret_key}"`);
80
+ }
81
+ if (params.region) {
82
+ lines.push(`export LAS_REGION="${params.region}"`);
83
+ }
84
+ if (params.sample_rows) {
85
+ lines.push(`export LAS_SAMPLE_ROWS="${params.sample_rows}"`);
86
+ }
87
+
88
+ lines.push(`export LAS_DB_PATH="${path.join(dir, 'catalog_db')}"`);
89
+ lines.push(`export LAS_DATASOURCE_NAME="${params.datasource_name}"`);
90
+ lines.push('');
91
+
92
+ fs.writeFileSync(envPath, lines.join('\n'), { mode: 0o600 });
93
+ return envPath;
94
+ }
95
+
96
+ /**
97
+ * Install Python dependencies if not already available.
98
+ */
99
+ function ensurePythonDeps(): void {
100
+ try {
101
+ execSync(`python3 -c "import boto3, lancedb, pyarrow, pandas, PIL, mutagen, fitz"`, {
102
+ stdio: 'pipe',
103
+ });
104
+ } catch {
105
+ console.log('[las-data-profiler] Installing Python dependencies...');
106
+ execSync(`pip3 install --user ${PYTHON_DEPS.join(' ')}`, {
107
+ stdio: 'inherit',
108
+ });
109
+ }
110
+ }
111
+
112
+ /**
113
+ * Get the path to the bundled Python script.
114
+ */
115
+ function getScriptPath(): string {
116
+ // The Python script is co-located with this module
117
+ return path.join(__dirname, 's3_catalog.py');
118
+ }
119
+
120
+ // ---------------------------------------------------------------------------
121
+ // Main Entry
122
+ // ---------------------------------------------------------------------------
123
+
124
+ export async function connectDataSource(
125
+ params: ConnectParams,
126
+ _ctx?: any
127
+ ): Promise<ConnectResult> {
128
+ // Validate required params
129
+ if (!params.datasource_name) {
130
+ throw new Error('datasource_name is required');
131
+ }
132
+ if (!params.vendor) {
133
+ throw new Error('vendor is required');
134
+ }
135
+ if (!params.bucket) {
136
+ throw new Error('bucket is required');
137
+ }
138
+ if (params.prefix === undefined || params.prefix === null) {
139
+ throw new Error('prefix is required');
140
+ }
141
+
142
+ // For non-local vendors, validate credentials
143
+ if (params.vendor !== 'local') {
144
+ if (!params.endpoint && params.vendor !== 'aws') {
145
+ throw new Error(`endpoint is required for vendor "${params.vendor}"`);
146
+ }
147
+ const ak = params.access_key || process.env.TOS_ACCESS_KEY || process.env.S3_ACCESS_KEY || process.env.AWS_ACCESS_KEY_ID;
148
+ const sk = params.secret_key || process.env.TOS_SECRET_KEY || process.env.S3_SECRET_KEY || process.env.AWS_SECRET_ACCESS_KEY;
149
+ if (!ak || !sk) {
150
+ throw new Error(
151
+ 'access_key and secret_key are required (via params or env vars TOS_ACCESS_KEY/TOS_SECRET_KEY, S3_ACCESS_KEY/S3_SECRET_KEY, AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY)'
152
+ );
153
+ }
154
+ // Normalise into params so env.sh picks them up
155
+ params.access_key = ak;
156
+ params.secret_key = sk;
157
+ }
158
+
159
+ const dsDir = getDataSourceDir(params.datasource_name);
160
+ const dbPath = path.join(dsDir, 'catalog_db');
161
+
162
+ ensureDir(dsDir);
163
+
164
+ // 1. Write env.sh
165
+ const envPath = writeEnvFile(dsDir, params);
166
+
167
+ // 2. Ensure Python dependencies
168
+ ensurePythonDeps();
169
+
170
+ // 3. Build CLI args for the Python script
171
+ const scriptPath = getScriptPath();
172
+ const args: string[] = [
173
+ scriptPath,
174
+ '--vendor', params.vendor,
175
+ '--bucket', params.bucket,
176
+ '--prefix', params.prefix,
177
+ '--db-path', dbPath,
178
+ ];
179
+
180
+ if (params.endpoint) {
181
+ args.push('--endpoint', params.endpoint);
182
+ }
183
+ if (params.access_key) {
184
+ args.push('--ak', params.access_key);
185
+ }
186
+ if (params.secret_key) {
187
+ args.push('--sk', params.secret_key);
188
+ }
189
+ if (params.region) {
190
+ args.push('--region', params.region);
191
+ }
192
+ if (params.sample_rows) {
193
+ args.push('--sample-rows', String(params.sample_rows));
194
+ }
195
+
196
+ // 4. Execute the profiling script
197
+ return new Promise<ConnectResult>((resolve) => {
198
+ let stdout = '';
199
+ let stderr = '';
200
+
201
+ const proc = spawn('python3', args, {
202
+ cwd: dsDir,
203
+ env: { ...process.env },
204
+ });
205
+
206
+ proc.stdout.on('data', (data: Buffer) => {
207
+ stdout += data.toString();
208
+ });
209
+
210
+ proc.stderr.on('data', (data: Buffer) => {
211
+ stderr += data.toString();
212
+ });
213
+
214
+ proc.on('close', (code: number | null) => {
215
+ if (code !== 0) {
216
+ resolve({
217
+ status: 'error',
218
+ datasource_name: params.datasource_name,
219
+ db_path: dbPath,
220
+ env_path: envPath,
221
+ tables: [],
222
+ error: stderr || `Python script exited with code ${code}`,
223
+ });
224
+ return;
225
+ }
226
+
227
+ // Try to parse structured output from the script
228
+ try {
229
+ const jsonMatch = stdout.match(/\{[\s\S]*"summary"[\s\S]*\}/);
230
+ const result = jsonMatch ? JSON.parse(jsonMatch[0]) : {};
231
+ resolve({
232
+ status: 'success',
233
+ datasource_name: params.datasource_name,
234
+ db_path: dbPath,
235
+ env_path: envPath,
236
+ tables: ['file_catalog', 'structured_schemas', 'media_metadata'],
237
+ summary: result.summary || {
238
+ total_files: 0,
239
+ structured_files: 0,
240
+ media_files: 0,
241
+ },
242
+ });
243
+ } catch {
244
+ resolve({
245
+ status: 'success',
246
+ datasource_name: params.datasource_name,
247
+ db_path: dbPath,
248
+ env_path: envPath,
249
+ tables: ['file_catalog', 'structured_schemas', 'media_metadata'],
250
+ });
251
+ }
252
+ });
253
+ });
254
+ }
@@ -0,0 +1,19 @@
1
+ // @ts-ignore
2
+ import { PluginContext } from 'openclaw/plugin-sdk';
3
+ import { connectDataSource } from './index';
4
+
5
+ export function registerLasDataProfilerSkill(ctx: any) {
6
+ const definition = {
7
+ name: 'las-data-profiler',
8
+ description: 'Connect to a data source (TOS/OSS/COS/S3/Local) and profile its structure, schemas, and media metadata into LanceDB',
9
+ async execute(params: any) {
10
+ return await connectDataSource(params, ctx);
11
+ }
12
+ };
13
+
14
+ if (typeof ctx.registerTool === 'function') {
15
+ ctx.registerTool(definition);
16
+ } else if (typeof ctx.registerSkill === 'function') {
17
+ ctx.registerSkill(definition);
18
+ }
19
+ }