@byted-las/contextlake-openclaw 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -0
- package/bin/contextlake-openclaw.js +5 -0
- package/dist/index.d.ts +113 -0
- package/dist/index.js +73 -0
- package/dist/src/client/lancedb.d.ts +30 -0
- package/dist/src/client/lancedb.js +113 -0
- package/dist/src/client/tos.d.ts +19 -0
- package/dist/src/client/tos.js +81 -0
- package/dist/src/commands/cli.d.ts +6 -0
- package/dist/src/commands/cli.js +78 -0
- package/dist/src/commands/index.d.ts +1 -0
- package/dist/src/commands/index.js +139 -0
- package/dist/src/commands/slashcmd.d.ts +14 -0
- package/dist/src/commands/slashcmd.js +91 -0
- package/dist/src/commands/tools.d.ts +219 -0
- package/dist/src/commands/tools.js +286 -0
- package/dist/src/lib/actions/ingest.d.ts +8 -0
- package/dist/src/lib/actions/ingest.js +123 -0
- package/dist/src/lib/actions/manage.d.ts +15 -0
- package/dist/src/lib/actions/manage.js +91 -0
- package/dist/src/lib/actions/retrieve.d.ts +8 -0
- package/dist/src/lib/actions/retrieve.js +73 -0
- package/dist/src/processor/loader.d.ts +7 -0
- package/dist/src/processor/loader.js +83 -0
- package/dist/src/service/embedding/factory.d.ts +2 -0
- package/dist/src/service/embedding/factory.js +16 -0
- package/dist/src/service/embedding/interface.d.ts +18 -0
- package/dist/src/service/embedding/interface.js +2 -0
- package/dist/src/service/embedding/local.d.ts +14 -0
- package/dist/src/service/embedding/local.js +104 -0
- package/dist/src/service/embedding/remote.d.ts +9 -0
- package/dist/src/service/embedding/remote.js +42 -0
- package/dist/src/service/metadata/factory.d.ts +13 -0
- package/dist/src/service/metadata/factory.js +48 -0
- package/dist/src/service/metadata/interface.d.ts +17 -0
- package/dist/src/service/metadata/interface.js +2 -0
- package/dist/src/service/metadata/local.d.ts +13 -0
- package/dist/src/service/metadata/local.js +49 -0
- package/dist/src/service/storage/factory.d.ts +2 -0
- package/dist/src/service/storage/factory.js +19 -0
- package/dist/src/service/storage/interface.d.ts +32 -0
- package/dist/src/service/storage/interface.js +2 -0
- package/dist/src/service/storage/local.d.ts +9 -0
- package/dist/src/service/storage/local.js +72 -0
- package/dist/src/skills/las-data-profiler/index.d.ts +26 -0
- package/dist/src/skills/las-data-profiler/index.js +231 -0
- package/dist/src/skills/las-data-profiler/register.d.ts +1 -0
- package/dist/src/skills/las-data-profiler/register.js +19 -0
- package/dist/src/utils/config.d.ts +1 -0
- package/dist/src/utils/config.js +16 -0
- package/index.ts +78 -0
- package/openclaw.plugin.json +57 -0
- package/package.json +52 -0
- package/src/client/lancedb.ts +102 -0
- package/src/client/tos.ts +100 -0
- package/src/commands/cli.ts +77 -0
- package/src/commands/index.ts +156 -0
- package/src/commands/slashcmd.ts +95 -0
- package/src/commands/tools.ts +286 -0
- package/src/lib/actions/ingest.ts +103 -0
- package/src/lib/actions/manage.ts +107 -0
- package/src/lib/actions/retrieve.ts +90 -0
- package/src/processor/loader.ts +58 -0
- package/src/service/embedding/factory.ts +13 -0
- package/src/service/embedding/interface.ts +21 -0
- package/src/service/embedding/local.ts +118 -0
- package/src/service/embedding/remote.ts +45 -0
- package/src/service/metadata/factory.ts +52 -0
- package/src/service/metadata/interface.ts +19 -0
- package/src/service/metadata/local.ts +60 -0
- package/src/service/storage/factory.ts +16 -0
- package/src/service/storage/interface.ts +36 -0
- package/src/service/storage/local.ts +42 -0
- package/src/skills/contextlake-delete/SKILL.md +36 -0
- package/src/skills/contextlake-ingest/SKILL.md +40 -0
- package/src/skills/contextlake-list/SKILL.md +22 -0
- package/src/skills/contextlake-retrieve/SKILL.md +37 -0
- package/src/skills/las-data-profiler/SKILL.md +174 -0
- package/src/skills/las-data-profiler/index.ts +254 -0
- package/src/skills/las-data-profiler/register.ts +19 -0
- package/src/skills/las-data-profiler/s3_catalog.py +608 -0
- package/src/utils/config.ts +13 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.connectDataSource = connectDataSource;
|
|
37
|
+
const path = __importStar(require("path"));
|
|
38
|
+
const fs = __importStar(require("fs"));
|
|
39
|
+
const os = __importStar(require("os"));
|
|
40
|
+
const child_process_1 = require("child_process");
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
// Constants
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
const BASE_DIR = path.join(os.homedir(), '.openclaw', 'las-data-profiler');
|
|
45
|
+
const PYTHON_DEPS = ['boto3', 'lancedb', 'pyarrow', 'pandas', 'Pillow', 'mutagen', 'pymupdf'];
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Helpers
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
function getDataSourceDir(name) {
|
|
50
|
+
return path.join(BASE_DIR, name);
|
|
51
|
+
}
|
|
52
|
+
function ensureDir(dir) {
|
|
53
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Generate env.sh with all connection parameters for this datasource.
|
|
57
|
+
* This file can be sourced to re-run the profiler or for debugging.
|
|
58
|
+
*/
|
|
59
|
+
function writeEnvFile(dir, params) {
|
|
60
|
+
const envPath = path.join(dir, 'env.sh');
|
|
61
|
+
const lines = [
|
|
62
|
+
'#!/usr/bin/env bash',
|
|
63
|
+
'# Auto-generated by las-data-profiler connect',
|
|
64
|
+
`# Datasource: ${params.datasource_name}`,
|
|
65
|
+
`# Created: ${new Date().toISOString()}`,
|
|
66
|
+
'',
|
|
67
|
+
`export LAS_VENDOR="${params.vendor}"`,
|
|
68
|
+
`export LAS_BUCKET="${params.bucket}"`,
|
|
69
|
+
`export LAS_PREFIX="${params.prefix}"`,
|
|
70
|
+
];
|
|
71
|
+
if (params.endpoint) {
|
|
72
|
+
lines.push(`export LAS_ENDPOINT="${params.endpoint}"`);
|
|
73
|
+
}
|
|
74
|
+
if (params.access_key) {
|
|
75
|
+
lines.push(`export LAS_ACCESS_KEY="${params.access_key}"`);
|
|
76
|
+
}
|
|
77
|
+
if (params.secret_key) {
|
|
78
|
+
lines.push(`export LAS_SECRET_KEY="${params.secret_key}"`);
|
|
79
|
+
}
|
|
80
|
+
if (params.region) {
|
|
81
|
+
lines.push(`export LAS_REGION="${params.region}"`);
|
|
82
|
+
}
|
|
83
|
+
if (params.sample_rows) {
|
|
84
|
+
lines.push(`export LAS_SAMPLE_ROWS="${params.sample_rows}"`);
|
|
85
|
+
}
|
|
86
|
+
lines.push(`export LAS_DB_PATH="${path.join(dir, 'catalog_db')}"`);
|
|
87
|
+
lines.push(`export LAS_DATASOURCE_NAME="${params.datasource_name}"`);
|
|
88
|
+
lines.push('');
|
|
89
|
+
fs.writeFileSync(envPath, lines.join('\n'), { mode: 0o600 });
|
|
90
|
+
return envPath;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Install Python dependencies if not already available.
|
|
94
|
+
*/
|
|
95
|
+
function ensurePythonDeps() {
|
|
96
|
+
try {
|
|
97
|
+
(0, child_process_1.execSync)(`python3 -c "import boto3, lancedb, pyarrow, pandas, PIL, mutagen, fitz"`, {
|
|
98
|
+
stdio: 'pipe',
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
console.log('[las-data-profiler] Installing Python dependencies...');
|
|
103
|
+
(0, child_process_1.execSync)(`pip3 install --user ${PYTHON_DEPS.join(' ')}`, {
|
|
104
|
+
stdio: 'inherit',
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Get the path to the bundled Python script.
|
|
110
|
+
*/
|
|
111
|
+
function getScriptPath() {
|
|
112
|
+
// The Python script is co-located with this module
|
|
113
|
+
return path.join(__dirname, 's3_catalog.py');
|
|
114
|
+
}
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
// Main Entry
|
|
117
|
+
// ---------------------------------------------------------------------------
|
|
118
|
+
async function connectDataSource(params, _ctx) {
|
|
119
|
+
// Validate required params
|
|
120
|
+
if (!params.datasource_name) {
|
|
121
|
+
throw new Error('datasource_name is required');
|
|
122
|
+
}
|
|
123
|
+
if (!params.vendor) {
|
|
124
|
+
throw new Error('vendor is required');
|
|
125
|
+
}
|
|
126
|
+
if (!params.bucket) {
|
|
127
|
+
throw new Error('bucket is required');
|
|
128
|
+
}
|
|
129
|
+
if (params.prefix === undefined || params.prefix === null) {
|
|
130
|
+
throw new Error('prefix is required');
|
|
131
|
+
}
|
|
132
|
+
// For non-local vendors, validate credentials
|
|
133
|
+
if (params.vendor !== 'local') {
|
|
134
|
+
if (!params.endpoint && params.vendor !== 'aws') {
|
|
135
|
+
throw new Error(`endpoint is required for vendor "${params.vendor}"`);
|
|
136
|
+
}
|
|
137
|
+
const ak = params.access_key || process.env.TOS_ACCESS_KEY || process.env.S3_ACCESS_KEY || process.env.AWS_ACCESS_KEY_ID;
|
|
138
|
+
const sk = params.secret_key || process.env.TOS_SECRET_KEY || process.env.S3_SECRET_KEY || process.env.AWS_SECRET_ACCESS_KEY;
|
|
139
|
+
if (!ak || !sk) {
|
|
140
|
+
throw new Error('access_key and secret_key are required (via params or env vars TOS_ACCESS_KEY/TOS_SECRET_KEY, S3_ACCESS_KEY/S3_SECRET_KEY, AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY)');
|
|
141
|
+
}
|
|
142
|
+
// Normalise into params so env.sh picks them up
|
|
143
|
+
params.access_key = ak;
|
|
144
|
+
params.secret_key = sk;
|
|
145
|
+
}
|
|
146
|
+
const dsDir = getDataSourceDir(params.datasource_name);
|
|
147
|
+
const dbPath = path.join(dsDir, 'catalog_db');
|
|
148
|
+
ensureDir(dsDir);
|
|
149
|
+
// 1. Write env.sh
|
|
150
|
+
const envPath = writeEnvFile(dsDir, params);
|
|
151
|
+
// 2. Ensure Python dependencies
|
|
152
|
+
ensurePythonDeps();
|
|
153
|
+
// 3. Build CLI args for the Python script
|
|
154
|
+
const scriptPath = getScriptPath();
|
|
155
|
+
const args = [
|
|
156
|
+
scriptPath,
|
|
157
|
+
'--vendor', params.vendor,
|
|
158
|
+
'--bucket', params.bucket,
|
|
159
|
+
'--prefix', params.prefix,
|
|
160
|
+
'--db-path', dbPath,
|
|
161
|
+
];
|
|
162
|
+
if (params.endpoint) {
|
|
163
|
+
args.push('--endpoint', params.endpoint);
|
|
164
|
+
}
|
|
165
|
+
if (params.access_key) {
|
|
166
|
+
args.push('--ak', params.access_key);
|
|
167
|
+
}
|
|
168
|
+
if (params.secret_key) {
|
|
169
|
+
args.push('--sk', params.secret_key);
|
|
170
|
+
}
|
|
171
|
+
if (params.region) {
|
|
172
|
+
args.push('--region', params.region);
|
|
173
|
+
}
|
|
174
|
+
if (params.sample_rows) {
|
|
175
|
+
args.push('--sample-rows', String(params.sample_rows));
|
|
176
|
+
}
|
|
177
|
+
// 4. Execute the profiling script
|
|
178
|
+
return new Promise((resolve) => {
|
|
179
|
+
let stdout = '';
|
|
180
|
+
let stderr = '';
|
|
181
|
+
const proc = (0, child_process_1.spawn)('python3', args, {
|
|
182
|
+
cwd: dsDir,
|
|
183
|
+
env: { ...process.env },
|
|
184
|
+
});
|
|
185
|
+
proc.stdout.on('data', (data) => {
|
|
186
|
+
stdout += data.toString();
|
|
187
|
+
});
|
|
188
|
+
proc.stderr.on('data', (data) => {
|
|
189
|
+
stderr += data.toString();
|
|
190
|
+
});
|
|
191
|
+
proc.on('close', (code) => {
|
|
192
|
+
if (code !== 0) {
|
|
193
|
+
resolve({
|
|
194
|
+
status: 'error',
|
|
195
|
+
datasource_name: params.datasource_name,
|
|
196
|
+
db_path: dbPath,
|
|
197
|
+
env_path: envPath,
|
|
198
|
+
tables: [],
|
|
199
|
+
error: stderr || `Python script exited with code ${code}`,
|
|
200
|
+
});
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
// Try to parse structured output from the script
|
|
204
|
+
try {
|
|
205
|
+
const jsonMatch = stdout.match(/\{[\s\S]*"summary"[\s\S]*\}/);
|
|
206
|
+
const result = jsonMatch ? JSON.parse(jsonMatch[0]) : {};
|
|
207
|
+
resolve({
|
|
208
|
+
status: 'success',
|
|
209
|
+
datasource_name: params.datasource_name,
|
|
210
|
+
db_path: dbPath,
|
|
211
|
+
env_path: envPath,
|
|
212
|
+
tables: ['file_catalog', 'structured_schemas', 'media_metadata'],
|
|
213
|
+
summary: result.summary || {
|
|
214
|
+
total_files: 0,
|
|
215
|
+
structured_files: 0,
|
|
216
|
+
media_files: 0,
|
|
217
|
+
},
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
catch {
|
|
221
|
+
resolve({
|
|
222
|
+
status: 'success',
|
|
223
|
+
datasource_name: params.datasource_name,
|
|
224
|
+
db_path: dbPath,
|
|
225
|
+
env_path: envPath,
|
|
226
|
+
tables: ['file_catalog', 'structured_schemas', 'media_metadata'],
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
});
|
|
231
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function registerLasDataProfilerSkill(ctx: any): void;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.registerLasDataProfilerSkill = registerLasDataProfilerSkill;
|
|
4
|
+
const index_1 = require("./index");
|
|
5
|
+
function registerLasDataProfilerSkill(ctx) {
|
|
6
|
+
const definition = {
|
|
7
|
+
name: 'las-data-profiler',
|
|
8
|
+
description: 'Connect to a data source (TOS/OSS/COS/S3/Local) and profile its structure, schemas, and media metadata into LanceDB',
|
|
9
|
+
async execute(params) {
|
|
10
|
+
return await (0, index_1.connectDataSource)(params, ctx);
|
|
11
|
+
}
|
|
12
|
+
};
|
|
13
|
+
if (typeof ctx.registerTool === 'function') {
|
|
14
|
+
ctx.registerTool(definition);
|
|
15
|
+
}
|
|
16
|
+
else if (typeof ctx.registerSkill === 'function') {
|
|
17
|
+
ctx.registerSkill(definition);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function getPluginConfig(ctx: any): any;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getPluginConfig = getPluginConfig;
|
|
4
|
+
function getPluginConfig(ctx) {
|
|
5
|
+
return ctx.config?.plugins?.entries?.['contextlake-openclaw']?.config || {
|
|
6
|
+
metadata_storage: {
|
|
7
|
+
type: 'local',
|
|
8
|
+
lancedb_uri: require('path').join(require('os').homedir(), '.openclaw', 'contextlake', 'data'),
|
|
9
|
+
embedding: {
|
|
10
|
+
provider: 'local',
|
|
11
|
+
model_name: 'hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf'
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
file_storage: { type: 'local', local_base_dir: require('path').join(require('os').homedir(), '.openclaw', 'contextlake', 'files') }
|
|
15
|
+
};
|
|
16
|
+
}
|
package/index.ts
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
// @ts-ignore
|
|
2
|
+
import { PluginContext } from 'openclaw/plugin-sdk';
|
|
3
|
+
import { registerAll } from './src/commands';
|
|
4
|
+
|
|
5
|
+
const plugin = {
|
|
6
|
+
id: 'contextlake-openclaw',
|
|
7
|
+
name: 'ContextLake',
|
|
8
|
+
version: '1.1.0',
|
|
9
|
+
description: 'A lightweight knowledge base plugin for OpenClaw using LanceDB and TOS, with data profiling support',
|
|
10
|
+
configSchema: {
|
|
11
|
+
type: 'object',
|
|
12
|
+
properties: {
|
|
13
|
+
metadata_storage: {
|
|
14
|
+
type: 'object',
|
|
15
|
+
properties: {
|
|
16
|
+
type: { type: 'string', enum: ['local', 'remote'], default: 'local' },
|
|
17
|
+
lancedb_uri: { type: 'string', default: './data/contextlake' },
|
|
18
|
+
remote_api_endpoint: { type: 'string' },
|
|
19
|
+
remote_api_key: { type: 'string' },
|
|
20
|
+
embedding: {
|
|
21
|
+
type: 'object',
|
|
22
|
+
properties: {
|
|
23
|
+
provider: { type: 'string', enum: ['local', 'remote', 'openai'], default: 'local' },
|
|
24
|
+
model_name: { type: 'string', default: 'hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf' },
|
|
25
|
+
api_key: { type: 'string' },
|
|
26
|
+
api_base: { type: 'string' }
|
|
27
|
+
},
|
|
28
|
+
default: { provider: 'local', model_name: 'hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf' }
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
default: { type: 'local', lancedb_uri: './data/contextlake' }
|
|
32
|
+
},
|
|
33
|
+
file_storage: {
|
|
34
|
+
type: 'object',
|
|
35
|
+
properties: {
|
|
36
|
+
type: { type: 'string', enum: ['local', 'tos'], default: 'local' },
|
|
37
|
+
local_base_dir: { type: 'string', default: './data/files' },
|
|
38
|
+
tos: {
|
|
39
|
+
type: 'object',
|
|
40
|
+
properties: {
|
|
41
|
+
access_key: { type: 'string' },
|
|
42
|
+
secret_key: { type: 'string' },
|
|
43
|
+
region: { type: 'string' },
|
|
44
|
+
path: { type: 'string', description: 'TOS path in format tos://bucket/base_path/' },
|
|
45
|
+
endpoint: { type: 'string' },
|
|
46
|
+
sts_token: { type: 'string' }
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
default: { type: 'local', local_base_dir: './data/files' }
|
|
51
|
+
},
|
|
52
|
+
storage_policy: {
|
|
53
|
+
type: 'object',
|
|
54
|
+
properties: {
|
|
55
|
+
max_inline_size_kb: { type: 'number', default: 1024, description: 'Files smaller than this size (in KB) will be stored directly in LanceDB' }
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
register(ctx: any) {
|
|
61
|
+
const logger = ctx.logger || {
|
|
62
|
+
info: (msg: string, ...args: any[]) => console.log(msg, ...args),
|
|
63
|
+
warn: (msg: string, ...args: any[]) => console.warn(msg, ...args),
|
|
64
|
+
error: (msg: string, ...args: any[]) => console.error(msg, ...args),
|
|
65
|
+
debug: (msg: string, ...args: any[]) => console.debug(msg, ...args),
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
// Add logging
|
|
69
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Plugin register started`);
|
|
70
|
+
|
|
71
|
+
// Delegate all registrations to commands/index.ts
|
|
72
|
+
registerAll(ctx, logger);
|
|
73
|
+
|
|
74
|
+
logger.info(`[${new Date().toISOString()}] [ContextLake] Plugin register completed`);
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
export default plugin;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "contextlake-openclaw",
|
|
3
|
+
"name": "ContextLake",
|
|
4
|
+
"version": "1.1.0",
|
|
5
|
+
"description": "A lightweight knowledge base plugin for OpenClaw using LanceDB and TOS, with data profiling support",
|
|
6
|
+
"skills": ["./src/skills"],
|
|
7
|
+
"configSchema": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"properties": {
|
|
10
|
+
"metadata_storage": {
|
|
11
|
+
"type": "object",
|
|
12
|
+
"properties": {
|
|
13
|
+
"type": { "type": "string", "enum": ["local", "remote"], "default": "local" },
|
|
14
|
+
"lancedb_uri": { "type": "string", "default": "./data/contextlake" },
|
|
15
|
+
"remote_api_endpoint": { "type": "string" },
|
|
16
|
+
"remote_api_key": { "type": "string" },
|
|
17
|
+
"embedding": {
|
|
18
|
+
"type": "object",
|
|
19
|
+
"properties": {
|
|
20
|
+
"provider": { "type": "string", "enum": ["local", "remote", "openai"], "default": "local" },
|
|
21
|
+
"model_name": { "type": "string", "default": "hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf" },
|
|
22
|
+
"api_key": { "type": "string" },
|
|
23
|
+
"api_base": { "type": "string", "description": "Base URL for remote API (e.g. https://ark.cn-beijing.volces.com/api/v3)" }
|
|
24
|
+
},
|
|
25
|
+
"default": { "provider": "local", "model_name": "hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf" }
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"default": { "type": "local", "lancedb_uri": "./data/contextlake" }
|
|
29
|
+
},
|
|
30
|
+
"file_storage": {
|
|
31
|
+
"type": "object",
|
|
32
|
+
"properties": {
|
|
33
|
+
"type": { "type": "string", "enum": ["local", "tos"], "default": "local" },
|
|
34
|
+
"local_base_dir": { "type": "string", "default": "./data/files" },
|
|
35
|
+
"tos": {
|
|
36
|
+
"type": "object",
|
|
37
|
+
"properties": {
|
|
38
|
+
"access_key": { "type": "string" },
|
|
39
|
+
"secret_key": { "type": "string" },
|
|
40
|
+
"region": { "type": "string" },
|
|
41
|
+
"path": { "type": "string", "description": "TOS path in format tos://bucket/base_path/" },
|
|
42
|
+
"endpoint": { "type": "string" },
|
|
43
|
+
"sts_token": { "type": "string" }
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"default": { "type": "local", "local_base_dir": "./data/files" }
|
|
48
|
+
},
|
|
49
|
+
"storage_policy": {
|
|
50
|
+
"type": "object",
|
|
51
|
+
"properties": {
|
|
52
|
+
"max_inline_size_kb": { "type": "number", "default": 1024, "description": "Files smaller than this size (in KB) will be stored directly in LanceDB" }
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@byted-las/contextlake-openclaw",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "ContextLake OpenClaw Plugin for managing knowledge base",
|
|
5
|
+
"main": "index.ts",
|
|
6
|
+
"files": [
|
|
7
|
+
"dist",
|
|
8
|
+
"bin",
|
|
9
|
+
"index.ts",
|
|
10
|
+
"src",
|
|
11
|
+
"openclaw.plugin.json"
|
|
12
|
+
],
|
|
13
|
+
"bin": {
|
|
14
|
+
"contextlake-openclaw": "./bin/contextlake-openclaw.js"
|
|
15
|
+
},
|
|
16
|
+
"openclaw": {
|
|
17
|
+
"extensions": [
|
|
18
|
+
"./dist/index.js"
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build": "tsc",
|
|
23
|
+
"test": "vitest --reporter verbose",
|
|
24
|
+
"test:local": "npx ts-node scripts/local-test.ts",
|
|
25
|
+
"test:profiler": "npx ts-node scripts/local-profiler-test.ts",
|
|
26
|
+
"cli": "npx ts-node scripts/cli.ts"
|
|
27
|
+
},
|
|
28
|
+
"keywords": ["openclaw", "contextlake", "plugin"],
|
|
29
|
+
"author": "byted-las",
|
|
30
|
+
"license": "ISC",
|
|
31
|
+
"engines": {
|
|
32
|
+
"node": ">=20.17.0"
|
|
33
|
+
},
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"@lancedb/lancedb": "^0.26.2",
|
|
36
|
+
"@volcengine/tos-sdk": "^2.9.0",
|
|
37
|
+
"commander": "^14.0.3",
|
|
38
|
+
"mammoth": "^1.12.0",
|
|
39
|
+
"node-llama-cpp": "^3.16.2",
|
|
40
|
+
"openclaw": "^2026.3.13",
|
|
41
|
+
"pdf-parse": "^2.4.5",
|
|
42
|
+
"uuid": "^13.0.0"
|
|
43
|
+
},
|
|
44
|
+
"devDependencies": {
|
|
45
|
+
"@types/node": "^25.5.0",
|
|
46
|
+
"@types/pdf-parse": "^1.1.5",
|
|
47
|
+
"@types/uuid": "^10.0.0",
|
|
48
|
+
"ts-node": "^10.9.2",
|
|
49
|
+
"typescript": "^5.9.3",
|
|
50
|
+
"vitest": "^4.1.0"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import * as lancedb from '@lancedb/lancedb';
|
|
2
|
+
import { EmbeddingProvider } from '../service/embedding/interface';
|
|
3
|
+
|
|
4
|
+
export interface LanceDBConfig {
|
|
5
|
+
uri: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface DocumentSchema {
|
|
9
|
+
id: string;
|
|
10
|
+
vector: number[];
|
|
11
|
+
text: string;
|
|
12
|
+
source: string;
|
|
13
|
+
file_type: string;
|
|
14
|
+
storage_type: string;
|
|
15
|
+
url: string;
|
|
16
|
+
metadata: string; // JSON string
|
|
17
|
+
created_at: number; // Unix timestamp
|
|
18
|
+
binary_data?: Buffer; // Optional direct binary storage
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class ContextLakeLanceDBClient {
|
|
22
|
+
private db: lancedb.Connection | null = null;
|
|
23
|
+
private table: lancedb.Table | null = null;
|
|
24
|
+
private config: LanceDBConfig;
|
|
25
|
+
private embeddingProvider: EmbeddingProvider;
|
|
26
|
+
|
|
27
|
+
constructor(config: LanceDBConfig, embeddingProvider: EmbeddingProvider) {
|
|
28
|
+
this.config = config;
|
|
29
|
+
this.embeddingProvider = embeddingProvider;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async connect() {
|
|
33
|
+
if (!this.db) {
|
|
34
|
+
this.db = await lancedb.connect(this.config.uri);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async getTable(tableName: string = 'documents', dim: number = 0) {
|
|
39
|
+
if (this.table) return this.table;
|
|
40
|
+
await this.connect();
|
|
41
|
+
|
|
42
|
+
const tableNames = await this.db!.tableNames();
|
|
43
|
+
if (tableNames.includes(tableName)) {
|
|
44
|
+
this.table = await this.db!.openTable(tableName);
|
|
45
|
+
} else {
|
|
46
|
+
if (dim <= 0) {
|
|
47
|
+
// Fallback: use embedding provider to infer dimension only if needed
|
|
48
|
+
const dummyVec = await this.embeddingProvider.generateEmbedding("init");
|
|
49
|
+
dim = dummyVec.length;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// @ts-ignore
|
|
53
|
+
this.table = await this.db!.createTable(tableName, [
|
|
54
|
+
{
|
|
55
|
+
id: 'schema_init',
|
|
56
|
+
vector: Array(dim).fill(0),
|
|
57
|
+
text: '',
|
|
58
|
+
source: '',
|
|
59
|
+
file_type: '',
|
|
60
|
+
storage_type: '',
|
|
61
|
+
url: '',
|
|
62
|
+
metadata: '{}',
|
|
63
|
+
created_at: 0,
|
|
64
|
+
binary_data: Buffer.from('')
|
|
65
|
+
}
|
|
66
|
+
]);
|
|
67
|
+
await this.table.delete('id = "schema_init"');
|
|
68
|
+
}
|
|
69
|
+
return this.table;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async addAssets(docs: DocumentSchema[]) {
|
|
73
|
+
const table = await this.getTable();
|
|
74
|
+
// @ts-ignore
|
|
75
|
+
await table.add(docs);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async search(query: string, limit: number = 5, filter?: string) {
|
|
79
|
+
const vector = await this.embeddingProvider.generateEmbedding(query);
|
|
80
|
+
const table = await this.getTable();
|
|
81
|
+
// @ts-ignore
|
|
82
|
+
let search = table.vectorSearch(vector).limit(limit);
|
|
83
|
+
if (filter) {
|
|
84
|
+
search = search.where(filter);
|
|
85
|
+
}
|
|
86
|
+
return await search.toArray();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async delete(filter: string) {
|
|
90
|
+
const table = await this.getTable();
|
|
91
|
+
await table.delete(filter);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async list(limit: number = 100, filter?: string) {
|
|
95
|
+
const table = await this.getTable();
|
|
96
|
+
let query = table.query().limit(limit);
|
|
97
|
+
if (filter) {
|
|
98
|
+
query = query.where(filter);
|
|
99
|
+
}
|
|
100
|
+
return await query.toArray();
|
|
101
|
+
}
|
|
102
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { TosClient } from '@volcengine/tos-sdk';
|
|
2
|
+
import { StorageProvider } from '../service/storage/interface';
|
|
3
|
+
|
|
4
|
+
export interface TosConfig {
|
|
5
|
+
access_key?: string;
|
|
6
|
+
secret_key?: string;
|
|
7
|
+
region: string;
|
|
8
|
+
endpoint?: string;
|
|
9
|
+
path: string;
|
|
10
|
+
sts_token?: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export class ContextLakeTosClient implements StorageProvider {
|
|
14
|
+
private client: TosClient;
|
|
15
|
+
private bucket: string;
|
|
16
|
+
private basePath: string;
|
|
17
|
+
|
|
18
|
+
constructor(config: TosConfig) {
|
|
19
|
+
const tosConfig: any = {
|
|
20
|
+
accessKeyId: config.access_key,
|
|
21
|
+
accessKeySecret: config.secret_key,
|
|
22
|
+
region: config.region,
|
|
23
|
+
endpoint: config.endpoint,
|
|
24
|
+
securityToken: config.sts_token,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
this.client = new TosClient(tosConfig);
|
|
28
|
+
|
|
29
|
+
if (!config.path) {
|
|
30
|
+
throw new Error('TOS configuration requires "path" (e.g. tos://bucket/path/)');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (!config.path.startsWith('tos://')) {
|
|
34
|
+
throw new Error('TOS path must start with tos://');
|
|
35
|
+
}
|
|
36
|
+
const parts = config.path.substring(6).split('/');
|
|
37
|
+
this.bucket = parts[0];
|
|
38
|
+
this.basePath = parts.slice(1).join('/');
|
|
39
|
+
if (this.basePath && !this.basePath.endsWith('/')) {
|
|
40
|
+
this.basePath += '/';
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
private parseTosUrl(key: string): string {
|
|
45
|
+
if (key.startsWith('tos://')) {
|
|
46
|
+
const pathParts = key.split('/').slice(3);
|
|
47
|
+
return pathParts.join('/');
|
|
48
|
+
}
|
|
49
|
+
return key;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async uploadFile(fileName: string, buffer: Buffer): Promise<string> {
|
|
53
|
+
const key = `${this.basePath}${fileName}`;
|
|
54
|
+
await this.client.putObject({
|
|
55
|
+
bucket: this.bucket,
|
|
56
|
+
key,
|
|
57
|
+
body: buffer,
|
|
58
|
+
});
|
|
59
|
+
return `tos://${this.bucket}/${key}`;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async downloadFile(key: string): Promise<Buffer> {
|
|
63
|
+
const actualKey = this.parseTosUrl(key);
|
|
64
|
+
|
|
65
|
+
const result = await this.client.getObject({
|
|
66
|
+
bucket: this.bucket,
|
|
67
|
+
key: actualKey,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Check if result.data is a stream or buffer
|
|
71
|
+
if (Buffer.isBuffer(result.data)) {
|
|
72
|
+
return result.data;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Read stream to buffer
|
|
76
|
+
// @ts-ignore
|
|
77
|
+
const stream: any = result.data.content || result.data;
|
|
78
|
+
if (stream && stream.toArray) {
|
|
79
|
+
// Optimized path for some stream implementations (like node-fetch/minipass)
|
|
80
|
+
return Buffer.concat(await stream.toArray());
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (stream && stream[Symbol.asyncIterator]) {
|
|
84
|
+
const chunks: Buffer[] = [];
|
|
85
|
+
for await (const chunk of stream) {
|
|
86
|
+
chunks.push(Buffer.from(chunk));
|
|
87
|
+
}
|
|
88
|
+
return Buffer.concat(chunks);
|
|
89
|
+
}
|
|
90
|
+
return Buffer.from(result.data as any);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async deleteFile(key: string): Promise<void> {
|
|
94
|
+
const actualKey = this.parseTosUrl(key);
|
|
95
|
+
await this.client.deleteObject({
|
|
96
|
+
bucket: this.bucket,
|
|
97
|
+
key: actualKey,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|