@byted-las/contextlake-openclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +64 -0
  2. package/bin/contextlake-openclaw.js +5 -0
  3. package/dist/index.d.ts +113 -0
  4. package/dist/index.js +73 -0
  5. package/dist/src/client/lancedb.d.ts +30 -0
  6. package/dist/src/client/lancedb.js +113 -0
  7. package/dist/src/client/tos.d.ts +19 -0
  8. package/dist/src/client/tos.js +81 -0
  9. package/dist/src/commands/cli.d.ts +6 -0
  10. package/dist/src/commands/cli.js +78 -0
  11. package/dist/src/commands/index.d.ts +1 -0
  12. package/dist/src/commands/index.js +139 -0
  13. package/dist/src/commands/slashcmd.d.ts +14 -0
  14. package/dist/src/commands/slashcmd.js +91 -0
  15. package/dist/src/commands/tools.d.ts +219 -0
  16. package/dist/src/commands/tools.js +286 -0
  17. package/dist/src/lib/actions/ingest.d.ts +8 -0
  18. package/dist/src/lib/actions/ingest.js +123 -0
  19. package/dist/src/lib/actions/manage.d.ts +15 -0
  20. package/dist/src/lib/actions/manage.js +91 -0
  21. package/dist/src/lib/actions/retrieve.d.ts +8 -0
  22. package/dist/src/lib/actions/retrieve.js +73 -0
  23. package/dist/src/processor/loader.d.ts +7 -0
  24. package/dist/src/processor/loader.js +83 -0
  25. package/dist/src/service/embedding/factory.d.ts +2 -0
  26. package/dist/src/service/embedding/factory.js +16 -0
  27. package/dist/src/service/embedding/interface.d.ts +18 -0
  28. package/dist/src/service/embedding/interface.js +2 -0
  29. package/dist/src/service/embedding/local.d.ts +14 -0
  30. package/dist/src/service/embedding/local.js +104 -0
  31. package/dist/src/service/embedding/remote.d.ts +9 -0
  32. package/dist/src/service/embedding/remote.js +42 -0
  33. package/dist/src/service/metadata/factory.d.ts +13 -0
  34. package/dist/src/service/metadata/factory.js +48 -0
  35. package/dist/src/service/metadata/interface.d.ts +17 -0
  36. package/dist/src/service/metadata/interface.js +2 -0
  37. package/dist/src/service/metadata/local.d.ts +13 -0
  38. package/dist/src/service/metadata/local.js +49 -0
  39. package/dist/src/service/storage/factory.d.ts +2 -0
  40. package/dist/src/service/storage/factory.js +19 -0
  41. package/dist/src/service/storage/interface.d.ts +32 -0
  42. package/dist/src/service/storage/interface.js +2 -0
  43. package/dist/src/service/storage/local.d.ts +9 -0
  44. package/dist/src/service/storage/local.js +72 -0
  45. package/dist/src/skills/las-data-profiler/index.d.ts +26 -0
  46. package/dist/src/skills/las-data-profiler/index.js +231 -0
  47. package/dist/src/skills/las-data-profiler/register.d.ts +1 -0
  48. package/dist/src/skills/las-data-profiler/register.js +19 -0
  49. package/dist/src/utils/config.d.ts +1 -0
  50. package/dist/src/utils/config.js +16 -0
  51. package/index.ts +78 -0
  52. package/openclaw.plugin.json +57 -0
  53. package/package.json +52 -0
  54. package/src/client/lancedb.ts +102 -0
  55. package/src/client/tos.ts +100 -0
  56. package/src/commands/cli.ts +77 -0
  57. package/src/commands/index.ts +156 -0
  58. package/src/commands/slashcmd.ts +95 -0
  59. package/src/commands/tools.ts +286 -0
  60. package/src/lib/actions/ingest.ts +103 -0
  61. package/src/lib/actions/manage.ts +107 -0
  62. package/src/lib/actions/retrieve.ts +90 -0
  63. package/src/processor/loader.ts +58 -0
  64. package/src/service/embedding/factory.ts +13 -0
  65. package/src/service/embedding/interface.ts +21 -0
  66. package/src/service/embedding/local.ts +118 -0
  67. package/src/service/embedding/remote.ts +45 -0
  68. package/src/service/metadata/factory.ts +52 -0
  69. package/src/service/metadata/interface.ts +19 -0
  70. package/src/service/metadata/local.ts +60 -0
  71. package/src/service/storage/factory.ts +16 -0
  72. package/src/service/storage/interface.ts +36 -0
  73. package/src/service/storage/local.ts +42 -0
  74. package/src/skills/contextlake-delete/SKILL.md +36 -0
  75. package/src/skills/contextlake-ingest/SKILL.md +40 -0
  76. package/src/skills/contextlake-list/SKILL.md +22 -0
  77. package/src/skills/contextlake-retrieve/SKILL.md +37 -0
  78. package/src/skills/las-data-profiler/SKILL.md +174 -0
  79. package/src/skills/las-data-profiler/index.ts +254 -0
  80. package/src/skills/las-data-profiler/register.ts +19 -0
  81. package/src/skills/las-data-profiler/s3_catalog.py +608 -0
  82. package/src/utils/config.ts +13 -0
@@ -0,0 +1,231 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.connectDataSource = connectDataSource;
37
+ const path = __importStar(require("path"));
38
+ const fs = __importStar(require("fs"));
39
+ const os = __importStar(require("os"));
40
+ const child_process_1 = require("child_process");
41
+ // ---------------------------------------------------------------------------
42
+ // Constants
43
+ // ---------------------------------------------------------------------------
44
+ const BASE_DIR = path.join(os.homedir(), '.openclaw', 'las-data-profiler');
45
+ const PYTHON_DEPS = ['boto3', 'lancedb', 'pyarrow', 'pandas', 'Pillow', 'mutagen', 'pymupdf'];
46
+ // ---------------------------------------------------------------------------
47
+ // Helpers
48
+ // ---------------------------------------------------------------------------
49
+ function getDataSourceDir(name) {
50
+ return path.join(BASE_DIR, name);
51
+ }
52
+ function ensureDir(dir) {
53
+ fs.mkdirSync(dir, { recursive: true });
54
+ }
55
+ /**
56
+ * Generate env.sh with all connection parameters for this datasource.
57
+ * This file can be sourced to re-run the profiler or for debugging.
58
+ */
59
+ function writeEnvFile(dir, params) {
60
+ const envPath = path.join(dir, 'env.sh');
61
+ const lines = [
62
+ '#!/usr/bin/env bash',
63
+ '# Auto-generated by las-data-profiler connect',
64
+ `# Datasource: ${params.datasource_name}`,
65
+ `# Created: ${new Date().toISOString()}`,
66
+ '',
67
+ `export LAS_VENDOR="${params.vendor}"`,
68
+ `export LAS_BUCKET="${params.bucket}"`,
69
+ `export LAS_PREFIX="${params.prefix}"`,
70
+ ];
71
+ if (params.endpoint) {
72
+ lines.push(`export LAS_ENDPOINT="${params.endpoint}"`);
73
+ }
74
+ if (params.access_key) {
75
+ lines.push(`export LAS_ACCESS_KEY="${params.access_key}"`);
76
+ }
77
+ if (params.secret_key) {
78
+ lines.push(`export LAS_SECRET_KEY="${params.secret_key}"`);
79
+ }
80
+ if (params.region) {
81
+ lines.push(`export LAS_REGION="${params.region}"`);
82
+ }
83
+ if (params.sample_rows) {
84
+ lines.push(`export LAS_SAMPLE_ROWS="${params.sample_rows}"`);
85
+ }
86
+ lines.push(`export LAS_DB_PATH="${path.join(dir, 'catalog_db')}"`);
87
+ lines.push(`export LAS_DATASOURCE_NAME="${params.datasource_name}"`);
88
+ lines.push('');
89
+ fs.writeFileSync(envPath, lines.join('\n'), { mode: 0o600 });
90
+ return envPath;
91
+ }
92
+ /**
93
+ * Install Python dependencies if not already available.
94
+ */
95
+ function ensurePythonDeps() {
96
+ try {
97
+ (0, child_process_1.execSync)(`python3 -c "import boto3, lancedb, pyarrow, pandas, PIL, mutagen, fitz"`, {
98
+ stdio: 'pipe',
99
+ });
100
+ }
101
+ catch {
102
+ console.log('[las-data-profiler] Installing Python dependencies...');
103
+ (0, child_process_1.execSync)(`pip3 install --user ${PYTHON_DEPS.join(' ')}`, {
104
+ stdio: 'inherit',
105
+ });
106
+ }
107
+ }
108
+ /**
109
+ * Get the path to the bundled Python script.
110
+ */
111
+ function getScriptPath() {
112
+ // The Python script is co-located with this module
113
+ return path.join(__dirname, 's3_catalog.py');
114
+ }
115
+ // ---------------------------------------------------------------------------
116
+ // Main Entry
117
+ // ---------------------------------------------------------------------------
118
+ async function connectDataSource(params, _ctx) {
119
+ // Validate required params
120
+ if (!params.datasource_name) {
121
+ throw new Error('datasource_name is required');
122
+ }
123
+ if (!params.vendor) {
124
+ throw new Error('vendor is required');
125
+ }
126
+ if (!params.bucket) {
127
+ throw new Error('bucket is required');
128
+ }
129
+ if (params.prefix === undefined || params.prefix === null) {
130
+ throw new Error('prefix is required');
131
+ }
132
+ // For non-local vendors, validate credentials
133
+ if (params.vendor !== 'local') {
134
+ if (!params.endpoint && params.vendor !== 'aws') {
135
+ throw new Error(`endpoint is required for vendor "${params.vendor}"`);
136
+ }
137
+ const ak = params.access_key || process.env.TOS_ACCESS_KEY || process.env.S3_ACCESS_KEY || process.env.AWS_ACCESS_KEY_ID;
138
+ const sk = params.secret_key || process.env.TOS_SECRET_KEY || process.env.S3_SECRET_KEY || process.env.AWS_SECRET_ACCESS_KEY;
139
+ if (!ak || !sk) {
140
+ throw new Error('access_key and secret_key are required (via params or env vars TOS_ACCESS_KEY/TOS_SECRET_KEY, S3_ACCESS_KEY/S3_SECRET_KEY, AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY)');
141
+ }
142
+ // Normalise into params so env.sh picks them up
143
+ params.access_key = ak;
144
+ params.secret_key = sk;
145
+ }
146
+ const dsDir = getDataSourceDir(params.datasource_name);
147
+ const dbPath = path.join(dsDir, 'catalog_db');
148
+ ensureDir(dsDir);
149
+ // 1. Write env.sh
150
+ const envPath = writeEnvFile(dsDir, params);
151
+ // 2. Ensure Python dependencies
152
+ ensurePythonDeps();
153
+ // 3. Build CLI args for the Python script
154
+ const scriptPath = getScriptPath();
155
+ const args = [
156
+ scriptPath,
157
+ '--vendor', params.vendor,
158
+ '--bucket', params.bucket,
159
+ '--prefix', params.prefix,
160
+ '--db-path', dbPath,
161
+ ];
162
+ if (params.endpoint) {
163
+ args.push('--endpoint', params.endpoint);
164
+ }
165
+ if (params.access_key) {
166
+ args.push('--ak', params.access_key);
167
+ }
168
+ if (params.secret_key) {
169
+ args.push('--sk', params.secret_key);
170
+ }
171
+ if (params.region) {
172
+ args.push('--region', params.region);
173
+ }
174
+ if (params.sample_rows) {
175
+ args.push('--sample-rows', String(params.sample_rows));
176
+ }
177
+ // 4. Execute the profiling script
178
+ return new Promise((resolve) => {
179
+ let stdout = '';
180
+ let stderr = '';
181
+ const proc = (0, child_process_1.spawn)('python3', args, {
182
+ cwd: dsDir,
183
+ env: { ...process.env },
184
+ });
185
+ proc.stdout.on('data', (data) => {
186
+ stdout += data.toString();
187
+ });
188
+ proc.stderr.on('data', (data) => {
189
+ stderr += data.toString();
190
+ });
191
+ proc.on('close', (code) => {
192
+ if (code !== 0) {
193
+ resolve({
194
+ status: 'error',
195
+ datasource_name: params.datasource_name,
196
+ db_path: dbPath,
197
+ env_path: envPath,
198
+ tables: [],
199
+ error: stderr || `Python script exited with code ${code}`,
200
+ });
201
+ return;
202
+ }
203
+ // Try to parse structured output from the script
204
+ try {
205
+ const jsonMatch = stdout.match(/\{[\s\S]*"summary"[\s\S]*\}/);
206
+ const result = jsonMatch ? JSON.parse(jsonMatch[0]) : {};
207
+ resolve({
208
+ status: 'success',
209
+ datasource_name: params.datasource_name,
210
+ db_path: dbPath,
211
+ env_path: envPath,
212
+ tables: ['file_catalog', 'structured_schemas', 'media_metadata'],
213
+ summary: result.summary || {
214
+ total_files: 0,
215
+ structured_files: 0,
216
+ media_files: 0,
217
+ },
218
+ });
219
+ }
220
+ catch {
221
+ resolve({
222
+ status: 'success',
223
+ datasource_name: params.datasource_name,
224
+ db_path: dbPath,
225
+ env_path: envPath,
226
+ tables: ['file_catalog', 'structured_schemas', 'media_metadata'],
227
+ });
228
+ }
229
+ });
230
+ });
231
+ }
@@ -0,0 +1 @@
1
+ export declare function registerLasDataProfilerSkill(ctx: any): void;
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.registerLasDataProfilerSkill = registerLasDataProfilerSkill;
4
+ const index_1 = require("./index");
5
+ function registerLasDataProfilerSkill(ctx) {
6
+ const definition = {
7
+ name: 'las-data-profiler',
8
+ description: 'Connect to a data source (TOS/OSS/COS/S3/Local) and profile its structure, schemas, and media metadata into LanceDB',
9
+ async execute(params) {
10
+ return await (0, index_1.connectDataSource)(params, ctx);
11
+ }
12
+ };
13
+ if (typeof ctx.registerTool === 'function') {
14
+ ctx.registerTool(definition);
15
+ }
16
+ else if (typeof ctx.registerSkill === 'function') {
17
+ ctx.registerSkill(definition);
18
+ }
19
+ }
@@ -0,0 +1 @@
1
+ export declare function getPluginConfig(ctx: any): any;
@@ -0,0 +1,16 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getPluginConfig = getPluginConfig;
4
+ function getPluginConfig(ctx) {
5
+ return ctx.config?.plugins?.entries?.['contextlake-openclaw']?.config || {
6
+ metadata_storage: {
7
+ type: 'local',
8
+ lancedb_uri: require('path').join(require('os').homedir(), '.openclaw', 'contextlake', 'data'),
9
+ embedding: {
10
+ provider: 'local',
11
+ model_name: 'hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf'
12
+ }
13
+ },
14
+ file_storage: { type: 'local', local_base_dir: require('path').join(require('os').homedir(), '.openclaw', 'contextlake', 'files') }
15
+ };
16
+ }
package/index.ts ADDED
@@ -0,0 +1,78 @@
1
+ // @ts-ignore
2
+ import { PluginContext } from 'openclaw/plugin-sdk';
3
+ import { registerAll } from './src/commands';
4
+
5
+ const plugin = {
6
+ id: 'contextlake-openclaw',
7
+ name: 'ContextLake',
8
+ version: '1.1.0',
9
+ description: 'A lightweight knowledge base plugin for OpenClaw using LanceDB and TOS, with data profiling support',
10
+ configSchema: {
11
+ type: 'object',
12
+ properties: {
13
+ metadata_storage: {
14
+ type: 'object',
15
+ properties: {
16
+ type: { type: 'string', enum: ['local', 'remote'], default: 'local' },
17
+ lancedb_uri: { type: 'string', default: './data/contextlake' },
18
+ remote_api_endpoint: { type: 'string' },
19
+ remote_api_key: { type: 'string' },
20
+ embedding: {
21
+ type: 'object',
22
+ properties: {
23
+ provider: { type: 'string', enum: ['local', 'remote', 'openai'], default: 'local' },
24
+ model_name: { type: 'string', default: 'hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf' },
25
+ api_key: { type: 'string' },
26
+ api_base: { type: 'string' }
27
+ },
28
+ default: { provider: 'local', model_name: 'hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf' }
29
+ }
30
+ },
31
+ default: { type: 'local', lancedb_uri: './data/contextlake' }
32
+ },
33
+ file_storage: {
34
+ type: 'object',
35
+ properties: {
36
+ type: { type: 'string', enum: ['local', 'tos'], default: 'local' },
37
+ local_base_dir: { type: 'string', default: './data/files' },
38
+ tos: {
39
+ type: 'object',
40
+ properties: {
41
+ access_key: { type: 'string' },
42
+ secret_key: { type: 'string' },
43
+ region: { type: 'string' },
44
+ path: { type: 'string', description: 'TOS path in format tos://bucket/base_path/' },
45
+ endpoint: { type: 'string' },
46
+ sts_token: { type: 'string' }
47
+ }
48
+ }
49
+ },
50
+ default: { type: 'local', local_base_dir: './data/files' }
51
+ },
52
+ storage_policy: {
53
+ type: 'object',
54
+ properties: {
55
+ max_inline_size_kb: { type: 'number', default: 1024, description: 'Files smaller than this size (in KB) will be stored directly in LanceDB' }
56
+ }
57
+ }
58
+ }
59
+ },
60
+ register(ctx: any) {
61
+ const logger = ctx.logger || {
62
+ info: (msg: string, ...args: any[]) => console.log(msg, ...args),
63
+ warn: (msg: string, ...args: any[]) => console.warn(msg, ...args),
64
+ error: (msg: string, ...args: any[]) => console.error(msg, ...args),
65
+ debug: (msg: string, ...args: any[]) => console.debug(msg, ...args),
66
+ };
67
+
68
+ // Add logging
69
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Plugin register started`);
70
+
71
+ // Delegate all registrations to commands/index.ts
72
+ registerAll(ctx, logger);
73
+
74
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Plugin register completed`);
75
+ }
76
+ };
77
+
78
+ export default plugin;
@@ -0,0 +1,57 @@
1
+ {
2
+ "id": "contextlake-openclaw",
3
+ "name": "ContextLake",
4
+ "version": "1.1.0",
5
+ "description": "A lightweight knowledge base plugin for OpenClaw using LanceDB and TOS, with data profiling support",
6
+ "skills": ["./src/skills"],
7
+ "configSchema": {
8
+ "type": "object",
9
+ "properties": {
10
+ "metadata_storage": {
11
+ "type": "object",
12
+ "properties": {
13
+ "type": { "type": "string", "enum": ["local", "remote"], "default": "local" },
14
+ "lancedb_uri": { "type": "string", "default": "./data/contextlake" },
15
+ "remote_api_endpoint": { "type": "string" },
16
+ "remote_api_key": { "type": "string" },
17
+ "embedding": {
18
+ "type": "object",
19
+ "properties": {
20
+ "provider": { "type": "string", "enum": ["local", "remote", "openai"], "default": "local" },
21
+ "model_name": { "type": "string", "default": "hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf" },
22
+ "api_key": { "type": "string" },
23
+ "api_base": { "type": "string", "description": "Base URL for remote API (e.g. https://ark.cn-beijing.volces.com/api/v3)" }
24
+ },
25
+ "default": { "provider": "local", "model_name": "hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf" }
26
+ }
27
+ },
28
+ "default": { "type": "local", "lancedb_uri": "./data/contextlake" }
29
+ },
30
+ "file_storage": {
31
+ "type": "object",
32
+ "properties": {
33
+ "type": { "type": "string", "enum": ["local", "tos"], "default": "local" },
34
+ "local_base_dir": { "type": "string", "default": "./data/files" },
35
+ "tos": {
36
+ "type": "object",
37
+ "properties": {
38
+ "access_key": { "type": "string" },
39
+ "secret_key": { "type": "string" },
40
+ "region": { "type": "string" },
41
+ "path": { "type": "string", "description": "TOS path in format tos://bucket/base_path/" },
42
+ "endpoint": { "type": "string" },
43
+ "sts_token": { "type": "string" }
44
+ }
45
+ }
46
+ },
47
+ "default": { "type": "local", "local_base_dir": "./data/files" }
48
+ },
49
+ "storage_policy": {
50
+ "type": "object",
51
+ "properties": {
52
+ "max_inline_size_kb": { "type": "number", "default": 1024, "description": "Files smaller than this size (in KB) will be stored directly in LanceDB" }
53
+ }
54
+ }
55
+ }
56
+ }
57
+ }
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "@byted-las/contextlake-openclaw",
3
+ "version": "1.0.0",
4
+ "description": "ContextLake OpenClaw Plugin for managing knowledge base",
5
+ "main": "index.ts",
6
+ "files": [
7
+ "dist",
8
+ "bin",
9
+ "index.ts",
10
+ "src",
11
+ "openclaw.plugin.json"
12
+ ],
13
+ "bin": {
14
+ "contextlake-openclaw": "./bin/contextlake-openclaw.js"
15
+ },
16
+ "openclaw": {
17
+ "extensions": [
18
+ "./dist/index.js"
19
+ ]
20
+ },
21
+ "scripts": {
22
+ "build": "tsc",
23
+ "test": "vitest --reporter verbose",
24
+ "test:local": "npx ts-node scripts/local-test.ts",
25
+ "test:profiler": "npx ts-node scripts/local-profiler-test.ts",
26
+ "cli": "npx ts-node scripts/cli.ts"
27
+ },
28
+ "keywords": ["openclaw", "contextlake", "plugin"],
29
+ "author": "byted-las",
30
+ "license": "ISC",
31
+ "engines": {
32
+ "node": ">=20.17.0"
33
+ },
34
+ "dependencies": {
35
+ "@lancedb/lancedb": "^0.26.2",
36
+ "@volcengine/tos-sdk": "^2.9.0",
37
+ "commander": "^14.0.3",
38
+ "mammoth": "^1.12.0",
39
+ "node-llama-cpp": "^3.16.2",
40
+ "openclaw": "^2026.3.13",
41
+ "pdf-parse": "^2.4.5",
42
+ "uuid": "^13.0.0"
43
+ },
44
+ "devDependencies": {
45
+ "@types/node": "^25.5.0",
46
+ "@types/pdf-parse": "^1.1.5",
47
+ "@types/uuid": "^10.0.0",
48
+ "ts-node": "^10.9.2",
49
+ "typescript": "^5.9.3",
50
+ "vitest": "^4.1.0"
51
+ }
52
+ }
@@ -0,0 +1,102 @@
1
+ import * as lancedb from '@lancedb/lancedb';
2
+ import { EmbeddingProvider } from '../service/embedding/interface';
3
+
4
+ export interface LanceDBConfig {
5
+ uri: string;
6
+ }
7
+
8
+ export interface DocumentSchema {
9
+ id: string;
10
+ vector: number[];
11
+ text: string;
12
+ source: string;
13
+ file_type: string;
14
+ storage_type: string;
15
+ url: string;
16
+ metadata: string; // JSON string
17
+ created_at: number; // Unix timestamp
18
+ binary_data?: Buffer; // Optional direct binary storage
19
+ }
20
+
21
+ export class ContextLakeLanceDBClient {
22
+ private db: lancedb.Connection | null = null;
23
+ private table: lancedb.Table | null = null;
24
+ private config: LanceDBConfig;
25
+ private embeddingProvider: EmbeddingProvider;
26
+
27
+ constructor(config: LanceDBConfig, embeddingProvider: EmbeddingProvider) {
28
+ this.config = config;
29
+ this.embeddingProvider = embeddingProvider;
30
+ }
31
+
32
+ async connect() {
33
+ if (!this.db) {
34
+ this.db = await lancedb.connect(this.config.uri);
35
+ }
36
+ }
37
+
38
+ async getTable(tableName: string = 'documents', dim: number = 0) {
39
+ if (this.table) return this.table;
40
+ await this.connect();
41
+
42
+ const tableNames = await this.db!.tableNames();
43
+ if (tableNames.includes(tableName)) {
44
+ this.table = await this.db!.openTable(tableName);
45
+ } else {
46
+ if (dim <= 0) {
47
+ // Fallback: use embedding provider to infer dimension only if needed
48
+ const dummyVec = await this.embeddingProvider.generateEmbedding("init");
49
+ dim = dummyVec.length;
50
+ }
51
+
52
+ // @ts-ignore
53
+ this.table = await this.db!.createTable(tableName, [
54
+ {
55
+ id: 'schema_init',
56
+ vector: Array(dim).fill(0),
57
+ text: '',
58
+ source: '',
59
+ file_type: '',
60
+ storage_type: '',
61
+ url: '',
62
+ metadata: '{}',
63
+ created_at: 0,
64
+ binary_data: Buffer.from('')
65
+ }
66
+ ]);
67
+ await this.table.delete('id = "schema_init"');
68
+ }
69
+ return this.table;
70
+ }
71
+
72
+ async addAssets(docs: DocumentSchema[]) {
73
+ const table = await this.getTable();
74
+ // @ts-ignore
75
+ await table.add(docs);
76
+ }
77
+
78
+ async search(query: string, limit: number = 5, filter?: string) {
79
+ const vector = await this.embeddingProvider.generateEmbedding(query);
80
+ const table = await this.getTable();
81
+ // @ts-ignore
82
+ let search = table.vectorSearch(vector).limit(limit);
83
+ if (filter) {
84
+ search = search.where(filter);
85
+ }
86
+ return await search.toArray();
87
+ }
88
+
89
+ async delete(filter: string) {
90
+ const table = await this.getTable();
91
+ await table.delete(filter);
92
+ }
93
+
94
+ async list(limit: number = 100, filter?: string) {
95
+ const table = await this.getTable();
96
+ let query = table.query().limit(limit);
97
+ if (filter) {
98
+ query = query.where(filter);
99
+ }
100
+ return await query.toArray();
101
+ }
102
+ }
@@ -0,0 +1,100 @@
1
+ import { TosClient } from '@volcengine/tos-sdk';
2
+ import { StorageProvider } from '../service/storage/interface';
3
+
4
+ export interface TosConfig {
5
+ access_key?: string;
6
+ secret_key?: string;
7
+ region: string;
8
+ endpoint?: string;
9
+ path: string;
10
+ sts_token?: string;
11
+ }
12
+
13
+ export class ContextLakeTosClient implements StorageProvider {
14
+ private client: TosClient;
15
+ private bucket: string;
16
+ private basePath: string;
17
+
18
+ constructor(config: TosConfig) {
19
+ const tosConfig: any = {
20
+ accessKeyId: config.access_key,
21
+ accessKeySecret: config.secret_key,
22
+ region: config.region,
23
+ endpoint: config.endpoint,
24
+ securityToken: config.sts_token,
25
+ };
26
+
27
+ this.client = new TosClient(tosConfig);
28
+
29
+ if (!config.path) {
30
+ throw new Error('TOS configuration requires "path" (e.g. tos://bucket/path/)');
31
+ }
32
+
33
+ if (!config.path.startsWith('tos://')) {
34
+ throw new Error('TOS path must start with tos://');
35
+ }
36
+ const parts = config.path.substring(6).split('/');
37
+ this.bucket = parts[0];
38
+ this.basePath = parts.slice(1).join('/');
39
+ if (this.basePath && !this.basePath.endsWith('/')) {
40
+ this.basePath += '/';
41
+ }
42
+ }
43
+
44
+ private parseTosUrl(key: string): string {
45
+ if (key.startsWith('tos://')) {
46
+ const pathParts = key.split('/').slice(3);
47
+ return pathParts.join('/');
48
+ }
49
+ return key;
50
+ }
51
+
52
+ async uploadFile(fileName: string, buffer: Buffer): Promise<string> {
53
+ const key = `${this.basePath}${fileName}`;
54
+ await this.client.putObject({
55
+ bucket: this.bucket,
56
+ key,
57
+ body: buffer,
58
+ });
59
+ return `tos://${this.bucket}/${key}`;
60
+ }
61
+
62
+ async downloadFile(key: string): Promise<Buffer> {
63
+ const actualKey = this.parseTosUrl(key);
64
+
65
+ const result = await this.client.getObject({
66
+ bucket: this.bucket,
67
+ key: actualKey,
68
+ });
69
+
70
+ // Check if result.data is a stream or buffer
71
+ if (Buffer.isBuffer(result.data)) {
72
+ return result.data;
73
+ }
74
+
75
+ // Read stream to buffer
76
+ // @ts-ignore
77
+ const stream: any = result.data.content || result.data;
78
+ if (stream && stream.toArray) {
79
+ // Optimized path for some stream implementations (like node-fetch/minipass)
80
+ return Buffer.concat(await stream.toArray());
81
+ }
82
+
83
+ if (stream && stream[Symbol.asyncIterator]) {
84
+ const chunks: Buffer[] = [];
85
+ for await (const chunk of stream) {
86
+ chunks.push(Buffer.from(chunk));
87
+ }
88
+ return Buffer.concat(chunks);
89
+ }
90
+ return Buffer.from(result.data as any);
91
+ }
92
+
93
+ async deleteFile(key: string): Promise<void> {
94
+ const actualKey = this.parseTosUrl(key);
95
+ await this.client.deleteObject({
96
+ bucket: this.bucket,
97
+ key: actualKey,
98
+ });
99
+ }
100
+ }