@byted-las/contextlake-openclaw 1.0.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -1
- package/dist/index.js +5 -5
- package/dist/src/client/lancedb.js +13 -4
- package/dist/src/commands/cli.d.ts +5 -2
- package/dist/src/commands/cli.js +94 -10
- package/dist/src/commands/index.d.ts +2 -1
- package/dist/src/commands/index.js +31 -35
- package/dist/src/commands/slashcmd.d.ts +8 -1
- package/dist/src/commands/slashcmd.js +90 -6
- package/dist/src/commands/tools.d.ts +10 -218
- package/dist/src/commands/tools.js +109 -104
- package/dist/src/lib/actions/ingest-source.d.ts +15 -0
- package/dist/src/lib/actions/ingest-source.js +193 -0
- package/dist/src/lib/actions/ingest.d.ts +14 -7
- package/dist/src/lib/actions/ingest.js +133 -63
- package/dist/src/lib/actions/las-api.d.ts +13 -0
- package/dist/src/lib/actions/las-api.js +105 -0
- package/dist/src/lib/actions/las-tools.d.ts +3 -0
- package/dist/src/lib/actions/las-tools.js +194 -0
- package/dist/src/lib/actions/las.d.ts +64 -0
- package/dist/src/lib/actions/las.js +72 -0
- package/dist/src/lib/actions/manage.d.ts +3 -2
- package/dist/src/{skills/las-data-profiler/index.d.ts → lib/actions/profiler.d.ts} +4 -2
- package/dist/src/{skills/las-data-profiler/index.js → lib/actions/profiler.js} +19 -3
- package/dist/src/lib/actions/retrieve.d.ts +2 -1
- package/dist/src/lib/actions/retrieve.js +2 -18
- package/{src/skills/las-data-profiler → dist/src/lib/scripts}/s3_catalog.py +10 -1
- package/dist/src/processor/loader.js +9 -2
- package/dist/src/service/embedding/factory.js +1 -10
- package/dist/src/service/embedding/interface.d.ts +8 -1
- package/dist/src/service/embedding/local.js +16 -13
- package/dist/src/service/embedding/remote.d.ts +7 -0
- package/dist/src/service/embedding/remote.js +108 -7
- package/dist/src/service/metadata/interface.d.ts +1 -0
- package/dist/src/service/metadata/local.d.ts +1 -0
- package/dist/src/service/metadata/local.js +6 -0
- package/dist/src/skills/SKILL.md +174 -0
- package/dist/src/skills/contextlake-delete/SKILL.md +36 -0
- package/dist/src/skills/contextlake-ingest/SKILL.md +40 -0
- package/dist/src/skills/contextlake-list/SKILL.md +22 -0
- package/dist/src/skills/contextlake-retrieve/SKILL.md +37 -0
- package/dist/src/skills/las-data-profiler/SKILL.md +174 -0
- package/dist/src/utils/config.d.ts +34 -1
- package/dist/src/utils/config.js +16 -3
- package/dist/src/utils/credentials.d.ts +8 -0
- package/dist/src/utils/credentials.js +77 -0
- package/index.ts +8 -8
- package/openclaw.plugin.json +1 -1
- package/package.json +8 -7
- package/src/client/lancedb.ts +32 -21
- package/src/commands/cli.ts +105 -13
- package/src/commands/index.ts +45 -42
- package/src/commands/slashcmd.ts +69 -10
- package/src/commands/tools.ts +142 -117
- package/src/lib/actions/ingest.ts +151 -75
- package/src/lib/actions/las-api.ts +119 -0
- package/src/lib/actions/las-tools.ts +196 -0
- package/src/lib/actions/manage.ts +6 -5
- package/src/{skills/las-data-profiler/index.ts → lib/actions/profiler.ts} +21 -4
- package/src/lib/actions/retrieve.ts +16 -34
- package/src/lib/scripts/s3_catalog.py +617 -0
- package/src/processor/loader.ts +12 -4
- package/src/service/embedding/factory.ts +1 -8
- package/src/service/embedding/interface.ts +9 -1
- package/src/service/embedding/remote.ts +133 -13
- package/src/service/metadata/interface.ts +1 -0
- package/src/service/metadata/local.ts +7 -0
- package/src/service/storage/factory.ts +2 -2
- package/src/utils/config.ts +61 -8
- package/src/utils/credentials.ts +50 -0
- package/bin/contextlake-openclaw.js +0 -5
- package/dist/src/skills/las-data-profiler/register.d.ts +0 -1
- package/dist/src/skills/las-data-profiler/register.js +0 -19
- package/src/service/embedding/local.ts +0 -118
- package/src/skills/las-data-profiler/register.ts +0 -19
|
@@ -1,45 +1,165 @@
|
|
|
1
1
|
import { EmbeddingProvider, EmbeddingConfig } from './interface';
|
|
2
2
|
|
|
3
|
+
type RemoteApiMode = 'openai-compatible' | 'las-multimodal';
|
|
4
|
+
|
|
3
5
|
export class RemoteEmbeddingProvider implements EmbeddingProvider {
|
|
4
6
|
private apiKey: string;
|
|
5
7
|
private modelName: string;
|
|
6
8
|
private apiBase: string;
|
|
9
|
+
private dimensions?: number;
|
|
10
|
+
private encodingFormat: 'float' | 'base64';
|
|
11
|
+
private mode: RemoteApiMode;
|
|
7
12
|
|
|
8
13
|
constructor(config: EmbeddingConfig) {
|
|
9
|
-
|
|
14
|
+
if (!config.api_key) {
|
|
15
|
+
throw new Error('Remote embedding API key is required');
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
this.apiKey = config.api_key;
|
|
10
19
|
this.modelName = config.model_name;
|
|
11
|
-
this.apiBase = config.api_base || 'https://api.openai.com/v1';
|
|
20
|
+
this.apiBase = (config.api_base || 'https://api.openai.com/v1').replace(/\/$/, '');
|
|
21
|
+
this.dimensions = config.dimensions;
|
|
22
|
+
this.encodingFormat = config.encoding_format || 'float';
|
|
23
|
+
this.mode = this.detectMode(this.apiBase, config.provider);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
private detectMode(apiBase: string, provider: EmbeddingConfig['provider']): RemoteApiMode {
|
|
27
|
+
if (provider === 'las' || apiBase.includes('/api/v1/embeddings/multimodal') || apiBase.includes('operator.las')) {
|
|
28
|
+
return 'las-multimodal';
|
|
29
|
+
}
|
|
30
|
+
return 'openai-compatible';
|
|
12
31
|
}
|
|
13
32
|
|
|
14
33
|
async generateEmbedding(text: string): Promise<number[]> {
|
|
15
34
|
const embeddings = await this.generateEmbeddings([text]);
|
|
35
|
+
if (!embeddings.length) {
|
|
36
|
+
throw new Error('No embedding returned for input text');
|
|
37
|
+
}
|
|
16
38
|
return embeddings[0];
|
|
17
39
|
}
|
|
18
40
|
|
|
19
|
-
// Optimized batch embedding generation
|
|
20
41
|
async generateEmbeddings(texts: string[]): Promise<number[][]> {
|
|
42
|
+
if (!Array.isArray(texts) || texts.length === 0) {
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const sanitizedTexts = texts.map((text) => {
|
|
47
|
+
if (typeof text !== 'string' || !text.trim()) {
|
|
48
|
+
throw new Error('Embedding input text must be a non-empty string');
|
|
49
|
+
}
|
|
50
|
+
return text;
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
if (this.mode === 'las-multimodal') {
|
|
54
|
+
return this.generateLasEmbeddings(sanitizedTexts);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return this.generateOpenAICompatibleEmbeddings(sanitizedTexts);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private async generateOpenAICompatibleEmbeddings(texts: string[]): Promise<number[][]> {
|
|
21
61
|
const response = await fetch(`${this.apiBase}/embeddings`, {
|
|
62
|
+
method: 'POST',
|
|
63
|
+
headers: {
|
|
64
|
+
'Content-Type': 'application/json',
|
|
65
|
+
'Authorization': `Bearer ${this.apiKey}`
|
|
66
|
+
},
|
|
67
|
+
body: JSON.stringify({
|
|
68
|
+
input: texts,
|
|
69
|
+
model: this.modelName,
|
|
70
|
+
...(this.dimensions ? { dimensions: this.dimensions } : {})
|
|
71
|
+
})
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
if (!response.ok) {
|
|
75
|
+
const error = await response.text();
|
|
76
|
+
throw new Error(`Remote API error: ${response.status} ${error}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const data = await response.json();
|
|
80
|
+
if (data.data && Array.isArray(data.data) && data.data.length > 0) {
|
|
81
|
+
return data.data
|
|
82
|
+
.sort((a: any, b: any) => a.index - b.index)
|
|
83
|
+
.map((item: any) => item.embedding);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
throw new Error('Unexpected OpenAI-compatible embedding response format');
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private async generateLasEmbeddings(texts: string[]): Promise<number[][]> {
|
|
90
|
+
const endpoint = this.apiBase.endsWith('/api/v1/embeddings/multimodal')
|
|
91
|
+
? this.apiBase
|
|
92
|
+
: `${this.apiBase}/api/v1/embeddings/multimodal`;
|
|
93
|
+
|
|
94
|
+
const requests = texts.map(async (text) => {
|
|
95
|
+
const response = await fetch(endpoint, {
|
|
22
96
|
method: 'POST',
|
|
23
97
|
headers: {
|
|
24
|
-
|
|
25
|
-
|
|
98
|
+
'Content-Type': 'application/json',
|
|
99
|
+
'Authorization': `Bearer ${this.apiKey}`
|
|
26
100
|
},
|
|
27
101
|
body: JSON.stringify({
|
|
28
|
-
|
|
29
|
-
|
|
102
|
+
model: this.modelName,
|
|
103
|
+
encoding_format: this.encodingFormat,
|
|
104
|
+
...(this.dimensions ? { dimensions: this.dimensions } : {}),
|
|
105
|
+
input: [
|
|
106
|
+
{
|
|
107
|
+
type: 'text',
|
|
108
|
+
text
|
|
109
|
+
}
|
|
110
|
+
]
|
|
30
111
|
})
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
if (!response.ok) {
|
|
115
|
+
const error = await response.text();
|
|
116
|
+
throw new Error(`LAS embedding API error: ${response.status} ${error}`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const data = await response.json();
|
|
120
|
+
const embedding = data?.data?.embedding;
|
|
121
|
+
if (!Array.isArray(embedding) || embedding.length === 0) {
|
|
122
|
+
throw new Error('Unexpected LAS embedding response format');
|
|
123
|
+
}
|
|
124
|
+
return embedding;
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
return Promise.all(requests);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
async generateMultimodalEmbedding(input: any[]): Promise<number[]> {
|
|
131
|
+
if (this.mode !== 'las-multimodal') {
|
|
132
|
+
throw new Error('generateMultimodalEmbedding requires LAS multimodal provider');
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const endpoint = this.apiBase.endsWith('/api/v1/embeddings/multimodal')
|
|
136
|
+
? this.apiBase
|
|
137
|
+
: `${this.apiBase}/api/v1/embeddings/multimodal`;
|
|
138
|
+
|
|
139
|
+
const response = await fetch(endpoint, {
|
|
140
|
+
method: 'POST',
|
|
141
|
+
headers: {
|
|
142
|
+
'Content-Type': 'application/json',
|
|
143
|
+
'Authorization': `Bearer ${this.apiKey}`
|
|
144
|
+
},
|
|
145
|
+
body: JSON.stringify({
|
|
146
|
+
model: this.modelName,
|
|
147
|
+
encoding_format: this.encodingFormat,
|
|
148
|
+
...(this.dimensions ? { dimensions: this.dimensions } : {}),
|
|
149
|
+
input
|
|
150
|
+
})
|
|
31
151
|
});
|
|
32
152
|
|
|
33
153
|
if (!response.ok) {
|
|
34
|
-
|
|
35
|
-
|
|
154
|
+
const error = await response.text();
|
|
155
|
+
throw new Error(`LAS embedding API error: ${response.status} ${error}`);
|
|
36
156
|
}
|
|
37
157
|
|
|
38
158
|
const data = await response.json();
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
159
|
+
const embedding = data?.data?.embedding;
|
|
160
|
+
if (!Array.isArray(embedding) || embedding.length === 0) {
|
|
161
|
+
throw new Error('Unexpected LAS embedding response format');
|
|
42
162
|
}
|
|
43
|
-
|
|
163
|
+
return embedding;
|
|
44
164
|
}
|
|
45
165
|
}
|
|
@@ -8,6 +8,7 @@ export interface MetadataProvider {
|
|
|
8
8
|
list(limit?: number, filter?: string): Promise<DocumentSchema[]>;
|
|
9
9
|
delete(filter: string): Promise<void>;
|
|
10
10
|
generateEmbedding(text: string): Promise<number[]>;
|
|
11
|
+
generateMultimodalEmbedding?(input: any[]): Promise<number[]>;
|
|
11
12
|
}
|
|
12
13
|
|
|
13
14
|
export interface MetadataConfig {
|
|
@@ -57,4 +57,11 @@ export class LocalMetadataProvider implements MetadataProvider {
|
|
|
57
57
|
async generateEmbedding(text: string): Promise<number[]> {
|
|
58
58
|
return await this.embeddingProvider.generateEmbedding(text);
|
|
59
59
|
}
|
|
60
|
+
|
|
61
|
+
async generateMultimodalEmbedding(input: any[]): Promise<number[]> {
|
|
62
|
+
if (this.embeddingProvider.generateMultimodalEmbedding) {
|
|
63
|
+
return await this.embeddingProvider.generateMultimodalEmbedding(input);
|
|
64
|
+
}
|
|
65
|
+
throw new Error('generateMultimodalEmbedding not supported by this embedding provider');
|
|
66
|
+
}
|
|
60
67
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { StorageProvider, StorageConfig } from './interface';
|
|
2
2
|
import { LocalStorageProvider } from './local';
|
|
3
|
-
import { ContextLakeTosClient } from '../../client/tos';
|
|
3
|
+
import { ContextLakeTosClient, TosConfig } from '../../client/tos';
|
|
4
4
|
|
|
5
5
|
export function createStorageProvider(config: StorageConfig): StorageProvider {
|
|
6
6
|
if (config.type === 'local') {
|
|
@@ -9,7 +9,7 @@ export function createStorageProvider(config: StorageConfig): StorageProvider {
|
|
|
9
9
|
if (!config.tos || !config.tos.region || !config.tos.path) {
|
|
10
10
|
throw new Error('Missing TOS configuration: region and path required');
|
|
11
11
|
}
|
|
12
|
-
return new ContextLakeTosClient(config.tos as
|
|
12
|
+
return new ContextLakeTosClient(config.tos as TosConfig);
|
|
13
13
|
} else {
|
|
14
14
|
throw new Error(`Unsupported storage type: ${config.type}`);
|
|
15
15
|
}
|
package/src/utils/config.ts
CHANGED
|
@@ -1,13 +1,66 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import { loadCredentials } from './credentials';
|
|
2
|
+
|
|
3
|
+
export interface EmbeddingConfig {
|
|
4
|
+
provider: string;
|
|
5
|
+
model_name: string;
|
|
6
|
+
api_key?: string;
|
|
7
|
+
api_base?: string;
|
|
8
|
+
dimensions?: number;
|
|
9
|
+
encoding_format?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface MetadataStorageConfig {
|
|
13
|
+
type: 'local' | 'remote';
|
|
14
|
+
lancedb_uri?: string;
|
|
15
|
+
embedding?: EmbeddingConfig;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface FileStorageConfig {
|
|
19
|
+
type: 'local' | 'tos';
|
|
20
|
+
local_base_dir?: string;
|
|
21
|
+
tos?: {
|
|
22
|
+
access_key?: string;
|
|
23
|
+
secret_key?: string;
|
|
24
|
+
region?: string;
|
|
25
|
+
path?: string;
|
|
26
|
+
endpoint?: string;
|
|
27
|
+
sts_token?: string;
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface StoragePolicyConfig {
|
|
32
|
+
max_inline_size_kb?: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface ContextLakeConfig {
|
|
36
|
+
metadata_storage?: MetadataStorageConfig;
|
|
37
|
+
file_storage?: FileStorageConfig;
|
|
38
|
+
storage_policy?: StoragePolicyConfig;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function getPluginConfig(ctx: any): ContextLakeConfig {
|
|
42
|
+
const creds = loadCredentials();
|
|
43
|
+
|
|
44
|
+
return (ctx.config?.plugins?.entries?.['contextlake-openclaw']?.config as ContextLakeConfig) || {
|
|
45
|
+
metadata_storage: {
|
|
46
|
+
type: 'local',
|
|
5
47
|
lancedb_uri: require('path').join(require('os').homedir(), '.openclaw', 'contextlake', 'data'),
|
|
6
48
|
embedding: {
|
|
7
|
-
provider: '
|
|
8
|
-
model_name: '
|
|
49
|
+
provider: 'las',
|
|
50
|
+
model_name: 'doubao-embedding-vision-250615',
|
|
51
|
+
api_key: process.env.LAS_API_KEY || creds.LAS_API_KEY,
|
|
52
|
+
api_base: process.env.LAS_BASE_URL || 'https://operator.las.cn-beijing.volces.com',
|
|
53
|
+
dimensions: 2048,
|
|
54
|
+
encoding_format: 'float'
|
|
9
55
|
}
|
|
10
56
|
},
|
|
11
|
-
file_storage: {
|
|
57
|
+
file_storage: {
|
|
58
|
+
type: 'local',
|
|
59
|
+
local_base_dir: require('path').join(require('os').homedir(), '.openclaw', 'contextlake', 'files'),
|
|
60
|
+
tos: {
|
|
61
|
+
access_key: process.env.VOLCENGINE_ACCESS_KEY || creds.VOLCENGINE_ACCESS_KEY,
|
|
62
|
+
secret_key: process.env.VOLCENGINE_SECRET_KEY || creds.VOLCENGINE_SECRET_KEY
|
|
63
|
+
}
|
|
64
|
+
}
|
|
12
65
|
};
|
|
13
|
-
}
|
|
66
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import * as os from 'os';
|
|
4
|
+
import * as readline from 'readline';
|
|
5
|
+
|
|
6
|
+
const CONFIG_DIR = path.join(os.homedir(), '.openclaw', 'contextlake');
|
|
7
|
+
const CONFIG_FILE = path.join(CONFIG_DIR, 'credentials.json');
|
|
8
|
+
|
|
9
|
+
export interface Credentials {
|
|
10
|
+
LAS_API_KEY?: string;
|
|
11
|
+
VOLCENGINE_ACCESS_KEY?: string;
|
|
12
|
+
VOLCENGINE_SECRET_KEY?: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function loadCredentials(): Credentials {
|
|
16
|
+
if (fs.existsSync(CONFIG_FILE)) {
|
|
17
|
+
try {
|
|
18
|
+
const content = fs.readFileSync(CONFIG_FILE, 'utf-8');
|
|
19
|
+
return JSON.parse(content);
|
|
20
|
+
} catch (e) {
|
|
21
|
+
// ignore
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return {};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function saveCredentials(creds: Credentials) {
|
|
28
|
+
if (!fs.existsSync(CONFIG_DIR)) {
|
|
29
|
+
fs.mkdirSync(CONFIG_DIR, { recursive: true });
|
|
30
|
+
}
|
|
31
|
+
const current = loadCredentials();
|
|
32
|
+
const updated = { ...current, ...creds };
|
|
33
|
+
fs.writeFileSync(CONFIG_FILE, JSON.stringify(updated, null, 2), { mode: 0o600 });
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export async function promptForInput(promptText: string, defaultVal?: string): Promise<string> {
|
|
37
|
+
const rl = readline.createInterface({
|
|
38
|
+
input: process.stdin,
|
|
39
|
+
output: process.stdout,
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
const displayPrompt = defaultVal ? `${promptText} [${defaultVal}]: ` : `${promptText}: `;
|
|
43
|
+
|
|
44
|
+
return new Promise((resolve) => {
|
|
45
|
+
rl.question(displayPrompt, (answer) => {
|
|
46
|
+
rl.close();
|
|
47
|
+
resolve(answer.trim() || defaultVal || '');
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export declare function registerLasDataProfilerSkill(ctx: any): void;
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.registerLasDataProfilerSkill = registerLasDataProfilerSkill;
|
|
4
|
-
const index_1 = require("./index");
|
|
5
|
-
function registerLasDataProfilerSkill(ctx) {
|
|
6
|
-
const definition = {
|
|
7
|
-
name: 'las-data-profiler',
|
|
8
|
-
description: 'Connect to a data source (TOS/OSS/COS/S3/Local) and profile its structure, schemas, and media metadata into LanceDB',
|
|
9
|
-
async execute(params) {
|
|
10
|
-
return await (0, index_1.connectDataSource)(params, ctx);
|
|
11
|
-
}
|
|
12
|
-
};
|
|
13
|
-
if (typeof ctx.registerTool === 'function') {
|
|
14
|
-
ctx.registerTool(definition);
|
|
15
|
-
}
|
|
16
|
-
else if (typeof ctx.registerSkill === 'function') {
|
|
17
|
-
ctx.registerSkill(definition);
|
|
18
|
-
}
|
|
19
|
-
}
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
import { EmbeddingProvider, EmbeddingConfig } from './interface';
|
|
2
|
-
// import type { Llama, LlamaEmbeddingContext, LlamaModel } from 'node-llama-cpp';
|
|
3
|
-
|
|
4
|
-
const DEFAULT_LOCAL_MODEL = "hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf";
|
|
5
|
-
|
|
6
|
-
let nodeLlamaImportPromise: Promise<any> | null = null;
|
|
7
|
-
|
|
8
|
-
export const setNodeLlamaCppImporter = (importer: () => Promise<any>) => {
|
|
9
|
-
nodeLlamaImportPromise = importer();
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
const importNodeLlamaCpp = async (): Promise<any> => {
|
|
13
|
-
if (!nodeLlamaImportPromise) {
|
|
14
|
-
nodeLlamaImportPromise = import("node-llama-cpp");
|
|
15
|
-
}
|
|
16
|
-
return nodeLlamaImportPromise;
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
export class LocalEmbeddingProvider implements EmbeddingProvider {
|
|
20
|
-
private llama: any = null;
|
|
21
|
-
private model: any = null;
|
|
22
|
-
private context: any = null;
|
|
23
|
-
private initPromise: Promise<void> | null = null;
|
|
24
|
-
private modelPath: string;
|
|
25
|
-
|
|
26
|
-
constructor(config: EmbeddingConfig) {
|
|
27
|
-
// Override transformers.js default with node-llama-cpp default
|
|
28
|
-
this.modelPath = config.model_name === 'Xenova/all-MiniLM-L6-v2'
|
|
29
|
-
? DEFAULT_LOCAL_MODEL
|
|
30
|
-
: (config.model_name || DEFAULT_LOCAL_MODEL);
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
private async ensureInitialized(): Promise<void> {
|
|
34
|
-
if (this.context) {
|
|
35
|
-
return;
|
|
36
|
-
}
|
|
37
|
-
if (this.initPromise) {
|
|
38
|
-
return this.initPromise;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
this.initPromise = this.doInitialize();
|
|
42
|
-
return this.initPromise;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
private async doInitialize(): Promise<void> {
|
|
46
|
-
try {
|
|
47
|
-
const { getLlama, resolveModelFile, LlamaLogLevel } = await importNodeLlamaCpp();
|
|
48
|
-
|
|
49
|
-
if (!this.llama) {
|
|
50
|
-
this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
if (!this.model) {
|
|
54
|
-
const resolved = await resolveModelFile(this.modelPath);
|
|
55
|
-
this.model = await this.llama.loadModel({ modelPath: resolved });
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
if (!this.context) {
|
|
59
|
-
this.context = await this.model.createEmbeddingContext();
|
|
60
|
-
}
|
|
61
|
-
} catch (err) {
|
|
62
|
-
const detail = err instanceof Error ? err.message : String(err);
|
|
63
|
-
throw new Error(`Local embeddings unavailable. Reason: ${detail}`, {
|
|
64
|
-
cause: err,
|
|
65
|
-
});
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
async generateEmbedding(text: string): Promise<number[]> {
|
|
70
|
-
await this.ensureInitialized();
|
|
71
|
-
|
|
72
|
-
const embedding = await this.context!.getEmbeddingFor(text);
|
|
73
|
-
const vector = embedding.vector; // TypedArray
|
|
74
|
-
|
|
75
|
-
// Optimized normalization loop
|
|
76
|
-
let sumSq = 0;
|
|
77
|
-
const len = vector.length;
|
|
78
|
-
|
|
79
|
-
// First pass: Calculate magnitude and sanitize (implicitly handled by JS numbers usually, but keeping finite check if needed)
|
|
80
|
-
// For performance, we assume node-llama-cpp returns valid floats.
|
|
81
|
-
// If strict sanitization is needed, it can be combined.
|
|
82
|
-
for (let i = 0; i < len; i++) {
|
|
83
|
-
const val = vector[i];
|
|
84
|
-
if (Number.isFinite(val)) {
|
|
85
|
-
sumSq += val * val;
|
|
86
|
-
} else {
|
|
87
|
-
vector[i] = 0;
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
const magnitude = Math.sqrt(sumSq);
|
|
92
|
-
const result = new Array(len);
|
|
93
|
-
|
|
94
|
-
if (magnitude > 0) {
|
|
95
|
-
const scale = 1.0 / magnitude;
|
|
96
|
-
for (let i = 0; i < len; i++) {
|
|
97
|
-
result[i] = vector[i] * scale;
|
|
98
|
-
}
|
|
99
|
-
} else {
|
|
100
|
-
// Zero vector case
|
|
101
|
-
for (let i = 0; i < len; i++) {
|
|
102
|
-
result[i] = vector[i]; // or 0
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
return result;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// Optimized batch processing for local embedding
|
|
110
|
-
async generateEmbeddings(texts: string[]): Promise<number[][]> {
|
|
111
|
-
await this.ensureInitialized();
|
|
112
|
-
// node-llama-cpp's createEmbeddingContext might not support batch directly yet depending on version,
|
|
113
|
-
// but we can at least optimize the loop.
|
|
114
|
-
// If newer version supports batch, we should use it. For now, we parallelize with limit.
|
|
115
|
-
|
|
116
|
-
return Promise.all(texts.map(text => this.generateEmbedding(text)));
|
|
117
|
-
}
|
|
118
|
-
}
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
// @ts-ignore
|
|
2
|
-
import { PluginContext } from 'openclaw/plugin-sdk';
|
|
3
|
-
import { connectDataSource } from './index';
|
|
4
|
-
|
|
5
|
-
export function registerLasDataProfilerSkill(ctx: any) {
|
|
6
|
-
const definition = {
|
|
7
|
-
name: 'las-data-profiler',
|
|
8
|
-
description: 'Connect to a data source (TOS/OSS/COS/S3/Local) and profile its structure, schemas, and media metadata into LanceDB',
|
|
9
|
-
async execute(params: any) {
|
|
10
|
-
return await connectDataSource(params, ctx);
|
|
11
|
-
}
|
|
12
|
-
};
|
|
13
|
-
|
|
14
|
-
if (typeof ctx.registerTool === 'function') {
|
|
15
|
-
ctx.registerTool(definition);
|
|
16
|
-
} else if (typeof ctx.registerSkill === 'function') {
|
|
17
|
-
ctx.registerSkill(definition);
|
|
18
|
-
}
|
|
19
|
-
}
|