@byted-las/contextlake-openclaw 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -0
- package/bin/contextlake-openclaw.js +5 -0
- package/dist/index.d.ts +113 -0
- package/dist/index.js +73 -0
- package/dist/src/client/lancedb.d.ts +30 -0
- package/dist/src/client/lancedb.js +113 -0
- package/dist/src/client/tos.d.ts +19 -0
- package/dist/src/client/tos.js +81 -0
- package/dist/src/commands/cli.d.ts +6 -0
- package/dist/src/commands/cli.js +78 -0
- package/dist/src/commands/index.d.ts +1 -0
- package/dist/src/commands/index.js +139 -0
- package/dist/src/commands/slashcmd.d.ts +14 -0
- package/dist/src/commands/slashcmd.js +91 -0
- package/dist/src/commands/tools.d.ts +219 -0
- package/dist/src/commands/tools.js +286 -0
- package/dist/src/lib/actions/ingest.d.ts +8 -0
- package/dist/src/lib/actions/ingest.js +123 -0
- package/dist/src/lib/actions/manage.d.ts +15 -0
- package/dist/src/lib/actions/manage.js +91 -0
- package/dist/src/lib/actions/retrieve.d.ts +8 -0
- package/dist/src/lib/actions/retrieve.js +73 -0
- package/dist/src/processor/loader.d.ts +7 -0
- package/dist/src/processor/loader.js +83 -0
- package/dist/src/service/embedding/factory.d.ts +2 -0
- package/dist/src/service/embedding/factory.js +16 -0
- package/dist/src/service/embedding/interface.d.ts +18 -0
- package/dist/src/service/embedding/interface.js +2 -0
- package/dist/src/service/embedding/local.d.ts +14 -0
- package/dist/src/service/embedding/local.js +104 -0
- package/dist/src/service/embedding/remote.d.ts +9 -0
- package/dist/src/service/embedding/remote.js +42 -0
- package/dist/src/service/metadata/factory.d.ts +13 -0
- package/dist/src/service/metadata/factory.js +48 -0
- package/dist/src/service/metadata/interface.d.ts +17 -0
- package/dist/src/service/metadata/interface.js +2 -0
- package/dist/src/service/metadata/local.d.ts +13 -0
- package/dist/src/service/metadata/local.js +49 -0
- package/dist/src/service/storage/factory.d.ts +2 -0
- package/dist/src/service/storage/factory.js +19 -0
- package/dist/src/service/storage/interface.d.ts +32 -0
- package/dist/src/service/storage/interface.js +2 -0
- package/dist/src/service/storage/local.d.ts +9 -0
- package/dist/src/service/storage/local.js +72 -0
- package/dist/src/skills/las-data-profiler/index.d.ts +26 -0
- package/dist/src/skills/las-data-profiler/index.js +231 -0
- package/dist/src/skills/las-data-profiler/register.d.ts +1 -0
- package/dist/src/skills/las-data-profiler/register.js +19 -0
- package/dist/src/utils/config.d.ts +1 -0
- package/dist/src/utils/config.js +16 -0
- package/index.ts +78 -0
- package/openclaw.plugin.json +57 -0
- package/package.json +52 -0
- package/src/client/lancedb.ts +102 -0
- package/src/client/tos.ts +100 -0
- package/src/commands/cli.ts +77 -0
- package/src/commands/index.ts +156 -0
- package/src/commands/slashcmd.ts +95 -0
- package/src/commands/tools.ts +286 -0
- package/src/lib/actions/ingest.ts +103 -0
- package/src/lib/actions/manage.ts +107 -0
- package/src/lib/actions/retrieve.ts +90 -0
- package/src/processor/loader.ts +58 -0
- package/src/service/embedding/factory.ts +13 -0
- package/src/service/embedding/interface.ts +21 -0
- package/src/service/embedding/local.ts +118 -0
- package/src/service/embedding/remote.ts +45 -0
- package/src/service/metadata/factory.ts +52 -0
- package/src/service/metadata/interface.ts +19 -0
- package/src/service/metadata/local.ts +60 -0
- package/src/service/storage/factory.ts +16 -0
- package/src/service/storage/interface.ts +36 -0
- package/src/service/storage/local.ts +42 -0
- package/src/skills/contextlake-delete/SKILL.md +36 -0
- package/src/skills/contextlake-ingest/SKILL.md +40 -0
- package/src/skills/contextlake-list/SKILL.md +22 -0
- package/src/skills/contextlake-retrieve/SKILL.md +37 -0
- package/src/skills/las-data-profiler/SKILL.md +174 -0
- package/src/skills/las-data-profiler/index.ts +254 -0
- package/src/skills/las-data-profiler/register.ts +19 -0
- package/src/skills/las-data-profiler/s3_catalog.py +608 -0
- package/src/utils/config.ts +13 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.processFile = processFile;
|
|
37
|
+
exports.splitText = splitText;
|
|
38
|
+
const fs = __importStar(require("fs"));
|
|
39
|
+
const path = __importStar(require("path"));
|
|
40
|
+
// @ts-ignore
|
|
41
|
+
const pdf = __importStar(require("pdf-parse"));
|
|
42
|
+
const mammoth = __importStar(require("mammoth"));
|
|
43
|
+
async function processFile(filePath) {
|
|
44
|
+
const buffer = fs.readFileSync(filePath);
|
|
45
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
46
|
+
let text = '';
|
|
47
|
+
switch (ext) {
|
|
48
|
+
case '.pdf':
|
|
49
|
+
const pdfData = await pdf(buffer);
|
|
50
|
+
text = pdfData.text;
|
|
51
|
+
break;
|
|
52
|
+
case '.docx':
|
|
53
|
+
const result = await mammoth.extractRawText({ buffer });
|
|
54
|
+
text = result.value;
|
|
55
|
+
break;
|
|
56
|
+
case '.txt':
|
|
57
|
+
case '.md':
|
|
58
|
+
text = buffer.toString('utf-8');
|
|
59
|
+
break;
|
|
60
|
+
default:
|
|
61
|
+
text = buffer.toString('utf-8'); // Fallback to text
|
|
62
|
+
}
|
|
63
|
+
// Basic cleaning
|
|
64
|
+
text = text.replace(/\s+/g, ' ').trim();
|
|
65
|
+
return {
|
|
66
|
+
buffer,
|
|
67
|
+
text,
|
|
68
|
+
type: ext.replace('.', '')
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
function splitText(text, chunkSize = 500, overlap = 50) {
|
|
72
|
+
const chunks = [];
|
|
73
|
+
if (!text)
|
|
74
|
+
return chunks;
|
|
75
|
+
let start = 0;
|
|
76
|
+
while (start < text.length) {
|
|
77
|
+
const end = Math.min(start + chunkSize, text.length);
|
|
78
|
+
const chunk = text.slice(start, end);
|
|
79
|
+
chunks.push(chunk);
|
|
80
|
+
start += chunkSize - overlap;
|
|
81
|
+
}
|
|
82
|
+
return chunks;
|
|
83
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createEmbeddingProvider = createEmbeddingProvider;
|
|
4
|
+
const local_1 = require("./local");
|
|
5
|
+
const remote_1 = require("./remote");
|
|
6
|
+
function createEmbeddingProvider(config) {
|
|
7
|
+
if (config.provider === 'local') {
|
|
8
|
+
return new local_1.LocalEmbeddingProvider(config);
|
|
9
|
+
}
|
|
10
|
+
else if (config.provider === 'openai' || config.provider === 'remote') {
|
|
11
|
+
return new remote_1.RemoteEmbeddingProvider(config);
|
|
12
|
+
}
|
|
13
|
+
else {
|
|
14
|
+
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export interface EmbeddingProvider {
|
|
2
|
+
/**
|
|
3
|
+
* Generate embedding for text
|
|
4
|
+
* @param text - Input text
|
|
5
|
+
*/
|
|
6
|
+
generateEmbedding(text: string): Promise<number[]>;
|
|
7
|
+
/**
|
|
8
|
+
* Generate embeddings for multiple texts
|
|
9
|
+
* @param texts - Array of input texts
|
|
10
|
+
*/
|
|
11
|
+
generateEmbeddings(texts: string[]): Promise<number[][]>;
|
|
12
|
+
}
|
|
13
|
+
export interface EmbeddingConfig {
|
|
14
|
+
provider: 'local' | 'remote' | 'openai';
|
|
15
|
+
model_name: string;
|
|
16
|
+
api_key?: string;
|
|
17
|
+
api_base?: string;
|
|
18
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { EmbeddingProvider, EmbeddingConfig } from './interface';
|
|
2
|
+
export declare const setNodeLlamaCppImporter: (importer: () => Promise<any>) => void;
|
|
3
|
+
export declare class LocalEmbeddingProvider implements EmbeddingProvider {
|
|
4
|
+
private llama;
|
|
5
|
+
private model;
|
|
6
|
+
private context;
|
|
7
|
+
private initPromise;
|
|
8
|
+
private modelPath;
|
|
9
|
+
constructor(config: EmbeddingConfig);
|
|
10
|
+
private ensureInitialized;
|
|
11
|
+
private doInitialize;
|
|
12
|
+
generateEmbedding(text: string): Promise<number[]>;
|
|
13
|
+
generateEmbeddings(texts: string[]): Promise<number[][]>;
|
|
14
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.LocalEmbeddingProvider = exports.setNodeLlamaCppImporter = void 0;
|
|
4
|
+
// import type { Llama, LlamaEmbeddingContext, LlamaModel } from 'node-llama-cpp';
|
|
5
|
+
const DEFAULT_LOCAL_MODEL = "hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf";
|
|
6
|
+
let nodeLlamaImportPromise = null;
|
|
7
|
+
const setNodeLlamaCppImporter = (importer) => {
|
|
8
|
+
nodeLlamaImportPromise = importer();
|
|
9
|
+
};
|
|
10
|
+
exports.setNodeLlamaCppImporter = setNodeLlamaCppImporter;
|
|
11
|
+
const importNodeLlamaCpp = async () => {
|
|
12
|
+
if (!nodeLlamaImportPromise) {
|
|
13
|
+
nodeLlamaImportPromise = import("node-llama-cpp");
|
|
14
|
+
}
|
|
15
|
+
return nodeLlamaImportPromise;
|
|
16
|
+
};
|
|
17
|
+
class LocalEmbeddingProvider {
|
|
18
|
+
llama = null;
|
|
19
|
+
model = null;
|
|
20
|
+
context = null;
|
|
21
|
+
initPromise = null;
|
|
22
|
+
modelPath;
|
|
23
|
+
constructor(config) {
|
|
24
|
+
// Override transformers.js default with node-llama-cpp default
|
|
25
|
+
this.modelPath = config.model_name === 'Xenova/all-MiniLM-L6-v2'
|
|
26
|
+
? DEFAULT_LOCAL_MODEL
|
|
27
|
+
: (config.model_name || DEFAULT_LOCAL_MODEL);
|
|
28
|
+
}
|
|
29
|
+
async ensureInitialized() {
|
|
30
|
+
if (this.context) {
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
if (this.initPromise) {
|
|
34
|
+
return this.initPromise;
|
|
35
|
+
}
|
|
36
|
+
this.initPromise = this.doInitialize();
|
|
37
|
+
return this.initPromise;
|
|
38
|
+
}
|
|
39
|
+
async doInitialize() {
|
|
40
|
+
try {
|
|
41
|
+
const { getLlama, resolveModelFile, LlamaLogLevel } = await importNodeLlamaCpp();
|
|
42
|
+
if (!this.llama) {
|
|
43
|
+
this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
|
|
44
|
+
}
|
|
45
|
+
if (!this.model) {
|
|
46
|
+
const resolved = await resolveModelFile(this.modelPath);
|
|
47
|
+
this.model = await this.llama.loadModel({ modelPath: resolved });
|
|
48
|
+
}
|
|
49
|
+
if (!this.context) {
|
|
50
|
+
this.context = await this.model.createEmbeddingContext();
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
catch (err) {
|
|
54
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
55
|
+
throw new Error(`Local embeddings unavailable. Reason: ${detail}`, {
|
|
56
|
+
cause: err,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
async generateEmbedding(text) {
|
|
61
|
+
await this.ensureInitialized();
|
|
62
|
+
const embedding = await this.context.getEmbeddingFor(text);
|
|
63
|
+
const vector = embedding.vector; // TypedArray
|
|
64
|
+
// Optimized normalization loop
|
|
65
|
+
let sumSq = 0;
|
|
66
|
+
const len = vector.length;
|
|
67
|
+
// First pass: Calculate magnitude and sanitize (implicitly handled by JS numbers usually, but keeping finite check if needed)
|
|
68
|
+
// For performance, we assume node-llama-cpp returns valid floats.
|
|
69
|
+
// If strict sanitization is needed, it can be combined.
|
|
70
|
+
for (let i = 0; i < len; i++) {
|
|
71
|
+
const val = vector[i];
|
|
72
|
+
if (Number.isFinite(val)) {
|
|
73
|
+
sumSq += val * val;
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
vector[i] = 0;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
const magnitude = Math.sqrt(sumSq);
|
|
80
|
+
const result = new Array(len);
|
|
81
|
+
if (magnitude > 0) {
|
|
82
|
+
const scale = 1.0 / magnitude;
|
|
83
|
+
for (let i = 0; i < len; i++) {
|
|
84
|
+
result[i] = vector[i] * scale;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
// Zero vector case
|
|
89
|
+
for (let i = 0; i < len; i++) {
|
|
90
|
+
result[i] = vector[i]; // or 0
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
95
|
+
// Optimized batch processing for local embedding
|
|
96
|
+
async generateEmbeddings(texts) {
|
|
97
|
+
await this.ensureInitialized();
|
|
98
|
+
// node-llama-cpp's createEmbeddingContext might not support batch directly yet depending on version,
|
|
99
|
+
// but we can at least optimize the loop.
|
|
100
|
+
// If newer version supports batch, we should use it. For now, we parallelize with limit.
|
|
101
|
+
return Promise.all(texts.map(text => this.generateEmbedding(text)));
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
exports.LocalEmbeddingProvider = LocalEmbeddingProvider;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { EmbeddingProvider, EmbeddingConfig } from './interface';
|
|
2
|
+
export declare class RemoteEmbeddingProvider implements EmbeddingProvider {
|
|
3
|
+
private apiKey;
|
|
4
|
+
private modelName;
|
|
5
|
+
private apiBase;
|
|
6
|
+
constructor(config: EmbeddingConfig);
|
|
7
|
+
generateEmbedding(text: string): Promise<number[]>;
|
|
8
|
+
generateEmbeddings(texts: string[]): Promise<number[][]>;
|
|
9
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RemoteEmbeddingProvider = void 0;
|
|
4
|
+
class RemoteEmbeddingProvider {
|
|
5
|
+
apiKey;
|
|
6
|
+
modelName;
|
|
7
|
+
apiBase;
|
|
8
|
+
constructor(config) {
|
|
9
|
+
this.apiKey = config.api_key || '';
|
|
10
|
+
this.modelName = config.model_name;
|
|
11
|
+
this.apiBase = config.api_base || 'https://api.openai.com/v1';
|
|
12
|
+
}
|
|
13
|
+
async generateEmbedding(text) {
|
|
14
|
+
const embeddings = await this.generateEmbeddings([text]);
|
|
15
|
+
return embeddings[0];
|
|
16
|
+
}
|
|
17
|
+
// Optimized batch embedding generation
|
|
18
|
+
async generateEmbeddings(texts) {
|
|
19
|
+
const response = await fetch(`${this.apiBase}/embeddings`, {
|
|
20
|
+
method: 'POST',
|
|
21
|
+
headers: {
|
|
22
|
+
'Content-Type': 'application/json',
|
|
23
|
+
'Authorization': `Bearer ${this.apiKey}`
|
|
24
|
+
},
|
|
25
|
+
body: JSON.stringify({
|
|
26
|
+
input: texts,
|
|
27
|
+
model: this.modelName
|
|
28
|
+
})
|
|
29
|
+
});
|
|
30
|
+
if (!response.ok) {
|
|
31
|
+
const error = await response.text();
|
|
32
|
+
throw new Error(`Remote API error: ${response.status} ${error}`);
|
|
33
|
+
}
|
|
34
|
+
const data = await response.json();
|
|
35
|
+
if (data.data && Array.isArray(data.data) && data.data.length > 0) {
|
|
36
|
+
// Ensure order matches input
|
|
37
|
+
return data.data.sort((a, b) => a.index - b.index).map((item) => item.embedding);
|
|
38
|
+
}
|
|
39
|
+
throw new Error('Unexpected API response format');
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
exports.RemoteEmbeddingProvider = RemoteEmbeddingProvider;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { MetadataProvider, MetadataConfig } from './interface';
|
|
2
|
+
import { DocumentSchema } from '../../client/lancedb';
|
|
3
|
+
export declare class RemoteMetadataProvider implements MetadataProvider {
|
|
4
|
+
private config;
|
|
5
|
+
constructor(config: MetadataConfig);
|
|
6
|
+
connect(): Promise<void>;
|
|
7
|
+
addAssets(docs: DocumentSchema[]): Promise<void>;
|
|
8
|
+
search(query: string, limit?: number, filter?: string): Promise<DocumentSchema[]>;
|
|
9
|
+
list(limit?: number): Promise<DocumentSchema[]>;
|
|
10
|
+
delete(filter: string): Promise<void>;
|
|
11
|
+
generateEmbedding(text: string): Promise<number[]>;
|
|
12
|
+
}
|
|
13
|
+
export declare function createMetadataProvider(config: MetadataConfig): MetadataProvider;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RemoteMetadataProvider = void 0;
|
|
4
|
+
exports.createMetadataProvider = createMetadataProvider;
|
|
5
|
+
const local_1 = require("./local");
|
|
6
|
+
class RemoteMetadataProvider {
|
|
7
|
+
config;
|
|
8
|
+
constructor(config) {
|
|
9
|
+
this.config = config;
|
|
10
|
+
if (!config.remote_api_endpoint) {
|
|
11
|
+
throw new Error("Missing remote API endpoint configuration");
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
async connect() {
|
|
15
|
+
// TODO: Implement health check or auth verification
|
|
16
|
+
console.log("Connected to remote metadata service at", this.config.remote_api_endpoint);
|
|
17
|
+
}
|
|
18
|
+
async addAssets(docs) {
|
|
19
|
+
throw new Error('Remote metadata service not implemented yet');
|
|
20
|
+
}
|
|
21
|
+
async search(query, limit, filter) {
|
|
22
|
+
throw new Error('Remote metadata service not implemented yet');
|
|
23
|
+
}
|
|
24
|
+
async list(limit) {
|
|
25
|
+
throw new Error('Remote metadata service not implemented yet');
|
|
26
|
+
}
|
|
27
|
+
async delete(filter) {
|
|
28
|
+
throw new Error('Remote metadata service not implemented yet');
|
|
29
|
+
}
|
|
30
|
+
async generateEmbedding(text) {
|
|
31
|
+
throw new Error('Remote metadata service not implemented yet');
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
exports.RemoteMetadataProvider = RemoteMetadataProvider;
|
|
35
|
+
function createMetadataProvider(config) {
|
|
36
|
+
if (!config) {
|
|
37
|
+
throw new Error('Metadata configuration is missing');
|
|
38
|
+
}
|
|
39
|
+
if (config.type === 'local' || !config.type) { // Default to local if type is missing but config exists
|
|
40
|
+
return new local_1.LocalMetadataProvider(config);
|
|
41
|
+
}
|
|
42
|
+
else if (config.type === 'remote') {
|
|
43
|
+
return new RemoteMetadataProvider(config);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
throw new Error(`Unsupported metadata storage type: ${config.type}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { DocumentSchema } from '../../client/lancedb';
|
|
2
|
+
import { EmbeddingConfig } from '../embedding/interface';
|
|
3
|
+
export interface MetadataProvider {
|
|
4
|
+
connect(): Promise<void>;
|
|
5
|
+
addAssets(docs: DocumentSchema[]): Promise<void>;
|
|
6
|
+
search(query: string, limit?: number, filter?: string): Promise<DocumentSchema[]>;
|
|
7
|
+
list(limit?: number, filter?: string): Promise<DocumentSchema[]>;
|
|
8
|
+
delete(filter: string): Promise<void>;
|
|
9
|
+
generateEmbedding(text: string): Promise<number[]>;
|
|
10
|
+
}
|
|
11
|
+
export interface MetadataConfig {
|
|
12
|
+
type: 'local' | 'remote';
|
|
13
|
+
lancedb_uri?: string;
|
|
14
|
+
remote_api_endpoint?: string;
|
|
15
|
+
remote_api_key?: string;
|
|
16
|
+
embedding?: EmbeddingConfig;
|
|
17
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { MetadataProvider, MetadataConfig } from './interface';
|
|
2
|
+
import { DocumentSchema } from '../../client/lancedb';
|
|
3
|
+
export declare class LocalMetadataProvider implements MetadataProvider {
|
|
4
|
+
private client;
|
|
5
|
+
private embeddingProvider;
|
|
6
|
+
constructor(config: MetadataConfig);
|
|
7
|
+
connect(): Promise<void>;
|
|
8
|
+
addAssets(docs: DocumentSchema[]): Promise<void>;
|
|
9
|
+
search(query: string, limit?: number, filter?: string): Promise<DocumentSchema[]>;
|
|
10
|
+
list(limit?: number, filter?: string): Promise<DocumentSchema[]>;
|
|
11
|
+
delete(filter: string): Promise<void>;
|
|
12
|
+
generateEmbedding(text: string): Promise<number[]>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.LocalMetadataProvider = void 0;
|
|
4
|
+
const lancedb_1 = require("../../client/lancedb");
|
|
5
|
+
const factory_1 = require("../embedding/factory");
|
|
6
|
+
class LocalMetadataProvider {
|
|
7
|
+
client;
|
|
8
|
+
embeddingProvider;
|
|
9
|
+
constructor(config) {
|
|
10
|
+
if (!config.lancedb_uri) {
|
|
11
|
+
// Fallback to default if somehow not passed
|
|
12
|
+
// Use an absolute path or path relative to home to avoid issues when running in different cwds via OpenClaw daemon
|
|
13
|
+
const os = require('os');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
config.lancedb_uri = path.join(os.homedir(), '.openclaw', 'contextlake', 'data');
|
|
16
|
+
}
|
|
17
|
+
// Ensure embedding config exists
|
|
18
|
+
if (!config.embedding) {
|
|
19
|
+
config.embedding = {
|
|
20
|
+
provider: 'local',
|
|
21
|
+
model_name: 'hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf'
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
if (!config.lancedb_uri || !config.embedding) {
|
|
25
|
+
throw new Error(`Missing LanceDB configuration: uri=${config.lancedb_uri}, embedding=${!!config.embedding}`);
|
|
26
|
+
}
|
|
27
|
+
this.embeddingProvider = (0, factory_1.createEmbeddingProvider)(config.embedding);
|
|
28
|
+
this.client = new lancedb_1.ContextLakeLanceDBClient({ uri: config.lancedb_uri }, this.embeddingProvider);
|
|
29
|
+
}
|
|
30
|
+
async connect() {
|
|
31
|
+
await this.client.connect();
|
|
32
|
+
}
|
|
33
|
+
async addAssets(docs) {
|
|
34
|
+
await this.client.addAssets(docs);
|
|
35
|
+
}
|
|
36
|
+
async search(query, limit, filter) {
|
|
37
|
+
return await this.client.search(query, limit, filter);
|
|
38
|
+
}
|
|
39
|
+
async list(limit, filter) {
|
|
40
|
+
return await this.client.list(limit, filter);
|
|
41
|
+
}
|
|
42
|
+
async delete(filter) {
|
|
43
|
+
await this.client.delete(filter);
|
|
44
|
+
}
|
|
45
|
+
async generateEmbedding(text) {
|
|
46
|
+
return await this.embeddingProvider.generateEmbedding(text);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
exports.LocalMetadataProvider = LocalMetadataProvider;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createStorageProvider = createStorageProvider;
|
|
4
|
+
const local_1 = require("./local");
|
|
5
|
+
const tos_1 = require("../../client/tos");
|
|
6
|
+
function createStorageProvider(config) {
|
|
7
|
+
if (config.type === 'local') {
|
|
8
|
+
return new local_1.LocalStorageProvider(config.local_base_dir);
|
|
9
|
+
}
|
|
10
|
+
else if (config.type === 'tos') {
|
|
11
|
+
if (!config.tos || !config.tos.region || !config.tos.path) {
|
|
12
|
+
throw new Error('Missing TOS configuration: region and path required');
|
|
13
|
+
}
|
|
14
|
+
return new tos_1.ContextLakeTosClient(config.tos);
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
throw new Error(`Unsupported storage type: ${config.type}`);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
export interface StorageProvider {
|
|
2
|
+
/**
|
|
3
|
+
* Upload file content
|
|
4
|
+
* @param fileName - Name of the file
|
|
5
|
+
* @param buffer - File content
|
|
6
|
+
* @returns The storage URL (e.g. file:///... or tos://...)
|
|
7
|
+
*/
|
|
8
|
+
uploadFile(fileName: string, buffer: Buffer): Promise<string>;
|
|
9
|
+
/**
|
|
10
|
+
* Download file content
|
|
11
|
+
* @param url - Storage URL
|
|
12
|
+
* @returns File content buffer
|
|
13
|
+
*/
|
|
14
|
+
downloadFile(url: string): Promise<Buffer>;
|
|
15
|
+
/**
|
|
16
|
+
* Delete file
|
|
17
|
+
* @param url - Storage URL
|
|
18
|
+
*/
|
|
19
|
+
deleteFile(url: string): Promise<void>;
|
|
20
|
+
}
|
|
21
|
+
export interface StorageConfig {
|
|
22
|
+
type: 'local' | 'tos';
|
|
23
|
+
local_base_dir?: string;
|
|
24
|
+
tos?: {
|
|
25
|
+
access_key?: string;
|
|
26
|
+
secret_key?: string;
|
|
27
|
+
region?: string;
|
|
28
|
+
path?: string;
|
|
29
|
+
endpoint?: string;
|
|
30
|
+
sts_token?: string;
|
|
31
|
+
};
|
|
32
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { StorageProvider } from './interface';
|
|
2
|
+
export declare class LocalStorageProvider implements StorageProvider {
|
|
3
|
+
private baseDir;
|
|
4
|
+
constructor(baseDir?: string);
|
|
5
|
+
uploadFile(fileName: string, buffer: Buffer): Promise<string>;
|
|
6
|
+
downloadFile(url: string): Promise<Buffer>;
|
|
7
|
+
deleteFile(url: string): Promise<void>;
|
|
8
|
+
private parseUrl;
|
|
9
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.LocalStorageProvider = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
class LocalStorageProvider {
|
|
40
|
+
baseDir;
|
|
41
|
+
constructor(baseDir = './data/files') {
|
|
42
|
+
this.baseDir = path.resolve(baseDir);
|
|
43
|
+
if (!fs.existsSync(this.baseDir)) {
|
|
44
|
+
fs.mkdirSync(this.baseDir, { recursive: true });
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
async uploadFile(fileName, buffer) {
|
|
48
|
+
if (!fs.existsSync(this.baseDir)) {
|
|
49
|
+
fs.mkdirSync(this.baseDir, { recursive: true });
|
|
50
|
+
}
|
|
51
|
+
const filePath = path.join(this.baseDir, fileName);
|
|
52
|
+
await fs.promises.writeFile(filePath, buffer);
|
|
53
|
+
return `file://${filePath}`;
|
|
54
|
+
}
|
|
55
|
+
async downloadFile(url) {
|
|
56
|
+
const filePath = this.parseUrl(url);
|
|
57
|
+
return await fs.promises.readFile(filePath);
|
|
58
|
+
}
|
|
59
|
+
async deleteFile(url) {
|
|
60
|
+
const filePath = this.parseUrl(url);
|
|
61
|
+
if (fs.existsSync(filePath)) {
|
|
62
|
+
await fs.promises.unlink(filePath);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
parseUrl(url) {
|
|
66
|
+
if (!url.startsWith('file://')) {
|
|
67
|
+
throw new Error(`Invalid local file URL: ${url}`);
|
|
68
|
+
}
|
|
69
|
+
return url.replace('file://', '');
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
exports.LocalStorageProvider = LocalStorageProvider;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export interface ConnectParams {
|
|
2
|
+
datasource_name: string;
|
|
3
|
+
vendor: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
|
|
4
|
+
endpoint?: string;
|
|
5
|
+
access_key?: string;
|
|
6
|
+
secret_key?: string;
|
|
7
|
+
region?: string;
|
|
8
|
+
bucket: string;
|
|
9
|
+
prefix: string;
|
|
10
|
+
sample_rows?: number;
|
|
11
|
+
}
|
|
12
|
+
interface ConnectResult {
|
|
13
|
+
status: 'success' | 'error';
|
|
14
|
+
datasource_name: string;
|
|
15
|
+
db_path: string;
|
|
16
|
+
env_path: string;
|
|
17
|
+
tables: string[];
|
|
18
|
+
summary?: {
|
|
19
|
+
total_files: number;
|
|
20
|
+
structured_files: number;
|
|
21
|
+
media_files: number;
|
|
22
|
+
};
|
|
23
|
+
error?: string;
|
|
24
|
+
}
|
|
25
|
+
export declare function connectDataSource(params: ConnectParams, _ctx?: any): Promise<ConnectResult>;
|
|
26
|
+
export {};
|