@byted-las/contextlake-openclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +64 -0
  2. package/bin/contextlake-openclaw.js +5 -0
  3. package/dist/index.d.ts +113 -0
  4. package/dist/index.js +73 -0
  5. package/dist/src/client/lancedb.d.ts +30 -0
  6. package/dist/src/client/lancedb.js +113 -0
  7. package/dist/src/client/tos.d.ts +19 -0
  8. package/dist/src/client/tos.js +81 -0
  9. package/dist/src/commands/cli.d.ts +6 -0
  10. package/dist/src/commands/cli.js +78 -0
  11. package/dist/src/commands/index.d.ts +1 -0
  12. package/dist/src/commands/index.js +139 -0
  13. package/dist/src/commands/slashcmd.d.ts +14 -0
  14. package/dist/src/commands/slashcmd.js +91 -0
  15. package/dist/src/commands/tools.d.ts +219 -0
  16. package/dist/src/commands/tools.js +286 -0
  17. package/dist/src/lib/actions/ingest.d.ts +8 -0
  18. package/dist/src/lib/actions/ingest.js +123 -0
  19. package/dist/src/lib/actions/manage.d.ts +15 -0
  20. package/dist/src/lib/actions/manage.js +91 -0
  21. package/dist/src/lib/actions/retrieve.d.ts +8 -0
  22. package/dist/src/lib/actions/retrieve.js +73 -0
  23. package/dist/src/processor/loader.d.ts +7 -0
  24. package/dist/src/processor/loader.js +83 -0
  25. package/dist/src/service/embedding/factory.d.ts +2 -0
  26. package/dist/src/service/embedding/factory.js +16 -0
  27. package/dist/src/service/embedding/interface.d.ts +18 -0
  28. package/dist/src/service/embedding/interface.js +2 -0
  29. package/dist/src/service/embedding/local.d.ts +14 -0
  30. package/dist/src/service/embedding/local.js +104 -0
  31. package/dist/src/service/embedding/remote.d.ts +9 -0
  32. package/dist/src/service/embedding/remote.js +42 -0
  33. package/dist/src/service/metadata/factory.d.ts +13 -0
  34. package/dist/src/service/metadata/factory.js +48 -0
  35. package/dist/src/service/metadata/interface.d.ts +17 -0
  36. package/dist/src/service/metadata/interface.js +2 -0
  37. package/dist/src/service/metadata/local.d.ts +13 -0
  38. package/dist/src/service/metadata/local.js +49 -0
  39. package/dist/src/service/storage/factory.d.ts +2 -0
  40. package/dist/src/service/storage/factory.js +19 -0
  41. package/dist/src/service/storage/interface.d.ts +32 -0
  42. package/dist/src/service/storage/interface.js +2 -0
  43. package/dist/src/service/storage/local.d.ts +9 -0
  44. package/dist/src/service/storage/local.js +72 -0
  45. package/dist/src/skills/las-data-profiler/index.d.ts +26 -0
  46. package/dist/src/skills/las-data-profiler/index.js +231 -0
  47. package/dist/src/skills/las-data-profiler/register.d.ts +1 -0
  48. package/dist/src/skills/las-data-profiler/register.js +19 -0
  49. package/dist/src/utils/config.d.ts +1 -0
  50. package/dist/src/utils/config.js +16 -0
  51. package/index.ts +78 -0
  52. package/openclaw.plugin.json +57 -0
  53. package/package.json +52 -0
  54. package/src/client/lancedb.ts +102 -0
  55. package/src/client/tos.ts +100 -0
  56. package/src/commands/cli.ts +77 -0
  57. package/src/commands/index.ts +156 -0
  58. package/src/commands/slashcmd.ts +95 -0
  59. package/src/commands/tools.ts +286 -0
  60. package/src/lib/actions/ingest.ts +103 -0
  61. package/src/lib/actions/manage.ts +107 -0
  62. package/src/lib/actions/retrieve.ts +90 -0
  63. package/src/processor/loader.ts +58 -0
  64. package/src/service/embedding/factory.ts +13 -0
  65. package/src/service/embedding/interface.ts +21 -0
  66. package/src/service/embedding/local.ts +118 -0
  67. package/src/service/embedding/remote.ts +45 -0
  68. package/src/service/metadata/factory.ts +52 -0
  69. package/src/service/metadata/interface.ts +19 -0
  70. package/src/service/metadata/local.ts +60 -0
  71. package/src/service/storage/factory.ts +16 -0
  72. package/src/service/storage/interface.ts +36 -0
  73. package/src/service/storage/local.ts +42 -0
  74. package/src/skills/contextlake-delete/SKILL.md +36 -0
  75. package/src/skills/contextlake-ingest/SKILL.md +40 -0
  76. package/src/skills/contextlake-list/SKILL.md +22 -0
  77. package/src/skills/contextlake-retrieve/SKILL.md +37 -0
  78. package/src/skills/las-data-profiler/SKILL.md +174 -0
  79. package/src/skills/las-data-profiler/index.ts +254 -0
  80. package/src/skills/las-data-profiler/register.ts +19 -0
  81. package/src/skills/las-data-profiler/s3_catalog.py +608 -0
  82. package/src/utils/config.ts +13 -0
@@ -0,0 +1,83 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.processFile = processFile;
37
+ exports.splitText = splitText;
38
+ const fs = __importStar(require("fs"));
39
+ const path = __importStar(require("path"));
40
+ // @ts-ignore
41
+ const pdf = __importStar(require("pdf-parse"));
42
+ const mammoth = __importStar(require("mammoth"));
43
+ async function processFile(filePath) {
44
+ const buffer = fs.readFileSync(filePath);
45
+ const ext = path.extname(filePath).toLowerCase();
46
+ let text = '';
47
+ switch (ext) {
48
+ case '.pdf':
49
+ const pdfData = await pdf(buffer);
50
+ text = pdfData.text;
51
+ break;
52
+ case '.docx':
53
+ const result = await mammoth.extractRawText({ buffer });
54
+ text = result.value;
55
+ break;
56
+ case '.txt':
57
+ case '.md':
58
+ text = buffer.toString('utf-8');
59
+ break;
60
+ default:
61
+ text = buffer.toString('utf-8'); // Fallback to text
62
+ }
63
+ // Basic cleaning
64
+ text = text.replace(/\s+/g, ' ').trim();
65
+ return {
66
+ buffer,
67
+ text,
68
+ type: ext.replace('.', '')
69
+ };
70
+ }
71
+ function splitText(text, chunkSize = 500, overlap = 50) {
72
+ const chunks = [];
73
+ if (!text)
74
+ return chunks;
75
+ let start = 0;
76
+ while (start < text.length) {
77
+ const end = Math.min(start + chunkSize, text.length);
78
+ const chunk = text.slice(start, end);
79
+ chunks.push(chunk);
80
+ start += chunkSize - overlap;
81
+ }
82
+ return chunks;
83
+ }
@@ -0,0 +1,2 @@
1
+ import { EmbeddingProvider, EmbeddingConfig } from './interface';
2
+ export declare function createEmbeddingProvider(config: EmbeddingConfig): EmbeddingProvider;
@@ -0,0 +1,16 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createEmbeddingProvider = createEmbeddingProvider;
4
+ const local_1 = require("./local");
5
+ const remote_1 = require("./remote");
6
+ function createEmbeddingProvider(config) {
7
+ if (config.provider === 'local') {
8
+ return new local_1.LocalEmbeddingProvider(config);
9
+ }
10
+ else if (config.provider === 'openai' || config.provider === 'remote') {
11
+ return new remote_1.RemoteEmbeddingProvider(config);
12
+ }
13
+ else {
14
+ throw new Error(`Unsupported embedding provider: ${config.provider}`);
15
+ }
16
+ }
@@ -0,0 +1,18 @@
1
+ export interface EmbeddingProvider {
2
+ /**
3
+ * Generate embedding for text
4
+ * @param text - Input text
5
+ */
6
+ generateEmbedding(text: string): Promise<number[]>;
7
+ /**
8
+ * Generate embeddings for multiple texts
9
+ * @param texts - Array of input texts
10
+ */
11
+ generateEmbeddings(texts: string[]): Promise<number[][]>;
12
+ }
13
+ export interface EmbeddingConfig {
14
+ provider: 'local' | 'remote' | 'openai';
15
+ model_name: string;
16
+ api_key?: string;
17
+ api_base?: string;
18
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,14 @@
1
+ import { EmbeddingProvider, EmbeddingConfig } from './interface';
2
+ export declare const setNodeLlamaCppImporter: (importer: () => Promise<any>) => void;
3
+ export declare class LocalEmbeddingProvider implements EmbeddingProvider {
4
+ private llama;
5
+ private model;
6
+ private context;
7
+ private initPromise;
8
+ private modelPath;
9
+ constructor(config: EmbeddingConfig);
10
+ private ensureInitialized;
11
+ private doInitialize;
12
+ generateEmbedding(text: string): Promise<number[]>;
13
+ generateEmbeddings(texts: string[]): Promise<number[][]>;
14
+ }
@@ -0,0 +1,104 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.LocalEmbeddingProvider = exports.setNodeLlamaCppImporter = void 0;
4
+ // import type { Llama, LlamaEmbeddingContext, LlamaModel } from 'node-llama-cpp';
5
+ const DEFAULT_LOCAL_MODEL = "hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf";
6
+ let nodeLlamaImportPromise = null;
7
+ const setNodeLlamaCppImporter = (importer) => {
8
+ nodeLlamaImportPromise = importer();
9
+ };
10
+ exports.setNodeLlamaCppImporter = setNodeLlamaCppImporter;
11
+ const importNodeLlamaCpp = async () => {
12
+ if (!nodeLlamaImportPromise) {
13
+ nodeLlamaImportPromise = import("node-llama-cpp");
14
+ }
15
+ return nodeLlamaImportPromise;
16
+ };
17
+ class LocalEmbeddingProvider {
18
+ llama = null;
19
+ model = null;
20
+ context = null;
21
+ initPromise = null;
22
+ modelPath;
23
+ constructor(config) {
24
+ // Override transformers.js default with node-llama-cpp default
25
+ this.modelPath = config.model_name === 'Xenova/all-MiniLM-L6-v2'
26
+ ? DEFAULT_LOCAL_MODEL
27
+ : (config.model_name || DEFAULT_LOCAL_MODEL);
28
+ }
29
+ async ensureInitialized() {
30
+ if (this.context) {
31
+ return;
32
+ }
33
+ if (this.initPromise) {
34
+ return this.initPromise;
35
+ }
36
+ this.initPromise = this.doInitialize();
37
+ return this.initPromise;
38
+ }
39
+ async doInitialize() {
40
+ try {
41
+ const { getLlama, resolveModelFile, LlamaLogLevel } = await importNodeLlamaCpp();
42
+ if (!this.llama) {
43
+ this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
44
+ }
45
+ if (!this.model) {
46
+ const resolved = await resolveModelFile(this.modelPath);
47
+ this.model = await this.llama.loadModel({ modelPath: resolved });
48
+ }
49
+ if (!this.context) {
50
+ this.context = await this.model.createEmbeddingContext();
51
+ }
52
+ }
53
+ catch (err) {
54
+ const detail = err instanceof Error ? err.message : String(err);
55
+ throw new Error(`Local embeddings unavailable. Reason: ${detail}`, {
56
+ cause: err,
57
+ });
58
+ }
59
+ }
60
+ async generateEmbedding(text) {
61
+ await this.ensureInitialized();
62
+ const embedding = await this.context.getEmbeddingFor(text);
63
+ const vector = embedding.vector; // TypedArray
64
+ // Optimized normalization loop
65
+ let sumSq = 0;
66
+ const len = vector.length;
67
+ // First pass: Calculate magnitude and sanitize (implicitly handled by JS numbers usually, but keeping finite check if needed)
68
+ // For performance, we assume node-llama-cpp returns valid floats.
69
+ // If strict sanitization is needed, it can be combined.
70
+ for (let i = 0; i < len; i++) {
71
+ const val = vector[i];
72
+ if (Number.isFinite(val)) {
73
+ sumSq += val * val;
74
+ }
75
+ else {
76
+ vector[i] = 0;
77
+ }
78
+ }
79
+ const magnitude = Math.sqrt(sumSq);
80
+ const result = new Array(len);
81
+ if (magnitude > 0) {
82
+ const scale = 1.0 / magnitude;
83
+ for (let i = 0; i < len; i++) {
84
+ result[i] = vector[i] * scale;
85
+ }
86
+ }
87
+ else {
88
+ // Zero vector case
89
+ for (let i = 0; i < len; i++) {
90
+ result[i] = vector[i]; // or 0
91
+ }
92
+ }
93
+ return result;
94
+ }
95
+ // Optimized batch processing for local embedding
96
+ async generateEmbeddings(texts) {
97
+ await this.ensureInitialized();
98
+ // node-llama-cpp's createEmbeddingContext might not support batch directly yet depending on version,
99
+ // but we can at least optimize the loop.
100
+ // If newer version supports batch, we should use it. For now, we parallelize with limit.
101
+ return Promise.all(texts.map(text => this.generateEmbedding(text)));
102
+ }
103
+ }
104
+ exports.LocalEmbeddingProvider = LocalEmbeddingProvider;
@@ -0,0 +1,9 @@
1
+ import { EmbeddingProvider, EmbeddingConfig } from './interface';
2
+ export declare class RemoteEmbeddingProvider implements EmbeddingProvider {
3
+ private apiKey;
4
+ private modelName;
5
+ private apiBase;
6
+ constructor(config: EmbeddingConfig);
7
+ generateEmbedding(text: string): Promise<number[]>;
8
+ generateEmbeddings(texts: string[]): Promise<number[][]>;
9
+ }
@@ -0,0 +1,42 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.RemoteEmbeddingProvider = void 0;
4
+ class RemoteEmbeddingProvider {
5
+ apiKey;
6
+ modelName;
7
+ apiBase;
8
+ constructor(config) {
9
+ this.apiKey = config.api_key || '';
10
+ this.modelName = config.model_name;
11
+ this.apiBase = config.api_base || 'https://api.openai.com/v1';
12
+ }
13
+ async generateEmbedding(text) {
14
+ const embeddings = await this.generateEmbeddings([text]);
15
+ return embeddings[0];
16
+ }
17
+ // Optimized batch embedding generation
18
+ async generateEmbeddings(texts) {
19
+ const response = await fetch(`${this.apiBase}/embeddings`, {
20
+ method: 'POST',
21
+ headers: {
22
+ 'Content-Type': 'application/json',
23
+ 'Authorization': `Bearer ${this.apiKey}`
24
+ },
25
+ body: JSON.stringify({
26
+ input: texts,
27
+ model: this.modelName
28
+ })
29
+ });
30
+ if (!response.ok) {
31
+ const error = await response.text();
32
+ throw new Error(`Remote API error: ${response.status} ${error}`);
33
+ }
34
+ const data = await response.json();
35
+ if (data.data && Array.isArray(data.data) && data.data.length > 0) {
36
+ // Ensure order matches input
37
+ return data.data.sort((a, b) => a.index - b.index).map((item) => item.embedding);
38
+ }
39
+ throw new Error('Unexpected API response format');
40
+ }
41
+ }
42
+ exports.RemoteEmbeddingProvider = RemoteEmbeddingProvider;
@@ -0,0 +1,13 @@
1
+ import { MetadataProvider, MetadataConfig } from './interface';
2
+ import { DocumentSchema } from '../../client/lancedb';
3
+ export declare class RemoteMetadataProvider implements MetadataProvider {
4
+ private config;
5
+ constructor(config: MetadataConfig);
6
+ connect(): Promise<void>;
7
+ addAssets(docs: DocumentSchema[]): Promise<void>;
8
+ search(query: string, limit?: number, filter?: string): Promise<DocumentSchema[]>;
9
+ list(limit?: number): Promise<DocumentSchema[]>;
10
+ delete(filter: string): Promise<void>;
11
+ generateEmbedding(text: string): Promise<number[]>;
12
+ }
13
+ export declare function createMetadataProvider(config: MetadataConfig): MetadataProvider;
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.RemoteMetadataProvider = void 0;
4
+ exports.createMetadataProvider = createMetadataProvider;
5
+ const local_1 = require("./local");
6
+ class RemoteMetadataProvider {
7
+ config;
8
+ constructor(config) {
9
+ this.config = config;
10
+ if (!config.remote_api_endpoint) {
11
+ throw new Error("Missing remote API endpoint configuration");
12
+ }
13
+ }
14
+ async connect() {
15
+ // TODO: Implement health check or auth verification
16
+ console.log("Connected to remote metadata service at", this.config.remote_api_endpoint);
17
+ }
18
+ async addAssets(docs) {
19
+ throw new Error('Remote metadata service not implemented yet');
20
+ }
21
+ async search(query, limit, filter) {
22
+ throw new Error('Remote metadata service not implemented yet');
23
+ }
24
+ async list(limit) {
25
+ throw new Error('Remote metadata service not implemented yet');
26
+ }
27
+ async delete(filter) {
28
+ throw new Error('Remote metadata service not implemented yet');
29
+ }
30
+ async generateEmbedding(text) {
31
+ throw new Error('Remote metadata service not implemented yet');
32
+ }
33
+ }
34
+ exports.RemoteMetadataProvider = RemoteMetadataProvider;
35
+ function createMetadataProvider(config) {
36
+ if (!config) {
37
+ throw new Error('Metadata configuration is missing');
38
+ }
39
+ if (config.type === 'local' || !config.type) { // Default to local if type is missing but config exists
40
+ return new local_1.LocalMetadataProvider(config);
41
+ }
42
+ else if (config.type === 'remote') {
43
+ return new RemoteMetadataProvider(config);
44
+ }
45
+ else {
46
+ throw new Error(`Unsupported metadata storage type: ${config.type}`);
47
+ }
48
+ }
@@ -0,0 +1,17 @@
1
+ import { DocumentSchema } from '../../client/lancedb';
2
+ import { EmbeddingConfig } from '../embedding/interface';
3
+ export interface MetadataProvider {
4
+ connect(): Promise<void>;
5
+ addAssets(docs: DocumentSchema[]): Promise<void>;
6
+ search(query: string, limit?: number, filter?: string): Promise<DocumentSchema[]>;
7
+ list(limit?: number, filter?: string): Promise<DocumentSchema[]>;
8
+ delete(filter: string): Promise<void>;
9
+ generateEmbedding(text: string): Promise<number[]>;
10
+ }
11
+ export interface MetadataConfig {
12
+ type: 'local' | 'remote';
13
+ lancedb_uri?: string;
14
+ remote_api_endpoint?: string;
15
+ remote_api_key?: string;
16
+ embedding?: EmbeddingConfig;
17
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,13 @@
1
+ import { MetadataProvider, MetadataConfig } from './interface';
2
+ import { DocumentSchema } from '../../client/lancedb';
3
+ export declare class LocalMetadataProvider implements MetadataProvider {
4
+ private client;
5
+ private embeddingProvider;
6
+ constructor(config: MetadataConfig);
7
+ connect(): Promise<void>;
8
+ addAssets(docs: DocumentSchema[]): Promise<void>;
9
+ search(query: string, limit?: number, filter?: string): Promise<DocumentSchema[]>;
10
+ list(limit?: number, filter?: string): Promise<DocumentSchema[]>;
11
+ delete(filter: string): Promise<void>;
12
+ generateEmbedding(text: string): Promise<number[]>;
13
+ }
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.LocalMetadataProvider = void 0;
4
+ const lancedb_1 = require("../../client/lancedb");
5
+ const factory_1 = require("../embedding/factory");
6
+ class LocalMetadataProvider {
7
+ client;
8
+ embeddingProvider;
9
+ constructor(config) {
10
+ if (!config.lancedb_uri) {
11
+ // Fallback to default if somehow not passed
12
+ // Use an absolute path or path relative to home to avoid issues when running in different cwds via OpenClaw daemon
13
+ const os = require('os');
14
+ const path = require('path');
15
+ config.lancedb_uri = path.join(os.homedir(), '.openclaw', 'contextlake', 'data');
16
+ }
17
+ // Ensure embedding config exists
18
+ if (!config.embedding) {
19
+ config.embedding = {
20
+ provider: 'local',
21
+ model_name: 'hf:CompendiumLabs/bge-small-zh-v1.5-gguf/bge-small-zh-v1.5-f16.gguf'
22
+ };
23
+ }
24
+ if (!config.lancedb_uri || !config.embedding) {
25
+ throw new Error(`Missing LanceDB configuration: uri=${config.lancedb_uri}, embedding=${!!config.embedding}`);
26
+ }
27
+ this.embeddingProvider = (0, factory_1.createEmbeddingProvider)(config.embedding);
28
+ this.client = new lancedb_1.ContextLakeLanceDBClient({ uri: config.lancedb_uri }, this.embeddingProvider);
29
+ }
30
+ async connect() {
31
+ await this.client.connect();
32
+ }
33
+ async addAssets(docs) {
34
+ await this.client.addAssets(docs);
35
+ }
36
+ async search(query, limit, filter) {
37
+ return await this.client.search(query, limit, filter);
38
+ }
39
+ async list(limit, filter) {
40
+ return await this.client.list(limit, filter);
41
+ }
42
+ async delete(filter) {
43
+ await this.client.delete(filter);
44
+ }
45
+ async generateEmbedding(text) {
46
+ return await this.embeddingProvider.generateEmbedding(text);
47
+ }
48
+ }
49
+ exports.LocalMetadataProvider = LocalMetadataProvider;
@@ -0,0 +1,2 @@
1
+ import { StorageProvider, StorageConfig } from './interface';
2
+ export declare function createStorageProvider(config: StorageConfig): StorageProvider;
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createStorageProvider = createStorageProvider;
4
+ const local_1 = require("./local");
5
+ const tos_1 = require("../../client/tos");
6
+ function createStorageProvider(config) {
7
+ if (config.type === 'local') {
8
+ return new local_1.LocalStorageProvider(config.local_base_dir);
9
+ }
10
+ else if (config.type === 'tos') {
11
+ if (!config.tos || !config.tos.region || !config.tos.path) {
12
+ throw new Error('Missing TOS configuration: region and path required');
13
+ }
14
+ return new tos_1.ContextLakeTosClient(config.tos);
15
+ }
16
+ else {
17
+ throw new Error(`Unsupported storage type: ${config.type}`);
18
+ }
19
+ }
@@ -0,0 +1,32 @@
1
+ export interface StorageProvider {
2
+ /**
3
+ * Upload file content
4
+ * @param fileName - Name of the file
5
+ * @param buffer - File content
6
+ * @returns The storage URL (e.g. file:///... or tos://...)
7
+ */
8
+ uploadFile(fileName: string, buffer: Buffer): Promise<string>;
9
+ /**
10
+ * Download file content
11
+ * @param url - Storage URL
12
+ * @returns File content buffer
13
+ */
14
+ downloadFile(url: string): Promise<Buffer>;
15
+ /**
16
+ * Delete file
17
+ * @param url - Storage URL
18
+ */
19
+ deleteFile(url: string): Promise<void>;
20
+ }
21
+ export interface StorageConfig {
22
+ type: 'local' | 'tos';
23
+ local_base_dir?: string;
24
+ tos?: {
25
+ access_key?: string;
26
+ secret_key?: string;
27
+ region?: string;
28
+ path?: string;
29
+ endpoint?: string;
30
+ sts_token?: string;
31
+ };
32
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,9 @@
1
+ import { StorageProvider } from './interface';
2
+ export declare class LocalStorageProvider implements StorageProvider {
3
+ private baseDir;
4
+ constructor(baseDir?: string);
5
+ uploadFile(fileName: string, buffer: Buffer): Promise<string>;
6
+ downloadFile(url: string): Promise<Buffer>;
7
+ deleteFile(url: string): Promise<void>;
8
+ private parseUrl;
9
+ }
@@ -0,0 +1,72 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.LocalStorageProvider = void 0;
37
+ const fs = __importStar(require("fs"));
38
+ const path = __importStar(require("path"));
39
+ class LocalStorageProvider {
40
+ baseDir;
41
+ constructor(baseDir = './data/files') {
42
+ this.baseDir = path.resolve(baseDir);
43
+ if (!fs.existsSync(this.baseDir)) {
44
+ fs.mkdirSync(this.baseDir, { recursive: true });
45
+ }
46
+ }
47
+ async uploadFile(fileName, buffer) {
48
+ if (!fs.existsSync(this.baseDir)) {
49
+ fs.mkdirSync(this.baseDir, { recursive: true });
50
+ }
51
+ const filePath = path.join(this.baseDir, fileName);
52
+ await fs.promises.writeFile(filePath, buffer);
53
+ return `file://${filePath}`;
54
+ }
55
+ async downloadFile(url) {
56
+ const filePath = this.parseUrl(url);
57
+ return await fs.promises.readFile(filePath);
58
+ }
59
+ async deleteFile(url) {
60
+ const filePath = this.parseUrl(url);
61
+ if (fs.existsSync(filePath)) {
62
+ await fs.promises.unlink(filePath);
63
+ }
64
+ }
65
+ parseUrl(url) {
66
+ if (!url.startsWith('file://')) {
67
+ throw new Error(`Invalid local file URL: ${url}`);
68
+ }
69
+ return url.replace('file://', '');
70
+ }
71
+ }
72
+ exports.LocalStorageProvider = LocalStorageProvider;
@@ -0,0 +1,26 @@
1
+ export interface ConnectParams {
2
+ datasource_name: string;
3
+ vendor: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
4
+ endpoint?: string;
5
+ access_key?: string;
6
+ secret_key?: string;
7
+ region?: string;
8
+ bucket: string;
9
+ prefix: string;
10
+ sample_rows?: number;
11
+ }
12
+ interface ConnectResult {
13
+ status: 'success' | 'error';
14
+ datasource_name: string;
15
+ db_path: string;
16
+ env_path: string;
17
+ tables: string[];
18
+ summary?: {
19
+ total_files: number;
20
+ structured_files: number;
21
+ media_files: number;
22
+ };
23
+ error?: string;
24
+ }
25
+ export declare function connectDataSource(params: ConnectParams, _ctx?: any): Promise<ConnectResult>;
26
+ export {};