@memvid/sdk 2.0.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +244 -0
- package/dist/__tests__/basic.test.d.ts +1 -0
- package/dist/__tests__/basic.test.js +41 -0
- package/dist/adapters/autogen.d.ts +23 -0
- package/dist/adapters/autogen.js +163 -0
- package/dist/adapters/basic.d.ts +1 -0
- package/dist/adapters/basic.js +11 -0
- package/dist/adapters/crewai.d.ts +23 -0
- package/dist/adapters/crewai.js +160 -0
- package/dist/adapters/google_adk.d.ts +25 -0
- package/dist/adapters/google_adk.js +158 -0
- package/dist/adapters/haystack.d.ts +1 -0
- package/dist/adapters/haystack.js +11 -0
- package/dist/adapters/langchain.d.ts +28 -0
- package/dist/adapters/langchain.js +156 -0
- package/dist/adapters/langgraph.d.ts +1 -0
- package/dist/adapters/langgraph.js +11 -0
- package/dist/adapters/llamaindex.d.ts +33 -0
- package/dist/adapters/llamaindex.js +195 -0
- package/dist/adapters/mcp.d.ts +1 -0
- package/dist/adapters/mcp.js +11 -0
- package/dist/adapters/openai.d.ts +26 -0
- package/dist/adapters/openai.js +169 -0
- package/dist/adapters/semantic_kernel.d.ts +1 -0
- package/dist/adapters/semantic_kernel.js +11 -0
- package/dist/adapters/vercel_ai.d.ts +27 -0
- package/dist/adapters/vercel_ai.js +148 -0
- package/dist/clip.d.ts +182 -0
- package/dist/clip.js +371 -0
- package/dist/embeddings.d.ts +156 -0
- package/dist/embeddings.js +289 -0
- package/dist/entities.d.ts +251 -0
- package/dist/entities.js +489 -0
- package/dist/error.d.ts +91 -0
- package/dist/error.js +203 -0
- package/dist/index.d.ts +53 -0
- package/dist/index.js +458 -0
- package/dist/noop.d.ts +2 -0
- package/dist/noop.js +55 -0
- package/dist/registry.d.ts +5 -0
- package/dist/registry.js +53 -0
- package/dist/types.d.ts +275 -0
- package/dist/types.js +2 -0
- package/index.node +0 -0
- package/package.json +81 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* External embedding provider support for Memvid SDK (Node.js).
|
|
3
|
+
*
|
|
4
|
+
* This module provides classes for generating embeddings using external providers
|
|
5
|
+
* like OpenAI, allowing users to use their own embedding models with Memvid.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { create } from 'memvid-sdk';
|
|
10
|
+
* import { OpenAIEmbeddings } from 'memvid-sdk/embeddings';
|
|
11
|
+
*
|
|
12
|
+
* // Initialize embedding provider
|
|
13
|
+
* const embedder = new OpenAIEmbeddings(); // Uses OPENAI_API_KEY env var
|
|
14
|
+
*
|
|
15
|
+
* // Create memory with external embeddings
|
|
16
|
+
* const mem = await create('knowledge.mv2', 'basic');
|
|
17
|
+
*
|
|
18
|
+
* // Store documents with embeddings
|
|
19
|
+
* const docs = [
|
|
20
|
+
* { title: 'Doc 1', label: 'notes', text: 'Content 1...' },
|
|
21
|
+
* { title: 'Doc 2', label: 'notes', text: 'Content 2...' },
|
|
22
|
+
* ];
|
|
23
|
+
* const embeddings = await embedder.embedDocuments(docs.map(d => d.text));
|
|
24
|
+
* // Use embeddings with putMany when available
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
/**
|
|
28
|
+
* Model dimension mappings for common embedding models.
|
|
29
|
+
*/
|
|
30
|
+
export declare const MODEL_DIMENSIONS: Record<string, number>;
|
|
31
|
+
/**
|
|
32
|
+
* Abstract interface for embedding providers.
|
|
33
|
+
*/
|
|
34
|
+
export interface EmbeddingProvider {
|
|
35
|
+
/** Embedding dimension for this model. */
|
|
36
|
+
readonly dimension: number;
|
|
37
|
+
/** Model name/identifier. */
|
|
38
|
+
readonly modelName: string;
|
|
39
|
+
/**
|
|
40
|
+
* Generate embeddings for a list of documents.
|
|
41
|
+
* @param texts - List of text documents to embed
|
|
42
|
+
* @returns Promise resolving to list of embedding vectors
|
|
43
|
+
*/
|
|
44
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
45
|
+
/**
|
|
46
|
+
* Generate embedding for a single query.
|
|
47
|
+
* @param text - Query text to embed
|
|
48
|
+
* @returns Promise resolving to embedding vector
|
|
49
|
+
*/
|
|
50
|
+
embedQuery(text: string): Promise<number[]>;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* OpenAI embedding provider configuration.
|
|
54
|
+
*/
|
|
55
|
+
export interface OpenAIEmbeddingsConfig {
|
|
56
|
+
/** OpenAI API key. If not provided, uses OPENAI_API_KEY env var. */
|
|
57
|
+
apiKey?: string;
|
|
58
|
+
/** Model to use. Default: 'text-embedding-3-small' */
|
|
59
|
+
model?: string;
|
|
60
|
+
/** Number of texts to embed in a single API call. Default: 100 */
|
|
61
|
+
batchSize?: number;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* OpenAI embedding provider.
|
|
65
|
+
*
|
|
66
|
+
* Uses OpenAI's text-embedding models to generate embeddings.
|
|
67
|
+
* Compatible with text-embedding-3-small, text-embedding-3-large, and text-embedding-ada-002.
|
|
68
|
+
*
|
|
69
|
+
* @example
|
|
70
|
+
* ```typescript
|
|
71
|
+
* const embedder = new OpenAIEmbeddings(); // Uses OPENAI_API_KEY
|
|
72
|
+
* const embedder = new OpenAIEmbeddings({ model: 'text-embedding-3-large' });
|
|
73
|
+
* const vectors = await embedder.embedDocuments(['Hello', 'World']);
|
|
74
|
+
* ```
|
|
75
|
+
*/
|
|
76
|
+
export declare class OpenAIEmbeddings implements EmbeddingProvider {
|
|
77
|
+
private readonly _apiKey;
|
|
78
|
+
private readonly _model;
|
|
79
|
+
private readonly _batchSize;
|
|
80
|
+
constructor(config?: OpenAIEmbeddingsConfig);
|
|
81
|
+
get dimension(): number;
|
|
82
|
+
get modelName(): string;
|
|
83
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
84
|
+
embedQuery(text: string): Promise<number[]>;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Cohere embedding provider configuration.
|
|
88
|
+
*/
|
|
89
|
+
export interface CohereEmbeddingsConfig {
|
|
90
|
+
/** Cohere API key. If not provided, uses COHERE_API_KEY env var. */
|
|
91
|
+
apiKey?: string;
|
|
92
|
+
/** Model to use. Default: 'embed-english-v3.0' */
|
|
93
|
+
model?: string;
|
|
94
|
+
/** Input type for documents. Default: 'search_document' */
|
|
95
|
+
inputType?: 'search_document' | 'search_query' | 'classification' | 'clustering';
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Cohere embedding provider.
|
|
99
|
+
*
|
|
100
|
+
* @example
|
|
101
|
+
* ```typescript
|
|
102
|
+
* const embedder = new CohereEmbeddings(); // Uses COHERE_API_KEY
|
|
103
|
+
* const vectors = await embedder.embedDocuments(['Hello', 'World']);
|
|
104
|
+
* ```
|
|
105
|
+
*/
|
|
106
|
+
export declare class CohereEmbeddings implements EmbeddingProvider {
|
|
107
|
+
private readonly _apiKey;
|
|
108
|
+
private readonly _model;
|
|
109
|
+
private readonly _inputType;
|
|
110
|
+
constructor(config?: CohereEmbeddingsConfig);
|
|
111
|
+
get dimension(): number;
|
|
112
|
+
get modelName(): string;
|
|
113
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
114
|
+
embedQuery(text: string): Promise<number[]>;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Voyage AI embedding provider configuration.
|
|
118
|
+
*/
|
|
119
|
+
export interface VoyageEmbeddingsConfig {
|
|
120
|
+
/** Voyage API key. If not provided, uses VOYAGE_API_KEY env var. */
|
|
121
|
+
apiKey?: string;
|
|
122
|
+
/** Model to use. Default: 'voyage-3' */
|
|
123
|
+
model?: string;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Voyage AI embedding provider.
|
|
127
|
+
*
|
|
128
|
+
* @example
|
|
129
|
+
* ```typescript
|
|
130
|
+
* const embedder = new VoyageEmbeddings(); // Uses VOYAGE_API_KEY
|
|
131
|
+
* const vectors = await embedder.embedDocuments(['Hello', 'World']);
|
|
132
|
+
* ```
|
|
133
|
+
*/
|
|
134
|
+
export declare class VoyageEmbeddings implements EmbeddingProvider {
|
|
135
|
+
private readonly _apiKey;
|
|
136
|
+
private readonly _model;
|
|
137
|
+
constructor(config?: VoyageEmbeddingsConfig);
|
|
138
|
+
get dimension(): number;
|
|
139
|
+
get modelName(): string;
|
|
140
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
141
|
+
embedQuery(text: string): Promise<number[]>;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Factory function to create an embedding provider.
|
|
145
|
+
*
|
|
146
|
+
* @param provider - One of: 'openai', 'cohere', 'voyage'
|
|
147
|
+
* @param config - Provider-specific configuration
|
|
148
|
+
* @returns EmbeddingProvider instance
|
|
149
|
+
*
|
|
150
|
+
* @example
|
|
151
|
+
* ```typescript
|
|
152
|
+
* const embedder = getEmbedder('openai');
|
|
153
|
+
* const embedder = getEmbedder('cohere', { model: 'embed-multilingual-v3.0' });
|
|
154
|
+
* ```
|
|
155
|
+
*/
|
|
156
|
+
export declare function getEmbedder(provider: 'openai' | 'cohere' | 'voyage', config?: Record<string, unknown>): EmbeddingProvider;
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* External embedding provider support for Memvid SDK (Node.js).
|
|
4
|
+
*
|
|
5
|
+
* This module provides classes for generating embeddings using external providers
|
|
6
|
+
* like OpenAI, allowing users to use their own embedding models with Memvid.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* import { create } from 'memvid-sdk';
|
|
11
|
+
* import { OpenAIEmbeddings } from 'memvid-sdk/embeddings';
|
|
12
|
+
*
|
|
13
|
+
* // Initialize embedding provider
|
|
14
|
+
* const embedder = new OpenAIEmbeddings(); // Uses OPENAI_API_KEY env var
|
|
15
|
+
*
|
|
16
|
+
* // Create memory with external embeddings
|
|
17
|
+
* const mem = await create('knowledge.mv2', 'basic');
|
|
18
|
+
*
|
|
19
|
+
* // Store documents with embeddings
|
|
20
|
+
* const docs = [
|
|
21
|
+
* { title: 'Doc 1', label: 'notes', text: 'Content 1...' },
|
|
22
|
+
* { title: 'Doc 2', label: 'notes', text: 'Content 2...' },
|
|
23
|
+
* ];
|
|
24
|
+
* const embeddings = await embedder.embedDocuments(docs.map(d => d.text));
|
|
25
|
+
* // Use embeddings with putMany when available
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
+
exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.MODEL_DIMENSIONS = void 0;
|
|
30
|
+
exports.getEmbedder = getEmbedder;
|
|
31
|
+
/**
|
|
32
|
+
* Model dimension mappings for common embedding models.
|
|
33
|
+
*/
|
|
34
|
+
exports.MODEL_DIMENSIONS = {
|
|
35
|
+
// OpenAI models
|
|
36
|
+
'text-embedding-3-small': 1536,
|
|
37
|
+
'text-embedding-3-large': 3072,
|
|
38
|
+
'text-embedding-ada-002': 1536,
|
|
39
|
+
// Cohere models
|
|
40
|
+
'embed-english-v3.0': 1024,
|
|
41
|
+
'embed-multilingual-v3.0': 1024,
|
|
42
|
+
'embed-english-light-v3.0': 384,
|
|
43
|
+
'embed-multilingual-light-v3.0': 384,
|
|
44
|
+
// Voyage models
|
|
45
|
+
'voyage-3': 1024,
|
|
46
|
+
'voyage-3-lite': 512,
|
|
47
|
+
'voyage-code-3': 1024,
|
|
48
|
+
};
|
|
49
|
+
/**
|
|
50
|
+
* OpenAI embedding provider.
|
|
51
|
+
*
|
|
52
|
+
* Uses OpenAI's text-embedding models to generate embeddings.
|
|
53
|
+
* Compatible with text-embedding-3-small, text-embedding-3-large, and text-embedding-ada-002.
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```typescript
|
|
57
|
+
* const embedder = new OpenAIEmbeddings(); // Uses OPENAI_API_KEY
|
|
58
|
+
* const embedder = new OpenAIEmbeddings({ model: 'text-embedding-3-large' });
|
|
59
|
+
* const vectors = await embedder.embedDocuments(['Hello', 'World']);
|
|
60
|
+
* ```
|
|
61
|
+
*/
|
|
62
|
+
class OpenAIEmbeddings {
|
|
63
|
+
constructor(config = {}) {
|
|
64
|
+
this._apiKey = config.apiKey || process.env.OPENAI_API_KEY || '';
|
|
65
|
+
if (!this._apiKey) {
|
|
66
|
+
throw new Error('OpenAI API key required. Pass apiKey or set OPENAI_API_KEY environment variable.');
|
|
67
|
+
}
|
|
68
|
+
this._model = config.model || 'text-embedding-3-small';
|
|
69
|
+
this._batchSize = config.batchSize || 100;
|
|
70
|
+
}
|
|
71
|
+
get dimension() {
|
|
72
|
+
return exports.MODEL_DIMENSIONS[this._model] || 1536;
|
|
73
|
+
}
|
|
74
|
+
get modelName() {
|
|
75
|
+
return this._model;
|
|
76
|
+
}
|
|
77
|
+
async embedDocuments(texts) {
|
|
78
|
+
if (texts.length === 0) {
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
const allEmbeddings = [];
|
|
82
|
+
// Process in batches
|
|
83
|
+
for (let i = 0; i < texts.length; i += this._batchSize) {
|
|
84
|
+
const batch = texts.slice(i, i + this._batchSize);
|
|
85
|
+
const response = await fetch('https://api.openai.com/v1/embeddings', {
|
|
86
|
+
method: 'POST',
|
|
87
|
+
headers: {
|
|
88
|
+
'Authorization': `Bearer ${this._apiKey}`,
|
|
89
|
+
'Content-Type': 'application/json',
|
|
90
|
+
},
|
|
91
|
+
body: JSON.stringify({
|
|
92
|
+
model: this._model,
|
|
93
|
+
input: batch,
|
|
94
|
+
}),
|
|
95
|
+
});
|
|
96
|
+
if (!response.ok) {
|
|
97
|
+
const error = await response.text();
|
|
98
|
+
throw new Error(`OpenAI API error: ${response.status} ${error}`);
|
|
99
|
+
}
|
|
100
|
+
const data = await response.json();
|
|
101
|
+
// Sort by index to ensure correct order
|
|
102
|
+
const sorted = data.data.sort((a, b) => a.index - b.index);
|
|
103
|
+
allEmbeddings.push(...sorted.map(e => e.embedding));
|
|
104
|
+
}
|
|
105
|
+
return allEmbeddings;
|
|
106
|
+
}
|
|
107
|
+
async embedQuery(text) {
|
|
108
|
+
const response = await fetch('https://api.openai.com/v1/embeddings', {
|
|
109
|
+
method: 'POST',
|
|
110
|
+
headers: {
|
|
111
|
+
'Authorization': `Bearer ${this._apiKey}`,
|
|
112
|
+
'Content-Type': 'application/json',
|
|
113
|
+
},
|
|
114
|
+
body: JSON.stringify({
|
|
115
|
+
model: this._model,
|
|
116
|
+
input: text,
|
|
117
|
+
}),
|
|
118
|
+
});
|
|
119
|
+
if (!response.ok) {
|
|
120
|
+
const error = await response.text();
|
|
121
|
+
throw new Error(`OpenAI API error: ${response.status} ${error}`);
|
|
122
|
+
}
|
|
123
|
+
const data = await response.json();
|
|
124
|
+
return data.data[0].embedding;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
exports.OpenAIEmbeddings = OpenAIEmbeddings;
|
|
128
|
+
/**
|
|
129
|
+
* Cohere embedding provider.
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* ```typescript
|
|
133
|
+
* const embedder = new CohereEmbeddings(); // Uses COHERE_API_KEY
|
|
134
|
+
* const vectors = await embedder.embedDocuments(['Hello', 'World']);
|
|
135
|
+
* ```
|
|
136
|
+
*/
|
|
137
|
+
class CohereEmbeddings {
|
|
138
|
+
constructor(config = {}) {
|
|
139
|
+
this._apiKey = config.apiKey || process.env.COHERE_API_KEY || '';
|
|
140
|
+
if (!this._apiKey) {
|
|
141
|
+
throw new Error('Cohere API key required. Pass apiKey or set COHERE_API_KEY environment variable.');
|
|
142
|
+
}
|
|
143
|
+
this._model = config.model || 'embed-english-v3.0';
|
|
144
|
+
this._inputType = config.inputType || 'search_document';
|
|
145
|
+
}
|
|
146
|
+
get dimension() {
|
|
147
|
+
return exports.MODEL_DIMENSIONS[this._model] || 1024;
|
|
148
|
+
}
|
|
149
|
+
get modelName() {
|
|
150
|
+
return this._model;
|
|
151
|
+
}
|
|
152
|
+
async embedDocuments(texts) {
|
|
153
|
+
if (texts.length === 0) {
|
|
154
|
+
return [];
|
|
155
|
+
}
|
|
156
|
+
const response = await fetch('https://api.cohere.ai/v1/embed', {
|
|
157
|
+
method: 'POST',
|
|
158
|
+
headers: {
|
|
159
|
+
'Authorization': `Bearer ${this._apiKey}`,
|
|
160
|
+
'Content-Type': 'application/json',
|
|
161
|
+
},
|
|
162
|
+
body: JSON.stringify({
|
|
163
|
+
model: this._model,
|
|
164
|
+
texts: texts,
|
|
165
|
+
input_type: this._inputType,
|
|
166
|
+
}),
|
|
167
|
+
});
|
|
168
|
+
if (!response.ok) {
|
|
169
|
+
const error = await response.text();
|
|
170
|
+
throw new Error(`Cohere API error: ${response.status} ${error}`);
|
|
171
|
+
}
|
|
172
|
+
const data = await response.json();
|
|
173
|
+
return data.embeddings;
|
|
174
|
+
}
|
|
175
|
+
async embedQuery(text) {
|
|
176
|
+
const response = await fetch('https://api.cohere.ai/v1/embed', {
|
|
177
|
+
method: 'POST',
|
|
178
|
+
headers: {
|
|
179
|
+
'Authorization': `Bearer ${this._apiKey}`,
|
|
180
|
+
'Content-Type': 'application/json',
|
|
181
|
+
},
|
|
182
|
+
body: JSON.stringify({
|
|
183
|
+
model: this._model,
|
|
184
|
+
texts: [text],
|
|
185
|
+
input_type: 'search_query',
|
|
186
|
+
}),
|
|
187
|
+
});
|
|
188
|
+
if (!response.ok) {
|
|
189
|
+
const error = await response.text();
|
|
190
|
+
throw new Error(`Cohere API error: ${response.status} ${error}`);
|
|
191
|
+
}
|
|
192
|
+
const data = await response.json();
|
|
193
|
+
return data.embeddings[0];
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
exports.CohereEmbeddings = CohereEmbeddings;
|
|
197
|
+
/**
|
|
198
|
+
* Voyage AI embedding provider.
|
|
199
|
+
*
|
|
200
|
+
* @example
|
|
201
|
+
* ```typescript
|
|
202
|
+
* const embedder = new VoyageEmbeddings(); // Uses VOYAGE_API_KEY
|
|
203
|
+
* const vectors = await embedder.embedDocuments(['Hello', 'World']);
|
|
204
|
+
* ```
|
|
205
|
+
*/
|
|
206
|
+
class VoyageEmbeddings {
|
|
207
|
+
constructor(config = {}) {
|
|
208
|
+
this._apiKey = config.apiKey || process.env.VOYAGE_API_KEY || '';
|
|
209
|
+
if (!this._apiKey) {
|
|
210
|
+
throw new Error('Voyage API key required. Pass apiKey or set VOYAGE_API_KEY environment variable.');
|
|
211
|
+
}
|
|
212
|
+
this._model = config.model || 'voyage-3';
|
|
213
|
+
}
|
|
214
|
+
get dimension() {
|
|
215
|
+
return exports.MODEL_DIMENSIONS[this._model] || 1024;
|
|
216
|
+
}
|
|
217
|
+
get modelName() {
|
|
218
|
+
return this._model;
|
|
219
|
+
}
|
|
220
|
+
async embedDocuments(texts) {
|
|
221
|
+
if (texts.length === 0) {
|
|
222
|
+
return [];
|
|
223
|
+
}
|
|
224
|
+
const response = await fetch('https://api.voyageai.com/v1/embeddings', {
|
|
225
|
+
method: 'POST',
|
|
226
|
+
headers: {
|
|
227
|
+
'Authorization': `Bearer ${this._apiKey}`,
|
|
228
|
+
'Content-Type': 'application/json',
|
|
229
|
+
},
|
|
230
|
+
body: JSON.stringify({
|
|
231
|
+
model: this._model,
|
|
232
|
+
input: texts,
|
|
233
|
+
input_type: 'document',
|
|
234
|
+
}),
|
|
235
|
+
});
|
|
236
|
+
if (!response.ok) {
|
|
237
|
+
const error = await response.text();
|
|
238
|
+
throw new Error(`Voyage API error: ${response.status} ${error}`);
|
|
239
|
+
}
|
|
240
|
+
const data = await response.json();
|
|
241
|
+
return data.data.map(d => d.embedding);
|
|
242
|
+
}
|
|
243
|
+
async embedQuery(text) {
|
|
244
|
+
const response = await fetch('https://api.voyageai.com/v1/embeddings', {
|
|
245
|
+
method: 'POST',
|
|
246
|
+
headers: {
|
|
247
|
+
'Authorization': `Bearer ${this._apiKey}`,
|
|
248
|
+
'Content-Type': 'application/json',
|
|
249
|
+
},
|
|
250
|
+
body: JSON.stringify({
|
|
251
|
+
model: this._model,
|
|
252
|
+
input: [text],
|
|
253
|
+
input_type: 'query',
|
|
254
|
+
}),
|
|
255
|
+
});
|
|
256
|
+
if (!response.ok) {
|
|
257
|
+
const error = await response.text();
|
|
258
|
+
throw new Error(`Voyage API error: ${response.status} ${error}`);
|
|
259
|
+
}
|
|
260
|
+
const data = await response.json();
|
|
261
|
+
return data.data[0].embedding;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
exports.VoyageEmbeddings = VoyageEmbeddings;
|
|
265
|
+
/**
|
|
266
|
+
* Factory function to create an embedding provider.
|
|
267
|
+
*
|
|
268
|
+
* @param provider - One of: 'openai', 'cohere', 'voyage'
|
|
269
|
+
* @param config - Provider-specific configuration
|
|
270
|
+
* @returns EmbeddingProvider instance
|
|
271
|
+
*
|
|
272
|
+
* @example
|
|
273
|
+
* ```typescript
|
|
274
|
+
* const embedder = getEmbedder('openai');
|
|
275
|
+
* const embedder = getEmbedder('cohere', { model: 'embed-multilingual-v3.0' });
|
|
276
|
+
* ```
|
|
277
|
+
*/
|
|
278
|
+
function getEmbedder(provider, config) {
|
|
279
|
+
switch (provider.toLowerCase()) {
|
|
280
|
+
case 'openai':
|
|
281
|
+
return new OpenAIEmbeddings(config);
|
|
282
|
+
case 'cohere':
|
|
283
|
+
return new CohereEmbeddings(config);
|
|
284
|
+
case 'voyage':
|
|
285
|
+
return new VoyageEmbeddings(config);
|
|
286
|
+
default:
|
|
287
|
+
throw new Error(`Unknown provider: ${provider}. Supported: openai, cohere, voyage`);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Entity extraction (NER) provider support for Memvid SDK (Node.js).
|
|
3
|
+
*
|
|
4
|
+
* Providers:
|
|
5
|
+
* - LocalNER: DistilBERT-NER (ONNX, offline)
|
|
6
|
+
* - OpenAIEntities: OpenAI GPT-4 (cloud, custom entity types)
|
|
7
|
+
* - ClaudeEntities: Anthropic Claude (cloud, custom entity types)
|
|
8
|
+
* - GeminiEntities: Google Gemini (cloud, custom entity types)
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* import { create } from 'memvid-sdk';
|
|
13
|
+
* import { getEntityExtractor, LocalNER, OpenAIEntities } from 'memvid-sdk/entities';
|
|
14
|
+
*
|
|
15
|
+
* // Local NER (default)
|
|
16
|
+
* const ner = getEntityExtractor('local');
|
|
17
|
+
*
|
|
18
|
+
* // Or with cloud provider for custom entity types
|
|
19
|
+
* const ner = getEntityExtractor('openai', {
|
|
20
|
+
* entityTypes: ['COMPANY', 'PRODUCT', 'EXECUTIVE'],
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* // Extract entities
|
|
24
|
+
* const text = "Microsoft CEO Satya Nadella announced the new Surface Pro in Seattle.";
|
|
25
|
+
* const entities = await ner.extract(text);
|
|
26
|
+
* // [
|
|
27
|
+
* // { name: "Microsoft", type: "ORG", confidence: 0.99 },
|
|
28
|
+
* // { name: "Satya Nadella", type: "PERSON", confidence: 0.97 },
|
|
29
|
+
* // { name: "Seattle", type: "LOCATION", confidence: 0.98 },
|
|
30
|
+
* // ]
|
|
31
|
+
*
|
|
32
|
+
* // Store with entities
|
|
33
|
+
* const mem = await create('knowledge.mv2', 'basic');
|
|
34
|
+
* await mem.put({ title: 'Tech News', text, entities });
|
|
35
|
+
* ```
|
|
36
|
+
*/
|
|
37
|
+
/**
|
|
38
|
+
* Extracted entity structure.
|
|
39
|
+
*/
|
|
40
|
+
export interface Entity {
|
|
41
|
+
/** Entity name as it appears in text. */
|
|
42
|
+
name: string;
|
|
43
|
+
/** Entity type (e.g., PERSON, ORG, LOCATION). */
|
|
44
|
+
type: string;
|
|
45
|
+
/** Confidence score between 0.0 and 1.0. */
|
|
46
|
+
confidence: number;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Relationship between entities.
|
|
50
|
+
*/
|
|
51
|
+
export interface Relationship {
|
|
52
|
+
/** Source entity name. */
|
|
53
|
+
source: string;
|
|
54
|
+
/** Target entity name. */
|
|
55
|
+
target: string;
|
|
56
|
+
/** Relationship type (e.g., WORKS_FOR, LOCATED_IN). */
|
|
57
|
+
type: string;
|
|
58
|
+
/** Confidence score between 0.0 and 1.0. */
|
|
59
|
+
confidence: number;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Result from entity extraction with optional relationships.
|
|
63
|
+
*/
|
|
64
|
+
export interface EntityExtractionResult {
|
|
65
|
+
/** Extracted entities. */
|
|
66
|
+
entities: Entity[];
|
|
67
|
+
/** Extracted relationships (optional). */
|
|
68
|
+
relationships?: Relationship[];
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Abstract interface for entity extraction providers.
|
|
72
|
+
*/
|
|
73
|
+
export interface EntityExtractor {
|
|
74
|
+
/** Provider name (e.g., 'local:distilbert-ner'). */
|
|
75
|
+
readonly name: string;
|
|
76
|
+
/** Supported entity types. */
|
|
77
|
+
readonly entityTypes: string[];
|
|
78
|
+
/**
|
|
79
|
+
* Extract entities from text.
|
|
80
|
+
* @param text - Text to extract entities from
|
|
81
|
+
* @param minConfidence - Minimum confidence threshold (0.0-1.0)
|
|
82
|
+
* @returns Promise resolving to list of entities
|
|
83
|
+
*/
|
|
84
|
+
extract(text: string, minConfidence?: number): Promise<Entity[]>;
|
|
85
|
+
/**
|
|
86
|
+
* Extract entities from multiple texts.
|
|
87
|
+
* @param texts - Texts to extract entities from
|
|
88
|
+
* @param minConfidence - Minimum confidence threshold
|
|
89
|
+
* @returns Promise resolving to list of entity arrays
|
|
90
|
+
*/
|
|
91
|
+
extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Default entity types for cloud providers.
|
|
95
|
+
*/
|
|
96
|
+
export declare const DEFAULT_ENTITY_TYPES: string[];
|
|
97
|
+
/**
|
|
98
|
+
* LocalNER configuration options.
|
|
99
|
+
*/
|
|
100
|
+
export interface LocalNERConfig {
|
|
101
|
+
/** Model to use. Default: 'distilbert-ner' */
|
|
102
|
+
model?: string;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Local NER provider using DistilBERT-NER (ONNX).
|
|
106
|
+
*
|
|
107
|
+
* Supported entity types (fixed):
|
|
108
|
+
* - PERSON: People's names
|
|
109
|
+
* - ORG: Organizations
|
|
110
|
+
* - LOCATION: Places
|
|
111
|
+
* - MISC: Miscellaneous
|
|
112
|
+
*/
|
|
113
|
+
export declare class LocalNER implements EntityExtractor {
|
|
114
|
+
private readonly _model;
|
|
115
|
+
private _nativeModel;
|
|
116
|
+
constructor(config?: LocalNERConfig);
|
|
117
|
+
get name(): string;
|
|
118
|
+
get entityTypes(): string[];
|
|
119
|
+
private _getModel;
|
|
120
|
+
extract(text: string, minConfidence?: number): Promise<Entity[]>;
|
|
121
|
+
extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* OpenAIEntities configuration options.
|
|
125
|
+
*/
|
|
126
|
+
export interface OpenAIEntitiesConfig {
|
|
127
|
+
/** OpenAI API key. If not provided, uses OPENAI_API_KEY env var. */
|
|
128
|
+
apiKey?: string;
|
|
129
|
+
/** Model to use. Default: 'gpt-4o-mini' */
|
|
130
|
+
model?: string;
|
|
131
|
+
/** Custom entity types. Default: standard NER types */
|
|
132
|
+
entityTypes?: string[];
|
|
133
|
+
/** Custom extraction prompt. */
|
|
134
|
+
prompt?: string;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* OpenAI GPT-4 entity extraction provider.
|
|
138
|
+
*
|
|
139
|
+
* Supports custom entity types and relationship extraction.
|
|
140
|
+
*/
|
|
141
|
+
export declare class OpenAIEntities implements EntityExtractor {
|
|
142
|
+
private readonly _apiKey;
|
|
143
|
+
private readonly _model;
|
|
144
|
+
private readonly _entityTypes;
|
|
145
|
+
private readonly _prompt;
|
|
146
|
+
private static readonly DEFAULT_PROMPT;
|
|
147
|
+
constructor(config?: OpenAIEntitiesConfig);
|
|
148
|
+
get name(): string;
|
|
149
|
+
get entityTypes(): string[];
|
|
150
|
+
extract(text: string, minConfidence?: number): Promise<Entity[]>;
|
|
151
|
+
extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
|
|
152
|
+
/**
|
|
153
|
+
* Extract entities AND relationships from text.
|
|
154
|
+
*/
|
|
155
|
+
extractWithRelationships(text: string, minConfidence?: number): Promise<EntityExtractionResult>;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* ClaudeEntities configuration options.
|
|
159
|
+
*/
|
|
160
|
+
export interface ClaudeEntitiesConfig {
|
|
161
|
+
/** Anthropic API key. If not provided, uses ANTHROPIC_API_KEY env var. */
|
|
162
|
+
apiKey?: string;
|
|
163
|
+
/** Model to use. Default: 'claude-3-5-sonnet-20241022' */
|
|
164
|
+
model?: string;
|
|
165
|
+
/** Custom entity types. */
|
|
166
|
+
entityTypes?: string[];
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Anthropic Claude entity extraction provider.
|
|
170
|
+
*/
|
|
171
|
+
export declare class ClaudeEntities implements EntityExtractor {
|
|
172
|
+
private readonly _apiKey;
|
|
173
|
+
private readonly _model;
|
|
174
|
+
private readonly _entityTypes;
|
|
175
|
+
constructor(config?: ClaudeEntitiesConfig);
|
|
176
|
+
get name(): string;
|
|
177
|
+
get entityTypes(): string[];
|
|
178
|
+
extract(text: string, minConfidence?: number): Promise<Entity[]>;
|
|
179
|
+
extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* GeminiEntities configuration options.
|
|
183
|
+
*/
|
|
184
|
+
export interface GeminiEntitiesConfig {
|
|
185
|
+
/** Google AI API key. If not provided, uses GEMINI_API_KEY env var. */
|
|
186
|
+
apiKey?: string;
|
|
187
|
+
/** Model to use. Default: 'gemini-2.0-flash' */
|
|
188
|
+
model?: string;
|
|
189
|
+
/** Custom entity types. */
|
|
190
|
+
entityTypes?: string[];
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Google Gemini entity extraction provider.
|
|
194
|
+
*/
|
|
195
|
+
export declare class GeminiEntities implements EntityExtractor {
|
|
196
|
+
private readonly _apiKey;
|
|
197
|
+
private readonly _model;
|
|
198
|
+
private readonly _entityTypes;
|
|
199
|
+
constructor(config?: GeminiEntitiesConfig);
|
|
200
|
+
get name(): string;
|
|
201
|
+
get entityTypes(): string[];
|
|
202
|
+
extract(text: string, minConfidence?: number): Promise<Entity[]>;
|
|
203
|
+
extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Configuration options for getEntityExtractor factory.
|
|
207
|
+
*/
|
|
208
|
+
export interface EntityExtractorConfig {
|
|
209
|
+
/** Model name (provider-specific). */
|
|
210
|
+
model?: string;
|
|
211
|
+
/** API key for cloud providers. */
|
|
212
|
+
apiKey?: string;
|
|
213
|
+
/** Custom entity types (cloud providers only). */
|
|
214
|
+
entityTypes?: string[];
|
|
215
|
+
/** Custom extraction prompt (OpenAI only). */
|
|
216
|
+
prompt?: string;
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Factory function to create an entity extraction provider.
|
|
220
|
+
*
|
|
221
|
+
* @param provider - Provider specification. Can be:
|
|
222
|
+
* - Simple: 'local', 'openai', 'claude', 'gemini'
|
|
223
|
+
* - With model: 'openai:gpt-4o-mini', 'claude:claude-3-5-sonnet-20241022'
|
|
224
|
+
* @param config - Provider-specific configuration
|
|
225
|
+
* @returns EntityExtractor instance
|
|
226
|
+
*
|
|
227
|
+
* @example
|
|
228
|
+
* ```typescript
|
|
229
|
+
* // Simple provider
|
|
230
|
+
* const ner = getEntityExtractor('local');
|
|
231
|
+
* const ner = getEntityExtractor('openai');
|
|
232
|
+
*
|
|
233
|
+
* // Provider with model specification
|
|
234
|
+
* const ner = getEntityExtractor('openai:gpt-4o-mini');
|
|
235
|
+
* const ner = getEntityExtractor('claude:claude-3-5-sonnet-20241022');
|
|
236
|
+
* const ner = getEntityExtractor('gemini:gemini-2.0-flash');
|
|
237
|
+
*
|
|
238
|
+
* // With config for custom entity types
|
|
239
|
+
* const ner = getEntityExtractor('openai', { entityTypes: ['COMPANY', 'PRODUCT'] });
|
|
240
|
+
* ```
|
|
241
|
+
*/
|
|
242
|
+
export declare function getEntityExtractor(provider?: string, config?: EntityExtractorConfig): EntityExtractor;
|
|
243
|
+
declare const _default: {
|
|
244
|
+
LocalNER: typeof LocalNER;
|
|
245
|
+
OpenAIEntities: typeof OpenAIEntities;
|
|
246
|
+
ClaudeEntities: typeof ClaudeEntities;
|
|
247
|
+
GeminiEntities: typeof GeminiEntities;
|
|
248
|
+
getEntityExtractor: typeof getEntityExtractor;
|
|
249
|
+
DEFAULT_ENTITY_TYPES: string[];
|
|
250
|
+
};
|
|
251
|
+
export default _default;
|