@memvid/sdk 2.0.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +244 -0
- package/dist/__tests__/basic.test.d.ts +1 -0
- package/dist/__tests__/basic.test.js +41 -0
- package/dist/adapters/autogen.d.ts +23 -0
- package/dist/adapters/autogen.js +163 -0
- package/dist/adapters/basic.d.ts +1 -0
- package/dist/adapters/basic.js +11 -0
- package/dist/adapters/crewai.d.ts +23 -0
- package/dist/adapters/crewai.js +160 -0
- package/dist/adapters/google_adk.d.ts +25 -0
- package/dist/adapters/google_adk.js +158 -0
- package/dist/adapters/haystack.d.ts +1 -0
- package/dist/adapters/haystack.js +11 -0
- package/dist/adapters/langchain.d.ts +28 -0
- package/dist/adapters/langchain.js +156 -0
- package/dist/adapters/langgraph.d.ts +1 -0
- package/dist/adapters/langgraph.js +11 -0
- package/dist/adapters/llamaindex.d.ts +33 -0
- package/dist/adapters/llamaindex.js +195 -0
- package/dist/adapters/mcp.d.ts +1 -0
- package/dist/adapters/mcp.js +11 -0
- package/dist/adapters/openai.d.ts +26 -0
- package/dist/adapters/openai.js +169 -0
- package/dist/adapters/semantic_kernel.d.ts +1 -0
- package/dist/adapters/semantic_kernel.js +11 -0
- package/dist/adapters/vercel_ai.d.ts +27 -0
- package/dist/adapters/vercel_ai.js +148 -0
- package/dist/clip.d.ts +182 -0
- package/dist/clip.js +371 -0
- package/dist/embeddings.d.ts +156 -0
- package/dist/embeddings.js +289 -0
- package/dist/entities.d.ts +251 -0
- package/dist/entities.js +489 -0
- package/dist/error.d.ts +91 -0
- package/dist/error.js +203 -0
- package/dist/index.d.ts +53 -0
- package/dist/index.js +458 -0
- package/dist/noop.d.ts +2 -0
- package/dist/noop.js +55 -0
- package/dist/registry.d.ts +5 -0
- package/dist/registry.js +53 -0
- package/dist/types.d.ts +275 -0
- package/dist/types.js +2 -0
- package/index.node +0 -0
- package/package.json +81 -0
package/dist/entities.js
ADDED
|
@@ -0,0 +1,489 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Entity extraction (NER) provider support for Memvid SDK (Node.js).
|
|
4
|
+
*
|
|
5
|
+
* Providers:
|
|
6
|
+
* - LocalNER: DistilBERT-NER (ONNX, offline)
|
|
7
|
+
* - OpenAIEntities: OpenAI GPT-4 (cloud, custom entity types)
|
|
8
|
+
* - ClaudeEntities: Anthropic Claude (cloud, custom entity types)
|
|
9
|
+
* - GeminiEntities: Google Gemini (cloud, custom entity types)
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* import { create } from 'memvid-sdk';
|
|
14
|
+
* import { getEntityExtractor, LocalNER, OpenAIEntities } from 'memvid-sdk/entities';
|
|
15
|
+
*
|
|
16
|
+
* // Local NER (default)
|
|
17
|
+
* const ner = getEntityExtractor('local');
|
|
18
|
+
*
|
|
19
|
+
* // Or with cloud provider for custom entity types
|
|
20
|
+
* const ner = getEntityExtractor('openai', {
|
|
21
|
+
* entityTypes: ['COMPANY', 'PRODUCT', 'EXECUTIVE'],
|
|
22
|
+
* });
|
|
23
|
+
*
|
|
24
|
+
* // Extract entities
|
|
25
|
+
* const text = "Microsoft CEO Satya Nadella announced the new Surface Pro in Seattle.";
|
|
26
|
+
* const entities = await ner.extract(text);
|
|
27
|
+
* // [
|
|
28
|
+
* // { name: "Microsoft", type: "ORG", confidence: 0.99 },
|
|
29
|
+
* // { name: "Satya Nadella", type: "PERSON", confidence: 0.97 },
|
|
30
|
+
* // { name: "Seattle", type: "LOCATION", confidence: 0.98 },
|
|
31
|
+
* // ]
|
|
32
|
+
*
|
|
33
|
+
* // Store with entities
|
|
34
|
+
* const mem = await create('knowledge.mv2', 'basic');
|
|
35
|
+
* await mem.put({ title: 'Tech News', text, entities });
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.GeminiEntities = exports.ClaudeEntities = exports.OpenAIEntities = exports.LocalNER = exports.DEFAULT_ENTITY_TYPES = void 0;
|
|
40
|
+
exports.getEntityExtractor = getEntityExtractor;
|
|
41
|
+
/**
|
|
42
|
+
* Default entity types for cloud providers.
|
|
43
|
+
*/
|
|
44
|
+
exports.DEFAULT_ENTITY_TYPES = [
|
|
45
|
+
'PERSON',
|
|
46
|
+
'ORG',
|
|
47
|
+
'LOCATION',
|
|
48
|
+
'DATE',
|
|
49
|
+
'PRODUCT',
|
|
50
|
+
'EVENT',
|
|
51
|
+
'OTHER',
|
|
52
|
+
];
|
|
53
|
+
/**
|
|
54
|
+
* Local NER provider using DistilBERT-NER (ONNX).
|
|
55
|
+
*
|
|
56
|
+
* Supported entity types (fixed):
|
|
57
|
+
* - PERSON: People's names
|
|
58
|
+
* - ORG: Organizations
|
|
59
|
+
* - LOCATION: Places
|
|
60
|
+
* - MISC: Miscellaneous
|
|
61
|
+
*/
|
|
62
|
+
class LocalNER {
|
|
63
|
+
constructor(config = {}) {
|
|
64
|
+
this._nativeModel = null;
|
|
65
|
+
this._model = config.model || 'distilbert-ner';
|
|
66
|
+
}
|
|
67
|
+
get name() {
|
|
68
|
+
return `local:${this._model}`;
|
|
69
|
+
}
|
|
70
|
+
get entityTypes() {
|
|
71
|
+
return ['PERSON', 'ORG', 'LOCATION', 'MISC'];
|
|
72
|
+
}
|
|
73
|
+
async _getModel() {
|
|
74
|
+
if (this._nativeModel === null) {
|
|
75
|
+
try {
|
|
76
|
+
// Import native bindings - use relative path for development
|
|
77
|
+
const native = require('../index.node');
|
|
78
|
+
if (!native.NerModel) {
|
|
79
|
+
throw new Error('NerModel not exported from native module');
|
|
80
|
+
}
|
|
81
|
+
this._nativeModel = new native.NerModel();
|
|
82
|
+
}
|
|
83
|
+
catch (e) {
|
|
84
|
+
throw new Error(`Local NER support requires memvid-sdk with NER feature. ` +
|
|
85
|
+
`The model will auto-download on first use (~261 MB). Error: ${e}`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return this._nativeModel;
|
|
89
|
+
}
|
|
90
|
+
async extract(text, minConfidence = 0.5) {
|
|
91
|
+
const model = await this._getModel();
|
|
92
|
+
const rawEntities = await model.extract(text);
|
|
93
|
+
return rawEntities
|
|
94
|
+
.filter((e) => e.confidence >= minConfidence)
|
|
95
|
+
.map((e) => ({
|
|
96
|
+
name: e.name,
|
|
97
|
+
type: e.type,
|
|
98
|
+
confidence: e.confidence,
|
|
99
|
+
}));
|
|
100
|
+
}
|
|
101
|
+
async extractBatch(texts, minConfidence = 0.5) {
|
|
102
|
+
const results = [];
|
|
103
|
+
for (const text of texts) {
|
|
104
|
+
results.push(await this.extract(text, minConfidence));
|
|
105
|
+
}
|
|
106
|
+
return results;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
exports.LocalNER = LocalNER;
|
|
110
|
+
/**
|
|
111
|
+
* OpenAI GPT-4 entity extraction provider.
|
|
112
|
+
*
|
|
113
|
+
* Supports custom entity types and relationship extraction.
|
|
114
|
+
*/
|
|
115
|
+
class OpenAIEntities {
|
|
116
|
+
constructor(config = {}) {
|
|
117
|
+
this._apiKey = config.apiKey || process.env.OPENAI_API_KEY || '';
|
|
118
|
+
if (!this._apiKey) {
|
|
119
|
+
throw new Error('OpenAI API key required. Pass apiKey or set OPENAI_API_KEY environment variable.');
|
|
120
|
+
}
|
|
121
|
+
this._model = config.model || 'gpt-4o-mini';
|
|
122
|
+
this._entityTypes = config.entityTypes || exports.DEFAULT_ENTITY_TYPES;
|
|
123
|
+
this._prompt = config.prompt || OpenAIEntities.DEFAULT_PROMPT;
|
|
124
|
+
}
|
|
125
|
+
get name() {
|
|
126
|
+
return `openai:${this._model}`;
|
|
127
|
+
}
|
|
128
|
+
get entityTypes() {
|
|
129
|
+
return this._entityTypes;
|
|
130
|
+
}
|
|
131
|
+
async extract(text, minConfidence = 0.5) {
|
|
132
|
+
const prompt = this._prompt
|
|
133
|
+
.replace('{entity_types}', this._entityTypes.join(', '))
|
|
134
|
+
.replace('{text}', text);
|
|
135
|
+
const response = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
136
|
+
method: 'POST',
|
|
137
|
+
headers: {
|
|
138
|
+
'Authorization': `Bearer ${this._apiKey}`,
|
|
139
|
+
'Content-Type': 'application/json',
|
|
140
|
+
},
|
|
141
|
+
body: JSON.stringify({
|
|
142
|
+
model: this._model,
|
|
143
|
+
messages: [
|
|
144
|
+
{
|
|
145
|
+
role: 'system',
|
|
146
|
+
content: 'You are an expert at extracting named entities from text. Always return valid JSON.',
|
|
147
|
+
},
|
|
148
|
+
{ role: 'user', content: prompt },
|
|
149
|
+
],
|
|
150
|
+
temperature: 0.1,
|
|
151
|
+
max_tokens: 1000,
|
|
152
|
+
}),
|
|
153
|
+
});
|
|
154
|
+
if (!response.ok) {
|
|
155
|
+
const error = await response.text();
|
|
156
|
+
throw new Error(`OpenAI API error: ${response.status} ${error}`);
|
|
157
|
+
}
|
|
158
|
+
const data = await response.json();
|
|
159
|
+
let content = data.choices?.[0]?.message?.content || '{}';
|
|
160
|
+
// Parse JSON response
|
|
161
|
+
try {
|
|
162
|
+
// Handle markdown code blocks
|
|
163
|
+
if (content.includes('```json')) {
|
|
164
|
+
content = content.split('```json')[1].split('```')[0];
|
|
165
|
+
}
|
|
166
|
+
else if (content.includes('```')) {
|
|
167
|
+
content = content.split('```')[1].split('```')[0];
|
|
168
|
+
}
|
|
169
|
+
const parsed = JSON.parse(content);
|
|
170
|
+
const rawEntities = parsed.entities || [];
|
|
171
|
+
return rawEntities
|
|
172
|
+
.filter((e) => (e.confidence || 0.8) >= minConfidence)
|
|
173
|
+
.map((e) => ({
|
|
174
|
+
name: e.name || '',
|
|
175
|
+
type: e.type || 'OTHER',
|
|
176
|
+
confidence: e.confidence || 0.8,
|
|
177
|
+
}));
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
return [];
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
async extractBatch(texts, minConfidence = 0.5) {
|
|
184
|
+
const results = [];
|
|
185
|
+
for (const text of texts) {
|
|
186
|
+
results.push(await this.extract(text, minConfidence));
|
|
187
|
+
}
|
|
188
|
+
return results;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Extract entities AND relationships from text.
|
|
192
|
+
*/
|
|
193
|
+
async extractWithRelationships(text, minConfidence = 0.5) {
|
|
194
|
+
const prompt = `Extract named entities AND their relationships from the text.
|
|
195
|
+
|
|
196
|
+
Return a JSON object with:
|
|
197
|
+
1. "entities": array of {"name": "...", "type": "...", "confidence": 0.9}
|
|
198
|
+
2. "relationships": array of {"source": "entity name", "target": "entity name", "type": "RELATIONSHIP_TYPE", "confidence": 0.9}
|
|
199
|
+
|
|
200
|
+
Entity types: ${this._entityTypes.join(', ')}
|
|
201
|
+
Relationship types: WORKS_FOR, LOCATED_IN, OWNS, PRODUCES, PARTNER_OF, SUBSIDIARY_OF, CEO_OF, FOUNDED, ACQUIRED, OTHER
|
|
202
|
+
|
|
203
|
+
Return ONLY valid JSON.
|
|
204
|
+
|
|
205
|
+
Text: ${text}`;
|
|
206
|
+
const response = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
207
|
+
method: 'POST',
|
|
208
|
+
headers: {
|
|
209
|
+
'Authorization': `Bearer ${this._apiKey}`,
|
|
210
|
+
'Content-Type': 'application/json',
|
|
211
|
+
},
|
|
212
|
+
body: JSON.stringify({
|
|
213
|
+
model: this._model,
|
|
214
|
+
messages: [
|
|
215
|
+
{
|
|
216
|
+
role: 'system',
|
|
217
|
+
content: 'You are an expert at extracting entities and relationships. Always return valid JSON.',
|
|
218
|
+
},
|
|
219
|
+
{ role: 'user', content: prompt },
|
|
220
|
+
],
|
|
221
|
+
temperature: 0.1,
|
|
222
|
+
max_tokens: 2000,
|
|
223
|
+
}),
|
|
224
|
+
});
|
|
225
|
+
if (!response.ok) {
|
|
226
|
+
const error = await response.text();
|
|
227
|
+
throw new Error(`OpenAI API error: ${response.status} ${error}`);
|
|
228
|
+
}
|
|
229
|
+
const data = await response.json();
|
|
230
|
+
let content = data.choices?.[0]?.message?.content || '{}';
|
|
231
|
+
try {
|
|
232
|
+
if (content.includes('```json')) {
|
|
233
|
+
content = content.split('```json')[1].split('```')[0];
|
|
234
|
+
}
|
|
235
|
+
else if (content.includes('```')) {
|
|
236
|
+
content = content.split('```')[1].split('```')[0];
|
|
237
|
+
}
|
|
238
|
+
const parsed = JSON.parse(content);
|
|
239
|
+
const entities = (parsed.entities || [])
|
|
240
|
+
.filter((e) => (e.confidence || 0.8) >= minConfidence)
|
|
241
|
+
.map((e) => ({
|
|
242
|
+
name: e.name || '',
|
|
243
|
+
type: e.type || 'OTHER',
|
|
244
|
+
confidence: e.confidence || 0.8,
|
|
245
|
+
}));
|
|
246
|
+
const relationships = (parsed.relationships || [])
|
|
247
|
+
.filter((r) => (r.confidence || 0.8) >= minConfidence);
|
|
248
|
+
return { entities, relationships };
|
|
249
|
+
}
|
|
250
|
+
catch {
|
|
251
|
+
return { entities: [], relationships: [] };
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
exports.OpenAIEntities = OpenAIEntities;
|
|
256
|
+
OpenAIEntities.DEFAULT_PROMPT = `Extract named entities from the provided text. Return a JSON object with an "entities" array.
|
|
257
|
+
|
|
258
|
+
Each entity should have:
|
|
259
|
+
- "name": The entity name as it appears in the text
|
|
260
|
+
- "type": The entity type from the allowed types
|
|
261
|
+
- "confidence": A number between 0.0 and 1.0 indicating your confidence
|
|
262
|
+
|
|
263
|
+
Return ONLY valid JSON, no explanations or markdown.
|
|
264
|
+
|
|
265
|
+
Allowed entity types: {entity_types}
|
|
266
|
+
|
|
267
|
+
Text to analyze:
|
|
268
|
+
{text}`;
|
|
269
|
+
/**
|
|
270
|
+
* Anthropic Claude entity extraction provider.
|
|
271
|
+
*/
|
|
272
|
+
class ClaudeEntities {
|
|
273
|
+
constructor(config = {}) {
|
|
274
|
+
this._apiKey = config.apiKey || process.env.ANTHROPIC_API_KEY || '';
|
|
275
|
+
if (!this._apiKey) {
|
|
276
|
+
throw new Error('Anthropic API key required. Pass apiKey or set ANTHROPIC_API_KEY environment variable.');
|
|
277
|
+
}
|
|
278
|
+
this._model = config.model || 'claude-3-5-sonnet-20241022';
|
|
279
|
+
this._entityTypes = config.entityTypes || exports.DEFAULT_ENTITY_TYPES;
|
|
280
|
+
}
|
|
281
|
+
get name() {
|
|
282
|
+
return `claude:${this._model}`;
|
|
283
|
+
}
|
|
284
|
+
get entityTypes() {
|
|
285
|
+
return this._entityTypes;
|
|
286
|
+
}
|
|
287
|
+
async extract(text, minConfidence = 0.5) {
|
|
288
|
+
const prompt = `Extract named entities from this text. Return ONLY a JSON object with an "entities" array.
|
|
289
|
+
|
|
290
|
+
Each entity: {"name": "exact text", "type": "TYPE", "confidence": 0.9}
|
|
291
|
+
Types: ${this._entityTypes.join(', ')}
|
|
292
|
+
|
|
293
|
+
Text: ${text}`;
|
|
294
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
295
|
+
method: 'POST',
|
|
296
|
+
headers: {
|
|
297
|
+
'x-api-key': this._apiKey,
|
|
298
|
+
'anthropic-version': '2023-06-01',
|
|
299
|
+
'Content-Type': 'application/json',
|
|
300
|
+
},
|
|
301
|
+
body: JSON.stringify({
|
|
302
|
+
model: this._model,
|
|
303
|
+
max_tokens: 1000,
|
|
304
|
+
messages: [{ role: 'user', content: prompt }],
|
|
305
|
+
}),
|
|
306
|
+
});
|
|
307
|
+
if (!response.ok) {
|
|
308
|
+
const error = await response.text();
|
|
309
|
+
throw new Error(`Claude API error: ${response.status} ${error}`);
|
|
310
|
+
}
|
|
311
|
+
const data = await response.json();
|
|
312
|
+
let content = data.content?.[0]?.text || '{}';
|
|
313
|
+
try {
|
|
314
|
+
if (content.includes('```json')) {
|
|
315
|
+
content = content.split('```json')[1].split('```')[0];
|
|
316
|
+
}
|
|
317
|
+
else if (content.includes('```')) {
|
|
318
|
+
content = content.split('```')[1].split('```')[0];
|
|
319
|
+
}
|
|
320
|
+
const parsed = JSON.parse(content);
|
|
321
|
+
const rawEntities = parsed.entities || [];
|
|
322
|
+
return rawEntities
|
|
323
|
+
.filter((e) => (e.confidence || 0.8) >= minConfidence)
|
|
324
|
+
.map((e) => ({
|
|
325
|
+
name: e.name || '',
|
|
326
|
+
type: e.type || 'OTHER',
|
|
327
|
+
confidence: e.confidence || 0.8,
|
|
328
|
+
}));
|
|
329
|
+
}
|
|
330
|
+
catch {
|
|
331
|
+
return [];
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
async extractBatch(texts, minConfidence = 0.5) {
|
|
335
|
+
const results = [];
|
|
336
|
+
for (const text of texts) {
|
|
337
|
+
results.push(await this.extract(text, minConfidence));
|
|
338
|
+
}
|
|
339
|
+
return results;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
exports.ClaudeEntities = ClaudeEntities;
|
|
343
|
+
/**
|
|
344
|
+
* Google Gemini entity extraction provider.
|
|
345
|
+
*/
|
|
346
|
+
class GeminiEntities {
|
|
347
|
+
constructor(config = {}) {
|
|
348
|
+
this._apiKey = config.apiKey || process.env.GEMINI_API_KEY || '';
|
|
349
|
+
if (!this._apiKey) {
|
|
350
|
+
throw new Error('Gemini API key required. Pass apiKey or set GEMINI_API_KEY environment variable.');
|
|
351
|
+
}
|
|
352
|
+
this._model = config.model || 'gemini-2.0-flash';
|
|
353
|
+
this._entityTypes = config.entityTypes || exports.DEFAULT_ENTITY_TYPES;
|
|
354
|
+
}
|
|
355
|
+
get name() {
|
|
356
|
+
return `gemini:${this._model}`;
|
|
357
|
+
}
|
|
358
|
+
get entityTypes() {
|
|
359
|
+
return this._entityTypes;
|
|
360
|
+
}
|
|
361
|
+
async extract(text, minConfidence = 0.5) {
|
|
362
|
+
const prompt = `Extract named entities from this text. Return ONLY a JSON object with an "entities" array.
|
|
363
|
+
|
|
364
|
+
Each entity: {"name": "exact text", "type": "TYPE", "confidence": 0.9}
|
|
365
|
+
Types: ${this._entityTypes.join(', ')}
|
|
366
|
+
|
|
367
|
+
Text: ${text}`;
|
|
368
|
+
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${this._model}:generateContent?key=${this._apiKey}`, {
|
|
369
|
+
method: 'POST',
|
|
370
|
+
headers: { 'Content-Type': 'application/json' },
|
|
371
|
+
body: JSON.stringify({
|
|
372
|
+
contents: [{ parts: [{ text: prompt }] }],
|
|
373
|
+
}),
|
|
374
|
+
});
|
|
375
|
+
if (!response.ok) {
|
|
376
|
+
const error = await response.text();
|
|
377
|
+
throw new Error(`Gemini API error: ${response.status} ${error}`);
|
|
378
|
+
}
|
|
379
|
+
const data = await response.json();
|
|
380
|
+
let content = data.candidates?.[0]?.content?.parts?.[0]?.text || '{}';
|
|
381
|
+
try {
|
|
382
|
+
if (content.includes('```json')) {
|
|
383
|
+
content = content.split('```json')[1].split('```')[0];
|
|
384
|
+
}
|
|
385
|
+
else if (content.includes('```')) {
|
|
386
|
+
content = content.split('```')[1].split('```')[0];
|
|
387
|
+
}
|
|
388
|
+
const parsed = JSON.parse(content);
|
|
389
|
+
const rawEntities = parsed.entities || [];
|
|
390
|
+
return rawEntities
|
|
391
|
+
.filter((e) => (e.confidence || 0.8) >= minConfidence)
|
|
392
|
+
.map((e) => ({
|
|
393
|
+
name: e.name || '',
|
|
394
|
+
type: e.type || 'OTHER',
|
|
395
|
+
confidence: e.confidence || 0.8,
|
|
396
|
+
}));
|
|
397
|
+
}
|
|
398
|
+
catch {
|
|
399
|
+
return [];
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
async extractBatch(texts, minConfidence = 0.5) {
|
|
403
|
+
const results = [];
|
|
404
|
+
for (const text of texts) {
|
|
405
|
+
results.push(await this.extract(text, minConfidence));
|
|
406
|
+
}
|
|
407
|
+
return results;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
exports.GeminiEntities = GeminiEntities;
|
|
411
|
+
/**
|
|
412
|
+
* Parse provider:model specification.
|
|
413
|
+
* Examples:
|
|
414
|
+
* "openai" -> ["openai", undefined]
|
|
415
|
+
* "openai:gpt-4o-mini" -> ["openai", "gpt-4o-mini"]
|
|
416
|
+
* "claude:claude-3-5-sonnet-20241022" -> ["claude", "claude-3-5-sonnet-20241022"]
|
|
417
|
+
* "gemini:gemini-2.0-flash" -> ["gemini", "gemini-2.0-flash"]
|
|
418
|
+
*/
|
|
419
|
+
function parseProviderModel(spec) {
|
|
420
|
+
if (spec.includes(':')) {
|
|
421
|
+
const idx = spec.indexOf(':');
|
|
422
|
+
return [spec.substring(0, idx).toLowerCase(), spec.substring(idx + 1)];
|
|
423
|
+
}
|
|
424
|
+
return [spec.toLowerCase(), undefined];
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* Factory function to create an entity extraction provider.
|
|
428
|
+
*
|
|
429
|
+
* @param provider - Provider specification. Can be:
|
|
430
|
+
* - Simple: 'local', 'openai', 'claude', 'gemini'
|
|
431
|
+
* - With model: 'openai:gpt-4o-mini', 'claude:claude-3-5-sonnet-20241022'
|
|
432
|
+
* @param config - Provider-specific configuration
|
|
433
|
+
* @returns EntityExtractor instance
|
|
434
|
+
*
|
|
435
|
+
* @example
|
|
436
|
+
* ```typescript
|
|
437
|
+
* // Simple provider
|
|
438
|
+
* const ner = getEntityExtractor('local');
|
|
439
|
+
* const ner = getEntityExtractor('openai');
|
|
440
|
+
*
|
|
441
|
+
* // Provider with model specification
|
|
442
|
+
* const ner = getEntityExtractor('openai:gpt-4o-mini');
|
|
443
|
+
* const ner = getEntityExtractor('claude:claude-3-5-sonnet-20241022');
|
|
444
|
+
* const ner = getEntityExtractor('gemini:gemini-2.0-flash');
|
|
445
|
+
*
|
|
446
|
+
* // With config for custom entity types
|
|
447
|
+
* const ner = getEntityExtractor('openai', { entityTypes: ['COMPANY', 'PRODUCT'] });
|
|
448
|
+
* ```
|
|
449
|
+
*/
|
|
450
|
+
function getEntityExtractor(provider = 'local', config = {}) {
|
|
451
|
+
// Parse provider:model format
|
|
452
|
+
const [parsedProvider, parsedModel] = parseProviderModel(provider);
|
|
453
|
+
// Use parsed model if config.model is not explicitly set
|
|
454
|
+
const effectiveModel = config.model ?? parsedModel;
|
|
455
|
+
switch (parsedProvider) {
|
|
456
|
+
case 'local':
|
|
457
|
+
return new LocalNER({ model: effectiveModel });
|
|
458
|
+
case 'openai':
|
|
459
|
+
return new OpenAIEntities({
|
|
460
|
+
apiKey: config.apiKey,
|
|
461
|
+
model: effectiveModel,
|
|
462
|
+
entityTypes: config.entityTypes,
|
|
463
|
+
prompt: config.prompt,
|
|
464
|
+
});
|
|
465
|
+
case 'claude':
|
|
466
|
+
case 'anthropic':
|
|
467
|
+
return new ClaudeEntities({
|
|
468
|
+
apiKey: config.apiKey,
|
|
469
|
+
model: effectiveModel,
|
|
470
|
+
entityTypes: config.entityTypes,
|
|
471
|
+
});
|
|
472
|
+
case 'gemini':
|
|
473
|
+
return new GeminiEntities({
|
|
474
|
+
apiKey: config.apiKey,
|
|
475
|
+
model: effectiveModel,
|
|
476
|
+
entityTypes: config.entityTypes,
|
|
477
|
+
});
|
|
478
|
+
default:
|
|
479
|
+
throw new Error(`Unknown provider: ${parsedProvider}. Supported: local, openai, claude, gemini`);
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
exports.default = {
|
|
483
|
+
LocalNER,
|
|
484
|
+
OpenAIEntities,
|
|
485
|
+
ClaudeEntities,
|
|
486
|
+
GeminiEntities,
|
|
487
|
+
getEntityExtractor,
|
|
488
|
+
DEFAULT_ENTITY_TYPES: exports.DEFAULT_ENTITY_TYPES,
|
|
489
|
+
};
|
package/dist/error.d.ts
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import type { MemvidErrorCode, MemvidErrorDetails } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Structured error class for Memvid SDK operations.
|
|
4
|
+
*
|
|
5
|
+
* All errors from the Memvid SDK are instances of this class,
|
|
6
|
+
* providing consistent error handling with:
|
|
7
|
+
* - `.code`: Machine-readable error code (e.g., "MV001")
|
|
8
|
+
* - `.message`: Human-readable error message
|
|
9
|
+
* - `.details`: Optional structured details object
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* import { use, MemvidError } from "@memvid/sdk";
|
|
14
|
+
*
|
|
15
|
+
* try {
|
|
16
|
+
* const mem = await use("basic", "file.mv2");
|
|
17
|
+
* await mem.put({ title: "Doc", label: "test", text: "..." });
|
|
18
|
+
* } catch (err) {
|
|
19
|
+
* if (err instanceof MemvidError) {
|
|
20
|
+
* switch (err.code) {
|
|
21
|
+
* case "MV001":
|
|
22
|
+
* console.error("Capacity exceeded:", err.message);
|
|
23
|
+
* break;
|
|
24
|
+
* case "MV007":
|
|
25
|
+
* console.error("File locked:", err.details?.owner);
|
|
26
|
+
* break;
|
|
27
|
+
* default:
|
|
28
|
+
* console.error(`Error ${err.code}: ${err.message}`);
|
|
29
|
+
* }
|
|
30
|
+
* }
|
|
31
|
+
* }
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export declare class MemvidError extends Error implements MemvidErrorDetails {
|
|
35
|
+
readonly code: MemvidErrorCode;
|
|
36
|
+
readonly details?: Record<string, unknown>;
|
|
37
|
+
constructor(code: MemvidErrorCode, message: string, details?: Record<string, unknown>);
|
|
38
|
+
/**
|
|
39
|
+
* Create a MemvidError from a native error string.
|
|
40
|
+
* Parses the "MV001: message" format from the Rust binding.
|
|
41
|
+
*/
|
|
42
|
+
static fromNative(err: Error): MemvidError;
|
|
43
|
+
/**
|
|
44
|
+
* Check if an error is a MemvidError.
|
|
45
|
+
*/
|
|
46
|
+
static isMemvidError(err: unknown): err is MemvidError;
|
|
47
|
+
/**
|
|
48
|
+
* Convert to JSON for logging/serialization.
|
|
49
|
+
*/
|
|
50
|
+
toJSON(): MemvidErrorDetails;
|
|
51
|
+
}
|
|
52
|
+
export declare class CapacityExceededError extends MemvidError {
|
|
53
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
54
|
+
}
|
|
55
|
+
export declare class TicketInvalidError extends MemvidError {
|
|
56
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
57
|
+
}
|
|
58
|
+
export declare class TicketReplayError extends MemvidError {
|
|
59
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
60
|
+
}
|
|
61
|
+
export declare class LexIndexDisabledError extends MemvidError {
|
|
62
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
63
|
+
}
|
|
64
|
+
export declare class TimeIndexMissingError extends MemvidError {
|
|
65
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
66
|
+
}
|
|
67
|
+
export declare class VerificationFailedError extends MemvidError {
|
|
68
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
69
|
+
}
|
|
70
|
+
export declare class LockedError extends MemvidError {
|
|
71
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
72
|
+
}
|
|
73
|
+
export declare class ApiKeyRequiredError extends MemvidError {
|
|
74
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
75
|
+
}
|
|
76
|
+
export declare class MemoryAlreadyBoundError extends MemvidError {
|
|
77
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
78
|
+
}
|
|
79
|
+
export declare class FrameNotFoundError extends MemvidError {
|
|
80
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
81
|
+
}
|
|
82
|
+
export declare class VecIndexDisabledError extends MemvidError {
|
|
83
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
84
|
+
}
|
|
85
|
+
export declare class CorruptFileError extends MemvidError {
|
|
86
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Wrap a native error into the appropriate MemvidError subclass.
|
|
90
|
+
*/
|
|
91
|
+
export declare function wrapNativeError(err: Error): MemvidError;
|