@memvid/sdk 2.0.113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/LICENSE +190 -0
  2. package/README.md +244 -0
  3. package/dist/__tests__/basic.test.d.ts +1 -0
  4. package/dist/__tests__/basic.test.js +41 -0
  5. package/dist/adapters/autogen.d.ts +23 -0
  6. package/dist/adapters/autogen.js +163 -0
  7. package/dist/adapters/basic.d.ts +1 -0
  8. package/dist/adapters/basic.js +11 -0
  9. package/dist/adapters/crewai.d.ts +23 -0
  10. package/dist/adapters/crewai.js +160 -0
  11. package/dist/adapters/google_adk.d.ts +25 -0
  12. package/dist/adapters/google_adk.js +158 -0
  13. package/dist/adapters/haystack.d.ts +1 -0
  14. package/dist/adapters/haystack.js +11 -0
  15. package/dist/adapters/langchain.d.ts +28 -0
  16. package/dist/adapters/langchain.js +156 -0
  17. package/dist/adapters/langgraph.d.ts +1 -0
  18. package/dist/adapters/langgraph.js +11 -0
  19. package/dist/adapters/llamaindex.d.ts +33 -0
  20. package/dist/adapters/llamaindex.js +195 -0
  21. package/dist/adapters/mcp.d.ts +1 -0
  22. package/dist/adapters/mcp.js +11 -0
  23. package/dist/adapters/openai.d.ts +26 -0
  24. package/dist/adapters/openai.js +169 -0
  25. package/dist/adapters/semantic_kernel.d.ts +1 -0
  26. package/dist/adapters/semantic_kernel.js +11 -0
  27. package/dist/adapters/vercel_ai.d.ts +27 -0
  28. package/dist/adapters/vercel_ai.js +148 -0
  29. package/dist/clip.d.ts +182 -0
  30. package/dist/clip.js +371 -0
  31. package/dist/embeddings.d.ts +156 -0
  32. package/dist/embeddings.js +289 -0
  33. package/dist/entities.d.ts +251 -0
  34. package/dist/entities.js +489 -0
  35. package/dist/error.d.ts +91 -0
  36. package/dist/error.js +203 -0
  37. package/dist/index.d.ts +53 -0
  38. package/dist/index.js +458 -0
  39. package/dist/noop.d.ts +2 -0
  40. package/dist/noop.js +55 -0
  41. package/dist/registry.d.ts +5 -0
  42. package/dist/registry.js +53 -0
  43. package/dist/types.d.ts +275 -0
  44. package/dist/types.js +2 -0
  45. package/index.node +0 -0
  46. package/package.json +81 -0
@@ -0,0 +1,156 @@
1
+ /**
2
+ * External embedding provider support for Memvid SDK (Node.js).
3
+ *
4
+ * This module provides classes for generating embeddings using external providers
5
+ * like OpenAI, allowing users to use their own embedding models with Memvid.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { create } from 'memvid-sdk';
10
+ * import { OpenAIEmbeddings } from 'memvid-sdk/embeddings';
11
+ *
12
+ * // Initialize embedding provider
13
+ * const embedder = new OpenAIEmbeddings(); // Uses OPENAI_API_KEY env var
14
+ *
15
+ * // Create memory with external embeddings
16
+ * const mem = await create('knowledge.mv2', 'basic');
17
+ *
18
+ * // Store documents with embeddings
19
+ * const docs = [
20
+ * { title: 'Doc 1', label: 'notes', text: 'Content 1...' },
21
+ * { title: 'Doc 2', label: 'notes', text: 'Content 2...' },
22
+ * ];
23
+ * const embeddings = await embedder.embedDocuments(docs.map(d => d.text));
24
+ * // Use embeddings with putMany when available
25
+ * ```
26
+ */
27
+ /**
28
+ * Model dimension mappings for common embedding models.
29
+ */
30
+ export declare const MODEL_DIMENSIONS: Record<string, number>;
31
+ /**
32
+ * Abstract interface for embedding providers.
33
+ */
34
+ export interface EmbeddingProvider {
35
+ /** Embedding dimension for this model. */
36
+ readonly dimension: number;
37
+ /** Model name/identifier. */
38
+ readonly modelName: string;
39
+ /**
40
+ * Generate embeddings for a list of documents.
41
+ * @param texts - List of text documents to embed
42
+ * @returns Promise resolving to list of embedding vectors
43
+ */
44
+ embedDocuments(texts: string[]): Promise<number[][]>;
45
+ /**
46
+ * Generate embedding for a single query.
47
+ * @param text - Query text to embed
48
+ * @returns Promise resolving to embedding vector
49
+ */
50
+ embedQuery(text: string): Promise<number[]>;
51
+ }
52
+ /**
53
+ * OpenAI embedding provider configuration.
54
+ */
55
+ export interface OpenAIEmbeddingsConfig {
56
+ /** OpenAI API key. If not provided, uses OPENAI_API_KEY env var. */
57
+ apiKey?: string;
58
+ /** Model to use. Default: 'text-embedding-3-small' */
59
+ model?: string;
60
+ /** Number of texts to embed in a single API call. Default: 100 */
61
+ batchSize?: number;
62
+ }
63
+ /**
64
+ * OpenAI embedding provider.
65
+ *
66
+ * Uses OpenAI's text-embedding models to generate embeddings.
67
+ * Compatible with text-embedding-3-small, text-embedding-3-large, and text-embedding-ada-002.
68
+ *
69
+ * @example
70
+ * ```typescript
71
+ * const embedder = new OpenAIEmbeddings(); // Uses OPENAI_API_KEY
72
+ * const embedder = new OpenAIEmbeddings({ model: 'text-embedding-3-large' });
73
+ * const vectors = await embedder.embedDocuments(['Hello', 'World']);
74
+ * ```
75
+ */
76
+ export declare class OpenAIEmbeddings implements EmbeddingProvider {
77
+ private readonly _apiKey;
78
+ private readonly _model;
79
+ private readonly _batchSize;
80
+ constructor(config?: OpenAIEmbeddingsConfig);
81
+ get dimension(): number;
82
+ get modelName(): string;
83
+ embedDocuments(texts: string[]): Promise<number[][]>;
84
+ embedQuery(text: string): Promise<number[]>;
85
+ }
86
+ /**
87
+ * Cohere embedding provider configuration.
88
+ */
89
+ export interface CohereEmbeddingsConfig {
90
+ /** Cohere API key. If not provided, uses COHERE_API_KEY env var. */
91
+ apiKey?: string;
92
+ /** Model to use. Default: 'embed-english-v3.0' */
93
+ model?: string;
94
+ /** Input type for documents. Default: 'search_document' */
95
+ inputType?: 'search_document' | 'search_query' | 'classification' | 'clustering';
96
+ }
97
+ /**
98
+ * Cohere embedding provider.
99
+ *
100
+ * @example
101
+ * ```typescript
102
+ * const embedder = new CohereEmbeddings(); // Uses COHERE_API_KEY
103
+ * const vectors = await embedder.embedDocuments(['Hello', 'World']);
104
+ * ```
105
+ */
106
+ export declare class CohereEmbeddings implements EmbeddingProvider {
107
+ private readonly _apiKey;
108
+ private readonly _model;
109
+ private readonly _inputType;
110
+ constructor(config?: CohereEmbeddingsConfig);
111
+ get dimension(): number;
112
+ get modelName(): string;
113
+ embedDocuments(texts: string[]): Promise<number[][]>;
114
+ embedQuery(text: string): Promise<number[]>;
115
+ }
116
+ /**
117
+ * Voyage AI embedding provider configuration.
118
+ */
119
+ export interface VoyageEmbeddingsConfig {
120
+ /** Voyage API key. If not provided, uses VOYAGE_API_KEY env var. */
121
+ apiKey?: string;
122
+ /** Model to use. Default: 'voyage-3' */
123
+ model?: string;
124
+ }
125
+ /**
126
+ * Voyage AI embedding provider.
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * const embedder = new VoyageEmbeddings(); // Uses VOYAGE_API_KEY
131
+ * const vectors = await embedder.embedDocuments(['Hello', 'World']);
132
+ * ```
133
+ */
134
+ export declare class VoyageEmbeddings implements EmbeddingProvider {
135
+ private readonly _apiKey;
136
+ private readonly _model;
137
+ constructor(config?: VoyageEmbeddingsConfig);
138
+ get dimension(): number;
139
+ get modelName(): string;
140
+ embedDocuments(texts: string[]): Promise<number[][]>;
141
+ embedQuery(text: string): Promise<number[]>;
142
+ }
143
+ /**
144
+ * Factory function to create an embedding provider.
145
+ *
146
+ * @param provider - One of: 'openai', 'cohere', 'voyage'
147
+ * @param config - Provider-specific configuration
148
+ * @returns EmbeddingProvider instance
149
+ *
150
+ * @example
151
+ * ```typescript
152
+ * const embedder = getEmbedder('openai');
153
+ * const embedder = getEmbedder('cohere', { model: 'embed-multilingual-v3.0' });
154
+ * ```
155
+ */
156
+ export declare function getEmbedder(provider: 'openai' | 'cohere' | 'voyage', config?: Record<string, unknown>): EmbeddingProvider;
@@ -0,0 +1,289 @@
1
+ "use strict";
2
+ /**
3
+ * External embedding provider support for Memvid SDK (Node.js).
4
+ *
5
+ * This module provides classes for generating embeddings using external providers
6
+ * like OpenAI, allowing users to use their own embedding models with Memvid.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * import { create } from 'memvid-sdk';
11
+ * import { OpenAIEmbeddings } from 'memvid-sdk/embeddings';
12
+ *
13
+ * // Initialize embedding provider
14
+ * const embedder = new OpenAIEmbeddings(); // Uses OPENAI_API_KEY env var
15
+ *
16
+ * // Create memory with external embeddings
17
+ * const mem = await create('knowledge.mv2', 'basic');
18
+ *
19
+ * // Store documents with embeddings
20
+ * const docs = [
21
+ * { title: 'Doc 1', label: 'notes', text: 'Content 1...' },
22
+ * { title: 'Doc 2', label: 'notes', text: 'Content 2...' },
23
+ * ];
24
+ * const embeddings = await embedder.embedDocuments(docs.map(d => d.text));
25
+ * // Use embeddings with putMany when available
26
+ * ```
27
+ */
28
+ Object.defineProperty(exports, "__esModule", { value: true });
29
+ exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.MODEL_DIMENSIONS = void 0;
30
+ exports.getEmbedder = getEmbedder;
31
+ /**
32
+ * Model dimension mappings for common embedding models.
33
+ */
34
+ exports.MODEL_DIMENSIONS = {
35
+ // OpenAI models
36
+ 'text-embedding-3-small': 1536,
37
+ 'text-embedding-3-large': 3072,
38
+ 'text-embedding-ada-002': 1536,
39
+ // Cohere models
40
+ 'embed-english-v3.0': 1024,
41
+ 'embed-multilingual-v3.0': 1024,
42
+ 'embed-english-light-v3.0': 384,
43
+ 'embed-multilingual-light-v3.0': 384,
44
+ // Voyage models
45
+ 'voyage-3': 1024,
46
+ 'voyage-3-lite': 512,
47
+ 'voyage-code-3': 1024,
48
+ };
49
+ /**
50
+ * OpenAI embedding provider.
51
+ *
52
+ * Uses OpenAI's text-embedding models to generate embeddings.
53
+ * Compatible with text-embedding-3-small, text-embedding-3-large, and text-embedding-ada-002.
54
+ *
55
+ * @example
56
+ * ```typescript
57
+ * const embedder = new OpenAIEmbeddings(); // Uses OPENAI_API_KEY
58
+ * const embedder = new OpenAIEmbeddings({ model: 'text-embedding-3-large' });
59
+ * const vectors = await embedder.embedDocuments(['Hello', 'World']);
60
+ * ```
61
+ */
62
+ class OpenAIEmbeddings {
63
+ constructor(config = {}) {
64
+ this._apiKey = config.apiKey || process.env.OPENAI_API_KEY || '';
65
+ if (!this._apiKey) {
66
+ throw new Error('OpenAI API key required. Pass apiKey or set OPENAI_API_KEY environment variable.');
67
+ }
68
+ this._model = config.model || 'text-embedding-3-small';
69
+ this._batchSize = config.batchSize || 100;
70
+ }
71
+ get dimension() {
72
+ return exports.MODEL_DIMENSIONS[this._model] || 1536;
73
+ }
74
+ get modelName() {
75
+ return this._model;
76
+ }
77
+ async embedDocuments(texts) {
78
+ if (texts.length === 0) {
79
+ return [];
80
+ }
81
+ const allEmbeddings = [];
82
+ // Process in batches
83
+ for (let i = 0; i < texts.length; i += this._batchSize) {
84
+ const batch = texts.slice(i, i + this._batchSize);
85
+ const response = await fetch('https://api.openai.com/v1/embeddings', {
86
+ method: 'POST',
87
+ headers: {
88
+ 'Authorization': `Bearer ${this._apiKey}`,
89
+ 'Content-Type': 'application/json',
90
+ },
91
+ body: JSON.stringify({
92
+ model: this._model,
93
+ input: batch,
94
+ }),
95
+ });
96
+ if (!response.ok) {
97
+ const error = await response.text();
98
+ throw new Error(`OpenAI API error: ${response.status} ${error}`);
99
+ }
100
+ const data = await response.json();
101
+ // Sort by index to ensure correct order
102
+ const sorted = data.data.sort((a, b) => a.index - b.index);
103
+ allEmbeddings.push(...sorted.map(e => e.embedding));
104
+ }
105
+ return allEmbeddings;
106
+ }
107
+ async embedQuery(text) {
108
+ const response = await fetch('https://api.openai.com/v1/embeddings', {
109
+ method: 'POST',
110
+ headers: {
111
+ 'Authorization': `Bearer ${this._apiKey}`,
112
+ 'Content-Type': 'application/json',
113
+ },
114
+ body: JSON.stringify({
115
+ model: this._model,
116
+ input: text,
117
+ }),
118
+ });
119
+ if (!response.ok) {
120
+ const error = await response.text();
121
+ throw new Error(`OpenAI API error: ${response.status} ${error}`);
122
+ }
123
+ const data = await response.json();
124
+ return data.data[0].embedding;
125
+ }
126
+ }
127
+ exports.OpenAIEmbeddings = OpenAIEmbeddings;
128
+ /**
129
+ * Cohere embedding provider.
130
+ *
131
+ * @example
132
+ * ```typescript
133
+ * const embedder = new CohereEmbeddings(); // Uses COHERE_API_KEY
134
+ * const vectors = await embedder.embedDocuments(['Hello', 'World']);
135
+ * ```
136
+ */
137
+ class CohereEmbeddings {
138
+ constructor(config = {}) {
139
+ this._apiKey = config.apiKey || process.env.COHERE_API_KEY || '';
140
+ if (!this._apiKey) {
141
+ throw new Error('Cohere API key required. Pass apiKey or set COHERE_API_KEY environment variable.');
142
+ }
143
+ this._model = config.model || 'embed-english-v3.0';
144
+ this._inputType = config.inputType || 'search_document';
145
+ }
146
+ get dimension() {
147
+ return exports.MODEL_DIMENSIONS[this._model] || 1024;
148
+ }
149
+ get modelName() {
150
+ return this._model;
151
+ }
152
+ async embedDocuments(texts) {
153
+ if (texts.length === 0) {
154
+ return [];
155
+ }
156
+ const response = await fetch('https://api.cohere.ai/v1/embed', {
157
+ method: 'POST',
158
+ headers: {
159
+ 'Authorization': `Bearer ${this._apiKey}`,
160
+ 'Content-Type': 'application/json',
161
+ },
162
+ body: JSON.stringify({
163
+ model: this._model,
164
+ texts: texts,
165
+ input_type: this._inputType,
166
+ }),
167
+ });
168
+ if (!response.ok) {
169
+ const error = await response.text();
170
+ throw new Error(`Cohere API error: ${response.status} ${error}`);
171
+ }
172
+ const data = await response.json();
173
+ return data.embeddings;
174
+ }
175
+ async embedQuery(text) {
176
+ const response = await fetch('https://api.cohere.ai/v1/embed', {
177
+ method: 'POST',
178
+ headers: {
179
+ 'Authorization': `Bearer ${this._apiKey}`,
180
+ 'Content-Type': 'application/json',
181
+ },
182
+ body: JSON.stringify({
183
+ model: this._model,
184
+ texts: [text],
185
+ input_type: 'search_query',
186
+ }),
187
+ });
188
+ if (!response.ok) {
189
+ const error = await response.text();
190
+ throw new Error(`Cohere API error: ${response.status} ${error}`);
191
+ }
192
+ const data = await response.json();
193
+ return data.embeddings[0];
194
+ }
195
+ }
196
+ exports.CohereEmbeddings = CohereEmbeddings;
197
+ /**
198
+ * Voyage AI embedding provider.
199
+ *
200
+ * @example
201
+ * ```typescript
202
+ * const embedder = new VoyageEmbeddings(); // Uses VOYAGE_API_KEY
203
+ * const vectors = await embedder.embedDocuments(['Hello', 'World']);
204
+ * ```
205
+ */
206
+ class VoyageEmbeddings {
207
+ constructor(config = {}) {
208
+ this._apiKey = config.apiKey || process.env.VOYAGE_API_KEY || '';
209
+ if (!this._apiKey) {
210
+ throw new Error('Voyage API key required. Pass apiKey or set VOYAGE_API_KEY environment variable.');
211
+ }
212
+ this._model = config.model || 'voyage-3';
213
+ }
214
+ get dimension() {
215
+ return exports.MODEL_DIMENSIONS[this._model] || 1024;
216
+ }
217
+ get modelName() {
218
+ return this._model;
219
+ }
220
+ async embedDocuments(texts) {
221
+ if (texts.length === 0) {
222
+ return [];
223
+ }
224
+ const response = await fetch('https://api.voyageai.com/v1/embeddings', {
225
+ method: 'POST',
226
+ headers: {
227
+ 'Authorization': `Bearer ${this._apiKey}`,
228
+ 'Content-Type': 'application/json',
229
+ },
230
+ body: JSON.stringify({
231
+ model: this._model,
232
+ input: texts,
233
+ input_type: 'document',
234
+ }),
235
+ });
236
+ if (!response.ok) {
237
+ const error = await response.text();
238
+ throw new Error(`Voyage API error: ${response.status} ${error}`);
239
+ }
240
+ const data = await response.json();
241
+ return data.data.map(d => d.embedding);
242
+ }
243
+ async embedQuery(text) {
244
+ const response = await fetch('https://api.voyageai.com/v1/embeddings', {
245
+ method: 'POST',
246
+ headers: {
247
+ 'Authorization': `Bearer ${this._apiKey}`,
248
+ 'Content-Type': 'application/json',
249
+ },
250
+ body: JSON.stringify({
251
+ model: this._model,
252
+ input: [text],
253
+ input_type: 'query',
254
+ }),
255
+ });
256
+ if (!response.ok) {
257
+ const error = await response.text();
258
+ throw new Error(`Voyage API error: ${response.status} ${error}`);
259
+ }
260
+ const data = await response.json();
261
+ return data.data[0].embedding;
262
+ }
263
+ }
264
+ exports.VoyageEmbeddings = VoyageEmbeddings;
265
+ /**
266
+ * Factory function to create an embedding provider.
267
+ *
268
+ * @param provider - One of: 'openai', 'cohere', 'voyage'
269
+ * @param config - Provider-specific configuration
270
+ * @returns EmbeddingProvider instance
271
+ *
272
+ * @example
273
+ * ```typescript
274
+ * const embedder = getEmbedder('openai');
275
+ * const embedder = getEmbedder('cohere', { model: 'embed-multilingual-v3.0' });
276
+ * ```
277
+ */
278
+ function getEmbedder(provider, config) {
279
+ switch (provider.toLowerCase()) {
280
+ case 'openai':
281
+ return new OpenAIEmbeddings(config);
282
+ case 'cohere':
283
+ return new CohereEmbeddings(config);
284
+ case 'voyage':
285
+ return new VoyageEmbeddings(config);
286
+ default:
287
+ throw new Error(`Unknown provider: ${provider}. Supported: openai, cohere, voyage`);
288
+ }
289
+ }
@@ -0,0 +1,251 @@
1
+ /**
2
+ * Entity extraction (NER) provider support for Memvid SDK (Node.js).
3
+ *
4
+ * Providers:
5
+ * - LocalNER: DistilBERT-NER (ONNX, offline)
6
+ * - OpenAIEntities: OpenAI GPT-4 (cloud, custom entity types)
7
+ * - ClaudeEntities: Anthropic Claude (cloud, custom entity types)
8
+ * - GeminiEntities: Google Gemini (cloud, custom entity types)
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import { create } from 'memvid-sdk';
13
+ * import { getEntityExtractor, LocalNER, OpenAIEntities } from 'memvid-sdk/entities';
14
+ *
15
+ * // Local NER (default)
16
+ * const ner = getEntityExtractor('local');
17
+ *
18
+ * // Or with cloud provider for custom entity types
19
+ * const ner = getEntityExtractor('openai', {
20
+ * entityTypes: ['COMPANY', 'PRODUCT', 'EXECUTIVE'],
21
+ * });
22
+ *
23
+ * // Extract entities
24
+ * const text = "Microsoft CEO Satya Nadella announced the new Surface Pro in Seattle.";
25
+ * const entities = await ner.extract(text);
26
+ * // [
27
+ * // { name: "Microsoft", type: "ORG", confidence: 0.99 },
28
+ * // { name: "Satya Nadella", type: "PERSON", confidence: 0.97 },
29
+ * // { name: "Seattle", type: "LOCATION", confidence: 0.98 },
30
+ * // ]
31
+ *
32
+ * // Store with entities
33
+ * const mem = await create('knowledge.mv2', 'basic');
34
+ * await mem.put({ title: 'Tech News', text, entities });
35
+ * ```
36
+ */
37
+ /**
38
+ * Extracted entity structure.
39
+ */
40
+ export interface Entity {
41
+ /** Entity name as it appears in text. */
42
+ name: string;
43
+ /** Entity type (e.g., PERSON, ORG, LOCATION). */
44
+ type: string;
45
+ /** Confidence score between 0.0 and 1.0. */
46
+ confidence: number;
47
+ }
48
+ /**
49
+ * Relationship between entities.
50
+ */
51
+ export interface Relationship {
52
+ /** Source entity name. */
53
+ source: string;
54
+ /** Target entity name. */
55
+ target: string;
56
+ /** Relationship type (e.g., WORKS_FOR, LOCATED_IN). */
57
+ type: string;
58
+ /** Confidence score between 0.0 and 1.0. */
59
+ confidence: number;
60
+ }
61
+ /**
62
+ * Result from entity extraction with optional relationships.
63
+ */
64
+ export interface EntityExtractionResult {
65
+ /** Extracted entities. */
66
+ entities: Entity[];
67
+ /** Extracted relationships (optional). */
68
+ relationships?: Relationship[];
69
+ }
70
+ /**
71
+ * Abstract interface for entity extraction providers.
72
+ */
73
+ export interface EntityExtractor {
74
+ /** Provider name (e.g., 'local:distilbert-ner'). */
75
+ readonly name: string;
76
+ /** Supported entity types. */
77
+ readonly entityTypes: string[];
78
+ /**
79
+ * Extract entities from text.
80
+ * @param text - Text to extract entities from
81
+ * @param minConfidence - Minimum confidence threshold (0.0-1.0)
82
+ * @returns Promise resolving to list of entities
83
+ */
84
+ extract(text: string, minConfidence?: number): Promise<Entity[]>;
85
+ /**
86
+ * Extract entities from multiple texts.
87
+ * @param texts - Texts to extract entities from
88
+ * @param minConfidence - Minimum confidence threshold
89
+ * @returns Promise resolving to list of entity arrays
90
+ */
91
+ extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
92
+ }
93
+ /**
94
+ * Default entity types for cloud providers.
95
+ */
96
+ export declare const DEFAULT_ENTITY_TYPES: string[];
97
+ /**
98
+ * LocalNER configuration options.
99
+ */
100
+ export interface LocalNERConfig {
101
+ /** Model to use. Default: 'distilbert-ner' */
102
+ model?: string;
103
+ }
104
+ /**
105
+ * Local NER provider using DistilBERT-NER (ONNX).
106
+ *
107
+ * Supported entity types (fixed):
108
+ * - PERSON: People's names
109
+ * - ORG: Organizations
110
+ * - LOCATION: Places
111
+ * - MISC: Miscellaneous
112
+ */
113
+ export declare class LocalNER implements EntityExtractor {
114
+ private readonly _model;
115
+ private _nativeModel;
116
+ constructor(config?: LocalNERConfig);
117
+ get name(): string;
118
+ get entityTypes(): string[];
119
+ private _getModel;
120
+ extract(text: string, minConfidence?: number): Promise<Entity[]>;
121
+ extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
122
+ }
123
+ /**
124
+ * OpenAIEntities configuration options.
125
+ */
126
+ export interface OpenAIEntitiesConfig {
127
+ /** OpenAI API key. If not provided, uses OPENAI_API_KEY env var. */
128
+ apiKey?: string;
129
+ /** Model to use. Default: 'gpt-4o-mini' */
130
+ model?: string;
131
+ /** Custom entity types. Default: standard NER types */
132
+ entityTypes?: string[];
133
+ /** Custom extraction prompt. */
134
+ prompt?: string;
135
+ }
136
+ /**
137
+ * OpenAI GPT-4 entity extraction provider.
138
+ *
139
+ * Supports custom entity types and relationship extraction.
140
+ */
141
+ export declare class OpenAIEntities implements EntityExtractor {
142
+ private readonly _apiKey;
143
+ private readonly _model;
144
+ private readonly _entityTypes;
145
+ private readonly _prompt;
146
+ private static readonly DEFAULT_PROMPT;
147
+ constructor(config?: OpenAIEntitiesConfig);
148
+ get name(): string;
149
+ get entityTypes(): string[];
150
+ extract(text: string, minConfidence?: number): Promise<Entity[]>;
151
+ extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
152
+ /**
153
+ * Extract entities AND relationships from text.
154
+ */
155
+ extractWithRelationships(text: string, minConfidence?: number): Promise<EntityExtractionResult>;
156
+ }
157
+ /**
158
+ * ClaudeEntities configuration options.
159
+ */
160
+ export interface ClaudeEntitiesConfig {
161
+ /** Anthropic API key. If not provided, uses ANTHROPIC_API_KEY env var. */
162
+ apiKey?: string;
163
+ /** Model to use. Default: 'claude-3-5-sonnet-20241022' */
164
+ model?: string;
165
+ /** Custom entity types. */
166
+ entityTypes?: string[];
167
+ }
168
+ /**
169
+ * Anthropic Claude entity extraction provider.
170
+ */
171
+ export declare class ClaudeEntities implements EntityExtractor {
172
+ private readonly _apiKey;
173
+ private readonly _model;
174
+ private readonly _entityTypes;
175
+ constructor(config?: ClaudeEntitiesConfig);
176
+ get name(): string;
177
+ get entityTypes(): string[];
178
+ extract(text: string, minConfidence?: number): Promise<Entity[]>;
179
+ extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
180
+ }
181
+ /**
182
+ * GeminiEntities configuration options.
183
+ */
184
+ export interface GeminiEntitiesConfig {
185
+ /** Google AI API key. If not provided, uses GEMINI_API_KEY env var. */
186
+ apiKey?: string;
187
+ /** Model to use. Default: 'gemini-2.0-flash' */
188
+ model?: string;
189
+ /** Custom entity types. */
190
+ entityTypes?: string[];
191
+ }
192
+ /**
193
+ * Google Gemini entity extraction provider.
194
+ */
195
+ export declare class GeminiEntities implements EntityExtractor {
196
+ private readonly _apiKey;
197
+ private readonly _model;
198
+ private readonly _entityTypes;
199
+ constructor(config?: GeminiEntitiesConfig);
200
+ get name(): string;
201
+ get entityTypes(): string[];
202
+ extract(text: string, minConfidence?: number): Promise<Entity[]>;
203
+ extractBatch(texts: string[], minConfidence?: number): Promise<Entity[][]>;
204
+ }
205
+ /**
206
+ * Configuration options for getEntityExtractor factory.
207
+ */
208
+ export interface EntityExtractorConfig {
209
+ /** Model name (provider-specific). */
210
+ model?: string;
211
+ /** API key for cloud providers. */
212
+ apiKey?: string;
213
+ /** Custom entity types (cloud providers only). */
214
+ entityTypes?: string[];
215
+ /** Custom extraction prompt (OpenAI only). */
216
+ prompt?: string;
217
+ }
218
+ /**
219
+ * Factory function to create an entity extraction provider.
220
+ *
221
+ * @param provider - Provider specification. Can be:
222
+ * - Simple: 'local', 'openai', 'claude', 'gemini'
223
+ * - With model: 'openai:gpt-4o-mini', 'claude:claude-3-5-sonnet-20241022'
224
+ * @param config - Provider-specific configuration
225
+ * @returns EntityExtractor instance
226
+ *
227
+ * @example
228
+ * ```typescript
229
+ * // Simple provider
230
+ * const ner = getEntityExtractor('local');
231
+ * const ner = getEntityExtractor('openai');
232
+ *
233
+ * // Provider with model specification
234
+ * const ner = getEntityExtractor('openai:gpt-4o-mini');
235
+ * const ner = getEntityExtractor('claude:claude-3-5-sonnet-20241022');
236
+ * const ner = getEntityExtractor('gemini:gemini-2.0-flash');
237
+ *
238
+ * // With config for custom entity types
239
+ * const ner = getEntityExtractor('openai', { entityTypes: ['COMPANY', 'PRODUCT'] });
240
+ * ```
241
+ */
242
+ export declare function getEntityExtractor(provider?: string, config?: EntityExtractorConfig): EntityExtractor;
243
+ declare const _default: {
244
+ LocalNER: typeof LocalNER;
245
+ OpenAIEntities: typeof OpenAIEntities;
246
+ ClaudeEntities: typeof ClaudeEntities;
247
+ GeminiEntities: typeof GeminiEntities;
248
+ getEntityExtractor: typeof getEntityExtractor;
249
+ DEFAULT_ENTITY_TYPES: string[];
250
+ };
251
+ export default _default;