@lov3kaizen/agentsea-embeddings 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ import {
2
+ BaseChunker,
3
+ CodeChunker,
4
+ FixedChunker,
5
+ MarkdownChunker,
6
+ RecursiveChunker,
7
+ SemanticChunker,
8
+ chunk,
9
+ createChunker,
10
+ createCodeChunker,
11
+ createFixedChunker,
12
+ createMarkdownChunker,
13
+ createRecursiveChunker,
14
+ createSemanticChunker,
15
+ defaultTokenCounter,
16
+ mergeSmallChunks,
17
+ splitLargeChunks
18
+ } from "../chunk-DJAURHAS.mjs";
19
+ import "../chunk-QAITLJ2E.mjs";
20
+ export {
21
+ BaseChunker,
22
+ CodeChunker,
23
+ FixedChunker,
24
+ MarkdownChunker,
25
+ RecursiveChunker,
26
+ SemanticChunker,
27
+ chunk,
28
+ createChunker,
29
+ createCodeChunker,
30
+ createFixedChunker,
31
+ createMarkdownChunker,
32
+ createRecursiveChunker,
33
+ createSemanticChunker,
34
+ defaultTokenCounter,
35
+ mergeSmallChunks,
36
+ splitLargeChunks
37
+ };
@@ -0,0 +1,102 @@
1
+ type EmbeddingVector = number[];
2
+ interface EmbeddingResult {
3
+ vector: EmbeddingVector;
4
+ text: string;
5
+ tokenCount: number;
6
+ cached: boolean;
7
+ model: string;
8
+ dimensions: number;
9
+ latencyMs: number;
10
+ metadata?: Record<string, unknown>;
11
+ }
12
+ interface BatchEmbeddingResult {
13
+ results: EmbeddingResult[];
14
+ totalTokens: number;
15
+ totalLatencyMs: number;
16
+ cacheHits: number;
17
+ cacheMisses: number;
18
+ failures: number;
19
+ }
20
+ interface EmbeddedChunk {
21
+ id: string;
22
+ text: string;
23
+ vector: EmbeddingVector;
24
+ index: number;
25
+ startPosition: number;
26
+ endPosition: number;
27
+ tokenCount: number;
28
+ metadata: ChunkMetadata;
29
+ }
30
+ interface ChunkMetadata {
31
+ documentId?: string;
32
+ source?: string;
33
+ type?: string;
34
+ page?: number;
35
+ section?: string;
36
+ [key: string]: unknown;
37
+ }
38
+ interface EmbeddingModelInfo {
39
+ name: string;
40
+ provider: string;
41
+ dimensions: number;
42
+ maxTokens: number;
43
+ maxBatchSize: number;
44
+ costPer1K?: number;
45
+ description?: string;
46
+ }
47
+ interface EmbeddingOptions {
48
+ metadata?: Record<string, unknown>;
49
+ user?: string;
50
+ model?: string;
51
+ skipCache?: boolean;
52
+ forceCache?: boolean;
53
+ timeout?: number;
54
+ }
55
+ interface BatchEmbeddingOptions extends EmbeddingOptions {
56
+ concurrency?: number;
57
+ onProgress?: (progress: BatchProgress) => void;
58
+ continueOnError?: boolean;
59
+ retryFailed?: boolean;
60
+ maxRetries?: number;
61
+ }
62
+ interface BatchProgress {
63
+ percent: number;
64
+ processed: number;
65
+ total: number;
66
+ current?: string;
67
+ elapsedMs: number;
68
+ estimatedRemainingMs?: number;
69
+ }
70
+ interface DocumentEmbeddingOptions extends BatchEmbeddingOptions {
71
+ documentId?: string;
72
+ source?: string;
73
+ type?: string;
74
+ chunkMetadata?: Record<string, unknown>;
75
+ }
76
+ interface SearchResult {
77
+ id: string;
78
+ text: string;
79
+ score: number;
80
+ metadata: ChunkMetadata;
81
+ distance?: number;
82
+ }
83
+ interface SearchOptions {
84
+ topK?: number;
85
+ minScore?: number;
86
+ filter?: Record<string, unknown>;
87
+ includeVectors?: boolean;
88
+ includeMetadata?: boolean;
89
+ namespace?: string;
90
+ }
91
+ type SimilarityMetric = 'cosine' | 'euclidean' | 'dot_product';
92
+ interface EmbeddingStats {
93
+ totalEmbeddings: number;
94
+ totalTokens: number;
95
+ avgLatencyMs: number;
96
+ cacheHitRate: number;
97
+ apiCalls: number;
98
+ errors: number;
99
+ estimatedCostUSD: number;
100
+ }
101
+
102
+ export type { BatchEmbeddingOptions as B, ChunkMetadata as C, DocumentEmbeddingOptions as D, EmbeddingVector as E, SearchOptions as S, EmbeddingResult as a, EmbeddingOptions as b, BatchEmbeddingResult as c, EmbeddedChunk as d, SearchResult as e, EmbeddingStats as f, EmbeddingModelInfo as g, BatchProgress as h, SimilarityMetric as i };
@@ -0,0 +1,102 @@
1
+ type EmbeddingVector = number[];
2
+ interface EmbeddingResult {
3
+ vector: EmbeddingVector;
4
+ text: string;
5
+ tokenCount: number;
6
+ cached: boolean;
7
+ model: string;
8
+ dimensions: number;
9
+ latencyMs: number;
10
+ metadata?: Record<string, unknown>;
11
+ }
12
+ interface BatchEmbeddingResult {
13
+ results: EmbeddingResult[];
14
+ totalTokens: number;
15
+ totalLatencyMs: number;
16
+ cacheHits: number;
17
+ cacheMisses: number;
18
+ failures: number;
19
+ }
20
+ interface EmbeddedChunk {
21
+ id: string;
22
+ text: string;
23
+ vector: EmbeddingVector;
24
+ index: number;
25
+ startPosition: number;
26
+ endPosition: number;
27
+ tokenCount: number;
28
+ metadata: ChunkMetadata;
29
+ }
30
+ interface ChunkMetadata {
31
+ documentId?: string;
32
+ source?: string;
33
+ type?: string;
34
+ page?: number;
35
+ section?: string;
36
+ [key: string]: unknown;
37
+ }
38
+ interface EmbeddingModelInfo {
39
+ name: string;
40
+ provider: string;
41
+ dimensions: number;
42
+ maxTokens: number;
43
+ maxBatchSize: number;
44
+ costPer1K?: number;
45
+ description?: string;
46
+ }
47
+ interface EmbeddingOptions {
48
+ metadata?: Record<string, unknown>;
49
+ user?: string;
50
+ model?: string;
51
+ skipCache?: boolean;
52
+ forceCache?: boolean;
53
+ timeout?: number;
54
+ }
55
+ interface BatchEmbeddingOptions extends EmbeddingOptions {
56
+ concurrency?: number;
57
+ onProgress?: (progress: BatchProgress) => void;
58
+ continueOnError?: boolean;
59
+ retryFailed?: boolean;
60
+ maxRetries?: number;
61
+ }
62
+ interface BatchProgress {
63
+ percent: number;
64
+ processed: number;
65
+ total: number;
66
+ current?: string;
67
+ elapsedMs: number;
68
+ estimatedRemainingMs?: number;
69
+ }
70
+ interface DocumentEmbeddingOptions extends BatchEmbeddingOptions {
71
+ documentId?: string;
72
+ source?: string;
73
+ type?: string;
74
+ chunkMetadata?: Record<string, unknown>;
75
+ }
76
+ interface SearchResult {
77
+ id: string;
78
+ text: string;
79
+ score: number;
80
+ metadata: ChunkMetadata;
81
+ distance?: number;
82
+ }
83
+ interface SearchOptions {
84
+ topK?: number;
85
+ minScore?: number;
86
+ filter?: Record<string, unknown>;
87
+ includeVectors?: boolean;
88
+ includeMetadata?: boolean;
89
+ namespace?: string;
90
+ }
91
+ type SimilarityMetric = 'cosine' | 'euclidean' | 'dot_product';
92
+ interface EmbeddingStats {
93
+ totalEmbeddings: number;
94
+ totalTokens: number;
95
+ avgLatencyMs: number;
96
+ cacheHitRate: number;
97
+ apiCalls: number;
98
+ errors: number;
99
+ estimatedCostUSD: number;
100
+ }
101
+
102
+ export type { BatchEmbeddingOptions as B, ChunkMetadata as C, DocumentEmbeddingOptions as D, EmbeddingVector as E, SearchOptions as S, EmbeddingResult as a, EmbeddingOptions as b, BatchEmbeddingResult as c, EmbeddedChunk as d, SearchResult as e, EmbeddingStats as f, EmbeddingModelInfo as g, BatchProgress as h, SimilarityMetric as i };
@@ -0,0 +1,297 @@
1
+ import { E as EmbeddingVector, a as EmbeddingResult, c as BatchEmbeddingResult, g as EmbeddingModelInfo, b as EmbeddingOptions, B as BatchEmbeddingOptions } from './embedding.types-CCgPVxt1.mjs';
2
+
3
+ type EmbeddingProviderType = 'openai' | 'cohere' | 'voyage' | 'local' | 'huggingface' | 'anthropic' | 'google' | 'custom';
4
+ interface ProviderConfig {
5
+ type: EmbeddingProviderType;
6
+ apiKey?: string;
7
+ baseUrl?: string;
8
+ model?: string;
9
+ timeout?: number;
10
+ maxRetries?: number;
11
+ retryDelay?: number;
12
+ headers?: Record<string, string>;
13
+ options?: Record<string, unknown>;
14
+ }
15
+ interface OpenAIProviderConfig extends ProviderConfig {
16
+ type: 'openai';
17
+ organization?: string;
18
+ apiVersion?: string;
19
+ model?: string;
20
+ encodingFormat?: 'float' | 'base64';
21
+ dimensions?: number;
22
+ }
23
+ interface CohereProviderConfig extends ProviderConfig {
24
+ type: 'cohere';
25
+ model?: string;
26
+ inputType?: 'search_document' | 'search_query' | 'classification' | 'clustering';
27
+ truncate?: 'NONE' | 'START' | 'END';
28
+ }
29
+ interface VoyageProviderConfig extends ProviderConfig {
30
+ type: 'voyage';
31
+ model?: string;
32
+ inputType?: 'document' | 'query';
33
+ truncation?: boolean;
34
+ }
35
+ interface LocalProviderConfig extends ProviderConfig {
36
+ type: 'local';
37
+ modelPath?: string;
38
+ modelType?: 'onnx' | 'transformers' | 'sentence-transformers';
39
+ device?: 'cpu' | 'cuda' | 'mps';
40
+ batchSize?: number;
41
+ normalize?: boolean;
42
+ pooling?: 'mean' | 'cls' | 'max';
43
+ }
44
+ interface HuggingFaceProviderConfig extends ProviderConfig {
45
+ type: 'huggingface';
46
+ model?: string;
47
+ useInferenceApi?: boolean;
48
+ waitForModel?: boolean;
49
+ }
50
+ interface EmbeddingRequest {
51
+ text: string;
52
+ model?: string;
53
+ user?: string;
54
+ options?: Record<string, unknown>;
55
+ }
56
+ interface BatchEmbeddingRequest {
57
+ texts: string[];
58
+ model?: string;
59
+ user?: string;
60
+ options?: Record<string, unknown>;
61
+ }
62
+ interface ProviderResponse {
63
+ embeddings: EmbeddingVector[];
64
+ model: string;
65
+ usage: TokenUsage;
66
+ metadata?: Record<string, unknown>;
67
+ }
68
+ interface TokenUsage {
69
+ promptTokens: number;
70
+ totalTokens: number;
71
+ }
72
+ interface ProviderError {
73
+ code: string;
74
+ message: string;
75
+ status?: number;
76
+ retryable: boolean;
77
+ retryAfter?: number;
78
+ details?: Record<string, unknown>;
79
+ }
80
+ interface ProviderHealth {
81
+ healthy: boolean;
82
+ latencyMs: number;
83
+ lastCheck: number;
84
+ error?: string;
85
+ rateLimit?: RateLimitInfo;
86
+ }
87
+ interface RateLimitInfo {
88
+ remaining: number;
89
+ limit: number;
90
+ reset: number;
91
+ retryAfter?: number;
92
+ }
93
+ interface ProviderMetrics {
94
+ provider: EmbeddingProviderType;
95
+ totalRequests: number;
96
+ successfulRequests: number;
97
+ failedRequests: number;
98
+ totalTokens: number;
99
+ avgLatencyMs: number;
100
+ p50LatencyMs: number;
101
+ p95LatencyMs: number;
102
+ p99LatencyMs: number;
103
+ errorRate: number;
104
+ rateLimitHits: number;
105
+ estimatedCostUSD: number;
106
+ }
107
+ interface ModelInfo {
108
+ id: string;
109
+ name: string;
110
+ provider: EmbeddingProviderType;
111
+ dimensions: number;
112
+ maxTokens: number;
113
+ maxBatchSize: number;
114
+ costPer1K?: number;
115
+ description?: string;
116
+ releaseDate?: string;
117
+ deprecated?: boolean;
118
+ replacement?: string;
119
+ }
120
+ interface ProviderCapabilities {
121
+ batch: boolean;
122
+ maxBatchSize: number;
123
+ streaming: boolean;
124
+ customDimensions: boolean;
125
+ inputTypes: string[];
126
+ models: string[];
127
+ }
128
+ interface ProviderFactoryOptions {
129
+ defaultProvider?: EmbeddingProviderType;
130
+ providers?: Record<EmbeddingProviderType, ProviderConfig>;
131
+ enableMetrics?: boolean;
132
+ enableHealthChecks?: boolean;
133
+ healthCheckInterval?: number;
134
+ }
135
+ interface CustomProviderConfig extends ProviderConfig {
136
+ type: 'custom';
137
+ embedFn: (text: string, options?: Record<string, unknown>) => Promise<EmbeddingResult>;
138
+ batchEmbedFn?: (texts: string[], options?: Record<string, unknown>) => Promise<BatchEmbeddingResult>;
139
+ modelInfo: ModelInfo;
140
+ }
141
+
142
+ declare abstract class EmbeddingModel {
143
+ abstract readonly info: EmbeddingModelInfo;
144
+ abstract embed(text: string, options?: EmbeddingOptions): Promise<EmbeddingResult>;
145
+ abstract embedBatch(texts: string[], options?: BatchEmbeddingOptions): Promise<BatchEmbeddingResult>;
146
+ get dimensions(): number;
147
+ get maxTokens(): number;
148
+ get maxBatchSize(): number;
149
+ get name(): string;
150
+ get provider(): string;
151
+ countTokens(text: string): number;
152
+ exceedsMaxTokens(text: string): boolean;
153
+ truncateToMaxTokens(text: string): string;
154
+ static cosineSimilarity(a: EmbeddingVector, b: EmbeddingVector): number;
155
+ static euclideanDistance(a: EmbeddingVector, b: EmbeddingVector): number;
156
+ static dotProduct(a: EmbeddingVector, b: EmbeddingVector): number;
157
+ static normalize(vector: EmbeddingVector): EmbeddingVector;
158
+ static average(vectors: EmbeddingVector[]): EmbeddingVector;
159
+ static weightedAverage(vectors: EmbeddingVector[], weights: number[]): EmbeddingVector;
160
+ }
161
+ declare class ModelRegistry {
162
+ private models;
163
+ private defaultModel;
164
+ register(model: EmbeddingModel, isDefault?: boolean): void;
165
+ get(provider: string, name: string): EmbeddingModel | undefined;
166
+ getByKey(key: string): EmbeddingModel | undefined;
167
+ getDefault(): EmbeddingModel | undefined;
168
+ setDefault(provider: string, name: string): void;
169
+ list(): EmbeddingModelInfo[];
170
+ has(provider: string, name: string): boolean;
171
+ remove(provider: string, name: string): boolean;
172
+ clear(): void;
173
+ }
174
+ declare const modelRegistry: ModelRegistry;
175
+
176
+ declare abstract class BaseProvider extends EmbeddingModel {
177
+ protected config: ProviderConfig;
178
+ protected metrics: ProviderMetrics;
179
+ protected health: ProviderHealth;
180
+ protected latencies: number[];
181
+ private readonly maxLatencySamples;
182
+ constructor(config: ProviderConfig);
183
+ private createInitialMetrics;
184
+ protected abstract doEmbed(texts: string[], options?: EmbeddingOptions): Promise<{
185
+ vectors: number[][];
186
+ tokenCount: number;
187
+ }>;
188
+ embed(text: string, options?: EmbeddingOptions): Promise<EmbeddingResult>;
189
+ embedBatch(texts: string[], options?: BatchEmbeddingOptions): Promise<BatchEmbeddingResult>;
190
+ protected isRetryable(error: Error): boolean;
191
+ protected recordLatency(latencyMs: number): void;
192
+ protected calculatePercentile(p: number): number;
193
+ protected updateMetrics(): void;
194
+ getMetrics(): ProviderMetrics;
195
+ getHealth(): ProviderHealth;
196
+ checkHealth(): Promise<ProviderHealth>;
197
+ resetMetrics(): void;
198
+ }
199
+
200
+ declare class OpenAIProvider extends BaseProvider {
201
+ private modelInfo;
202
+ private apiKey;
203
+ private baseUrl;
204
+ private organization?;
205
+ constructor(config: OpenAIProviderConfig);
206
+ get info(): EmbeddingModelInfo;
207
+ protected doEmbed(texts: string[], options?: EmbeddingOptions): Promise<{
208
+ vectors: number[][];
209
+ tokenCount: number;
210
+ }>;
211
+ countTokens(text: string): number;
212
+ }
213
+ declare function createOpenAIProvider(config: OpenAIProviderConfig): OpenAIProvider;
214
+
215
+ declare class CohereProvider extends BaseProvider {
216
+ private modelInfo;
217
+ private apiKey;
218
+ private baseUrl;
219
+ private inputType;
220
+ private truncate;
221
+ constructor(config: CohereProviderConfig);
222
+ get info(): EmbeddingModelInfo;
223
+ protected doEmbed(texts: string[], options?: EmbeddingOptions): Promise<{
224
+ vectors: number[][];
225
+ tokenCount: number;
226
+ }>;
227
+ setInputType(inputType: CohereProviderConfig['inputType']): this;
228
+ countTokens(text: string): number;
229
+ }
230
+ declare function createCohereProvider(config: CohereProviderConfig): CohereProvider;
231
+
232
+ declare class VoyageProvider extends BaseProvider {
233
+ private modelInfo;
234
+ private apiKey;
235
+ private baseUrl;
236
+ private inputType;
237
+ private truncation;
238
+ constructor(config: VoyageProviderConfig);
239
+ get info(): EmbeddingModelInfo;
240
+ protected doEmbed(texts: string[], options?: EmbeddingOptions): Promise<{
241
+ vectors: number[][];
242
+ tokenCount: number;
243
+ }>;
244
+ setInputType(inputType: VoyageProviderConfig['inputType']): this;
245
+ countTokens(text: string): number;
246
+ }
247
+ declare function createVoyageProvider(config: VoyageProviderConfig): VoyageProvider;
248
+
249
+ type LocalEmbeddingFn = (texts: string[], options?: Record<string, unknown>) => Promise<number[][]>;
250
+ interface LocalProviderOptions extends LocalProviderConfig {
251
+ embedFn?: LocalEmbeddingFn;
252
+ dimensions: number;
253
+ name?: string;
254
+ maxTokens?: number;
255
+ maxBatchSize?: number;
256
+ }
257
+ declare class LocalProvider extends BaseProvider {
258
+ private modelInfo;
259
+ private embedFn;
260
+ private normalize;
261
+ private batchSize;
262
+ constructor(config: LocalProviderOptions);
263
+ get info(): EmbeddingModelInfo;
264
+ protected doEmbed(texts: string[], options?: EmbeddingOptions): Promise<{
265
+ vectors: number[][];
266
+ tokenCount: number;
267
+ }>;
268
+ setEmbedFunction(fn: LocalEmbeddingFn): this;
269
+ countTokens(text: string): number;
270
+ }
271
+ declare function createLocalProvider(config: LocalProviderOptions): LocalProvider;
272
+ declare function createMockProvider(config: {
273
+ dimensions: number;
274
+ name?: string;
275
+ delay?: number;
276
+ }): LocalProvider;
277
+ declare function createRandomProvider(config: {
278
+ dimensions: number;
279
+ name?: string;
280
+ }): LocalProvider;
281
+
282
+ declare class HuggingFaceProvider extends BaseProvider {
283
+ private modelInfo;
284
+ private apiKey;
285
+ private baseUrl;
286
+ private waitForModel;
287
+ constructor(config: HuggingFaceProviderConfig);
288
+ get info(): EmbeddingModelInfo;
289
+ protected doEmbed(texts: string[], _options?: EmbeddingOptions): Promise<{
290
+ vectors: number[][];
291
+ tokenCount: number;
292
+ }>;
293
+ countTokens(text: string): number;
294
+ }
295
+ declare function createHuggingFaceProvider(config: HuggingFaceProviderConfig): HuggingFaceProvider;
296
+
297
+ export { BaseProvider as B, CohereProvider as C, type EmbeddingProviderType as E, HuggingFaceProvider as H, LocalProvider as L, type ModelInfo as M, OpenAIProvider as O, type ProviderConfig as P, type RateLimitInfo as R, type TokenUsage as T, VoyageProvider as V, EmbeddingModel as a, ModelRegistry as b, createOpenAIProvider as c, createCohereProvider as d, createVoyageProvider as e, createLocalProvider as f, createMockProvider as g, createRandomProvider as h, createHuggingFaceProvider as i, type LocalEmbeddingFn as j, type LocalProviderOptions as k, type OpenAIProviderConfig as l, modelRegistry as m, type CohereProviderConfig as n, type VoyageProviderConfig as o, type LocalProviderConfig as p, type HuggingFaceProviderConfig as q, type EmbeddingRequest as r, type BatchEmbeddingRequest as s, type ProviderResponse as t, type ProviderError as u, type ProviderHealth as v, type ProviderMetrics as w, type ProviderCapabilities as x, type ProviderFactoryOptions as y, type CustomProviderConfig as z };