@cosmocoder/mcp-web-docs 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +368 -0
- package/build/__mocks__/embeddings.d.ts +17 -0
- package/build/__mocks__/embeddings.js +66 -0
- package/build/__mocks__/embeddings.js.map +1 -0
- package/build/config.d.ts +44 -0
- package/build/config.js +158 -0
- package/build/config.js.map +1 -0
- package/build/config.test.d.ts +1 -0
- package/build/config.test.js +165 -0
- package/build/config.test.js.map +1 -0
- package/build/crawler/auth.d.ts +128 -0
- package/build/crawler/auth.js +546 -0
- package/build/crawler/auth.js.map +1 -0
- package/build/crawler/auth.test.d.ts +1 -0
- package/build/crawler/auth.test.js +174 -0
- package/build/crawler/auth.test.js.map +1 -0
- package/build/crawler/base.d.ts +24 -0
- package/build/crawler/base.js +149 -0
- package/build/crawler/base.js.map +1 -0
- package/build/crawler/base.test.d.ts +1 -0
- package/build/crawler/base.test.js +234 -0
- package/build/crawler/base.test.js.map +1 -0
- package/build/crawler/browser-config.d.ts +2 -0
- package/build/crawler/browser-config.js +29 -0
- package/build/crawler/browser-config.js.map +1 -0
- package/build/crawler/browser-config.test.d.ts +1 -0
- package/build/crawler/browser-config.test.js +56 -0
- package/build/crawler/browser-config.test.js.map +1 -0
- package/build/crawler/cheerio.d.ts +11 -0
- package/build/crawler/cheerio.js +134 -0
- package/build/crawler/cheerio.js.map +1 -0
- package/build/crawler/chromium.d.ts +21 -0
- package/build/crawler/chromium.js +596 -0
- package/build/crawler/chromium.js.map +1 -0
- package/build/crawler/content-extractor-types.d.ts +25 -0
- package/build/crawler/content-extractor-types.js +2 -0
- package/build/crawler/content-extractor-types.js.map +1 -0
- package/build/crawler/content-extractors.d.ts +9 -0
- package/build/crawler/content-extractors.js +9 -0
- package/build/crawler/content-extractors.js.map +1 -0
- package/build/crawler/content-utils.d.ts +2 -0
- package/build/crawler/content-utils.js +22 -0
- package/build/crawler/content-utils.js.map +1 -0
- package/build/crawler/content-utils.test.d.ts +1 -0
- package/build/crawler/content-utils.test.js +99 -0
- package/build/crawler/content-utils.test.js.map +1 -0
- package/build/crawler/crawlee-crawler.d.ts +63 -0
- package/build/crawler/crawlee-crawler.js +342 -0
- package/build/crawler/crawlee-crawler.js.map +1 -0
- package/build/crawler/crawlee-crawler.test.d.ts +1 -0
- package/build/crawler/crawlee-crawler.test.js +280 -0
- package/build/crawler/crawlee-crawler.test.js.map +1 -0
- package/build/crawler/default-extractor.d.ts +4 -0
- package/build/crawler/default-extractor.js +26 -0
- package/build/crawler/default-extractor.js.map +1 -0
- package/build/crawler/default-extractor.test.d.ts +1 -0
- package/build/crawler/default-extractor.test.js +200 -0
- package/build/crawler/default-extractor.test.js.map +1 -0
- package/build/crawler/default.d.ts +11 -0
- package/build/crawler/default.js +138 -0
- package/build/crawler/default.js.map +1 -0
- package/build/crawler/docs-crawler.d.ts +26 -0
- package/build/crawler/docs-crawler.js +97 -0
- package/build/crawler/docs-crawler.js.map +1 -0
- package/build/crawler/docs-crawler.test.d.ts +1 -0
- package/build/crawler/docs-crawler.test.js +185 -0
- package/build/crawler/docs-crawler.test.js.map +1 -0
- package/build/crawler/factory.d.ts +6 -0
- package/build/crawler/factory.js +83 -0
- package/build/crawler/factory.js.map +1 -0
- package/build/crawler/github-pages-extractor.d.ts +4 -0
- package/build/crawler/github-pages-extractor.js +33 -0
- package/build/crawler/github-pages-extractor.js.map +1 -0
- package/build/crawler/github-pages-extractor.test.d.ts +1 -0
- package/build/crawler/github-pages-extractor.test.js +184 -0
- package/build/crawler/github-pages-extractor.test.js.map +1 -0
- package/build/crawler/github.d.ts +20 -0
- package/build/crawler/github.js +181 -0
- package/build/crawler/github.js.map +1 -0
- package/build/crawler/github.test.d.ts +1 -0
- package/build/crawler/github.test.js +326 -0
- package/build/crawler/github.test.js.map +1 -0
- package/build/crawler/puppeteer.d.ts +16 -0
- package/build/crawler/puppeteer.js +191 -0
- package/build/crawler/puppeteer.js.map +1 -0
- package/build/crawler/queue-manager.d.ts +43 -0
- package/build/crawler/queue-manager.js +169 -0
- package/build/crawler/queue-manager.js.map +1 -0
- package/build/crawler/queue-manager.test.d.ts +1 -0
- package/build/crawler/queue-manager.test.js +509 -0
- package/build/crawler/queue-manager.test.js.map +1 -0
- package/build/crawler/site-rules.d.ts +11 -0
- package/build/crawler/site-rules.js +104 -0
- package/build/crawler/site-rules.js.map +1 -0
- package/build/crawler/site-rules.test.d.ts +1 -0
- package/build/crawler/site-rules.test.js +139 -0
- package/build/crawler/site-rules.test.js.map +1 -0
- package/build/crawler/storybook-extractor.d.ts +34 -0
- package/build/crawler/storybook-extractor.js +767 -0
- package/build/crawler/storybook-extractor.js.map +1 -0
- package/build/crawler/storybook-extractor.test.d.ts +1 -0
- package/build/crawler/storybook-extractor.test.js +491 -0
- package/build/crawler/storybook-extractor.test.js.map +1 -0
- package/build/embeddings/fastembed.d.ts +25 -0
- package/build/embeddings/fastembed.js +188 -0
- package/build/embeddings/fastembed.js.map +1 -0
- package/build/embeddings/fastembed.test.d.ts +1 -0
- package/build/embeddings/fastembed.test.js +307 -0
- package/build/embeddings/fastembed.test.js.map +1 -0
- package/build/embeddings/openai.d.ts +8 -0
- package/build/embeddings/openai.js +56 -0
- package/build/embeddings/openai.js.map +1 -0
- package/build/embeddings/types.d.ts +4 -0
- package/build/embeddings/types.js +2 -0
- package/build/embeddings/types.js.map +1 -0
- package/build/index.d.ts +2 -0
- package/build/index.js +1007 -0
- package/build/index.js.map +1 -0
- package/build/index.test.d.ts +1 -0
- package/build/index.test.js +364 -0
- package/build/index.test.js.map +1 -0
- package/build/indexing/queue-manager.d.ts +36 -0
- package/build/indexing/queue-manager.js +86 -0
- package/build/indexing/queue-manager.js.map +1 -0
- package/build/indexing/queue-manager.test.d.ts +1 -0
- package/build/indexing/queue-manager.test.js +257 -0
- package/build/indexing/queue-manager.test.js.map +1 -0
- package/build/indexing/status.d.ts +39 -0
- package/build/indexing/status.js +207 -0
- package/build/indexing/status.js.map +1 -0
- package/build/indexing/status.test.d.ts +1 -0
- package/build/indexing/status.test.js +246 -0
- package/build/indexing/status.test.js.map +1 -0
- package/build/processor/content.d.ts +16 -0
- package/build/processor/content.js +286 -0
- package/build/processor/content.js.map +1 -0
- package/build/processor/content.test.d.ts +1 -0
- package/build/processor/content.test.js +369 -0
- package/build/processor/content.test.js.map +1 -0
- package/build/processor/markdown.d.ts +11 -0
- package/build/processor/markdown.js +256 -0
- package/build/processor/markdown.js.map +1 -0
- package/build/processor/markdown.test.d.ts +1 -0
- package/build/processor/markdown.test.js +312 -0
- package/build/processor/markdown.test.js.map +1 -0
- package/build/processor/metadata-parser.d.ts +37 -0
- package/build/processor/metadata-parser.js +245 -0
- package/build/processor/metadata-parser.js.map +1 -0
- package/build/processor/metadata-parser.test.d.ts +1 -0
- package/build/processor/metadata-parser.test.js +357 -0
- package/build/processor/metadata-parser.test.js.map +1 -0
- package/build/processor/processor.d.ts +8 -0
- package/build/processor/processor.js +190 -0
- package/build/processor/processor.js.map +1 -0
- package/build/processor/processor.test.d.ts +1 -0
- package/build/processor/processor.test.js +357 -0
- package/build/processor/processor.test.js.map +1 -0
- package/build/rag/cache.d.ts +10 -0
- package/build/rag/cache.js +10 -0
- package/build/rag/cache.js.map +1 -0
- package/build/rag/code-generator.d.ts +11 -0
- package/build/rag/code-generator.js +30 -0
- package/build/rag/code-generator.js.map +1 -0
- package/build/rag/context-assembler.d.ts +23 -0
- package/build/rag/context-assembler.js +113 -0
- package/build/rag/context-assembler.js.map +1 -0
- package/build/rag/docs-search.d.ts +55 -0
- package/build/rag/docs-search.js +380 -0
- package/build/rag/docs-search.js.map +1 -0
- package/build/rag/pipeline.d.ts +26 -0
- package/build/rag/pipeline.js +91 -0
- package/build/rag/pipeline.js.map +1 -0
- package/build/rag/query-processor.d.ts +14 -0
- package/build/rag/query-processor.js +57 -0
- package/build/rag/query-processor.js.map +1 -0
- package/build/rag/reranker.d.ts +55 -0
- package/build/rag/reranker.js +210 -0
- package/build/rag/reranker.js.map +1 -0
- package/build/rag/response-generator.d.ts +20 -0
- package/build/rag/response-generator.js +101 -0
- package/build/rag/response-generator.js.map +1 -0
- package/build/rag/retriever.d.ts +19 -0
- package/build/rag/retriever.js +111 -0
- package/build/rag/retriever.js.map +1 -0
- package/build/rag/validator.d.ts +22 -0
- package/build/rag/validator.js +128 -0
- package/build/rag/validator.js.map +1 -0
- package/build/rag/version-manager.d.ts +23 -0
- package/build/rag/version-manager.js +98 -0
- package/build/rag/version-manager.js.map +1 -0
- package/build/setupTests.d.ts +4 -0
- package/build/setupTests.js +50 -0
- package/build/setupTests.js.map +1 -0
- package/build/storage/storage.d.ts +38 -0
- package/build/storage/storage.js +700 -0
- package/build/storage/storage.js.map +1 -0
- package/build/storage/storage.test.d.ts +1 -0
- package/build/storage/storage.test.js +338 -0
- package/build/storage/storage.test.js.map +1 -0
- package/build/types/rag.d.ts +27 -0
- package/build/types/rag.js +2 -0
- package/build/types/rag.js.map +1 -0
- package/build/types.d.ts +120 -0
- package/build/types.js +2 -0
- package/build/types.js.map +1 -0
- package/build/util/content-utils.d.ts +31 -0
- package/build/util/content-utils.js +120 -0
- package/build/util/content-utils.js.map +1 -0
- package/build/util/content.d.ts +1 -0
- package/build/util/content.js +16 -0
- package/build/util/content.js.map +1 -0
- package/build/util/docs.d.ts +1 -0
- package/build/util/docs.js +26 -0
- package/build/util/docs.js.map +1 -0
- package/build/util/docs.test.d.ts +1 -0
- package/build/util/docs.test.js +49 -0
- package/build/util/docs.test.js.map +1 -0
- package/build/util/favicon.d.ts +6 -0
- package/build/util/favicon.js +88 -0
- package/build/util/favicon.js.map +1 -0
- package/build/util/favicon.test.d.ts +1 -0
- package/build/util/favicon.test.js +140 -0
- package/build/util/favicon.test.js.map +1 -0
- package/build/util/logger.d.ts +17 -0
- package/build/util/logger.js +72 -0
- package/build/util/logger.js.map +1 -0
- package/build/util/logger.test.d.ts +1 -0
- package/build/util/logger.test.js +46 -0
- package/build/util/logger.test.js.map +1 -0
- package/build/util/security.d.ts +312 -0
- package/build/util/security.js +719 -0
- package/build/util/security.js.map +1 -0
- package/build/util/security.test.d.ts +1 -0
- package/build/util/security.test.js +524 -0
- package/build/util/security.test.js.map +1 -0
- package/build/util/site-detector.d.ts +22 -0
- package/build/util/site-detector.js +42 -0
- package/build/util/site-detector.js.map +1 -0
- package/package.json +112 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { EmbeddingModel, FlagEmbedding } from 'fastembed';
|
|
2
|
+
import { logger } from '../util/logger.js';
|
|
3
|
+
import { mkdir } from 'node:fs/promises';
|
|
4
|
+
import { existsSync } from 'node:fs';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
import { homedir } from 'node:os';
|
|
7
|
+
// FastEmbed configuration
|
|
8
|
+
const EMBEDDING_MODEL = EmbeddingModel.BGESmallENV15;
|
|
9
|
+
const EMBEDDING_DIMENSIONS = 384; // bge-small-en-v1.5 dimensions
|
|
10
|
+
const MAX_RETRIES = 3;
|
|
11
|
+
const CACHE_DIR = join(homedir(), '.mcp-web-docs', 'fastembed-cache');
|
|
12
|
+
export class FastEmbeddings {
|
|
13
|
+
model = null;
|
|
14
|
+
modelInitPromise = null;
|
|
15
|
+
cache;
|
|
16
|
+
dimensions = EMBEDDING_DIMENSIONS;
|
|
17
|
+
constructor() {
|
|
18
|
+
this.cache = new Map();
|
|
19
|
+
logger.info(`[FastEmbeddings] Using model: ${EMBEDDING_MODEL}, dimensions: ${EMBEDDING_DIMENSIONS}`);
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Initialize the FastEmbed model (lazy initialization)
|
|
23
|
+
*/
|
|
24
|
+
async initialize() {
|
|
25
|
+
// Return existing model if already initialized
|
|
26
|
+
if (this.model) {
|
|
27
|
+
return this.model;
|
|
28
|
+
}
|
|
29
|
+
// Wait for existing initialization if in progress
|
|
30
|
+
if (this.modelInitPromise) {
|
|
31
|
+
return this.modelInitPromise;
|
|
32
|
+
}
|
|
33
|
+
// Start initialization
|
|
34
|
+
this.modelInitPromise = this.initializeModel();
|
|
35
|
+
return this.modelInitPromise;
|
|
36
|
+
}
|
|
37
|
+
async initializeModel() {
|
|
38
|
+
try {
|
|
39
|
+
// Ensure cache directory exists
|
|
40
|
+
if (!existsSync(CACHE_DIR)) {
|
|
41
|
+
logger.debug(`[FastEmbeddings] Creating cache directory: ${CACHE_DIR}`);
|
|
42
|
+
await mkdir(CACHE_DIR, { recursive: true });
|
|
43
|
+
}
|
|
44
|
+
logger.info(`[FastEmbeddings] Initializing model (cache: ${CACHE_DIR})`);
|
|
45
|
+
let retries = 0;
|
|
46
|
+
while (retries < MAX_RETRIES) {
|
|
47
|
+
try {
|
|
48
|
+
this.model = await FlagEmbedding.init({
|
|
49
|
+
model: EMBEDDING_MODEL,
|
|
50
|
+
cacheDir: CACHE_DIR,
|
|
51
|
+
});
|
|
52
|
+
logger.info('[FastEmbeddings] Model initialized successfully');
|
|
53
|
+
this.modelInitPromise = null;
|
|
54
|
+
return this.model;
|
|
55
|
+
}
|
|
56
|
+
catch (initError) {
|
|
57
|
+
retries++;
|
|
58
|
+
logger.warn(`[FastEmbeddings] Initialization attempt ${retries}/${MAX_RETRIES} failed: ${initError}`);
|
|
59
|
+
if (retries >= MAX_RETRIES) {
|
|
60
|
+
throw initError;
|
|
61
|
+
}
|
|
62
|
+
// Wait before retrying (exponential backoff)
|
|
63
|
+
await new Promise((resolve) => setTimeout(resolve, retries * 2000));
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
throw new Error('Failed to initialize model after max retries');
|
|
67
|
+
}
|
|
68
|
+
catch (error) {
|
|
69
|
+
this.modelInitPromise = null;
|
|
70
|
+
logger.error('[FastEmbeddings] Fatal: Failed to initialize model:', error);
|
|
71
|
+
throw error;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Generate embedding for a single text (for documents/passages)
|
|
76
|
+
*/
|
|
77
|
+
async embed(text) {
|
|
78
|
+
if (!text || typeof text !== 'string') {
|
|
79
|
+
throw new Error('Input text must be a non-empty string');
|
|
80
|
+
}
|
|
81
|
+
const cleanText = text.trim();
|
|
82
|
+
if (!cleanText) {
|
|
83
|
+
throw new Error('Input text is empty after trimming');
|
|
84
|
+
}
|
|
85
|
+
// Check cache first
|
|
86
|
+
const cacheKey = cleanText.slice(0, 200);
|
|
87
|
+
const cached = this.cache.get(cacheKey);
|
|
88
|
+
if (cached) {
|
|
89
|
+
return cached;
|
|
90
|
+
}
|
|
91
|
+
try {
|
|
92
|
+
const model = await this.initialize();
|
|
93
|
+
// Use passageEmbed for documents/content
|
|
94
|
+
const embeddingGenerator = model.passageEmbed([cleanText]);
|
|
95
|
+
let embedding = null;
|
|
96
|
+
for await (const batch of embeddingGenerator) {
|
|
97
|
+
if (batch && batch.length > 0 && batch[0]) {
|
|
98
|
+
embedding = Array.from(batch[0]);
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
if (!embedding || embedding.length !== this.dimensions) {
|
|
103
|
+
throw new Error(`Invalid embedding: got ${embedding?.length} dimensions, expected ${this.dimensions}`);
|
|
104
|
+
}
|
|
105
|
+
// Cache the result
|
|
106
|
+
this.cache.set(cacheKey, embedding);
|
|
107
|
+
// Limit cache size
|
|
108
|
+
if (this.cache.size > 1000) {
|
|
109
|
+
const firstKey = this.cache.keys().next().value;
|
|
110
|
+
if (firstKey) {
|
|
111
|
+
this.cache.delete(firstKey);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return embedding;
|
|
115
|
+
}
|
|
116
|
+
catch (error) {
|
|
117
|
+
logger.error('[FastEmbeddings] Error generating embedding:', error);
|
|
118
|
+
throw error;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Generate embedding for a query (optimized for search)
|
|
123
|
+
*/
|
|
124
|
+
async embedQuery(text) {
|
|
125
|
+
if (!text || typeof text !== 'string') {
|
|
126
|
+
throw new Error('Input text must be a non-empty string');
|
|
127
|
+
}
|
|
128
|
+
const cleanText = text.trim();
|
|
129
|
+
if (!cleanText) {
|
|
130
|
+
throw new Error('Input text is empty after trimming');
|
|
131
|
+
}
|
|
132
|
+
// Check cache with query prefix
|
|
133
|
+
const cacheKey = `query:${cleanText.slice(0, 200)}`;
|
|
134
|
+
const cached = this.cache.get(cacheKey);
|
|
135
|
+
if (cached) {
|
|
136
|
+
return cached;
|
|
137
|
+
}
|
|
138
|
+
try {
|
|
139
|
+
const model = await this.initialize();
|
|
140
|
+
// Use queryEmbed for search queries
|
|
141
|
+
const embeddingArray = await model.queryEmbed(cleanText);
|
|
142
|
+
if (!embeddingArray || embeddingArray.length !== this.dimensions) {
|
|
143
|
+
throw new Error(`Invalid query embedding: got ${embeddingArray?.length} dimensions, expected ${this.dimensions}`);
|
|
144
|
+
}
|
|
145
|
+
const embedding = Array.from(embeddingArray);
|
|
146
|
+
// Cache the result
|
|
147
|
+
this.cache.set(cacheKey, embedding);
|
|
148
|
+
return embedding;
|
|
149
|
+
}
|
|
150
|
+
catch (error) {
|
|
151
|
+
logger.error('[FastEmbeddings] Error generating query embedding:', error);
|
|
152
|
+
throw error;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Generate embeddings for multiple texts in batch
|
|
157
|
+
*/
|
|
158
|
+
async embedBatch(texts) {
|
|
159
|
+
if (!Array.isArray(texts) || texts.length === 0) {
|
|
160
|
+
return [];
|
|
161
|
+
}
|
|
162
|
+
const validTexts = texts.filter((t) => typeof t === 'string' && t.trim().length > 0);
|
|
163
|
+
if (validTexts.length === 0) {
|
|
164
|
+
return texts.map(() => []);
|
|
165
|
+
}
|
|
166
|
+
try {
|
|
167
|
+
const model = await this.initialize();
|
|
168
|
+
const embeddings = [];
|
|
169
|
+
for await (const batch of model.passageEmbed(validTexts)) {
|
|
170
|
+
for (const vec of batch) {
|
|
171
|
+
if (vec && vec.length === this.dimensions) {
|
|
172
|
+
embeddings.push(Array.from(vec));
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
logger.warn(`[FastEmbeddings] Invalid batch embedding dimension: ${vec?.length}`);
|
|
176
|
+
embeddings.push(new Array(this.dimensions).fill(0));
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return embeddings;
|
|
181
|
+
}
|
|
182
|
+
catch (error) {
|
|
183
|
+
logger.error('[FastEmbeddings] Error generating batch embeddings:', error);
|
|
184
|
+
throw error;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
//# sourceMappingURL=fastembed.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fastembed.js","sourceRoot":"","sources":["../../src/embeddings/fastembed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAE1D,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3C,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAElC,0BAA0B;AAC1B,MAAM,eAAe,GAAG,cAAc,CAAC,aAAa,CAAC;AACrD,MAAM,oBAAoB,GAAG,GAAG,CAAC,CAAC,+BAA+B;AACjE,MAAM,WAAW,GAAG,CAAC,CAAC;AACtB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,eAAe,EAAE,iBAAiB,CAAC,CAAC;AAEtE,MAAM,OAAO,cAAc;IACjB,KAAK,GAAyB,IAAI,CAAC;IACnC,gBAAgB,GAAkC,IAAI,CAAC;IACvD,KAAK,CAAwB;IAC5B,UAAU,GAAG,oBAAoB,CAAC;IAE3C;QACE,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC,iCAAiC,eAAe,iBAAiB,oBAAoB,EAAE,CAAC,CAAC;IACvG,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU;QACtB,+CAA+C;QAC/C,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAO,IAAI,CAAC,KAAK,CAAC;QACpB,CAAC;QAED,kDAAkD;QAClD,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC,gBAAgB,CAAC;QAC/B,CAAC;QAED,uBAAuB;QACvB,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAC/C,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IAEO,KAAK,CAAC,eAAe;QAC3B,IAAI,CAAC;YACH,gCAAgC;YAChC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC3B,MAAM,CAAC,KAAK,CAAC,8CAA8C,SAAS,EAAE,CAAC,CAAC;gBACxE,MAAM,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC9C,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,+CAA+C,SAAS,GAAG,CAAC,CAAC;YAEzE,IAAI,OAAO,GAAG,CAAC,CAAC;YAChB,OAAO,OAAO,GAAG,WAAW,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,IAAI,CAAC,KAAK,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC;wBACpC,KAAK,EAAE,eAAe;wBACtB,QAAQ,EAAE,SAAS;qBACpB,CAAC,CAAC;oBAEH,MAAM,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;oBAC/D,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC;oBAC7B,OAAO,IAAI,CAAC,KAAK,CAAC;gBACpB,CAAC;gBAAC,OAAO,SAAS,EAAE,CAAC;oBACnB,OAAO,EAAE,CAAC;oBACV,MAAM,CAAC,IAAI,CAAC,2CAA2C,OAAO,IAAI,WAAW,YAAY,SAAS,EAAE,CAAC,CAAC;oBAEtG,IAAI,OAAO,IAAI,WAAW,EAAE,CAAC;wBAC3B,MAAM,SAAS,CAAC;oBAClB,CAAC;oBAED,6CAA6C;oBAC7C,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC;gBACtE,CAAC;YACH,CAAC;YAED,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC;YAC7B,MAAM,CAAC,KAAK,CAAC,qDAAqD,EAAE,KAAK,CAAC,CAAC;YAC3E,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC9B,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxD,CAAC;QAED,oBAAoB;QACpB,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YAEtC,yCAAyC;YACzC,MAAM,kBAAkB,GAAG,KAAK,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;YAE3D,IAAI,SAAS,GAAoB,IAAI,CAAC;YACtC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,kBAAkB,EAAE,CAAC;gBAC7C,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC1C,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;oBACjC,MAAM;gBACR,CAAC;YACH,CAAC;YAED,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;gBACvD,MAAM,IAAI,KAAK,CAAC,0BAA0B,SAAS,EAAE,MAAM,yBAAyB,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;YACzG,CAAC;YAED,mBAAmB;YACnB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAEpC,mBAAmB;YACnB,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,EAAE,CAAC;gBAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;gBAChD,IAAI,QAAQ,EAAE,CAAC;oBACb,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;YAED,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,8CAA8C,EAAE,KAAK,CAAC,CAAC;YACpE,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,IAAY;QAC3B,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC9B,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxD,CAAC;QAED,gCAAgC;QAChC,MAAM,QAAQ,GAAG,SAAS,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;QACpD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YAEtC,oCAAoC;YACpC,MAAM,cAAc,GAAG,MAAM,KAAK,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;YAEzD,IAAI,CAAC,cAAc,IAAI,cAAc,CAAC,MAAM,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;gBACjE,MAAM,IAAI,KAAK,CAAC,gCAAgC,cAAc,EAAE,MAAM,yBAAyB,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;YACpH,CAAC;YAED,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAE7C,mBAAmB;YACnB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAEpC,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,oDAAoD,EAAE,KAAK,CAAC,CAAC;YAC1E,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAChD,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACrF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QAC7B,CAAC;QAED,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YACtC,MAAM,UAAU,GAAe,EAAE,CAAC;YAElC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,KAAK,CAAC,YAAY,CAAC,UAAU,CAAC,EAAE,CAAC;gBACzD,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;oBACxB,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;wBAC1C,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACnC,CAAC;yBAAM,CAAC;wBACN,MAAM,CAAC,IAAI,CAAC,uDAAuD,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;wBAClF,UAAU,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;oBACtD,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,UAAU,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,qDAAqD,EAAE,KAAK,CAAC,CAAC;YAC3E,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
import { FastEmbeddings } from './fastembed.js';
|
|
2
|
+
// Helper to create async generator from array
|
|
3
|
+
async function* createAsyncGenerator(items) {
|
|
4
|
+
for (const item of items) {
|
|
5
|
+
yield item;
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
// Use vi.hoisted to define mocks that will be available when vi.mock runs
|
|
9
|
+
const { mockPassageEmbed, mockQueryEmbed, mockFlagEmbeddingInit, mockExistsSync, mockMkdir } = vi.hoisted(() => ({
|
|
10
|
+
mockPassageEmbed: vi.fn(),
|
|
11
|
+
mockQueryEmbed: vi.fn(),
|
|
12
|
+
mockFlagEmbeddingInit: vi.fn(),
|
|
13
|
+
mockExistsSync: vi.fn(),
|
|
14
|
+
mockMkdir: vi.fn(),
|
|
15
|
+
}));
|
|
16
|
+
// Mock fastembed module
|
|
17
|
+
vi.mock('fastembed', () => ({
|
|
18
|
+
EmbeddingModel: {
|
|
19
|
+
BGESmallENV15: 'bge-small-en-v1.5',
|
|
20
|
+
},
|
|
21
|
+
FlagEmbedding: {
|
|
22
|
+
init: mockFlagEmbeddingInit,
|
|
23
|
+
},
|
|
24
|
+
}));
|
|
25
|
+
// Mock fs/promises
|
|
26
|
+
vi.mock('node:fs/promises', () => ({
|
|
27
|
+
mkdir: mockMkdir,
|
|
28
|
+
}));
|
|
29
|
+
// Mock fs
|
|
30
|
+
vi.mock('node:fs', () => ({
|
|
31
|
+
existsSync: mockExistsSync,
|
|
32
|
+
}));
|
|
33
|
+
describe('FastEmbeddings', () => {
|
|
34
|
+
let embeddings;
|
|
35
|
+
beforeEach(() => {
|
|
36
|
+
vi.clearAllMocks();
|
|
37
|
+
// Default mock implementations
|
|
38
|
+
mockPassageEmbed.mockImplementation((texts) => {
|
|
39
|
+
const results = texts.map(() => new Float32Array(384).fill(0.1));
|
|
40
|
+
return createAsyncGenerator([results]);
|
|
41
|
+
});
|
|
42
|
+
mockQueryEmbed.mockResolvedValue(new Float32Array(384).fill(0.1));
|
|
43
|
+
mockFlagEmbeddingInit.mockResolvedValue({
|
|
44
|
+
passageEmbed: mockPassageEmbed,
|
|
45
|
+
queryEmbed: mockQueryEmbed,
|
|
46
|
+
});
|
|
47
|
+
mockExistsSync.mockReturnValue(true);
|
|
48
|
+
mockMkdir.mockResolvedValue(undefined);
|
|
49
|
+
embeddings = new FastEmbeddings();
|
|
50
|
+
});
|
|
51
|
+
describe('constructor', () => {
|
|
52
|
+
it('should initialize with correct dimensions', () => {
|
|
53
|
+
expect(embeddings.dimensions).toBe(384);
|
|
54
|
+
});
|
|
55
|
+
it('should start with empty cache', async () => {
|
|
56
|
+
// Access private cache via embed caching behavior
|
|
57
|
+
const result1 = await embeddings.embed('test text');
|
|
58
|
+
const result2 = await embeddings.embed('test text');
|
|
59
|
+
// Both should return same result (from cache on second call)
|
|
60
|
+
expect(result1).toEqual(result2);
|
|
61
|
+
// passageEmbed should only be called once due to caching
|
|
62
|
+
expect(mockPassageEmbed).toHaveBeenCalledTimes(1);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
describe('embed', () => {
|
|
66
|
+
it('should generate embedding for valid text', async () => {
|
|
67
|
+
const result = await embeddings.embed('Hello world');
|
|
68
|
+
expect(result).toBeDefined();
|
|
69
|
+
expect(result.length).toBe(384);
|
|
70
|
+
expect(mockPassageEmbed).toHaveBeenCalledWith(['Hello world']);
|
|
71
|
+
});
|
|
72
|
+
it('should trim whitespace from input', async () => {
|
|
73
|
+
await embeddings.embed(' trimmed text ');
|
|
74
|
+
expect(mockPassageEmbed).toHaveBeenCalledWith(['trimmed text']);
|
|
75
|
+
});
|
|
76
|
+
it('should throw for empty string', async () => {
|
|
77
|
+
await expect(embeddings.embed('')).rejects.toThrow('Input text must be a non-empty string');
|
|
78
|
+
});
|
|
79
|
+
it('should throw for whitespace-only string', async () => {
|
|
80
|
+
await expect(embeddings.embed(' ')).rejects.toThrow('Input text is empty after trimming');
|
|
81
|
+
});
|
|
82
|
+
it('should throw for non-string input', async () => {
|
|
83
|
+
// @ts-expect-error Testing invalid input
|
|
84
|
+
await expect(embeddings.embed(null)).rejects.toThrow('Input text must be a non-empty string');
|
|
85
|
+
// @ts-expect-error Testing invalid input
|
|
86
|
+
await expect(embeddings.embed(undefined)).rejects.toThrow('Input text must be a non-empty string');
|
|
87
|
+
// @ts-expect-error Testing invalid input
|
|
88
|
+
await expect(embeddings.embed(123)).rejects.toThrow('Input text must be a non-empty string');
|
|
89
|
+
});
|
|
90
|
+
it('should cache embeddings', async () => {
|
|
91
|
+
await embeddings.embed('cached text');
|
|
92
|
+
await embeddings.embed('cached text');
|
|
93
|
+
await embeddings.embed('cached text');
|
|
94
|
+
// Should only call the model once
|
|
95
|
+
expect(mockPassageEmbed).toHaveBeenCalledTimes(1);
|
|
96
|
+
});
|
|
97
|
+
it('should use first 200 chars as cache key', async () => {
|
|
98
|
+
const longText = 'a'.repeat(300);
|
|
99
|
+
await embeddings.embed(longText);
|
|
100
|
+
await embeddings.embed(longText);
|
|
101
|
+
expect(mockPassageEmbed).toHaveBeenCalledTimes(1);
|
|
102
|
+
});
|
|
103
|
+
it('should throw on invalid embedding dimensions', async () => {
|
|
104
|
+
mockPassageEmbed.mockImplementation(() => {
|
|
105
|
+
return createAsyncGenerator([[new Float32Array(100).fill(0.1)]]);
|
|
106
|
+
});
|
|
107
|
+
await expect(embeddings.embed('test')).rejects.toThrow('Invalid embedding: got 100 dimensions, expected 384');
|
|
108
|
+
});
|
|
109
|
+
it('should throw on empty embedding result', async () => {
|
|
110
|
+
mockPassageEmbed.mockImplementation(() => {
|
|
111
|
+
return createAsyncGenerator([[]]);
|
|
112
|
+
});
|
|
113
|
+
await expect(embeddings.embed('test')).rejects.toThrow('Invalid embedding');
|
|
114
|
+
});
|
|
115
|
+
it('should handle model errors', async () => {
|
|
116
|
+
mockPassageEmbed.mockImplementation(() => {
|
|
117
|
+
throw new Error('Model error');
|
|
118
|
+
});
|
|
119
|
+
await expect(embeddings.embed('test')).rejects.toThrow('Model error');
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
describe('embedQuery', () => {
|
|
123
|
+
it('should generate query embedding for valid text', async () => {
|
|
124
|
+
const result = await embeddings.embedQuery('search query');
|
|
125
|
+
expect(result).toBeDefined();
|
|
126
|
+
expect(result.length).toBe(384);
|
|
127
|
+
expect(mockQueryEmbed).toHaveBeenCalledWith('search query');
|
|
128
|
+
});
|
|
129
|
+
it('should trim whitespace from input', async () => {
|
|
130
|
+
await embeddings.embedQuery(' query ');
|
|
131
|
+
expect(mockQueryEmbed).toHaveBeenCalledWith('query');
|
|
132
|
+
});
|
|
133
|
+
it('should throw for empty string', async () => {
|
|
134
|
+
await expect(embeddings.embedQuery('')).rejects.toThrow('Input text must be a non-empty string');
|
|
135
|
+
});
|
|
136
|
+
it('should throw for whitespace-only string', async () => {
|
|
137
|
+
await expect(embeddings.embedQuery(' ')).rejects.toThrow('Input text is empty after trimming');
|
|
138
|
+
});
|
|
139
|
+
it('should throw for non-string input', async () => {
|
|
140
|
+
// @ts-expect-error Testing invalid input
|
|
141
|
+
await expect(embeddings.embedQuery(null)).rejects.toThrow('Input text must be a non-empty string');
|
|
142
|
+
});
|
|
143
|
+
it('should cache query embeddings with prefix', async () => {
|
|
144
|
+
await embeddings.embedQuery('cached query');
|
|
145
|
+
await embeddings.embedQuery('cached query');
|
|
146
|
+
// Should only call the model once
|
|
147
|
+
expect(mockQueryEmbed).toHaveBeenCalledTimes(1);
|
|
148
|
+
});
|
|
149
|
+
it('should use separate cache from passage embeddings', async () => {
|
|
150
|
+
await embeddings.embed('same text');
|
|
151
|
+
await embeddings.embedQuery('same text');
|
|
152
|
+
// Both should be called since they use different cache keys
|
|
153
|
+
expect(mockPassageEmbed).toHaveBeenCalledTimes(1);
|
|
154
|
+
expect(mockQueryEmbed).toHaveBeenCalledTimes(1);
|
|
155
|
+
});
|
|
156
|
+
it('should throw on invalid embedding dimensions', async () => {
|
|
157
|
+
mockQueryEmbed.mockResolvedValue(new Float32Array(100).fill(0.1));
|
|
158
|
+
await expect(embeddings.embedQuery('test')).rejects.toThrow('Invalid query embedding: got 100 dimensions, expected 384');
|
|
159
|
+
});
|
|
160
|
+
it('should handle model errors', async () => {
|
|
161
|
+
mockQueryEmbed.mockRejectedValue(new Error('Query model error'));
|
|
162
|
+
await expect(embeddings.embedQuery('test')).rejects.toThrow('Query model error');
|
|
163
|
+
});
|
|
164
|
+
});
|
|
165
|
+
describe('embedBatch', () => {
|
|
166
|
+
it('should generate embeddings for multiple texts', async () => {
|
|
167
|
+
const texts = ['text one', 'text two', 'text three'];
|
|
168
|
+
const result = await embeddings.embedBatch(texts);
|
|
169
|
+
expect(result).toHaveLength(3);
|
|
170
|
+
result.forEach((embedding) => {
|
|
171
|
+
expect(embedding.length).toBe(384);
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
it('should return empty array for empty input', async () => {
|
|
175
|
+
const result = await embeddings.embedBatch([]);
|
|
176
|
+
expect(result).toEqual([]);
|
|
177
|
+
expect(mockPassageEmbed).not.toHaveBeenCalled();
|
|
178
|
+
});
|
|
179
|
+
it('should filter out invalid texts', async () => {
|
|
180
|
+
const texts = ['valid', '', null, ' ', 'also valid', undefined];
|
|
181
|
+
// @ts-expect-error Testing invalid input
|
|
182
|
+
await embeddings.embedBatch(texts);
|
|
183
|
+
// Should only process valid texts
|
|
184
|
+
expect(mockPassageEmbed).toHaveBeenCalledWith(['valid', 'also valid']);
|
|
185
|
+
});
|
|
186
|
+
it('should return empty arrays for all-invalid texts', async () => {
|
|
187
|
+
const texts = ['', null, ' '];
|
|
188
|
+
// @ts-expect-error Testing invalid input
|
|
189
|
+
const result = await embeddings.embedBatch(texts);
|
|
190
|
+
expect(result).toEqual([[], [], []]);
|
|
191
|
+
expect(mockPassageEmbed).not.toHaveBeenCalled();
|
|
192
|
+
});
|
|
193
|
+
it('should handle partial invalid embeddings in batch', async () => {
|
|
194
|
+
mockPassageEmbed.mockImplementation(() => {
|
|
195
|
+
return createAsyncGenerator([
|
|
196
|
+
[
|
|
197
|
+
new Float32Array(384).fill(0.1),
|
|
198
|
+
new Float32Array(100).fill(0.1), // Invalid dimension
|
|
199
|
+
new Float32Array(384).fill(0.2),
|
|
200
|
+
],
|
|
201
|
+
]);
|
|
202
|
+
});
|
|
203
|
+
const result = await embeddings.embedBatch(['text1', 'text2', 'text3']);
|
|
204
|
+
expect(result[0].length).toBe(384);
|
|
205
|
+
expect(result[1]).toEqual(new Array(384).fill(0)); // Zero-filled fallback
|
|
206
|
+
expect(result[2].length).toBe(384);
|
|
207
|
+
});
|
|
208
|
+
it('should handle model errors', async () => {
|
|
209
|
+
mockPassageEmbed.mockImplementation(() => {
|
|
210
|
+
throw new Error('Batch model error');
|
|
211
|
+
});
|
|
212
|
+
await expect(embeddings.embedBatch(['text'])).rejects.toThrow('Batch model error');
|
|
213
|
+
});
|
|
214
|
+
});
|
|
215
|
+
describe('initialization', () => {
|
|
216
|
+
it('should lazily initialize model on first use', async () => {
|
|
217
|
+
// Model should not be initialized yet
|
|
218
|
+
expect(mockFlagEmbeddingInit).not.toHaveBeenCalled();
|
|
219
|
+
// First embed call triggers initialization
|
|
220
|
+
await embeddings.embed('test');
|
|
221
|
+
expect(mockFlagEmbeddingInit).toHaveBeenCalledTimes(1);
|
|
222
|
+
expect(mockFlagEmbeddingInit).toHaveBeenCalledWith(expect.objectContaining({
|
|
223
|
+
model: 'bge-small-en-v1.5',
|
|
224
|
+
}));
|
|
225
|
+
});
|
|
226
|
+
it('should only initialize once for multiple calls', async () => {
|
|
227
|
+
await embeddings.embed('test1');
|
|
228
|
+
await embeddings.embed('test2');
|
|
229
|
+
await embeddings.embedQuery('query');
|
|
230
|
+
await embeddings.embedBatch(['batch']);
|
|
231
|
+
expect(mockFlagEmbeddingInit).toHaveBeenCalledTimes(1);
|
|
232
|
+
});
|
|
233
|
+
it('should handle concurrent initialization', async () => {
|
|
234
|
+
// Start multiple operations simultaneously
|
|
235
|
+
const promises = [embeddings.embed('test1'), embeddings.embed('test2'), embeddings.embedQuery('query')];
|
|
236
|
+
await Promise.all(promises);
|
|
237
|
+
// Should only initialize once despite concurrent calls
|
|
238
|
+
expect(mockFlagEmbeddingInit).toHaveBeenCalledTimes(1);
|
|
239
|
+
});
|
|
240
|
+
it('should create cache directory if it does not exist', async () => {
|
|
241
|
+
mockExistsSync.mockReturnValue(false);
|
|
242
|
+
await embeddings.embed('test');
|
|
243
|
+
expect(mockMkdir).toHaveBeenCalledWith(expect.stringContaining('fastembed-cache'), { recursive: true });
|
|
244
|
+
});
|
|
245
|
+
it('should not create cache directory if it exists', async () => {
|
|
246
|
+
mockExistsSync.mockReturnValue(true);
|
|
247
|
+
await embeddings.embed('test');
|
|
248
|
+
expect(mockMkdir).not.toHaveBeenCalled();
|
|
249
|
+
});
|
|
250
|
+
it('should retry initialization on failure', async () => {
|
|
251
|
+
vi.useFakeTimers();
|
|
252
|
+
let attempts = 0;
|
|
253
|
+
mockFlagEmbeddingInit.mockImplementation(async () => {
|
|
254
|
+
attempts++;
|
|
255
|
+
if (attempts < 3) {
|
|
256
|
+
throw new Error('Init failed');
|
|
257
|
+
}
|
|
258
|
+
return {
|
|
259
|
+
passageEmbed: mockPassageEmbed,
|
|
260
|
+
queryEmbed: mockQueryEmbed,
|
|
261
|
+
};
|
|
262
|
+
});
|
|
263
|
+
const embedPromise = embeddings.embed('test');
|
|
264
|
+
// Advance through retry delays (2s, 4s exponential backoff)
|
|
265
|
+
await vi.advanceTimersByTimeAsync(2000);
|
|
266
|
+
await vi.advanceTimersByTimeAsync(4000);
|
|
267
|
+
await embedPromise;
|
|
268
|
+
expect(mockFlagEmbeddingInit).toHaveBeenCalledTimes(3);
|
|
269
|
+
vi.useRealTimers();
|
|
270
|
+
});
|
|
271
|
+
it('should throw after max retries', async () => {
|
|
272
|
+
let attempts = 0;
|
|
273
|
+
mockFlagEmbeddingInit.mockImplementation(async () => {
|
|
274
|
+
attempts++;
|
|
275
|
+
throw new Error('Persistent failure');
|
|
276
|
+
});
|
|
277
|
+
// Mock setTimeout to resolve immediately for retry delays
|
|
278
|
+
vi.spyOn(global, 'setTimeout').mockImplementation((callback) => {
|
|
279
|
+
callback();
|
|
280
|
+
return 0;
|
|
281
|
+
});
|
|
282
|
+
await expect(embeddings.embed('test')).rejects.toThrow('Persistent failure');
|
|
283
|
+
expect(attempts).toBe(3);
|
|
284
|
+
vi.restoreAllMocks();
|
|
285
|
+
});
|
|
286
|
+
});
|
|
287
|
+
describe('cache management', () => {
|
|
288
|
+
it('should evict oldest entry when cache exceeds 1000 entries', async () => {
|
|
289
|
+
// Generate unique texts to fill cache
|
|
290
|
+
for (let i = 0; i < 1002; i++) {
|
|
291
|
+
mockPassageEmbed.mockImplementationOnce(() => {
|
|
292
|
+
return createAsyncGenerator([[new Float32Array(384).fill(i * 0.001)]]);
|
|
293
|
+
});
|
|
294
|
+
await embeddings.embed(`unique text ${i}`);
|
|
295
|
+
}
|
|
296
|
+
// All 1002 calls should go through (cache evicts oldest)
|
|
297
|
+
expect(mockPassageEmbed).toHaveBeenCalledTimes(1002);
|
|
298
|
+
// First entry should have been evicted, so re-embedding should call model
|
|
299
|
+
mockPassageEmbed.mockImplementationOnce(() => {
|
|
300
|
+
return createAsyncGenerator([[new Float32Array(384).fill(0.999)]]);
|
|
301
|
+
});
|
|
302
|
+
await embeddings.embed('unique text 0');
|
|
303
|
+
expect(mockPassageEmbed).toHaveBeenCalledTimes(1003);
|
|
304
|
+
});
|
|
305
|
+
});
|
|
306
|
+
});
|
|
307
|
+
//# sourceMappingURL=fastembed.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fastembed.test.js","sourceRoot":"","sources":["../../src/embeddings/fastembed.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAEhD,8CAA8C;AAC9C,KAAK,SAAS,CAAC,CAAC,oBAAoB,CAAI,KAAU;IAChD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,CAAC;IACb,CAAC;AACH,CAAC;AAED,0EAA0E;AAC1E,MAAM,EAAE,gBAAgB,EAAE,cAAc,EAAE,qBAAqB,EAAE,cAAc,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAC/G,gBAAgB,EAAE,EAAE,CAAC,EAAE,EAAE;IACzB,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE;IACvB,qBAAqB,EAAE,EAAE,CAAC,EAAE,EAAE;IAC9B,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE;IACvB,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE;CACnB,CAAC,CAAC,CAAC;AAEJ,wBAAwB;AACxB,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,GAAG,EAAE,CAAC,CAAC;IAC1B,cAAc,EAAE;QACd,aAAa,EAAE,mBAAmB;KACnC;IACD,aAAa,EAAE;QACb,IAAI,EAAE,qBAAqB;KAC5B;CACF,CAAC,CAAC,CAAC;AAEJ,mBAAmB;AACnB,EAAE,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,EAAE,CAAC,CAAC;IACjC,KAAK,EAAE,SAAS;CACjB,CAAC,CAAC,CAAC;AAEJ,UAAU;AACV,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IACxB,UAAU,EAAE,cAAc;CAC3B,CAAC,CAAC,CAAC;AAEJ,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,IAAI,UAA0B,CAAC;IAE/B,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;QAEnB,+BAA+B;QAC/B,gBAAgB,CAAC,kBAAkB,CAAC,CAAC,KAAe,EAAE,EAAE;YACtD,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YACjE,OAAO,oBAAoB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QAEH,cAAc,CAAC,iBAAiB,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QAElE,qBAAqB,CAAC,iBAAiB,CAAC;YACtC,YAAY,EAAE,gBAAgB;YAC9B,UAAU,EAAE,cAAc;SAC3B,CAAC,CAAC;QAEH,cAAc,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACrC,SAAS,CAAC,iBAAiB,CAAC,SAAS,CAAC,CAAC;QAEvC,UAAU,GAAG,IAAI,cAAc,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;QAC3B,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACnD,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,KAAK,IAAI,EAAE;YAC7C,kDAAkD;YAClD,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACpD,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YAEpD,6DAA6D;YAC7D,MAAM,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAEjC,yDAAyD;YACzD,MAAM,CAAC,gBAAgB,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;YACxD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YAErD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAChC,MAAM,CAAC,gBAAgB,CAAC,CAAC,oBAAoB,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YACjD,MAAM,UAAU,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;YAE3C,MAAM,CAAC,gBAAgB,CAAC,CAAC,oBAAoB,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,KAAK,IAAI,EAAE;YAC7C,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC;QAC9F,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;YACvD,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,oCAAoC,CAAC,CAAC;QAC9F,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YACjD,yCAAyC;YACzC,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC;YAC9F,yCAAyC;YACzC,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC;YACnG,yCAAyC;YACzC,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC;QAC/F,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yBAAyB,EAAE,KAAK,IAAI,EAAE;YACvC,MAAM,UAAU,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YACtC,MAAM,UAAU,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YACtC,MAAM,UAAU,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YAEtC,kCAAkC;YAClC,MAAM,CAAC,gBAAgB,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;YACvD,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACjC,MAAM,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACjC,MAAM,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAEjC,MAAM,CAAC,gBAAgB,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;YAC5D,gBAAgB,CAAC,kBAAkB,CAAC,GAAG,EAAE;gBACvC,OAAO,oBAAoB,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACnE,CAAC,CAAC,CAAC;YAEH,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,qDAAqD,CAAC,CAAC;QAChH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;YACtD,gBAAgB,CAAC,kBAAkB,CAAC,GAAG,EAAE;gBACvC,OAAO,oBAAoB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACpC,CAAC,CAAC,CAAC;YAEH,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,mBAAmB,CAAC,CAAC;QAC9E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;YAC1C,gBAAgB,CAAC,kBAAkB,CAAC,GAAG,EAAE;gBACvC,MAAM,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;YACjC,CAAC,CAAC,CAAC;YAEH,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACxE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;YAE3D,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAChC,MAAM,CAAC,cAAc,CAAC,CAAC,oBAAoB,CAAC,cAAc,CAAC,CAAC;QAC9D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YACjD,MAAM,UAAU,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;YAEzC,MAAM,CAAC,cAAc,CAAC,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,KAAK,IAAI,EAAE;YAC7C,MAAM,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC;QACnG,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;YACvD,MAAM,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,oCAAoC,CAAC,CAAC;QACnG,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YACjD,yCAAyC;YACzC,MAAM,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC;QACrG,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,UAAU,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;YAC5C,MAAM,UAAU,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;YAE5C,kCAAkC;YAClC,MAAM,CAAC,cAAc,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;YACjE,MAAM,UAAU,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACpC,MAAM,UAAU,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;YAEzC,4DAA4D;YAC5D,MAAM,CAAC,gBAAgB,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;YAClD,MAAM,CAAC,cAAc,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;YAC5D,cAAc,CAAC,iBAAiB,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAElE,MAAM,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,2DAA2D,CAAC,CAAC;QAC3H,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;YAC1C,cAAc,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC,CAAC;YAEjE,MAAM,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,mBAAmB,CAAC,CAAC;QACnF,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;YAC7D,MAAM,KAAK,GAAG,CAAC,UAAU,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;YACrD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAElD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC/B,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,EAAE;gBAC3B,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrC,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;YAE/C,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YAC3B,MAAM,CAAC,gBAAgB,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,KAAK,GAAG,CAAC,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC;YACjE,yCAAyC;YACzC,MAAM,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEnC,kCAAkC;YAClC,MAAM,CAAC,gBAAgB,CAAC,CAAC,oBAAoB,CAAC,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC;QACzE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YAChE,MAAM,KAAK,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;YAC/B,yCAAyC;YACzC,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAElD,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,gBAAgB,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;YACjE,gBAAgB,CAAC,kBAAkB,CAAC,GAAG,EAAE;gBACvC,OAAO,oBAAoB,CAAC;oBAC1B;wBACE,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;wBAC/B,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,oBAAoB;wBACrD,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;qBAChC;iBACF,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;YAExE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,uBAAuB;YAC1E,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;YAC1C,gBAAgB,CAAC,kBAAkB,CAAC,GAAG,EAAE;gBACvC,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;YACvC,CAAC,CAAC,CAAC;YAEH,MAAM,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,mBAAmB,CAAC,CAAC;QACrF,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAC3D,sCAAsC;YACtC,MAAM,CAAC,qBAAqB,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;YAErD,2CAA2C;YAC3C,MAAM,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAE/B,MAAM,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;YACvD,MAAM,CAAC,qBAAqB,CAAC,CAAC,oBAAoB,CAChD,MAAM,CAAC,gBAAgB,CAAC;gBACtB,KAAK,EAAE,mBAAmB;aAC3B,CAAC,CACH,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAChC,MAAM,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAChC,MAAM,UAAU,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YACrC,MAAM,UAAU,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAEvC,MAAM,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;YACvD,2CAA2C;YAC3C,MAAM,QAAQ,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,UAAU,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;YAExG,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAE5B,uDAAuD;YACvD,MAAM,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;YAClE,cAAc,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;YAEtC,MAAM,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAE/B,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,CAAC,MAAM,CAAC,gBAAgB,CAAC,iBAAiB,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1G,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,cAAc,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;YAErC,MAAM,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAE/B,MAAM,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;QAC3C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;YACtD,EAAE,CAAC,aAAa,EAAE,CAAC;YAEnB,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,qBAAqB,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;gBAClD,QAAQ,EAAE,CAAC;gBACX,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;oBACjB,MAAM,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;gBACjC,CAAC;gBACD,OAAO;oBACL,YAAY,EAAE,gBAAgB;oBAC9B,UAAU,EAAE,cAAc;iBAC3B,CAAC;YACJ,CAAC,CAAC,CAAC;YAEH,MAAM,YAAY,GAAG,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAE9C,4DAA4D;YAC5D,MAAM,EAAE,CAAC,wBAAwB,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,EAAE,CAAC,wBAAwB,CAAC,IAAI,CAAC,CAAC;YAExC,MAAM,YAAY,CAAC;YAEnB,MAAM,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;YAEvD,EAAE,CAAC,aAAa,EAAE,CAAC;QACrB,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,qBAAqB,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;gBAClD,QAAQ,EAAE,CAAC;gBACX,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;YACxC,CAAC,CAAC,CAAC;YAEH,0DAA0D;YAC1D,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC,kBAAkB,CAAC,CAAC,QAAoB,EAAE,EAAE;gBACzE,QAAQ,EAAE,CAAC;gBACX,OAAO,CAA8B,CAAC;YACxC,CAAC,CAAC,CAAC;YAEH,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,oBAAoB,CAAC,CAAC;YAC7E,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAEzB,EAAE,CAAC,eAAe,EAAE,CAAC;QACvB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,2DAA2D,EAAE,KAAK,IAAI,EAAE;YACzE,sCAAsC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9B,gBAAgB,CAAC,sBAAsB,CAAC,GAAG,EAAE;oBAC3C,OAAO,oBAAoB,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzE,CAAC,CAAC,CAAC;gBACH,MAAM,UAAU,CAAC,KAAK,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;YAC7C,CAAC;YAED,yDAAyD;YACzD,MAAM,CAAC,gBAAgB,CAAC,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAAC;YAErD,0EAA0E;YAC1E,gBAAgB,CAAC,sBAAsB,CAAC,GAAG,EAAE;gBAC3C,OAAO,oBAAoB,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACrE,CAAC,CAAC,CAAC;YACH,MAAM,UAAU,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;YAExC,MAAM,CAAC,gBAAgB,CAAC,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
import { logger } from '../util/logger.js';
|
|
3
|
+
export class OpenAIEmbeddings {
|
|
4
|
+
openai;
|
|
5
|
+
cache;
|
|
6
|
+
dimensions = 1536; // text-embedding-3-small dimensions
|
|
7
|
+
constructor(apiKey) {
|
|
8
|
+
if (!apiKey) {
|
|
9
|
+
throw new Error('OpenAI API key is required');
|
|
10
|
+
}
|
|
11
|
+
this.openai = new OpenAI({ apiKey });
|
|
12
|
+
this.cache = new Map();
|
|
13
|
+
}
|
|
14
|
+
async embed(text) {
|
|
15
|
+
// Ensure input is a string and not empty
|
|
16
|
+
if (!text || typeof text !== 'string') {
|
|
17
|
+
throw new Error('Input text must be a non-empty string');
|
|
18
|
+
}
|
|
19
|
+
// Check cache first
|
|
20
|
+
const cacheKey = text.slice(0, 1000); // Limit cache key size
|
|
21
|
+
const cached = this.cache.get(cacheKey);
|
|
22
|
+
if (cached) {
|
|
23
|
+
return cached;
|
|
24
|
+
}
|
|
25
|
+
try {
|
|
26
|
+
const cleanText = text.trim();
|
|
27
|
+
if (!cleanText) {
|
|
28
|
+
throw new Error('Input text is empty after trimming');
|
|
29
|
+
}
|
|
30
|
+
const response = await this.openai.embeddings.create({
|
|
31
|
+
model: "text-embedding-3-small",
|
|
32
|
+
input: cleanText,
|
|
33
|
+
dimensions: this.dimensions
|
|
34
|
+
});
|
|
35
|
+
if (!response.data?.[0]?.embedding) {
|
|
36
|
+
throw new Error('No embedding returned from OpenAI');
|
|
37
|
+
}
|
|
38
|
+
const embedding = response.data[0].embedding;
|
|
39
|
+
// Cache the result
|
|
40
|
+
this.cache.set(cacheKey, embedding);
|
|
41
|
+
// Limit cache size to prevent memory issues
|
|
42
|
+
if (this.cache.size > 1000) {
|
|
43
|
+
const firstKey = this.cache.keys().next().value;
|
|
44
|
+
if (firstKey) {
|
|
45
|
+
this.cache.delete(firstKey);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return embedding;
|
|
49
|
+
}
|
|
50
|
+
catch (error) {
|
|
51
|
+
logger.debug('Error generating embedding:', error);
|
|
52
|
+
throw error;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=openai.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai.js","sourceRoot":"","sources":["../../src/embeddings/openai.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,MAAM,OAAO,gBAAgB;IACnB,MAAM,CAAS;IACf,KAAK,CAAwB;IAC5B,UAAU,GAAG,IAAI,CAAC,CAAC,oCAAoC;IAEhE,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;QACD,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACrC,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,yCAAyC;QACzC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,CAAC;QAED,oBAAoB;QACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,uBAAuB;QAC7D,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;YACxD,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC;gBACnD,KAAK,EAAE,wBAAwB;gBAC/B,KAAK,EAAE,SAAS;gBAChB,UAAU,EAAE,IAAI,CAAC,UAAU;aAC5B,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC;gBACnC,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YACvD,CAAC;YAED,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAE7C,mBAAmB;YACnB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAEpC,4CAA4C;YAC5C,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,EAAE,CAAC;gBAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;gBAChD,IAAI,QAAQ,EAAE,CAAC;oBACb,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;YAED,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,6BAA6B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/embeddings/types.ts"],"names":[],"mappings":""}
|
package/build/index.d.ts
ADDED