@wcs-colab/plugin-fuzzy-phrase 3.1.16-custom.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,164 @@
1
+ # @wcs-colab/plugin-fuzzy-phrase
2
+
3
+ Advanced fuzzy phrase matching plugin for Orama with semantic weighting and synonym expansion.
4
+
5
+ ## Features
6
+
7
+ - ✅ **Independent from QPS** - Direct radix tree access, no QPS dependency
8
+ - ✅ **Fuzzy matching** - Using `boundedLevenshtein` algorithm (same as match-highlight)
9
+ - ✅ **Phrase-level scoring** - Multi-factor scoring algorithm
10
+ - ✅ **Synonym expansion** - Load synonyms from Supabase
11
+ - ✅ **Adaptive tolerance** - Dynamically scales with query length
12
+ - ✅ **Semantic weighting** - TF-IDF scoring for relevance
13
+ - ✅ **Configurable** - All weights and thresholds are configurable
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ npm install @wcs-colab/plugin-fuzzy-phrase
19
+ ```
20
+
21
+ ## Basic Usage
22
+
23
+ ```typescript
24
+ import { create } from '@wcs-colab/orama';
25
+ import { pluginFuzzyPhrase } from '@wcs-colab/plugin-fuzzy-phrase';
26
+
27
+ const db = await create({
28
+ schema: {
29
+ content: 'string',
30
+ title: 'string'
31
+ },
32
+ plugins: [
33
+ pluginFuzzyPhrase({
34
+ textProperty: 'content',
35
+ tolerance: 1,
36
+ adaptiveTolerance: true
37
+ })
38
+ ]
39
+ });
40
+
41
+ // Search with fuzzy phrase matching
42
+ const results = await search(db, {
43
+ term: 'fuzzy search example',
44
+ properties: ['content']
45
+ });
46
+ ```
47
+
48
+ ## Configuration
49
+
50
+ ```typescript
51
+ interface FuzzyPhraseConfig {
52
+ // Text property to search in
53
+ textProperty?: string; // default: 'content'
54
+
55
+ // Base fuzzy matching tolerance (edit distance)
56
+ tolerance?: number; // default: 1
57
+
58
+ // Enable adaptive tolerance (scales with query length)
59
+ adaptiveTolerance?: boolean; // default: true
60
+
61
+ // Enable synonym expansion
62
+ enableSynonyms?: boolean; // default: false
63
+
64
+ // Supabase configuration for loading synonyms
65
+ supabase?: {
66
+ url: string;
67
+ serviceKey: string;
68
+ };
69
+
70
+ // Scoring weight for synonym matches (0-1)
71
+ synonymMatchScore?: number; // default: 0.8
72
+
73
+ // Scoring weights for different components
74
+ weights?: {
75
+ exact?: number; // default: 1.0
76
+ fuzzy?: number; // default: 0.8
77
+ order?: number; // default: 0.3
78
+ proximity?: number; // default: 0.2
79
+ density?: number; // default: 0.2
80
+ semantic?: number; // default: 0.15
81
+ };
82
+
83
+ // Maximum gap between words in a phrase
84
+ maxGap?: number; // default: 5
85
+
86
+ // Minimum phrase score to include in results
87
+ minScore?: number; // default: 0.1
88
+ }
89
+ ```
90
+
91
+ ## With Synonyms (Supabase)
92
+
93
+ ```typescript
94
+ import { pluginFuzzyPhrase } from '@wcs-colab/plugin-fuzzy-phrase';
95
+
96
+ const db = await create({
97
+ schema: {
98
+ content: 'string'
99
+ },
100
+ plugins: [
101
+ pluginFuzzyPhrase({
102
+ textProperty: 'content',
103
+ enableSynonyms: true,
104
+ supabase: {
105
+ url: process.env.SUPABASE_URL,
106
+ serviceKey: process.env.SUPABASE_SERVICE_ROLE_KEY
107
+ }
108
+ })
109
+ ]
110
+ });
111
+
112
+ // Now searches will include synonym matches
113
+ // e.g., "humanité" will also match "homme", "humain"
114
+ ```
115
+
116
+ ## How It Works
117
+
118
+ ### 1. Candidate Expansion
119
+ For each query token, the plugin finds:
120
+ - **Exact matches** - Exact word match (score: 1.0)
121
+ - **Fuzzy matches** - Within edit distance tolerance (score: 0.6-0.95)
122
+ - **Synonym matches** - From synonym dictionary (score: 0.8)
123
+
124
+ ### 2. Phrase Finding
125
+ Uses sliding window to find phrases where:
126
+ - Words are within `maxGap` distance
127
+ - Multiple query tokens are present
128
+ - Phrases don't overlap
129
+
130
+ ### 3. Multi-Factor Scoring
131
+ Each phrase is scored using:
132
+ - **Base score** - Quality of word matches
133
+ - **Order bonus** - Words in correct order
134
+ - **Proximity bonus** - Words close together
135
+ - **Density bonus** - Percentage of query covered
136
+ - **Semantic bonus** - TF-IDF relevance weighting
137
+
138
+ ### 4. Result Ranking
139
+ Results are sorted by highest phrase score.
140
+
141
+ ## Architecture
142
+
143
+ The plugin is completely independent from QPS:
144
+ - Accesses Orama's radix tree directly
145
+ - Uses same `boundedLevenshtein` as match-highlight plugin
146
+ - Implements custom phrase-level scoring
147
+ - Loads synonyms from Supabase (optional)
148
+
149
+ ## Performance
150
+
151
+ - **Bounded Levenshtein** - Early termination for performance
152
+ - **Vocabulary extraction** - One-time cost at index creation
153
+ - **TF-IDF** - Pre-calculated document frequencies
154
+ - **Deduplication** - Non-overlapping phrase optimization
155
+
156
+ ## License
157
+
158
+ Apache-2.0
159
+
160
+ ## Version
161
+
162
+ 3.1.16-custom.1
163
+
164
+ Compatible with `@wcs-colab/orama@3.1.16-custom.9`
package/dist/index.cjs ADDED
@@ -0,0 +1,508 @@
1
+ 'use strict';
2
+
3
+ // src/fuzzy.ts
4
+ function boundedLevenshtein(a, b, bound) {
5
+ if (a === b) {
6
+ return { isBounded: true, distance: 0 };
7
+ }
8
+ const aLen = a.length;
9
+ const bLen = b.length;
10
+ if (Math.abs(aLen - bLen) > bound) {
11
+ return { isBounded: false, distance: bound + 1 };
12
+ }
13
+ if (aLen > bLen) {
14
+ [a, b] = [b, a];
15
+ }
16
+ const m = a.length;
17
+ const n = b.length;
18
+ let prevRow = new Array(n + 1);
19
+ let currRow = new Array(n + 1);
20
+ for (let j = 0; j <= n; j++) {
21
+ prevRow[j] = j;
22
+ }
23
+ for (let i = 1; i <= m; i++) {
24
+ currRow[0] = i;
25
+ let minInRow = i;
26
+ for (let j = 1; j <= n; j++) {
27
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
28
+ currRow[j] = Math.min(
29
+ prevRow[j] + 1,
30
+ // deletion
31
+ currRow[j - 1] + 1,
32
+ // insertion
33
+ prevRow[j - 1] + cost
34
+ // substitution
35
+ );
36
+ minInRow = Math.min(minInRow, currRow[j]);
37
+ }
38
+ if (minInRow > bound) {
39
+ return { isBounded: false, distance: bound + 1 };
40
+ }
41
+ [prevRow, currRow] = [currRow, prevRow];
42
+ }
43
+ const distance = prevRow[n];
44
+ return {
45
+ isBounded: distance <= bound,
46
+ distance
47
+ };
48
+ }
49
+ function fuzzyMatch(word, queryToken, tolerance) {
50
+ if (word === queryToken) {
51
+ return { matches: true, distance: 0, score: 1 };
52
+ }
53
+ if (word.startsWith(queryToken)) {
54
+ return { matches: true, distance: 0, score: 0.95 };
55
+ }
56
+ const result = boundedLevenshtein(word, queryToken, tolerance);
57
+ if (result.isBounded) {
58
+ const score = 1 - result.distance * 0.2;
59
+ return {
60
+ matches: true,
61
+ distance: result.distance,
62
+ score: Math.max(0.1, score)
63
+ // Minimum score of 0.1
64
+ };
65
+ }
66
+ return { matches: false, distance: tolerance + 1, score: 0 };
67
+ }
68
+ function calculateAdaptiveTolerance(queryTokens, baseTolerance) {
69
+ const queryLength = queryTokens.length;
70
+ if (queryLength <= 2) {
71
+ return baseTolerance;
72
+ } else if (queryLength <= 4) {
73
+ return baseTolerance + 1;
74
+ } else if (queryLength <= 6) {
75
+ return baseTolerance + 2;
76
+ } else {
77
+ return baseTolerance + 3;
78
+ }
79
+ }
80
+
81
+ // src/candidates.ts
82
+ function extractVocabularyFromRadixTree(radixNode) {
83
+ const vocabulary = /* @__PURE__ */ new Set();
84
+ function traverse(node) {
85
+ if (node.w) {
86
+ vocabulary.add(node.w);
87
+ }
88
+ if (node.c) {
89
+ for (const child of Object.values(node.c)) {
90
+ traverse(child);
91
+ }
92
+ }
93
+ }
94
+ traverse(radixNode);
95
+ return vocabulary;
96
+ }
97
+ function findCandidatesForToken(queryToken, vocabulary, tolerance, synonyms, synonymScore = 0.8) {
98
+ const candidates = [];
99
+ const seen = /* @__PURE__ */ new Set();
100
+ if (vocabulary.has(queryToken)) {
101
+ candidates.push({
102
+ word: queryToken,
103
+ type: "exact",
104
+ queryToken,
105
+ distance: 0,
106
+ score: 1
107
+ });
108
+ seen.add(queryToken);
109
+ }
110
+ for (const word of vocabulary) {
111
+ if (seen.has(word))
112
+ continue;
113
+ const match = fuzzyMatch(word, queryToken, tolerance);
114
+ if (match.matches) {
115
+ candidates.push({
116
+ word,
117
+ type: "fuzzy",
118
+ queryToken,
119
+ distance: match.distance,
120
+ score: match.score
121
+ });
122
+ seen.add(word);
123
+ }
124
+ }
125
+ if (synonyms && synonyms[queryToken]) {
126
+ for (const synonym of synonyms[queryToken]) {
127
+ if (seen.has(synonym))
128
+ continue;
129
+ if (vocabulary.has(synonym)) {
130
+ candidates.push({
131
+ word: synonym,
132
+ type: "synonym",
133
+ queryToken,
134
+ distance: 0,
135
+ score: synonymScore
136
+ });
137
+ seen.add(synonym);
138
+ }
139
+ }
140
+ }
141
+ return candidates;
142
+ }
143
+ function findAllCandidates(queryTokens, vocabulary, tolerance, synonyms, synonymScore = 0.8) {
144
+ const candidatesMap = /* @__PURE__ */ new Map();
145
+ for (const token of queryTokens) {
146
+ const tokenCandidates = findCandidatesForToken(
147
+ token,
148
+ vocabulary,
149
+ tolerance,
150
+ synonyms,
151
+ synonymScore
152
+ );
153
+ candidatesMap.set(token, tokenCandidates);
154
+ }
155
+ return candidatesMap;
156
+ }
157
+ function filterCandidatesByScore(candidatesMap, minScore) {
158
+ const filtered = /* @__PURE__ */ new Map();
159
+ for (const [token, candidates] of candidatesMap.entries()) {
160
+ const filteredCandidates = candidates.filter((c) => c.score >= minScore);
161
+ if (filteredCandidates.length > 0) {
162
+ filtered.set(token, filteredCandidates);
163
+ }
164
+ }
165
+ return filtered;
166
+ }
167
+
168
+ // src/scoring.ts
169
+ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFrequency, totalDocuments) {
170
+ const phrases = [];
171
+ const queryTokens = Array.from(candidatesMap.keys());
172
+ const wordMatches = [];
173
+ for (let i = 0; i < documentTokens.length; i++) {
174
+ const docWord = documentTokens[i];
175
+ for (const [queryToken, candidates] of candidatesMap.entries()) {
176
+ for (const candidate of candidates) {
177
+ if (candidate.word === docWord) {
178
+ wordMatches.push({
179
+ word: docWord,
180
+ queryToken,
181
+ position: i,
182
+ type: candidate.type,
183
+ distance: candidate.distance,
184
+ score: candidate.score
185
+ });
186
+ }
187
+ }
188
+ }
189
+ }
190
+ for (let i = 0; i < wordMatches.length; i++) {
191
+ const phrase = buildPhraseFromPosition(
192
+ wordMatches,
193
+ i,
194
+ queryTokens,
195
+ config,
196
+ documentFrequency,
197
+ totalDocuments
198
+ );
199
+ if (phrase && phrase.words.length > 0) {
200
+ phrases.push(phrase);
201
+ }
202
+ }
203
+ return deduplicatePhrases(phrases);
204
+ }
205
+ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments) {
206
+ const startMatch = wordMatches[startIndex];
207
+ const phraseWords = [startMatch];
208
+ const coveredTokens = /* @__PURE__ */ new Set([startMatch.queryToken]);
209
+ for (let i = startIndex + 1; i < wordMatches.length; i++) {
210
+ const match = wordMatches[i];
211
+ const gap = match.position - phraseWords[phraseWords.length - 1].position - 1;
212
+ if (gap > config.maxGap) {
213
+ break;
214
+ }
215
+ if (!coveredTokens.has(match.queryToken)) {
216
+ phraseWords.push(match);
217
+ coveredTokens.add(match.queryToken);
218
+ }
219
+ if (coveredTokens.size === queryTokens.length) {
220
+ break;
221
+ }
222
+ }
223
+ if (phraseWords.length > 0) {
224
+ const score = calculatePhraseScore(
225
+ phraseWords,
226
+ queryTokens,
227
+ config,
228
+ documentFrequency,
229
+ totalDocuments
230
+ );
231
+ return {
232
+ words: phraseWords,
233
+ startPosition: phraseWords[0].position,
234
+ endPosition: phraseWords[phraseWords.length - 1].position,
235
+ gap: phraseWords[phraseWords.length - 1].position - phraseWords[0].position,
236
+ inOrder: isInOrder(phraseWords, queryTokens),
237
+ score
238
+ };
239
+ }
240
+ return null;
241
+ }
242
+ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments) {
243
+ let baseScore = 0;
244
+ for (const word of phraseWords) {
245
+ const weight = word.type === "exact" ? config.weights.exact : word.type === "fuzzy" ? config.weights.fuzzy : config.weights.fuzzy * 0.8;
246
+ baseScore += word.score * weight;
247
+ }
248
+ baseScore /= phraseWords.length;
249
+ const inOrder = isInOrder(phraseWords, queryTokens);
250
+ const orderScore = inOrder ? 1 : 0.5;
251
+ const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;
252
+ const proximityScore = Math.max(0, 1 - span / (queryTokens.length * 5));
253
+ const densityScore = phraseWords.length / queryTokens.length;
254
+ const semanticScore = calculateSemanticScore(
255
+ phraseWords,
256
+ documentFrequency,
257
+ totalDocuments
258
+ );
259
+ const weights = config.weights;
260
+ const totalScore = baseScore + orderScore * weights.order + proximityScore * weights.proximity + densityScore * weights.density + semanticScore * weights.semantic;
261
+ const maxPossibleScore = 1 + weights.order + weights.proximity + weights.density + weights.semantic;
262
+ return Math.min(1, totalScore / maxPossibleScore);
263
+ }
264
+ function isInOrder(phraseWords, queryTokens) {
265
+ const tokenOrder = new Map(queryTokens.map((token, index) => [token, index]));
266
+ for (let i = 1; i < phraseWords.length; i++) {
267
+ const prevOrder = tokenOrder.get(phraseWords[i - 1].queryToken) ?? -1;
268
+ const currOrder = tokenOrder.get(phraseWords[i].queryToken) ?? -1;
269
+ if (currOrder < prevOrder) {
270
+ return false;
271
+ }
272
+ }
273
+ return true;
274
+ }
275
+ function calculateSemanticScore(phraseWords, documentFrequency, totalDocuments) {
276
+ let tfidfSum = 0;
277
+ for (const word of phraseWords) {
278
+ const df = documentFrequency.get(word.word) || 1;
279
+ const idf = Math.log(totalDocuments / df);
280
+ tfidfSum += idf;
281
+ }
282
+ const avgTfidf = tfidfSum / phraseWords.length;
283
+ return Math.min(1, avgTfidf / 10);
284
+ }
285
+ function deduplicatePhrases(phrases) {
286
+ if (phrases.length === 0)
287
+ return [];
288
+ const sorted = phrases.slice().sort((a, b) => b.score - a.score);
289
+ const result = [];
290
+ const covered = /* @__PURE__ */ new Set();
291
+ for (const phrase of sorted) {
292
+ let overlaps = false;
293
+ for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {
294
+ if (covered.has(pos)) {
295
+ overlaps = true;
296
+ break;
297
+ }
298
+ }
299
+ if (!overlaps) {
300
+ result.push(phrase);
301
+ for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {
302
+ covered.add(pos);
303
+ }
304
+ }
305
+ }
306
+ return result.sort((a, b) => b.score - a.score);
307
+ }
308
+
309
+ // src/index.ts
310
+ var DEFAULT_CONFIG = {
311
+ textProperty: "content",
312
+ tolerance: 1,
313
+ adaptiveTolerance: true,
314
+ enableSynonyms: false,
315
+ supabase: void 0,
316
+ synonymMatchScore: 0.8,
317
+ weights: {
318
+ exact: 1,
319
+ fuzzy: 0.8,
320
+ order: 0.3,
321
+ proximity: 0.2,
322
+ density: 0.2,
323
+ semantic: 0.15
324
+ },
325
+ maxGap: 5,
326
+ minScore: 0.1
327
+ };
328
+ var pluginStates = /* @__PURE__ */ new WeakMap();
329
+ function pluginFuzzyPhrase(userConfig = {}) {
330
+ const config = {
331
+ textProperty: userConfig.textProperty ?? DEFAULT_CONFIG.textProperty,
332
+ tolerance: userConfig.tolerance ?? DEFAULT_CONFIG.tolerance,
333
+ adaptiveTolerance: userConfig.adaptiveTolerance ?? DEFAULT_CONFIG.adaptiveTolerance,
334
+ enableSynonyms: userConfig.enableSynonyms ?? DEFAULT_CONFIG.enableSynonyms,
335
+ supabase: userConfig.supabase || DEFAULT_CONFIG.supabase,
336
+ synonymMatchScore: userConfig.synonymMatchScore ?? DEFAULT_CONFIG.synonymMatchScore,
337
+ weights: {
338
+ exact: userConfig.weights?.exact ?? DEFAULT_CONFIG.weights.exact,
339
+ fuzzy: userConfig.weights?.fuzzy ?? DEFAULT_CONFIG.weights.fuzzy,
340
+ order: userConfig.weights?.order ?? DEFAULT_CONFIG.weights.order,
341
+ proximity: userConfig.weights?.proximity ?? DEFAULT_CONFIG.weights.proximity,
342
+ density: userConfig.weights?.density ?? DEFAULT_CONFIG.weights.density,
343
+ semantic: userConfig.weights?.semantic ?? DEFAULT_CONFIG.weights.semantic
344
+ },
345
+ maxGap: userConfig.maxGap ?? DEFAULT_CONFIG.maxGap,
346
+ minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore
347
+ };
348
+ const plugin = {
349
+ name: "fuzzy-phrase",
350
+ /**
351
+ * Initialize plugin after index is created
352
+ */
353
+ afterCreate: async (orama) => {
354
+ console.log("\u{1F52E} Initializing Fuzzy Phrase Plugin...");
355
+ const state = {
356
+ synonymMap: {},
357
+ config,
358
+ documentFrequency: /* @__PURE__ */ new Map(),
359
+ totalDocuments: 0
360
+ };
361
+ if (config.enableSynonyms && config.supabase) {
362
+ try {
363
+ console.log("\u{1F4D6} Loading synonyms from Supabase...");
364
+ state.synonymMap = await loadSynonymsFromSupabase(config.supabase);
365
+ console.log(`\u2705 Loaded ${Object.keys(state.synonymMap).length} words with synonyms`);
366
+ } catch (error) {
367
+ console.error("\u26A0\uFE0F Failed to load synonyms:", error);
368
+ }
369
+ }
370
+ if (orama.data && typeof orama.data === "object") {
371
+ const docs = orama.data.docs || {};
372
+ state.totalDocuments = Object.keys(docs).length;
373
+ state.documentFrequency = calculateDocumentFrequencies(docs, config.textProperty);
374
+ console.log(`\u{1F4CA} Calculated document frequencies for ${state.totalDocuments} documents`);
375
+ }
376
+ pluginStates.set(orama, state);
377
+ console.log("\u2705 Fuzzy Phrase Plugin initialized");
378
+ }
379
+ };
380
+ return plugin;
381
+ }
382
+ async function searchWithFuzzyPhrase(orama, params, language) {
383
+ const startTime = performance.now();
384
+ const state = pluginStates.get(orama);
385
+ if (!state) {
386
+ console.error("\u274C Plugin state not initialized");
387
+ throw new Error("Fuzzy Phrase Plugin not properly initialized");
388
+ }
389
+ const { term, properties } = params;
390
+ if (!term || typeof term !== "string") {
391
+ return { elapsed: { formatted: "0ms", raw: 0 }, hits: [], count: 0 };
392
+ }
393
+ const textProperty = properties && properties[0] || state.config.textProperty;
394
+ const queryTokens = tokenize(term);
395
+ if (queryTokens.length === 0) {
396
+ return { elapsed: { formatted: "0ms", raw: 0 }, hits: [], count: 0 };
397
+ }
398
+ const tolerance = state.config.adaptiveTolerance ? calculateAdaptiveTolerance(queryTokens, state.config.tolerance) : state.config.tolerance;
399
+ console.log(`\u{1F50D} Fuzzy phrase search: "${term}" (${queryTokens.length} tokens, tolerance: ${tolerance})`);
400
+ let vocabulary;
401
+ try {
402
+ const radixNode = orama.index?.indexes?.[textProperty]?.node;
403
+ if (!radixNode) {
404
+ console.error("\u274C Radix tree not found for property:", textProperty);
405
+ return { elapsed: { formatted: "0ms", raw: 0 }, hits: [], count: 0 };
406
+ }
407
+ vocabulary = extractVocabularyFromRadixTree(radixNode);
408
+ console.log(`\u{1F4DA} Extracted ${vocabulary.size} unique words from index`);
409
+ } catch (error) {
410
+ console.error("\u274C Failed to extract vocabulary:", error);
411
+ return { elapsed: { formatted: "0ms", raw: 0 }, hits: [], count: 0 };
412
+ }
413
+ const candidatesMap = findAllCandidates(
414
+ queryTokens,
415
+ vocabulary,
416
+ tolerance,
417
+ state.config.enableSynonyms ? state.synonymMap : void 0,
418
+ state.config.synonymMatchScore
419
+ );
420
+ const filteredCandidates = filterCandidatesByScore(
421
+ candidatesMap,
422
+ state.config.minScore
423
+ );
424
+ console.log(`\u{1F3AF} Found candidates: ${Array.from(filteredCandidates.values()).reduce((sum, c) => sum + c.length, 0)} total`);
425
+ const documentMatches = [];
426
+ const docs = orama.data?.docs || {};
427
+ for (const [docId, doc] of Object.entries(docs)) {
428
+ const text = doc[textProperty];
429
+ if (!text || typeof text !== "string") {
430
+ continue;
431
+ }
432
+ const docTokens = tokenize(text);
433
+ const phrases = findPhrasesInDocument(
434
+ docTokens,
435
+ filteredCandidates,
436
+ {
437
+ weights: state.config.weights,
438
+ maxGap: state.config.maxGap
439
+ },
440
+ state.documentFrequency,
441
+ state.totalDocuments
442
+ );
443
+ if (phrases.length > 0) {
444
+ const docScore = Math.max(...phrases.map((p) => p.score));
445
+ documentMatches.push({
446
+ id: docId,
447
+ phrases,
448
+ score: docScore,
449
+ document: doc
450
+ });
451
+ }
452
+ }
453
+ documentMatches.sort((a, b) => b.score - a.score);
454
+ const hits = documentMatches.map((match) => ({
455
+ id: match.id,
456
+ score: match.score,
457
+ document: match.document,
458
+ // Store phrases for highlighting
459
+ _phrases: match.phrases
460
+ }));
461
+ const elapsed = performance.now() - startTime;
462
+ console.log(`\u2705 Found ${hits.length} results in ${elapsed.toFixed(2)}ms`);
463
+ return {
464
+ elapsed: {
465
+ formatted: `${elapsed.toFixed(2)}ms`,
466
+ raw: Math.floor(elapsed * 1e6)
467
+ // nanoseconds
468
+ },
469
+ hits,
470
+ count: hits.length
471
+ };
472
+ }
473
+ async function loadSynonymsFromSupabase(supabaseConfig) {
474
+ try {
475
+ const { createClient } = await import('@supabase/supabase-js');
476
+ const supabase = createClient(supabaseConfig.url, supabaseConfig.serviceKey);
477
+ const { data, error } = await supabase.rpc("get_synonym_map");
478
+ if (error) {
479
+ throw new Error(`Supabase error: ${error.message}`);
480
+ }
481
+ return data || {};
482
+ } catch (error) {
483
+ console.error("Failed to load synonyms from Supabase:", error);
484
+ throw error;
485
+ }
486
+ }
487
+ function calculateDocumentFrequencies(docs, textProperty) {
488
+ const df = /* @__PURE__ */ new Map();
489
+ for (const doc of Object.values(docs)) {
490
+ const text = doc[textProperty];
491
+ if (!text || typeof text !== "string") {
492
+ continue;
493
+ }
494
+ const words = new Set(tokenize(text));
495
+ for (const word of words) {
496
+ df.set(word, (df.get(word) || 0) + 1);
497
+ }
498
+ }
499
+ return df;
500
+ }
501
+ function tokenize(text) {
502
+ return text.toLowerCase().split(/\s+/).filter((token) => token.length > 0);
503
+ }
504
+
505
+ exports.pluginFuzzyPhrase = pluginFuzzyPhrase;
506
+ exports.searchWithFuzzyPhrase = searchWithFuzzyPhrase;
507
+ //# sourceMappingURL=out.js.map
508
+ //# sourceMappingURL=index.cjs.map