@wcs-colab/plugin-fuzzy-phrase 3.1.16-custom.newbase.2 → 3.1.16-custom.newbase.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -211,8 +211,9 @@ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFr
211
211
  config,
212
212
  documentFrequency,
213
213
  totalDocuments,
214
- wordMatches
215
- // Pass all word matches for density calculation
214
+ wordMatches,
215
+ documentTokens
216
+ // Pass document tokens to extract gap words
216
217
  );
217
218
  if (phrase && phrase.words.length > 0) {
218
219
  phrases.push(phrase);
@@ -220,16 +221,27 @@ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFr
220
221
  }
221
222
  return deduplicatePhrases(phrases);
222
223
  }
223
- function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments, allWordMatches) {
224
+ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments, allWordMatches, documentTokens) {
224
225
  const startMatch = wordMatches[startIndex];
225
226
  const phraseWords = [startMatch];
226
227
  const coveredTokens = /* @__PURE__ */ new Set([startMatch.queryToken]);
228
+ const gapWords = [];
229
+ let totalGapUsed = 0;
227
230
  for (let i = startIndex + 1; i < wordMatches.length; i++) {
228
231
  const match = wordMatches[i];
229
- const gap = match.position - phraseWords[phraseWords.length - 1].position - 1;
232
+ const lastPos = phraseWords[phraseWords.length - 1].position;
233
+ const gap = match.position - lastPos - 1;
230
234
  if (gap > config.maxGap) {
231
235
  break;
232
236
  }
237
+ for (let pos = lastPos + 1; pos < match.position; pos++) {
238
+ totalGapUsed++;
239
+ gapWords.push({
240
+ word: documentTokens[pos],
241
+ position: pos,
242
+ gapIndex: totalGapUsed
243
+ });
244
+ }
233
245
  if (!coveredTokens.has(match.queryToken)) {
234
246
  phraseWords.push(match);
235
247
  coveredTokens.add(match.queryToken);
@@ -239,19 +251,25 @@ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, d
239
251
  }
240
252
  }
241
253
  if (phraseWords.length > 0) {
254
+ const coverage = phraseWords.length / queryTokens.length;
255
+ const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;
242
256
  const { score, breakdown } = calculatePhraseScore(
243
257
  phraseWords,
244
258
  queryTokens,
245
259
  config,
246
260
  documentFrequency,
247
261
  totalDocuments,
248
- allWordMatches
262
+ allWordMatches,
263
+ coverage
249
264
  );
250
265
  return {
251
266
  words: phraseWords,
267
+ gapWords,
268
+ gapUsed: totalGapUsed,
269
+ coverage,
252
270
  startPosition: phraseWords[0].position,
253
271
  endPosition: phraseWords[phraseWords.length - 1].position,
254
- gap: phraseWords[phraseWords.length - 1].position - phraseWords[0].position,
272
+ span,
255
273
  inOrder: isInOrder(phraseWords, queryTokens),
256
274
  score,
257
275
  scoreBreakdown: breakdown
@@ -259,7 +277,7 @@ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, d
259
277
  }
260
278
  return null;
261
279
  }
262
- function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments, allWordMatches) {
280
+ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments, allWordMatches, coverage) {
263
281
  let baseScore = 0;
264
282
  for (const word of phraseWords) {
265
283
  const weight = word.type === "exact" ? config.weights.exact : word.type === "fuzzy" ? config.weights.fuzzy : config.weights.fuzzy * 0.8;
@@ -269,13 +287,12 @@ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequenc
269
287
  const inOrder = isInOrder(phraseWords, queryTokens);
270
288
  const orderScore = inOrder ? 1 : 0.5;
271
289
  const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;
272
- const proximityScore = Math.max(0, 1 - span / (queryTokens.length * 5));
290
+ const proximityWindow = queryTokens.length * config.proximitySpanMultiplier;
291
+ const proximityScore = Math.max(0, 1 - span / proximityWindow);
273
292
  let densityScore = 0;
274
293
  if (queryTokens.length === 1) {
275
294
  const totalOccurrences = allWordMatches.length;
276
- densityScore = totalOccurrences / queryTokens.length;
277
- } else {
278
- densityScore = phraseWords.length / queryTokens.length;
295
+ densityScore = Math.min(1, totalOccurrences / 10);
279
296
  }
280
297
  const semanticScore = calculateSemanticScore(
281
298
  phraseWords,
@@ -289,8 +306,11 @@ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequenc
289
306
  const weightedDensity = densityScore * weights.density;
290
307
  const weightedSemantic = semanticScore * weights.semantic;
291
308
  const totalScore = weightedBase + weightedOrder + weightedProximity + weightedDensity + weightedSemantic;
292
- const maxPossibleScore = 1 + weights.order + weights.proximity + weights.density + weights.semantic;
293
- const score = totalScore / maxPossibleScore;
309
+ const maxBaseWeight = Math.max(weights.exact, weights.fuzzy);
310
+ const maxPossibleScore = maxBaseWeight + weights.order + weights.proximity + weights.density + weights.semantic;
311
+ const normalizedScore = totalScore / maxPossibleScore;
312
+ const coverageMultiplier = queryTokens.length > 1 ? coverage : 1;
313
+ const score = normalizedScore * coverageMultiplier;
294
314
  const base = weightedBase / maxPossibleScore;
295
315
  const order = weightedOrder / maxPossibleScore;
296
316
  const proximity = weightedProximity / maxPossibleScore;
@@ -303,7 +323,9 @@ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequenc
303
323
  order,
304
324
  proximity,
305
325
  density,
306
- semantic
326
+ semantic,
327
+ coverage: coverageMultiplier
328
+ // Show coverage multiplier in breakdown
307
329
  }
308
330
  };
309
331
  }
@@ -372,7 +394,8 @@ var DEFAULT_CONFIG = {
372
394
  semantic: 0.15
373
395
  },
374
396
  maxGap: 5,
375
- minScore: 0.1
397
+ minScore: 0.1,
398
+ proximitySpanMultiplier: 5
376
399
  };
377
400
  var pluginStates = /* @__PURE__ */ new WeakMap();
378
401
  function pluginFuzzyPhrase(userConfig = {}) {
@@ -392,7 +415,8 @@ function pluginFuzzyPhrase(userConfig = {}) {
392
415
  semantic: userConfig.weights?.semantic ?? DEFAULT_CONFIG.weights.semantic
393
416
  },
394
417
  maxGap: userConfig.maxGap ?? DEFAULT_CONFIG.maxGap,
395
- minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore
418
+ minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore,
419
+ proximitySpanMultiplier: userConfig.proximitySpanMultiplier ?? DEFAULT_CONFIG.proximitySpanMultiplier
396
420
  };
397
421
  const plugin = {
398
422
  name: "fuzzy-phrase",
@@ -530,7 +554,8 @@ async function searchWithFuzzyPhrase(orama, params, language) {
530
554
  filteredCandidates,
531
555
  {
532
556
  weights: state.config.weights,
533
- maxGap: state.config.maxGap
557
+ maxGap: state.config.maxGap,
558
+ proximitySpanMultiplier: state.config.proximitySpanMultiplier
534
559
  },
535
560
  state.documentFrequency,
536
561
  state.totalDocuments
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/fuzzy.ts","../src/candidates.ts","../src/scoring.ts","../src/index.ts"],"names":[],"mappings":";AA4BO,SAAS,mBACd,GACA,GACA,OAC0B;AAE1B,MAAI,MAAM,GAAG;AACX,WAAO,EAAE,WAAW,MAAM,UAAU,EAAE;AAAA,EACxC;AAEA,QAAM,OAAO,EAAE;AACf,QAAM,OAAO,EAAE;AAGf,MAAI,KAAK,IAAI,OAAO,IAAI,IAAI,OAAO;AACjC,WAAO,EAAE,WAAW,OAAO,UAAU,QAAQ,EAAE;AAAA,EACjD;AAGA,MAAI,OAAO,MAAM;AACf,KAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;AAAA,EAChB;AAEA,QAAM,IAAI,EAAE;AACZ,QAAM,IAAI,EAAE;AAGZ,MAAI,UAAU,IAAI,MAAM,IAAI,CAAC;AAC7B,MAAI,UAAU,IAAI,MAAM,IAAI,CAAC;AAG7B,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAQ,CAAC,IAAI;AAAA,EACf;AAEA,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAQ,CAAC,IAAI;AACb,QAAI,WAAW;AAEf,aAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,IAAI;AAEzC,cAAQ,CAAC,IAAI,KAAK;AAAA,QAChB,QAAQ,CAAC,IAAI;AAAA;AAAA,QACb,QAAQ,IAAI,CAAC,IAAI;AAAA;AAAA,QACjB,QAAQ,IAAI,CAAC,IAAI;AAAA;AAAA,MACnB;AAEA,iBAAW,KAAK,IAAI,UAAU,QAAQ,CAAC,CAAC;AAAA,IAC1C;AAGA,QAAI,WAAW,OAAO;AACpB,aAAO,EAAE,WAAW,OAAO,UAAU,QAAQ,EAAE;AAAA,IACjD;AAGA,KAAC,SAAS,OAAO,IAAI,CAAC,SAAS,OAAO;AAAA,EACxC;AAEA,QAAM,WAAW,QAAQ,CAAC;AAC1B,SAAO;AAAA,IACL,WAAW,YAAY;AAAA,IACvB;AAAA,EACF;AACF;AAUO,SAAS,WACd,MACA,YACA,WACuD;AAEvD,MAAI,SAAS,YAAY;AACvB,WAAO,EAAE,SAAS,MAAM,UAAU,GAAG,OAAO,EAAI;AAAA,EAClD;AAGA,MAAI,KAAK,WAAW,UAAU,GAAG;AAC/B,WAAO,EAAE,SAAS,MAAM,UAAU,GAAG,OAAO,KAAK;AAAA,EACnD;AAGA,QAAM,SAAS,mBAAmB,MAAM,YAAY,SAAS;AAE7D,MAAI,OAAO,WAAW;AAGpB,UAAM,QAAQ,IAAO,OAAO,WAAW;AACvC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,UAAU,OAAO;AAAA,MACjB,OAAO,KAAK,IAAI,KAAK,KAAK;AAAA;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,OAAO,UAAU,YAAY,GAAG,OAAO,EAAE;AAC7D;AAWO,SAAS,2BACd,aACA,eACQ;AACR,QAAM,cAAc,YAAY;AAEhC,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,EACT,WAAW,eAAe,GAAG;AAC3B,WAAO,gBAAgB;AAAA,EACzB,WAAW,eAAe,GAAG;AAC3B,WAAO,gBAAgB;AAAA,EACzB,OAAO;AACL,WAAO,gBAAgB;AAAA,EACzB;AACF;;;ACjJO,SAAS,+BAA+B,WAA6B;AAC1E,QAAM,aAAa,oBAAI,IAAY;AACnC,MAAI,eAAe;AACnB,MAAI,aAAa;AAEjB,WAAS,SAAS,MAAW,QAAgB,GAAG;AAC9C,QAAI,CAAC,MAAM;AACT;AAAA,IACF;AAEA;AAIA,QAAI,KAAK,KAAK,KAAK,KAAK,OAAO,KAAK,MAAM,YAAY,KAAK,EAAE,SAAS,GAAG;AACvE,iBAAW,IAAI,KAAK,CAAC;AACrB;AAAA,IACF;AAGA,QAAI,KAAK,GAAG;AACV,UAAI,KAAK,aAAa,KAAK;AAEzB,mBAAW,CAAC,MAAM,SAAS,KAAK,KAAK,GAAG;AACtC,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF,WAAW,MAAM,QAAQ,KAAK,CAAC,GAAG;AAEhC,mBAAW,CAAC,MAAM,SAAS,KAAK,KAAK,GAAG;AACtC,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF,WAAW,OAAO,KAAK,MAAM,UAAU;AAErC,mBAAW,aAAa,OAAO,OAAO,KAAK,CAAC,GAAG;AAC7C,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,WAAS,SAAS;AAClB,UAAQ,IAAI,uBAAgB,WAAW,IAAI,eAAe,YAAY,gBAAgB;AACtF,SAAO;AACT;AAYO,SAAS,uBACd,YACA,YACA,WACA,UACA,eAAuB,KACV;AACb,QAAM,aAA0B,CAAC;AACjC,QAAM,OAAO,oBAAI,IAAY;AAG7B,MAAI,WAAW,IAAI,UAAU,GAAG;AAC9B,eAAW,KAAK;AAAA,MACd,MAAM;AAAA,MACN,MAAM;AAAA,MACN;AAAA,MACA,UAAU;AAAA,MACV,OAAO;AAAA,IACT,CAAC;AACD,SAAK,IAAI,UAAU;AAAA,EACrB;AAGA,aAAW,QAAQ,YAAY;AAC7B,QAAI,KAAK,IAAI,IAAI;AAAG;AAEpB,UAAM,QAAQ,WAAW,MAAM,YAAY,SAAS;AACpD,QAAI,MAAM,SAAS;AACjB,iBAAW,KAAK;AAAA,QACd;AAAA,QACA,MAAM;AAAA,QACN;AAAA,QACA,UAAU,MAAM;AAAA,QAChB,OAAO,MAAM;AAAA,MACf,CAAC;AACD,WAAK,IAAI,IAAI;AAAA,IACf;AAAA,EACF;AAGA,MAAI,YAAY,SAAS,UAAU,GAAG;AACpC,eAAW,WAAW,SAAS,UAAU,GAAG;AAC1C,UAAI,KAAK,IAAI,OAAO;AAAG;AACvB,UAAI,WAAW,IAAI,OAAO,GAAG;AAC3B,mBAAW,KAAK;AAAA,UACd,MAAM;AAAA,UACN,MAAM;AAAA,UACN;AAAA,UACA,UAAU;AAAA,UACV,OAAO;AAAA,QACT,CAAC;AACD,aAAK,IAAI,OAAO;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAYO,SAAS,kBACd,aACA,YACA,WACA,UACA,eAAuB,KACG;AAC1B,QAAM,gBAAgB,oBAAI,IAAyB;AAEnD,aAAW,SAAS,aAAa;AAC/B,UAAM,kBAAkB;AAAA,MACtB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,kBAAc,IAAI,OAAO,eAAe;AAAA,EAC1C;AAEA,SAAO;AACT;AAyBO,SAAS,wBACd,eACA,UAC0B;AAC1B,QAAM,WAAW,oBAAI,IAAyB;AAE9C,aAAW,CAAC,OAAO,UAAU,KAAK,cAAc,QAAQ,GAAG;AACzD,UAAM,qBAAqB,WAAW,OAAO,OAAK,EAAE,SAAS,QAAQ;AACrE,QAAI,mBAAmB,SAAS,GAAG;AACjC,eAAS,IAAI,OAAO,kBAAkB;AAAA,IACxC;AAAA,EACF;AAEA,SAAO;AACT;;;ACvKO,SAAS,sBACd,gBACA,eACA,QACA,mBACA,gBACe;AACf,QAAM,UAAyB,CAAC;AAChC,QAAM,cAAc,MAAM,KAAK,cAAc,KAAK,CAAC;AAGnD,QAAM,cAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK;AAC9C,UAAM,UAAU,eAAe,CAAC;AAGhC,eAAW,CAAC,YAAY,UAAU,KAAK,cAAc,QAAQ,GAAG;AAC9D,iBAAW,aAAa,YAAY;AAClC,YAAI,UAAU,SAAS,SAAS;AAC9B,sBAAY,KAAK;AAAA,YACf,MAAM;AAAA,YACN;AAAA,YACA,UAAU;AAAA,YACV,MAAM,UAAU;AAAA,YAChB,UAAU,UAAU;AAAA,YACpB,OAAO,UAAU;AAAA,UACnB,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,SAAS;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA;AAAA,IACF;AAEA,QAAI,UAAU,OAAO,MAAM,SAAS,GAAG;AACrC,cAAQ,KAAK,MAAM;AAAA,IACrB;AAAA,EACF;AAGA,SAAO,mBAAmB,OAAO;AACnC;AAcA,SAAS,wBACP,aACA,YACA,aACA,QACA,mBACA,gBACA,gBACoB;AACpB,QAAM,aAAa,YAAY,UAAU;AACzC,QAAM,cAA2B,CAAC,UAAU;AAC5C,QAAM,gBAAgB,oBAAI,IAAI,CAAC,WAAW,UAAU,CAAC;AAGrD,WAAS,IAAI,aAAa,GAAG,IAAI,YAAY,QAAQ,KAAK;AACxD,UAAM,QAAQ,YAAY,CAAC;AAC3B,UAAM,MAAM,MAAM,WAAW,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW;AAG5E,QAAI,MAAM,OAAO,QAAQ;AACvB;AAAA,IACF;AAGA,QAAI,CAAC,cAAc,IAAI,MAAM,UAAU,GAAG;AACxC,kBAAY,KAAK,KAAK;AACtB,oBAAc,IAAI,MAAM,UAAU;AAAA,IACpC;AAGA,QAAI,cAAc,SAAS,YAAY,QAAQ;AAC7C;AAAA,IACF;AAAA,EACF;AAGA,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,EAAE,OAAO,UAAU,IAAI;AAAA,MAC3B;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,WAAO;AAAA,MACL,OAAO;AAAA,MACP,eAAe,YAAY,CAAC,EAAE;AAAA,MAC9B,aAAa,YAAY,YAAY,SAAS,CAAC,EAAE;AAAA,MACjD,KAAK,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW,YAAY,CAAC,EAAE;AAAA,MACnE,SAAS,UAAU,aAAa,WAAW;AAAA,MAC3C;AAAA,MACA,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,SAAO;AACT;AAaA,SAAS,qBACP,aACA,aACA,QACA,mBACA,gBACA,gBACqH;AAErH,MAAI,YAAY;AAChB,aAAW,QAAQ,aAAa;AAC9B,UAAM,SAAS,KAAK,SAAS,UAAU,OAAO,QAAQ,QACvC,KAAK,SAAS,UAAU,OAAO,QAAQ,QACvC,OAAO,QAAQ,QAAQ;AACtC,iBAAa,KAAK,QAAQ;AAAA,EAC5B;AACA,eAAa,YAAY;AAGzB,QAAM,UAAU,UAAU,aAAa,WAAW;AAClD,QAAM,aAAa,UAAU,IAAM;AAGnC,QAAM,OAAO,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW,YAAY,CAAC,EAAE,WAAW;AACtF,QAAM,iBAAiB,KAAK,IAAI,GAAG,IAAO,QAAQ,YAAY,SAAS,EAAG;AAI1E,MAAI,eAAe;AAEnB,MAAI,YAAY,WAAW,GAAG;AAE5B,UAAM,mBAAmB,eAAe;AAExC,mBAAe,mBAAmB,YAAY;AAAA,EAChD,OAAO;AAGL,mBAAe,YAAY,SAAS,YAAY;AAAA,EAClD;AAGA,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAGA,QAAM,UAAU,OAAO;AAGvB,QAAM,eAAe;AACrB,QAAM,gBAAgB,aAAa,QAAQ;AAC3C,QAAM,oBAAoB,iBAAiB,QAAQ;AACnD,QAAM,kBAAkB,eAAe,QAAQ;AAC/C,QAAM,mBAAmB,gBAAgB,QAAQ;AAEjD,QAAM,aAAa,eAAe,gBAAgB,oBAAoB,kBAAkB;AAIxF,QAAM,mBAAmB,IAAM,QAAQ,QAAQ,QAAQ,YAAY,QAAQ,UAAU,QAAQ;AAG7F,QAAM,QAAQ,aAAa;AAG3B,QAAM,OAAO,eAAe;AAC5B,QAAM,QAAQ,gBAAgB;AAC9B,QAAM,YAAY,oBAAoB;AACtC,QAAM,UAAU,kBAAkB;AAClC,QAAM,WAAW,mBAAmB;AAEpC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,MACT;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AASA,SAAS,UAAU,aAA0B,aAAgC;AAC3E,QAAM,aAAa,IAAI,IAAI,YAAY,IAAI,CAAC,OAAO,UAAU,CAAC,OAAO,KAAK,CAAC,CAAC;AAE5E,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,YAAY,WAAW,IAAI,YAAY,IAAI,CAAC,EAAE,UAAU,KAAK;AACnE,UAAM,YAAY,WAAW,IAAI,YAAY,CAAC,EAAE,UAAU,KAAK;AAE/D,QAAI,YAAY,WAAW;AACzB,aAAO;AAAA,IACT;AAAA,EACF;AAEA,SAAO;AACT;AAUA,SAAS,uBACP,aACA,mBACA,gBACQ;AAER,MAAI,mBAAmB,GAAG;AACxB,WAAO;AAAA,EACT;AAEA,MAAI,WAAW;AAEf,aAAW,QAAQ,aAAa;AAC9B,UAAM,KAAK,kBAAkB,IAAI,KAAK,IAAI,KAAK;AAC/C,UAAM,MAAM,KAAK,IAAI,iBAAiB,EAAE;AACxC,gBAAY;AAAA,EACd;AAGA,QAAM,WAAW,WAAW,YAAY;AAGxC,SAAO,KAAK,IAAI,GAAK,WAAW,EAAE;AACpC;AAQA,SAAS,mBAAmB,SAAuC;AACjE,MAAI,QAAQ,WAAW;AAAG,WAAO,CAAC;AAGlC,QAAM,SAAS,QAAQ,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC/D,QAAM,SAAwB,CAAC;AAC/B,QAAM,UAAU,oBAAI,IAAY;AAEhC,aAAW,UAAU,QAAQ;AAE3B,QAAI,WAAW;AACf,aAAS,MAAM,OAAO,eAAe,OAAO,OAAO,aAAa,OAAO;AACrE,UAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,mBAAW;AACX;AAAA,MACF;AAAA,IACF;AAEA,QAAI,CAAC,UAAU;AACb,aAAO,KAAK,MAAM;AAElB,eAAS,MAAM,OAAO,eAAe,OAAO,OAAO,aAAa,OAAO;AACrE,gBAAQ,IAAI,GAAG;AAAA,MACjB;AAAA,IACF;AAAA,EACF;AAEA,SAAO,OAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAChD;;;ACnUA,IAAM,iBAA8C;AAAA,EAClD,cAAc;AAAA,EACd,WAAW;AAAA,EACX,mBAAmB;AAAA,EACnB,gBAAgB;AAAA,EAChB,UAAU;AAAA,EACV,mBAAmB;AAAA,EACnB,SAAS;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,WAAW;AAAA,IACX,SAAS;AAAA,IACT,UAAU;AAAA,EACZ;AAAA,EACA,QAAQ;AAAA,EACR,UAAU;AACZ;AAKA,IAAM,eAAe,oBAAI,QAA+B;AAQjD,SAAS,kBAAkB,aAAgC,CAAC,GAAgB;AAEjF,QAAM,SAAsC;AAAA,IAC1C,cAAc,WAAW,gBAAgB,eAAe;AAAA,IACxD,WAAW,WAAW,aAAa,eAAe;AAAA,IAClD,mBAAmB,WAAW,qBAAqB,eAAe;AAAA,IAClE,gBAAgB,WAAW,kBAAkB,eAAe;AAAA,IAC5D,UAAU,WAAW,YAAY,eAAe;AAAA,IAChD,mBAAmB,WAAW,qBAAqB,eAAe;AAAA,IAClE,SAAS;AAAA,MACP,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,WAAW,WAAW,SAAS,aAAa,eAAe,QAAQ;AAAA,MACnE,SAAS,WAAW,SAAS,WAAW,eAAe,QAAQ;AAAA,MAC/D,UAAU,WAAW,SAAS,YAAY,eAAe,QAAQ;AAAA,IACnE;AAAA,IACA,QAAQ,WAAW,UAAU,eAAe;AAAA,IAC5C,UAAU,WAAW,YAAY,eAAe;AAAA,EAClD;AAEA,QAAM,SAAsB;AAAA,IAC1B,MAAM;AAAA;AAAA;AAAA;AAAA,IAKN,aAAa,OAAO,UAAoB;AACtC,cAAQ,IAAI,+CAAwC;AAGpD,YAAM,QAAqB;AAAA,QACzB,YAAY,CAAC;AAAA,QACb;AAAA,QACA,mBAAmB,oBAAI,IAAI;AAAA,QAC3B,gBAAgB;AAAA,MAClB;AAGA,UAAI,OAAO,kBAAkB,OAAO,UAAU;AAC5C,YAAI;AACF,kBAAQ,IAAI,6CAAsC;AAClD,gBAAM,aAAa,MAAM,yBAAyB,OAAO,QAAQ;AACjE,kBAAQ,IAAI,iBAAY,OAAO,KAAK,MAAM,UAAU,EAAE,MAAM,sBAAsB;AAAA,QACpF,SAAS,OAAO;AACd,kBAAQ,MAAM,0CAAgC,KAAK;AAAA,QAErD;AAAA,MACF;AAGA,YAAM,OAAQ,MAAM,MAAc,MAAM;AACxC,UAAI,MAAM;AACR,cAAM,iBAAiB,OAAO,KAAK,IAAI,EAAE;AACzC,cAAM,oBAAoB,6BAA6B,MAAM,OAAO,YAAY;AAChF,gBAAQ,IAAI,iDAA0C,MAAM,cAAc,YAAY;AAAA,MACxF;AAGA,mBAAa,IAAI,OAAO,KAAK;AAC7B,cAAQ,IAAI,wCAAmC;AAI/C,mBAAa,MAAM;AACjB,YAAI,OAAQ,WAAmB,2BAA2B,YAAY;AACpE,kBAAQ,IAAI,qCAA8B;AAC1C,UAAC,WAAmB,uBAAuB;AAAA,QAC7C,OAAO;AACL,kBAAQ,KAAK,yDAA+C;AAAA,QAC9D;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAQA,eAAsB,sBACpB,OACA,QACA,UACoC;AACpC,QAAM,YAAY,YAAY,IAAI;AAGlC,QAAM,QAAQ,aAAa,IAAI,KAAK;AAEpC,MAAI,CAAC,OAAO;AACV,YAAQ,MAAM,qCAAgC;AAC9C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAChE;AAEA,QAAM,EAAE,MAAM,WAAW,IAAI;AAE7B,MAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,eAAgB,cAAc,WAAW,CAAC,KAAM,MAAM,OAAO;AAGnE,QAAM,cAAc,SAAS,IAAI;AAEjC,MAAI,YAAY,WAAW,GAAG;AAC5B,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,YAAY,MAAM,OAAO,oBAC3B,2BAA2B,aAAa,MAAM,OAAO,SAAS,IAC9D,MAAM,OAAO;AAEjB,UAAQ,IAAI,mCAA4B,IAAI,MAAM,YAAY,MAAM,uBAAuB,SAAS,GAAG;AAGvG,MAAI;AAEJ,MAAI;AAGF,UAAM,YAAa,MAAc,MAAM;AAEvC,QAAI,CAAC,WAAW;AACd,cAAQ,MAAM,gDAA2C;AACzD,aAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,IACrE;AAEA,YAAQ,IAAI,qCAA8B,OAAO,KAAK,aAAa,CAAC,CAAC,CAAC;AAGtE,QAAI,YAAY;AAGhB,QAAI,UAAU,UAAU,YAAY,GAAG,MAAM;AAC3C,kBAAY,UAAU,QAAQ,YAAY,EAAE;AAC5C,cAAQ,IAAI,4DAAuD;AAAA,IACrE,WAES,UAAU,YAAY,GAAG,MAAM;AACtC,kBAAY,UAAU,YAAY,EAAE;AACpC,cAAQ,IAAI,6DAAwD;AAAA,IACtE;AAEA,QAAI,CAAC,WAAW;AACd,cAAQ,MAAM,6CAAwC,YAAY;AAClE,cAAQ,MAAM,qCAAqC,OAAO,KAAK,SAAS,CAAC;AACzE,aAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,IACrE;AAEA,iBAAa,+BAA+B,SAAS;AACrD,YAAQ,IAAI,uBAAgB,WAAW,IAAI,0BAA0B;AAAA,EACvE,SAAS,OAAO;AACd,YAAQ,MAAM,wCAAmC,KAAK;AACtD,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM,OAAO,iBAAiB,MAAM,aAAa;AAAA,IACjD,MAAM,OAAO;AAAA,EACf;AAGA,QAAM,qBAAqB;AAAA,IACzB;AAAA,IACA,MAAM,OAAO;AAAA,EACf;AAEA,UAAQ,IAAI,+BAAwB,MAAM,KAAK,mBAAmB,OAAO,CAAC,EAAE,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,QAAQ,CAAC,CAAC,QAAQ;AAGzH,QAAM,kBAAmC,CAAC;AAE1C,UAAQ,IAAI,yCAAkC;AAAA,IAC5C,UAAU,OAAO,KAAM,MAAc,QAAQ,CAAC,CAAC;AAAA,IAC/C,SAAS,CAAC,CAAG,MAAc,MAAM;AAAA,IACjC,UAAW,MAAc,MAAM,OAAO,OAAQ,MAAc,KAAK,OAAO;AAAA,EAC1E,CAAC;AAGD,MAAI,OAA4B,CAAC;AAGjC,MAAK,MAAc,MAAM,MAAM,MAAM;AACnC,WAAQ,MAAc,KAAK,KAAK;AAChC,YAAQ,IAAI,2CAAsC;AAAA,EACpD,WAEU,MAAc,MAAM,QAAQ,OAAQ,MAAc,KAAK,SAAS,UAAU;AAElF,UAAM,WAAW,OAAO,KAAM,MAAc,KAAK,IAAI,EAAE,CAAC;AACxD,QAAI,YAAY,aAAa,iCAAiC,aAAa,SAAS;AAClF,aAAQ,MAAc,KAAK;AAC3B,cAAQ,IAAI,+CAA0C;AAAA,IACxD;AAAA,EACF;AAEA,MAAI,OAAO,KAAK,IAAI,EAAE,WAAW,GAAG;AAClC,YAAQ,IAAI,0DAAqD;AAAA,MAC/D,aAAa,CAAC,CAAG,MAAc,MAAM;AAAA,MACrC,cAAe,MAAc,MAAM,OAAO,OAAO,KAAM,MAAc,KAAK,IAAI,IAAI;AAAA,MAClF,iBAAiB,CAAC,CAAG,MAAc,MAAM,MAAM;AAAA,MAC/C,mBAAoB,MAAc,MAAM,MAAM,OAAO,OAAO,KAAM,MAAc,KAAK,KAAK,IAAI,EAAE,SAAS;AAAA,IAC3G,CAAC;AAAA,EACH;AAEA,UAAQ,IAAI,+BAAwB,OAAO,KAAK,IAAI,EAAE,MAAM,YAAY;AAExE,aAAW,CAAC,OAAO,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG;AAC/C,UAAM,OAAO,IAAI,YAAY;AAE7B,QAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC;AAAA,IACF;AAGA,UAAM,YAAY,SAAS,IAAI;AAG/B,UAAM,UAAU;AAAA,MACd;AAAA,MACA;AAAA,MACA;AAAA,QACE,SAAS,MAAM,OAAO;AAAA,QACtB,QAAQ,MAAM,OAAO;AAAA,MACvB;AAAA,MACA,MAAM;AAAA,MACN,MAAM;AAAA,IACR;AAEA,QAAI,QAAQ,SAAS,GAAG;AAEtB,YAAM,WAAW,KAAK,IAAI,GAAG,QAAQ,IAAI,OAAK,EAAE,KAAK,CAAC;AAEtD,sBAAgB,KAAK;AAAA,QACnB,IAAI;AAAA,QACJ;AAAA,QACA,OAAO;AAAA,QACP,UAAU;AAAA,MACZ,CAAC;AAAA,IACH;AAAA,EACF;AAGA,kBAAgB,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAGhD,QAAM,QAAQ,OAAO,SAAS,gBAAgB;AAC9C,QAAM,iBAAiB,gBAAgB,MAAM,GAAG,KAAK;AAGrD,QAAM,OAAO,eAAe,IAAI,YAAU;AAAA,IACxC,IAAI,MAAM;AAAA,IACV,OAAO,MAAM;AAAA,IACb,UAAU,MAAM;AAAA;AAAA,IAEhB,UAAU,MAAM;AAAA,EAClB,EAAE;AAEF,QAAM,UAAU,YAAY,IAAI,IAAI;AAEpC,UAAQ,IAAI,gBAAW,KAAK,MAAM,eAAe,QAAQ,QAAQ,CAAC,CAAC,cAAc,KAAK,GAAG;AAEzF,SAAO;AAAA,IACL,SAAS;AAAA,MACP,WAAW,GAAG,QAAQ,QAAQ,CAAC,CAAC;AAAA,MAChC,KAAK,KAAK,MAAM,UAAU,GAAO;AAAA;AAAA,IACnC;AAAA,IACA;AAAA,IACA,OAAO,KAAK;AAAA,EACd;AACF;AAKA,eAAe,yBACb,gBACqB;AACrB,MAAI;AACF,YAAQ,IAAI,0DAAmD;AAG/D,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,uBAAuB;AAE7D,UAAM,WAAW,aAAa,eAAe,KAAK,eAAe,UAAU;AAG3E,UAAM,EAAE,MAAM,MAAM,IAAI,MAAM,SAAS,IAAI,iBAAiB;AAE5D,YAAQ,IAAI,2CAAoC;AAAA,MAC9C,UAAU,CAAC,CAAC;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB,SAAS,CAAC,CAAC;AAAA,MACX,UAAU,OAAO;AAAA,MACjB,UAAU,OAAO,OAAO,KAAK,IAAI,EAAE,SAAS;AAAA,IAC9C,CAAC;AAED,QAAI,OAAO;AACT,YAAM,IAAI,MAAM,mBAAmB,MAAM,OAAO,EAAE;AAAA,IACpD;AAEA,UAAM,aAAa,QAAQ,CAAC;AAC5B,YAAQ,IAAI,oBAAa,OAAO,KAAK,UAAU,EAAE,MAAM,gCAAgC;AAEvF,WAAO;AAAA,EACT,SAAS,OAAO;AACd,YAAQ,MAAM,iDAA4C,KAAK;AAC/D,UAAM;AAAA,EACR;AACF;AAKA,SAAS,6BACP,MACA,cACqB;AACrB,QAAM,KAAK,oBAAI,IAAoB;AAEnC,aAAW,OAAO,OAAO,OAAO,IAAI,GAAG;AACrC,UAAM,OAAO,IAAI,YAAY;AAE7B,QAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC;AAAA,IACF;AAGA,UAAM,QAAQ,IAAI,IAAI,SAAS,IAAI,CAAC;AAGpC,eAAW,QAAQ,OAAO;AACxB,SAAG,IAAI,OAAO,GAAG,IAAI,IAAI,KAAK,KAAK,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO;AACT;AAQA,SAAS,cAAc,MAAsB;AAC3C,SAAO,KACJ,YAAY,EACZ,UAAU,KAAK,EACf,QAAQ,oBAAoB,EAAE,EAE9B,QAAQ,gFAAgF,GAAG,EAC3F,QAAQ,6DAA6D,EAAE,EACvE,QAAQ,mBAAmB,GAAG,EAC9B,QAAQ,4BAA4B,GAAG,EACvC,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACV;AAQA,SAAS,SAAS,MAAwB;AAExC,SAAO,cAAc,IAAI,EACtB,MAAM,KAAK,EACX,OAAO,WAAS,MAAM,SAAS,CAAC;AACrC","sourcesContent":["/**\n * Fuzzy matching utilities using bounded Levenshtein distance\n * \n * This is the same algorithm used by Orama's match-highlight plugin\n * for consistent fuzzy matching behavior.\n */\n\n/**\n * Result of bounded Levenshtein distance calculation\n */\nexport interface BoundedLevenshteinResult {\n /** Whether the distance is within bounds */\n isBounded: boolean;\n /** The actual distance (only valid if isBounded is true) */\n distance: number;\n}\n\n/**\n * Calculate bounded Levenshtein distance between two strings\n * \n * Stops early if distance exceeds the bound for better performance.\n * This is the same algorithm as Orama's internal boundedLevenshtein.\n * \n * @param a - First string\n * @param b - Second string\n * @param bound - Maximum allowed distance\n * @returns Result indicating if strings are within bound and the distance\n */\nexport function boundedLevenshtein(\n a: string,\n b: string,\n bound: number\n): BoundedLevenshteinResult {\n // Quick checks\n if (a === b) {\n return { isBounded: true, distance: 0 };\n }\n\n const aLen = a.length;\n const bLen = b.length;\n\n // If length difference exceeds bound, no need to calculate\n if (Math.abs(aLen - bLen) > bound) {\n return { isBounded: false, distance: bound + 1 };\n }\n\n // Swap to ensure a is shorter (optimization)\n if (aLen > bLen) {\n [a, b] = [b, a];\n }\n\n const m = a.length;\n const n = b.length;\n\n // Use single array instead of matrix (memory optimization)\n let prevRow = new Array(n + 1);\n let currRow = new Array(n + 1);\n\n // Initialize first row\n for (let j = 0; j <= n; j++) {\n prevRow[j] = j;\n }\n\n for (let i = 1; i <= m; i++) {\n currRow[0] = i;\n let minInRow = i;\n\n for (let j = 1; j <= n; j++) {\n const cost = a[i - 1] === b[j - 1] ? 0 : 1;\n\n currRow[j] = Math.min(\n prevRow[j] + 1, // deletion\n currRow[j - 1] + 1, // insertion\n prevRow[j - 1] + cost // substitution\n );\n\n minInRow = Math.min(minInRow, currRow[j]);\n }\n\n // Early termination: if all values in row exceed bound, we're done\n if (minInRow > bound) {\n return { isBounded: false, distance: bound + 1 };\n }\n\n // Swap rows for next iteration\n [prevRow, currRow] = [currRow, prevRow];\n }\n\n const distance = prevRow[n];\n return {\n isBounded: distance <= bound,\n distance\n };\n}\n\n/**\n * Check if a word matches a query token with fuzzy matching\n * \n * @param word - Word from document\n * @param queryToken - Token from search query\n * @param tolerance - Maximum edit distance allowed\n * @returns Match result with score\n */\nexport function fuzzyMatch(\n word: string,\n queryToken: string,\n tolerance: number\n): { matches: boolean; distance: number; score: number } {\n // Exact match\n if (word === queryToken) {\n return { matches: true, distance: 0, score: 1.0 };\n }\n\n // Prefix match (high score, no distance)\n if (word.startsWith(queryToken)) {\n return { matches: true, distance: 0, score: 0.95 };\n }\n\n // Fuzzy match with tolerance\n const result = boundedLevenshtein(word, queryToken, tolerance);\n \n if (result.isBounded) {\n // Score decreases with distance\n // distance 1 = 0.8, distance 2 = 0.6, etc.\n const score = 1.0 - (result.distance * 0.2);\n return {\n matches: true,\n distance: result.distance,\n score: Math.max(0.1, score) // Minimum score of 0.1\n };\n }\n\n return { matches: false, distance: tolerance + 1, score: 0 };\n}\n\n/**\n * Calculate adaptive tolerance based on query length\n * \n * Longer queries get higher tolerance for better fuzzy matching.\n * \n * @param queryTokens - Array of query tokens\n * @param baseTolerance - Base tolerance value\n * @returns Calculated tolerance (always an integer)\n */\nexport function calculateAdaptiveTolerance(\n queryTokens: string[],\n baseTolerance: number\n): number {\n const queryLength = queryTokens.length;\n \n if (queryLength <= 2) {\n return baseTolerance;\n } else if (queryLength <= 4) {\n return baseTolerance + 1;\n } else if (queryLength <= 6) {\n return baseTolerance + 2;\n } else {\n return baseTolerance + 3;\n }\n}\n","/**\n * Candidate expansion: Find all possible matches for query tokens\n * including exact matches, fuzzy matches, and synonyms\n */\n\nimport { fuzzyMatch } from './fuzzy.js';\nimport type { Candidate, SynonymMap } from './types.js';\n\n/**\n * Extract all unique words from the radix tree index\n * \n * @param radixNode - Root node of the radix tree\n * @returns Set of all unique words in the index\n */\nexport function extractVocabularyFromRadixTree(radixNode: any): Set<string> {\n const vocabulary = new Set<string>();\n let nodesVisited = 0;\n let wordsFound = 0;\n \n function traverse(node: any, depth: number = 0) {\n if (!node) {\n return;\n }\n \n nodesVisited++;\n \n // Check if this node represents a complete word\n // e = true means it's an end of a word\n if (node.e && node.w && typeof node.w === 'string' && node.w.length > 0) {\n vocabulary.add(node.w);\n wordsFound++;\n }\n \n // Children can be Map, Array, or Object\n if (node.c) {\n if (node.c instanceof Map) {\n // Map format\n for (const [_key, childNode] of node.c) {\n traverse(childNode, depth + 1);\n }\n } else if (Array.isArray(node.c)) {\n // Array format: [[key, childNode], ...]\n for (const [_key, childNode] of node.c) {\n traverse(childNode, depth + 1);\n }\n } else if (typeof node.c === 'object') {\n // Object format: {key: childNode, ...}\n for (const childNode of Object.values(node.c)) {\n traverse(childNode, depth + 1);\n }\n }\n }\n }\n \n traverse(radixNode);\n console.log(`📚 Extracted ${vocabulary.size} words from ${nodesVisited} nodes visited`);\n return vocabulary;\n}\n\n/**\n * Find all candidate matches for a single query token\n * \n * @param queryToken - Token from search query\n * @param vocabulary - Set of all words in the index\n * @param tolerance - Fuzzy matching tolerance\n * @param synonyms - Synonym map (optional)\n * @param synonymScore - Score multiplier for synonym matches\n * @returns Array of candidate matches\n */\nexport function findCandidatesForToken(\n queryToken: string,\n vocabulary: Set<string>,\n tolerance: number,\n synonyms?: SynonymMap,\n synonymScore: number = 0.8\n): Candidate[] {\n const candidates: Candidate[] = [];\n const seen = new Set<string>();\n\n // 1. Check for exact match\n if (vocabulary.has(queryToken)) {\n candidates.push({\n word: queryToken,\n type: 'exact',\n queryToken,\n distance: 0,\n score: 1.0\n });\n seen.add(queryToken);\n }\n\n // 2. Check for fuzzy matches\n for (const word of vocabulary) {\n if (seen.has(word)) continue;\n\n const match = fuzzyMatch(word, queryToken, tolerance);\n if (match.matches) {\n candidates.push({\n word,\n type: 'fuzzy',\n queryToken,\n distance: match.distance,\n score: match.score\n });\n seen.add(word);\n }\n }\n\n // 3. Check for synonym matches\n if (synonyms && synonyms[queryToken]) {\n for (const synonym of synonyms[queryToken]) {\n if (seen.has(synonym)) continue;\n if (vocabulary.has(synonym)) {\n candidates.push({\n word: synonym,\n type: 'synonym',\n queryToken,\n distance: 0,\n score: synonymScore\n });\n seen.add(synonym);\n }\n }\n }\n\n return candidates;\n}\n\n/**\n * Find candidates for all query tokens\n * \n * @param queryTokens - Array of tokens from search query\n * @param vocabulary - Set of all words in the index\n * @param tolerance - Fuzzy matching tolerance\n * @param synonyms - Synonym map (optional)\n * @param synonymScore - Score multiplier for synonym matches\n * @returns Map of query tokens to their candidate matches\n */\nexport function findAllCandidates(\n queryTokens: string[],\n vocabulary: Set<string>,\n tolerance: number,\n synonyms?: SynonymMap,\n synonymScore: number = 0.8\n): Map<string, Candidate[]> {\n const candidatesMap = new Map<string, Candidate[]>();\n\n for (const token of queryTokens) {\n const tokenCandidates = findCandidatesForToken(\n token,\n vocabulary,\n tolerance,\n synonyms,\n synonymScore\n );\n candidatesMap.set(token, tokenCandidates);\n }\n\n return candidatesMap;\n}\n\n/**\n * Get total number of candidates across all tokens\n * \n * @param candidatesMap - Map of token to candidates\n * @returns Total count of all candidates\n */\nexport function getTotalCandidateCount(\n candidatesMap: Map<string, Candidate[]>\n): number {\n let total = 0;\n for (const candidates of candidatesMap.values()) {\n total += candidates.length;\n }\n return total;\n}\n\n/**\n * Filter candidates by minimum score threshold\n * \n * @param candidatesMap - Map of token to candidates\n * @param minScore - Minimum score threshold\n * @returns Filtered candidates map\n */\nexport function filterCandidatesByScore(\n candidatesMap: Map<string, Candidate[]>,\n minScore: number\n): Map<string, Candidate[]> {\n const filtered = new Map<string, Candidate[]>();\n\n for (const [token, candidates] of candidatesMap.entries()) {\n const filteredCandidates = candidates.filter(c => c.score >= minScore);\n if (filteredCandidates.length > 0) {\n filtered.set(token, filteredCandidates);\n }\n }\n\n return filtered;\n}\n","/**\n * Phrase scoring algorithm with semantic weighting\n */\n\nimport type { WordMatch, PhraseMatch, Candidate } from './types.js';\n\n/**\n * Configuration for phrase scoring\n */\nexport interface ScoringConfig {\n weights: {\n exact: number;\n fuzzy: number;\n order: number;\n proximity: number;\n density: number;\n semantic: number;\n };\n maxGap: number;\n}\n\n/**\n * Find all phrase matches in a document\n * \n * @param documentTokens - Tokenized document content\n * @param candidatesMap - Map of query tokens to their candidates\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map for TF-IDF\n * @param totalDocuments - Total number of documents\n * @returns Array of phrase matches\n */\nexport function findPhrasesInDocument(\n documentTokens: string[],\n candidatesMap: Map<string, Candidate[]>,\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number\n): PhraseMatch[] {\n const phrases: PhraseMatch[] = [];\n const queryTokens = Array.from(candidatesMap.keys());\n\n // Find all word matches in document\n const wordMatches: WordMatch[] = [];\n \n for (let i = 0; i < documentTokens.length; i++) {\n const docWord = documentTokens[i];\n \n // Check if this word matches any query token\n for (const [queryToken, candidates] of candidatesMap.entries()) {\n for (const candidate of candidates) {\n if (candidate.word === docWord) {\n wordMatches.push({\n word: docWord,\n queryToken,\n position: i,\n type: candidate.type,\n distance: candidate.distance,\n score: candidate.score\n });\n }\n }\n }\n }\n\n // Build phrases from word matches using sliding window\n for (let i = 0; i < wordMatches.length; i++) {\n const phrase = buildPhraseFromPosition(\n wordMatches,\n i,\n queryTokens,\n config,\n documentFrequency,\n totalDocuments,\n wordMatches // Pass all word matches for density calculation\n );\n \n if (phrase && phrase.words.length > 0) {\n phrases.push(phrase);\n }\n }\n\n // Deduplicate and sort by score\n return deduplicatePhrases(phrases);\n}\n\n/**\n * Build a phrase starting from a specific word match position\n * \n * @param wordMatches - All word matches in document\n * @param startIndex - Starting index in wordMatches array\n * @param queryTokens - Original query tokens\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @param allWordMatches - All word matches in document (for density calculation)\n * @returns Phrase match or null\n */\nfunction buildPhraseFromPosition(\n wordMatches: WordMatch[],\n startIndex: number,\n queryTokens: string[],\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number,\n allWordMatches: WordMatch[]\n): PhraseMatch | null {\n const startMatch = wordMatches[startIndex];\n const phraseWords: WordMatch[] = [startMatch];\n const coveredTokens = new Set([startMatch.queryToken]);\n\n // Look for nearby matches to complete the phrase\n for (let i = startIndex + 1; i < wordMatches.length; i++) {\n const match = wordMatches[i];\n const gap = match.position - phraseWords[phraseWords.length - 1].position - 1;\n\n // Stop if gap exceeds maximum\n if (gap > config.maxGap) {\n break;\n }\n\n // Add if it's a different query token\n if (!coveredTokens.has(match.queryToken)) {\n phraseWords.push(match);\n coveredTokens.add(match.queryToken);\n }\n\n // Stop if we have all query tokens\n if (coveredTokens.size === queryTokens.length) {\n break;\n }\n }\n\n // Calculate phrase score\n if (phraseWords.length > 0) {\n const { score, breakdown } = calculatePhraseScore(\n phraseWords,\n queryTokens,\n config,\n documentFrequency,\n totalDocuments,\n allWordMatches\n );\n\n return {\n words: phraseWords,\n startPosition: phraseWords[0].position,\n endPosition: phraseWords[phraseWords.length - 1].position,\n gap: phraseWords[phraseWords.length - 1].position - phraseWords[0].position,\n inOrder: isInOrder(phraseWords, queryTokens),\n score,\n scoreBreakdown: breakdown\n };\n }\n\n return null;\n}\n\n/**\n * Calculate overall phrase score\n * \n * @param phraseWords - Words in the phrase\n * @param queryTokens - Original query tokens\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @param allWordMatches - All word matches in document (for density calculation)\n * @returns Phrase score (0-1) and detailed component breakdown\n */\nfunction calculatePhraseScore(\n phraseWords: WordMatch[],\n queryTokens: string[],\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number,\n allWordMatches: WordMatch[]\n): { score: number; breakdown: { base: number; order: number; proximity: number; density: number; semantic: number } } {\n // Base score from word matches\n let baseScore = 0;\n for (const word of phraseWords) {\n const weight = word.type === 'exact' ? config.weights.exact :\n word.type === 'fuzzy' ? config.weights.fuzzy : \n config.weights.fuzzy * 0.8; // synonym\n baseScore += word.score * weight;\n }\n baseScore /= phraseWords.length;\n\n // Order bonus\n const inOrder = isInOrder(phraseWords, queryTokens);\n const orderScore = inOrder ? 1.0 : 0.5;\n\n // Proximity bonus (closer words score higher)\n const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;\n const proximityScore = Math.max(0, 1.0 - (span / (queryTokens.length * 5)));\n\n // Density bonus (how many times query terms appear in the document)\n // Only applies to single-word queries - for phrase queries, use phrase-specific metrics\n let densityScore = 0;\n \n if (queryTokens.length === 1) {\n // Single-word query: reward repetition without capping\n const totalOccurrences = allWordMatches.length;\n // Normalize by query length but don't cap - more occurrences = higher score\n densityScore = totalOccurrences / queryTokens.length;\n } else {\n // Multi-word phrase query: density doesn't apply\n // Use phrase coverage instead (what percentage of query is in this phrase)\n densityScore = phraseWords.length / queryTokens.length;\n }\n\n // Semantic score (TF-IDF)\n const semanticScore = calculateSemanticScore(\n phraseWords,\n documentFrequency,\n totalDocuments\n );\n\n // Weighted combination\n const weights = config.weights;\n \n // Calculate weighted components\n const weightedBase = baseScore;\n const weightedOrder = orderScore * weights.order;\n const weightedProximity = proximityScore * weights.proximity;\n const weightedDensity = densityScore * weights.density;\n const weightedSemantic = semanticScore * weights.semantic;\n \n const totalScore = weightedBase + weightedOrder + weightedProximity + weightedDensity + weightedSemantic;\n\n // Calculate max possible score (all components at maximum)\n // baseScore max is 1.0 (from exact matches), other components are already 0-1\n const maxPossibleScore = 1.0 + weights.order + weights.proximity + weights.density + weights.semantic;\n \n // Normalize to 0-1 range without clamping\n const score = totalScore / maxPossibleScore;\n\n // Component contributions to the final normalized score\n const base = weightedBase / maxPossibleScore;\n const order = weightedOrder / maxPossibleScore;\n const proximity = weightedProximity / maxPossibleScore;\n const density = weightedDensity / maxPossibleScore;\n const semantic = weightedSemantic / maxPossibleScore;\n\n return {\n score,\n breakdown: {\n base,\n order,\n proximity,\n density,\n semantic\n }\n };\n}\n\n/**\n * Check if words are in the same order as query tokens\n * \n * @param phraseWords - Words in the phrase\n * @param queryTokens - Original query tokens\n * @returns True if in order\n */\nfunction isInOrder(phraseWords: WordMatch[], queryTokens: string[]): boolean {\n const tokenOrder = new Map(queryTokens.map((token, index) => [token, index]));\n \n for (let i = 1; i < phraseWords.length; i++) {\n const prevOrder = tokenOrder.get(phraseWords[i - 1].queryToken) ?? -1;\n const currOrder = tokenOrder.get(phraseWords[i].queryToken) ?? -1;\n \n if (currOrder < prevOrder) {\n return false;\n }\n }\n \n return true;\n}\n\n/**\n * Calculate semantic score using TF-IDF\n * \n * @param phraseWords - Words in the phrase\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @returns Semantic score (0-1)\n */\nfunction calculateSemanticScore(\n phraseWords: WordMatch[],\n documentFrequency: Map<string, number>,\n totalDocuments: number\n): number {\n // Handle edge case: no documents\n if (totalDocuments === 0) {\n return 0;\n }\n \n let tfidfSum = 0;\n \n for (const word of phraseWords) {\n const df = documentFrequency.get(word.word) || 1;\n const idf = Math.log(totalDocuments / df);\n tfidfSum += idf;\n }\n \n // Normalize by phrase length\n const avgTfidf = tfidfSum / phraseWords.length;\n \n // Normalize to 0-1 range (assuming max IDF of ~10)\n return Math.min(1.0, avgTfidf / 10);\n}\n\n/**\n * Deduplicate overlapping phrases, keeping highest scoring ones\n * \n * @param phrases - Array of phrase matches\n * @returns Deduplicated phrases sorted by score\n */\nfunction deduplicatePhrases(phrases: PhraseMatch[]): PhraseMatch[] {\n if (phrases.length === 0) return [];\n\n // Sort by score descending\n const sorted = phrases.slice().sort((a, b) => b.score - a.score);\n const result: PhraseMatch[] = [];\n const covered = new Set<number>();\n\n for (const phrase of sorted) {\n // Check if this phrase overlaps with already selected phrases\n let overlaps = false;\n for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {\n if (covered.has(pos)) {\n overlaps = true;\n break;\n }\n }\n\n if (!overlaps) {\n result.push(phrase);\n // Mark positions as covered\n for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {\n covered.add(pos);\n }\n }\n }\n\n return result.sort((a, b) => b.score - a.score);\n}\n","/**\n * Fuzzy Phrase Plugin for Orama\n * \n * Advanced fuzzy phrase matching with semantic weighting and synonym expansion.\n * Completely independent from QPS - accesses Orama's radix tree directly.\n */\n\nimport type { AnyOrama, OramaPlugin, Results, TypedDocument } from '@wcs-colab/orama';\nimport type { FuzzyPhraseConfig, PluginState, SynonymMap, DocumentMatch } from './types.js';\nimport { calculateAdaptiveTolerance } from './fuzzy.js';\nimport { \n extractVocabularyFromRadixTree, \n findAllCandidates,\n filterCandidatesByScore \n} from './candidates.js';\nimport { findPhrasesInDocument } from './scoring.js';\n\n/**\n * Default configuration\n */\nconst DEFAULT_CONFIG: Required<FuzzyPhraseConfig> = {\n textProperty: 'content',\n tolerance: 1,\n adaptiveTolerance: true,\n enableSynonyms: false,\n supabase: undefined as any,\n synonymMatchScore: 0.8,\n weights: {\n exact: 1.0,\n fuzzy: 0.8,\n order: 0.3,\n proximity: 0.2,\n density: 0.2,\n semantic: 0.15\n },\n maxGap: 5,\n minScore: 0.1\n};\n\n/**\n * Plugin state storage (keyed by Orama instance)\n */\nconst pluginStates = new WeakMap<AnyOrama, PluginState>();\n\n/**\n * Create the Fuzzy Phrase Plugin\n * \n * @param userConfig - User configuration options\n * @returns Orama plugin instance\n */\nexport function pluginFuzzyPhrase(userConfig: FuzzyPhraseConfig = {}): OramaPlugin {\n // Merge user config with defaults\n const config: Required<FuzzyPhraseConfig> = {\n textProperty: userConfig.textProperty ?? DEFAULT_CONFIG.textProperty,\n tolerance: userConfig.tolerance ?? DEFAULT_CONFIG.tolerance,\n adaptiveTolerance: userConfig.adaptiveTolerance ?? DEFAULT_CONFIG.adaptiveTolerance,\n enableSynonyms: userConfig.enableSynonyms ?? DEFAULT_CONFIG.enableSynonyms,\n supabase: userConfig.supabase || DEFAULT_CONFIG.supabase,\n synonymMatchScore: userConfig.synonymMatchScore ?? DEFAULT_CONFIG.synonymMatchScore,\n weights: {\n exact: userConfig.weights?.exact ?? DEFAULT_CONFIG.weights.exact,\n fuzzy: userConfig.weights?.fuzzy ?? DEFAULT_CONFIG.weights.fuzzy,\n order: userConfig.weights?.order ?? DEFAULT_CONFIG.weights.order,\n proximity: userConfig.weights?.proximity ?? DEFAULT_CONFIG.weights.proximity,\n density: userConfig.weights?.density ?? DEFAULT_CONFIG.weights.density,\n semantic: userConfig.weights?.semantic ?? DEFAULT_CONFIG.weights.semantic\n },\n maxGap: userConfig.maxGap ?? DEFAULT_CONFIG.maxGap,\n minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore\n };\n\n const plugin: OramaPlugin = {\n name: 'fuzzy-phrase',\n\n /**\n * Initialize plugin after index is created\n */\n afterCreate: async (orama: AnyOrama) => {\n console.log('🔮 Initializing Fuzzy Phrase Plugin...');\n\n // Initialize state\n const state: PluginState = {\n synonymMap: {},\n config,\n documentFrequency: new Map(),\n totalDocuments: 0\n };\n\n // Load synonyms from Supabase if enabled\n if (config.enableSynonyms && config.supabase) {\n try {\n console.log('📖 Loading synonyms from Supabase...');\n state.synonymMap = await loadSynonymsFromSupabase(config.supabase);\n console.log(`✅ Loaded ${Object.keys(state.synonymMap).length} words with synonyms`);\n } catch (error) {\n console.error('⚠️ Failed to load synonyms:', error);\n // Continue without synonyms\n }\n }\n\n // Calculate document frequencies for TF-IDF from document store\n const docs = (orama.data as any)?.docs?.docs;\n if (docs) {\n state.totalDocuments = Object.keys(docs).length;\n state.documentFrequency = calculateDocumentFrequencies(docs, config.textProperty);\n console.log(`📊 Calculated document frequencies for ${state.totalDocuments} documents`);\n }\n\n // Store state\n pluginStates.set(orama, state);\n console.log('✅ Fuzzy Phrase Plugin initialized');\n \n // Signal ready - emit a custom event that can be listened to\n // Use setImmediate to ensure this runs after the afterCreate hook completes\n setImmediate(() => {\n if (typeof (globalThis as any).fuzzyPhrasePluginReady === 'function') {\n console.log('📡 Signaling plugin ready...');\n (globalThis as any).fuzzyPhrasePluginReady();\n } else {\n console.warn('⚠️ fuzzyPhrasePluginReady callback not found');\n }\n });\n }\n };\n\n return plugin;\n}\n\n/**\n * Search with fuzzy phrase matching\n * \n * This function should be called instead of the regular search() function\n * to enable fuzzy phrase matching.\n */\nexport async function searchWithFuzzyPhrase<T extends AnyOrama>(\n orama: T, \n params: { term?: string; properties?: string[]; limit?: number },\n language?: string\n): Promise<Results<TypedDocument<T>>> {\n const startTime = performance.now();\n \n // Get plugin state\n const state = pluginStates.get(orama);\n \n if (!state) {\n console.error('❌ Plugin state not initialized');\n throw new Error('Fuzzy Phrase Plugin not properly initialized');\n }\n\n const { term, properties } = params;\n \n if (!term || typeof term !== 'string') {\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Use specified property or default\n const textProperty = (properties && properties[0]) || state.config.textProperty;\n\n // Tokenize query\n const queryTokens = tokenize(term);\n \n if (queryTokens.length === 0) {\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Calculate tolerance (adaptive or fixed)\n const tolerance = state.config.adaptiveTolerance\n ? calculateAdaptiveTolerance(queryTokens, state.config.tolerance)\n : state.config.tolerance;\n\n console.log(`🔍 Fuzzy phrase search: \"${term}\" (${queryTokens.length} tokens, tolerance: ${tolerance})`);\n\n // Extract vocabulary from radix tree\n let vocabulary: Set<string>;\n \n try {\n // Access radix tree - the actual index data is in orama.data.index, not orama.index\n // orama.index is just the component interface (methods)\n const indexData = (orama as any).data?.index;\n \n if (!indexData) {\n console.error('❌ No index data found in orama.data.index');\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n \n console.log('🔍 DEBUG: Index data keys:', Object.keys(indexData || {}));\n \n // Try different paths to find the radix tree\n let radixNode = null;\n \n // Path 1: QPS-style (orama.data.index.indexes[property].node)\n if (indexData.indexes?.[textProperty]?.node) {\n radixNode = indexData.indexes[textProperty].node;\n console.log('✅ Found radix via QPS-style path (data.index.indexes)');\n }\n // Path 2: Standard Orama (orama.data.index[property].node)\n else if (indexData[textProperty]?.node) {\n radixNode = indexData[textProperty].node;\n console.log('✅ Found radix via standard path (data.index[property])');\n }\n \n if (!radixNode) {\n console.error('❌ Radix tree not found for property:', textProperty);\n console.error(' Available properties in index:', Object.keys(indexData));\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n vocabulary = extractVocabularyFromRadixTree(radixNode);\n console.log(`📚 Extracted ${vocabulary.size} unique words from index`);\n } catch (error) {\n console.error('❌ Failed to extract vocabulary:', error);\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Find candidates for all query tokens\n const candidatesMap = findAllCandidates(\n queryTokens,\n vocabulary,\n tolerance,\n state.config.enableSynonyms ? state.synonymMap : undefined,\n state.config.synonymMatchScore\n );\n\n // Filter by minimum score\n const filteredCandidates = filterCandidatesByScore(\n candidatesMap,\n state.config.minScore\n );\n\n console.log(`🎯 Found candidates: ${Array.from(filteredCandidates.values()).reduce((sum, c) => sum + c.length, 0)} total`);\n\n // Search through all documents\n const documentMatches: DocumentMatch[] = [];\n \n console.log('🔍 DEBUG orama.data structure:', {\n dataKeys: Object.keys((orama as any).data || {}),\n hasDocs: !!((orama as any).data?.docs),\n docsType: (orama as any).data?.docs ? typeof (orama as any).data.docs : 'undefined'\n });\n \n // Try multiple possible document storage locations\n let docs: Record<string, any> = {};\n \n // Access the actual documents - they're nested in orama.data.docs.docs\n if ((orama as any).data?.docs?.docs) {\n docs = (orama as any).data.docs.docs;\n console.log('✅ Found docs at orama.data.docs.docs');\n }\n // Fallback: orama.data.docs (might be the correct structure in some cases)\n else if ((orama as any).data?.docs && typeof (orama as any).data.docs === 'object') {\n // Check if it has document-like properties (not sharedInternalDocumentStore, etc.)\n const firstKey = Object.keys((orama as any).data.docs)[0];\n if (firstKey && firstKey !== 'sharedInternalDocumentStore' && firstKey !== 'count') {\n docs = (orama as any).data.docs;\n console.log('✅ Found docs at orama.data.docs (direct)');\n }\n }\n \n if (Object.keys(docs).length === 0) {\n console.log('❌ Could not find documents - available structure:', {\n hasDataDocs: !!((orama as any).data?.docs),\n dataDocsKeys: (orama as any).data?.docs ? Object.keys((orama as any).data.docs) : 'none',\n hasDataDocsDocs: !!((orama as any).data?.docs?.docs),\n dataDocsDocsCount: (orama as any).data?.docs?.docs ? Object.keys((orama as any).data.docs.docs).length : 0\n });\n }\n \n console.log(`📄 Searching through ${Object.keys(docs).length} documents`);\n\n for (const [docId, doc] of Object.entries(docs)) {\n const text = doc[textProperty];\n \n if (!text || typeof text !== 'string') {\n continue;\n }\n\n // Tokenize document\n const docTokens = tokenize(text);\n\n // Find phrases in this document\n const phrases = findPhrasesInDocument(\n docTokens,\n filteredCandidates,\n {\n weights: state.config.weights as Required<FuzzyPhraseConfig['weights']>,\n maxGap: state.config.maxGap\n } as any,\n state.documentFrequency,\n state.totalDocuments\n );\n\n if (phrases.length > 0) {\n // Calculate overall document score (highest phrase score)\n const docScore = Math.max(...phrases.map(p => p.score));\n\n documentMatches.push({\n id: docId,\n phrases,\n score: docScore,\n document: doc\n });\n }\n }\n\n // Sort by score descending\n documentMatches.sort((a, b) => b.score - a.score);\n\n // Apply limit if specified\n const limit = params.limit ?? documentMatches.length;\n const limitedMatches = documentMatches.slice(0, limit);\n\n // Convert to Orama results format\n const hits = limitedMatches.map(match => ({\n id: match.id,\n score: match.score,\n document: match.document,\n // Store phrases for highlighting\n _phrases: match.phrases\n })) as any[];\n\n const elapsed = performance.now() - startTime;\n\n console.log(`✅ Found ${hits.length} results in ${elapsed.toFixed(2)}ms (limit: ${limit})`);\n\n return {\n elapsed: {\n formatted: `${elapsed.toFixed(2)}ms`,\n raw: Math.floor(elapsed * 1000000) // nanoseconds\n },\n hits,\n count: hits.length\n } as any;\n}\n\n/**\n * Load synonyms from Supabase\n */\nasync function loadSynonymsFromSupabase(\n supabaseConfig: { url: string; serviceKey: string }\n): Promise<SynonymMap> {\n try {\n console.log('🔍 DEBUG: Calling Supabase RPC get_synonym_map...');\n \n // Dynamic import to avoid bundling Supabase client if not needed\n const { createClient } = await import('@supabase/supabase-js');\n \n const supabase = createClient(supabaseConfig.url, supabaseConfig.serviceKey);\n \n // Call the get_synonym_map function\n const { data, error } = await supabase.rpc('get_synonym_map');\n \n console.log('🔍 DEBUG: Supabase RPC response:', {\n hasError: !!error,\n errorMessage: error?.message,\n hasData: !!data,\n dataType: typeof data,\n dataKeys: data ? Object.keys(data).length : 0\n });\n \n if (error) {\n throw new Error(`Supabase error: ${error.message}`);\n }\n \n const synonymMap = data || {};\n console.log(`📚 Loaded ${Object.keys(synonymMap).length} synonym entries from Supabase`);\n \n return synonymMap;\n } catch (error) {\n console.error('❌ Failed to load synonyms from Supabase:', error);\n throw error;\n }\n}\n\n/**\n * Calculate document frequencies for TF-IDF\n */\nfunction calculateDocumentFrequencies(\n docs: Record<string, any>,\n textProperty: string\n): Map<string, number> {\n const df = new Map<string, number>();\n\n for (const doc of Object.values(docs)) {\n const text = doc[textProperty];\n \n if (!text || typeof text !== 'string') {\n continue;\n }\n\n // Get unique words in this document\n const words = new Set(tokenize(text));\n\n // Increment document frequency for each unique word\n for (const word of words) {\n df.set(word, (df.get(word) || 0) + 1);\n }\n }\n\n return df;\n}\n\n/**\n * Normalize text using the same rules as server-side\n * \n * CRITICAL: This must match the normalizeText() function in server/index.js exactly\n * PLUS we remove all punctuation to match Orama's French tokenizer behavior\n */\nfunction normalizeText(text: string): string {\n return text\n .toLowerCase()\n .normalize('NFD')\n .replace(/[\\u0300-\\u036f]/g, '') // Remove diacritics\n // Replace French elisions (l', d', etc.) with space to preserve word boundaries\n .replace(/\\b[ldcjmnst][\\u2018\\u2019\\u201A\\u201B\\u2032\\u2035\\u0027\\u0060\\u00B4](?=\\w)/gi, ' ')\n .replace(/[\\u2018\\u2019\\u201A\\u201B\\u2032\\u2035\\u0027\\u0060\\u00B4]/g, '') // Remove remaining apostrophes\n .replace(/[\\u201c\\u201d]/g, '\"') // Normalize curly quotes to straight quotes\n .replace(/[.,;:!?()[\\]{}\\-—–«»\"\"]/g, ' ') // Remove punctuation (replace with space to preserve word boundaries)\n .replace(/\\s+/g, ' ') // Normalize multiple spaces to single space\n .trim();\n}\n\n/**\n * Tokenization matching normalized text behavior\n * \n * Note: Text should already be normalized before indexing, so we normalize again\n * to ensure plugin tokenization matches index tokenization\n */\nfunction tokenize(text: string): string[] {\n // Normalize first (same as indexing), then split by whitespace\n return normalizeText(text)\n .split(/\\s+/)\n .filter(token => token.length > 0);\n}\n\n/**\n * Export types for external use\n */\nexport type {\n FuzzyPhraseConfig,\n WordMatch,\n PhraseMatch,\n DocumentMatch,\n SynonymMap,\n Candidate\n} from './types.js';\n"]}
1
+ {"version":3,"sources":["../src/fuzzy.ts","../src/candidates.ts","../src/scoring.ts","../src/index.ts"],"names":[],"mappings":";AA4BO,SAAS,mBACd,GACA,GACA,OAC0B;AAE1B,MAAI,MAAM,GAAG;AACX,WAAO,EAAE,WAAW,MAAM,UAAU,EAAE;AAAA,EACxC;AAEA,QAAM,OAAO,EAAE;AACf,QAAM,OAAO,EAAE;AAGf,MAAI,KAAK,IAAI,OAAO,IAAI,IAAI,OAAO;AACjC,WAAO,EAAE,WAAW,OAAO,UAAU,QAAQ,EAAE;AAAA,EACjD;AAGA,MAAI,OAAO,MAAM;AACf,KAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;AAAA,EAChB;AAEA,QAAM,IAAI,EAAE;AACZ,QAAM,IAAI,EAAE;AAGZ,MAAI,UAAU,IAAI,MAAM,IAAI,CAAC;AAC7B,MAAI,UAAU,IAAI,MAAM,IAAI,CAAC;AAG7B,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAQ,CAAC,IAAI;AAAA,EACf;AAEA,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAQ,CAAC,IAAI;AACb,QAAI,WAAW;AAEf,aAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,IAAI;AAEzC,cAAQ,CAAC,IAAI,KAAK;AAAA,QAChB,QAAQ,CAAC,IAAI;AAAA;AAAA,QACb,QAAQ,IAAI,CAAC,IAAI;AAAA;AAAA,QACjB,QAAQ,IAAI,CAAC,IAAI;AAAA;AAAA,MACnB;AAEA,iBAAW,KAAK,IAAI,UAAU,QAAQ,CAAC,CAAC;AAAA,IAC1C;AAGA,QAAI,WAAW,OAAO;AACpB,aAAO,EAAE,WAAW,OAAO,UAAU,QAAQ,EAAE;AAAA,IACjD;AAGA,KAAC,SAAS,OAAO,IAAI,CAAC,SAAS,OAAO;AAAA,EACxC;AAEA,QAAM,WAAW,QAAQ,CAAC;AAC1B,SAAO;AAAA,IACL,WAAW,YAAY;AAAA,IACvB;AAAA,EACF;AACF;AAUO,SAAS,WACd,MACA,YACA,WACuD;AAEvD,MAAI,SAAS,YAAY;AACvB,WAAO,EAAE,SAAS,MAAM,UAAU,GAAG,OAAO,EAAI;AAAA,EAClD;AAGA,MAAI,KAAK,WAAW,UAAU,GAAG;AAC/B,WAAO,EAAE,SAAS,MAAM,UAAU,GAAG,OAAO,KAAK;AAAA,EACnD;AAGA,QAAM,SAAS,mBAAmB,MAAM,YAAY,SAAS;AAE7D,MAAI,OAAO,WAAW;AAGpB,UAAM,QAAQ,IAAO,OAAO,WAAW;AACvC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,UAAU,OAAO;AAAA,MACjB,OAAO,KAAK,IAAI,KAAK,KAAK;AAAA;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,OAAO,UAAU,YAAY,GAAG,OAAO,EAAE;AAC7D;AAWO,SAAS,2BACd,aACA,eACQ;AACR,QAAM,cAAc,YAAY;AAEhC,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,EACT,WAAW,eAAe,GAAG;AAC3B,WAAO,gBAAgB;AAAA,EACzB,WAAW,eAAe,GAAG;AAC3B,WAAO,gBAAgB;AAAA,EACzB,OAAO;AACL,WAAO,gBAAgB;AAAA,EACzB;AACF;;;ACjJO,SAAS,+BAA+B,WAA6B;AAC1E,QAAM,aAAa,oBAAI,IAAY;AACnC,MAAI,eAAe;AACnB,MAAI,aAAa;AAEjB,WAAS,SAAS,MAAW,QAAgB,GAAG;AAC9C,QAAI,CAAC,MAAM;AACT;AAAA,IACF;AAEA;AAIA,QAAI,KAAK,KAAK,KAAK,KAAK,OAAO,KAAK,MAAM,YAAY,KAAK,EAAE,SAAS,GAAG;AACvE,iBAAW,IAAI,KAAK,CAAC;AACrB;AAAA,IACF;AAGA,QAAI,KAAK,GAAG;AACV,UAAI,KAAK,aAAa,KAAK;AAEzB,mBAAW,CAAC,MAAM,SAAS,KAAK,KAAK,GAAG;AACtC,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF,WAAW,MAAM,QAAQ,KAAK,CAAC,GAAG;AAEhC,mBAAW,CAAC,MAAM,SAAS,KAAK,KAAK,GAAG;AACtC,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF,WAAW,OAAO,KAAK,MAAM,UAAU;AAErC,mBAAW,aAAa,OAAO,OAAO,KAAK,CAAC,GAAG;AAC7C,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,WAAS,SAAS;AAClB,UAAQ,IAAI,uBAAgB,WAAW,IAAI,eAAe,YAAY,gBAAgB;AACtF,SAAO;AACT;AAYO,SAAS,uBACd,YACA,YACA,WACA,UACA,eAAuB,KACV;AACb,QAAM,aAA0B,CAAC;AACjC,QAAM,OAAO,oBAAI,IAAY;AAG7B,MAAI,WAAW,IAAI,UAAU,GAAG;AAC9B,eAAW,KAAK;AAAA,MACd,MAAM;AAAA,MACN,MAAM;AAAA,MACN;AAAA,MACA,UAAU;AAAA,MACV,OAAO;AAAA,IACT,CAAC;AACD,SAAK,IAAI,UAAU;AAAA,EACrB;AAGA,aAAW,QAAQ,YAAY;AAC7B,QAAI,KAAK,IAAI,IAAI;AAAG;AAEpB,UAAM,QAAQ,WAAW,MAAM,YAAY,SAAS;AACpD,QAAI,MAAM,SAAS;AACjB,iBAAW,KAAK;AAAA,QACd;AAAA,QACA,MAAM;AAAA,QACN;AAAA,QACA,UAAU,MAAM;AAAA,QAChB,OAAO,MAAM;AAAA,MACf,CAAC;AACD,WAAK,IAAI,IAAI;AAAA,IACf;AAAA,EACF;AAGA,MAAI,YAAY,SAAS,UAAU,GAAG;AACpC,eAAW,WAAW,SAAS,UAAU,GAAG;AAC1C,UAAI,KAAK,IAAI,OAAO;AAAG;AACvB,UAAI,WAAW,IAAI,OAAO,GAAG;AAC3B,mBAAW,KAAK;AAAA,UACd,MAAM;AAAA,UACN,MAAM;AAAA,UACN;AAAA,UACA,UAAU;AAAA,UACV,OAAO;AAAA,QACT,CAAC;AACD,aAAK,IAAI,OAAO;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAYO,SAAS,kBACd,aACA,YACA,WACA,UACA,eAAuB,KACG;AAC1B,QAAM,gBAAgB,oBAAI,IAAyB;AAEnD,aAAW,SAAS,aAAa;AAC/B,UAAM,kBAAkB;AAAA,MACtB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,kBAAc,IAAI,OAAO,eAAe;AAAA,EAC1C;AAEA,SAAO;AACT;AAyBO,SAAS,wBACd,eACA,UAC0B;AAC1B,QAAM,WAAW,oBAAI,IAAyB;AAE9C,aAAW,CAAC,OAAO,UAAU,KAAK,cAAc,QAAQ,GAAG;AACzD,UAAM,qBAAqB,WAAW,OAAO,OAAK,EAAE,SAAS,QAAQ;AACrE,QAAI,mBAAmB,SAAS,GAAG;AACjC,eAAS,IAAI,OAAO,kBAAkB;AAAA,IACxC;AAAA,EACF;AAEA,SAAO;AACT;;;AClKO,SAAS,sBACd,gBACA,eACA,QACA,mBACA,gBACe;AACf,QAAM,UAAyB,CAAC;AAChC,QAAM,cAAc,MAAM,KAAK,cAAc,KAAK,CAAC;AAGnD,QAAM,cAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK;AAC9C,UAAM,UAAU,eAAe,CAAC;AAGhC,eAAW,CAAC,YAAY,UAAU,KAAK,cAAc,QAAQ,GAAG;AAC9D,iBAAW,aAAa,YAAY;AAClC,YAAI,UAAU,SAAS,SAAS;AAC9B,sBAAY,KAAK;AAAA,YACf,MAAM;AAAA,YACN;AAAA,YACA,UAAU;AAAA,YACV,MAAM,UAAU;AAAA,YAChB,UAAU,UAAU;AAAA,YACpB,OAAO,UAAU;AAAA,UACnB,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,SAAS;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA;AAAA,IACF;AAEA,QAAI,UAAU,OAAO,MAAM,SAAS,GAAG;AACrC,cAAQ,KAAK,MAAM;AAAA,IACrB;AAAA,EACF;AAGA,SAAO,mBAAmB,OAAO;AACnC;AAeA,SAAS,wBACP,aACA,YACA,aACA,QACA,mBACA,gBACA,gBACA,gBACoB;AACpB,QAAM,aAAa,YAAY,UAAU;AACzC,QAAM,cAA2B,CAAC,UAAU;AAC5C,QAAM,gBAAgB,oBAAI,IAAI,CAAC,WAAW,UAAU,CAAC;AACrD,QAAM,WAAsB,CAAC;AAC7B,MAAI,eAAe;AAGnB,WAAS,IAAI,aAAa,GAAG,IAAI,YAAY,QAAQ,KAAK;AACxD,UAAM,QAAQ,YAAY,CAAC;AAC3B,UAAM,UAAU,YAAY,YAAY,SAAS,CAAC,EAAE;AACpD,UAAM,MAAM,MAAM,WAAW,UAAU;AAGvC,QAAI,MAAM,OAAO,QAAQ;AACvB;AAAA,IACF;AAGA,aAAS,MAAM,UAAU,GAAG,MAAM,MAAM,UAAU,OAAO;AACvD;AACA,eAAS,KAAK;AAAA,QACZ,MAAM,eAAe,GAAG;AAAA,QACxB,UAAU;AAAA,QACV,UAAU;AAAA,MACZ,CAAC;AAAA,IACH;AAGA,QAAI,CAAC,cAAc,IAAI,MAAM,UAAU,GAAG;AACxC,kBAAY,KAAK,KAAK;AACtB,oBAAc,IAAI,MAAM,UAAU;AAAA,IACpC;AAGA,QAAI,cAAc,SAAS,YAAY,QAAQ;AAC7C;AAAA,IACF;AAAA,EACF;AAGA,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,WAAW,YAAY,SAAS,YAAY;AAClD,UAAM,OAAO,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW,YAAY,CAAC,EAAE,WAAW;AAEtF,UAAM,EAAE,OAAO,UAAU,IAAI;AAAA,MAC3B;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,WAAO;AAAA,MACL,OAAO;AAAA,MACP;AAAA,MACA,SAAS;AAAA,MACT;AAAA,MACA,eAAe,YAAY,CAAC,EAAE;AAAA,MAC9B,aAAa,YAAY,YAAY,SAAS,CAAC,EAAE;AAAA,MACjD;AAAA,MACA,SAAS,UAAU,aAAa,WAAW;AAAA,MAC3C;AAAA,MACA,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,SAAO;AACT;AAcA,SAAS,qBACP,aACA,aACA,QACA,mBACA,gBACA,gBACA,UACuI;AAGvI,MAAI,YAAY;AAChB,aAAW,QAAQ,aAAa;AAC9B,UAAM,SAAS,KAAK,SAAS,UAAU,OAAO,QAAQ,QACvC,KAAK,SAAS,UAAU,OAAO,QAAQ,QACvC,OAAO,QAAQ,QAAQ;AACtC,iBAAa,KAAK,QAAQ;AAAA,EAC5B;AACA,eAAa,YAAY;AAGzB,QAAM,UAAU,UAAU,aAAa,WAAW;AAClD,QAAM,aAAa,UAAU,IAAM;AAInC,QAAM,OAAO,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW,YAAY,CAAC,EAAE,WAAW;AACtF,QAAM,kBAAkB,YAAY,SAAS,OAAO;AACpD,QAAM,iBAAiB,KAAK,IAAI,GAAG,IAAO,OAAO,eAAgB;AAIjE,MAAI,eAAe;AAEnB,MAAI,YAAY,WAAW,GAAG;AAE5B,UAAM,mBAAmB,eAAe;AAExC,mBAAe,KAAK,IAAI,GAAK,mBAAmB,EAAE;AAAA,EACpD;AAKA,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAGA,QAAM,UAAU,OAAO;AAGvB,QAAM,eAAe;AACrB,QAAM,gBAAgB,aAAa,QAAQ;AAC3C,QAAM,oBAAoB,iBAAiB,QAAQ;AACnD,QAAM,kBAAkB,eAAe,QAAQ;AAC/C,QAAM,mBAAmB,gBAAgB,QAAQ;AAEjD,QAAM,aAAa,eAAe,gBAAgB,oBAAoB,kBAAkB;AAIxF,QAAM,gBAAgB,KAAK,IAAI,QAAQ,OAAO,QAAQ,KAAK;AAC3D,QAAM,mBAAmB,gBAAgB,QAAQ,QAAQ,QAAQ,YAAY,QAAQ,UAAU,QAAQ;AAGvG,QAAM,kBAAkB,aAAa;AAIrC,QAAM,qBAAqB,YAAY,SAAS,IAAI,WAAW;AAC/D,QAAM,QAAQ,kBAAkB;AAGhC,QAAM,OAAO,eAAe;AAC5B,QAAM,QAAQ,gBAAgB;AAC9B,QAAM,YAAY,oBAAoB;AACtC,QAAM,UAAU,kBAAkB;AAClC,QAAM,WAAW,mBAAmB;AAEpC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,MACT;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA;AAAA,IACZ;AAAA,EACF;AACF;AASA,SAAS,UAAU,aAA0B,aAAgC;AAC3E,QAAM,aAAa,IAAI,IAAI,YAAY,IAAI,CAAC,OAAO,UAAU,CAAC,OAAO,KAAK,CAAC,CAAC;AAE5E,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,YAAY,WAAW,IAAI,YAAY,IAAI,CAAC,EAAE,UAAU,KAAK;AACnE,UAAM,YAAY,WAAW,IAAI,YAAY,CAAC,EAAE,UAAU,KAAK;AAE/D,QAAI,YAAY,WAAW;AACzB,aAAO;AAAA,IACT;AAAA,EACF;AAEA,SAAO;AACT;AAUA,SAAS,uBACP,aACA,mBACA,gBACQ;AAER,MAAI,mBAAmB,GAAG;AACxB,WAAO;AAAA,EACT;AAEA,MAAI,WAAW;AAEf,aAAW,QAAQ,aAAa;AAC9B,UAAM,KAAK,kBAAkB,IAAI,KAAK,IAAI,KAAK;AAC/C,UAAM,MAAM,KAAK,IAAI,iBAAiB,EAAE;AACxC,gBAAY;AAAA,EACd;AAGA,QAAM,WAAW,WAAW,YAAY;AAGxC,SAAO,KAAK,IAAI,GAAK,WAAW,EAAE;AACpC;AAQA,SAAS,mBAAmB,SAAuC;AACjE,MAAI,QAAQ,WAAW;AAAG,WAAO,CAAC;AAGlC,QAAM,SAAS,QAAQ,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC/D,QAAM,SAAwB,CAAC;AAC/B,QAAM,UAAU,oBAAI,IAAY;AAEhC,aAAW,UAAU,QAAQ;AAE3B,QAAI,WAAW;AACf,aAAS,MAAM,OAAO,eAAe,OAAO,OAAO,aAAa,OAAO;AACrE,UAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,mBAAW;AACX;AAAA,MACF;AAAA,IACF;AAEA,QAAI,CAAC,UAAU;AACb,aAAO,KAAK,MAAM;AAElB,eAAS,MAAM,OAAO,eAAe,OAAO,OAAO,aAAa,OAAO;AACrE,gBAAQ,IAAI,GAAG;AAAA,MACjB;AAAA,IACF;AAAA,EACF;AAEA,SAAO,OAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAChD;;;ACzWA,IAAM,iBAA8C;AAAA,EAClD,cAAc;AAAA,EACd,WAAW;AAAA,EACX,mBAAmB;AAAA,EACnB,gBAAgB;AAAA,EAChB,UAAU;AAAA,EACV,mBAAmB;AAAA,EACnB,SAAS;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,WAAW;AAAA,IACX,SAAS;AAAA,IACT,UAAU;AAAA,EACZ;AAAA,EACA,QAAQ;AAAA,EACR,UAAU;AAAA,EACV,yBAAyB;AAC3B;AAKA,IAAM,eAAe,oBAAI,QAA+B;AAQjD,SAAS,kBAAkB,aAAgC,CAAC,GAAgB;AAEjF,QAAM,SAAsC;AAAA,IAC1C,cAAc,WAAW,gBAAgB,eAAe;AAAA,IACxD,WAAW,WAAW,aAAa,eAAe;AAAA,IAClD,mBAAmB,WAAW,qBAAqB,eAAe;AAAA,IAClE,gBAAgB,WAAW,kBAAkB,eAAe;AAAA,IAC5D,UAAU,WAAW,YAAY,eAAe;AAAA,IAChD,mBAAmB,WAAW,qBAAqB,eAAe;AAAA,IAClE,SAAS;AAAA,MACP,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,WAAW,WAAW,SAAS,aAAa,eAAe,QAAQ;AAAA,MACnE,SAAS,WAAW,SAAS,WAAW,eAAe,QAAQ;AAAA,MAC/D,UAAU,WAAW,SAAS,YAAY,eAAe,QAAQ;AAAA,IACnE;AAAA,IACA,QAAQ,WAAW,UAAU,eAAe;AAAA,IAC5C,UAAU,WAAW,YAAY,eAAe;AAAA,IAChD,yBAAyB,WAAW,2BAA2B,eAAe;AAAA,EAChF;AAEA,QAAM,SAAsB;AAAA,IAC1B,MAAM;AAAA;AAAA;AAAA;AAAA,IAKN,aAAa,OAAO,UAAoB;AACtC,cAAQ,IAAI,+CAAwC;AAGpD,YAAM,QAAqB;AAAA,QACzB,YAAY,CAAC;AAAA,QACb;AAAA,QACA,mBAAmB,oBAAI,IAAI;AAAA,QAC3B,gBAAgB;AAAA,MAClB;AAGA,UAAI,OAAO,kBAAkB,OAAO,UAAU;AAC5C,YAAI;AACF,kBAAQ,IAAI,6CAAsC;AAClD,gBAAM,aAAa,MAAM,yBAAyB,OAAO,QAAQ;AACjE,kBAAQ,IAAI,iBAAY,OAAO,KAAK,MAAM,UAAU,EAAE,MAAM,sBAAsB;AAAA,QACpF,SAAS,OAAO;AACd,kBAAQ,MAAM,0CAAgC,KAAK;AAAA,QAErD;AAAA,MACF;AAGA,YAAM,OAAQ,MAAM,MAAc,MAAM;AACxC,UAAI,MAAM;AACR,cAAM,iBAAiB,OAAO,KAAK,IAAI,EAAE;AACzC,cAAM,oBAAoB,6BAA6B,MAAM,OAAO,YAAY;AAChF,gBAAQ,IAAI,iDAA0C,MAAM,cAAc,YAAY;AAAA,MACxF;AAGA,mBAAa,IAAI,OAAO,KAAK;AAC7B,cAAQ,IAAI,wCAAmC;AAI/C,mBAAa,MAAM;AACjB,YAAI,OAAQ,WAAmB,2BAA2B,YAAY;AACpE,kBAAQ,IAAI,qCAA8B;AAC1C,UAAC,WAAmB,uBAAuB;AAAA,QAC7C,OAAO;AACL,kBAAQ,KAAK,yDAA+C;AAAA,QAC9D;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAQA,eAAsB,sBACpB,OACA,QACA,UACoC;AACpC,QAAM,YAAY,YAAY,IAAI;AAGlC,QAAM,QAAQ,aAAa,IAAI,KAAK;AAEpC,MAAI,CAAC,OAAO;AACV,YAAQ,MAAM,qCAAgC;AAC9C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAChE;AAEA,QAAM,EAAE,MAAM,WAAW,IAAI;AAE7B,MAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,eAAgB,cAAc,WAAW,CAAC,KAAM,MAAM,OAAO;AAGnE,QAAM,cAAc,SAAS,IAAI;AAEjC,MAAI,YAAY,WAAW,GAAG;AAC5B,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,YAAY,MAAM,OAAO,oBAC3B,2BAA2B,aAAa,MAAM,OAAO,SAAS,IAC9D,MAAM,OAAO;AAEjB,UAAQ,IAAI,mCAA4B,IAAI,MAAM,YAAY,MAAM,uBAAuB,SAAS,GAAG;AAGvG,MAAI;AAEJ,MAAI;AAGF,UAAM,YAAa,MAAc,MAAM;AAEvC,QAAI,CAAC,WAAW;AACd,cAAQ,MAAM,gDAA2C;AACzD,aAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,IACrE;AAEA,YAAQ,IAAI,qCAA8B,OAAO,KAAK,aAAa,CAAC,CAAC,CAAC;AAGtE,QAAI,YAAY;AAGhB,QAAI,UAAU,UAAU,YAAY,GAAG,MAAM;AAC3C,kBAAY,UAAU,QAAQ,YAAY,EAAE;AAC5C,cAAQ,IAAI,4DAAuD;AAAA,IACrE,WAES,UAAU,YAAY,GAAG,MAAM;AACtC,kBAAY,UAAU,YAAY,EAAE;AACpC,cAAQ,IAAI,6DAAwD;AAAA,IACtE;AAEA,QAAI,CAAC,WAAW;AACd,cAAQ,MAAM,6CAAwC,YAAY;AAClE,cAAQ,MAAM,qCAAqC,OAAO,KAAK,SAAS,CAAC;AACzE,aAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,IACrE;AAEA,iBAAa,+BAA+B,SAAS;AACrD,YAAQ,IAAI,uBAAgB,WAAW,IAAI,0BAA0B;AAAA,EACvE,SAAS,OAAO;AACd,YAAQ,MAAM,wCAAmC,KAAK;AACtD,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM,OAAO,iBAAiB,MAAM,aAAa;AAAA,IACjD,MAAM,OAAO;AAAA,EACf;AAGA,QAAM,qBAAqB;AAAA,IACzB;AAAA,IACA,MAAM,OAAO;AAAA,EACf;AAEA,UAAQ,IAAI,+BAAwB,MAAM,KAAK,mBAAmB,OAAO,CAAC,EAAE,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,QAAQ,CAAC,CAAC,QAAQ;AAGzH,QAAM,kBAAmC,CAAC;AAE1C,UAAQ,IAAI,yCAAkC;AAAA,IAC5C,UAAU,OAAO,KAAM,MAAc,QAAQ,CAAC,CAAC;AAAA,IAC/C,SAAS,CAAC,CAAG,MAAc,MAAM;AAAA,IACjC,UAAW,MAAc,MAAM,OAAO,OAAQ,MAAc,KAAK,OAAO;AAAA,EAC1E,CAAC;AAGD,MAAI,OAA4B,CAAC;AAGjC,MAAK,MAAc,MAAM,MAAM,MAAM;AACnC,WAAQ,MAAc,KAAK,KAAK;AAChC,YAAQ,IAAI,2CAAsC;AAAA,EACpD,WAEU,MAAc,MAAM,QAAQ,OAAQ,MAAc,KAAK,SAAS,UAAU;AAElF,UAAM,WAAW,OAAO,KAAM,MAAc,KAAK,IAAI,EAAE,CAAC;AACxD,QAAI,YAAY,aAAa,iCAAiC,aAAa,SAAS;AAClF,aAAQ,MAAc,KAAK;AAC3B,cAAQ,IAAI,+CAA0C;AAAA,IACxD;AAAA,EACF;AAEA,MAAI,OAAO,KAAK,IAAI,EAAE,WAAW,GAAG;AAClC,YAAQ,IAAI,0DAAqD;AAAA,MAC/D,aAAa,CAAC,CAAG,MAAc,MAAM;AAAA,MACrC,cAAe,MAAc,MAAM,OAAO,OAAO,KAAM,MAAc,KAAK,IAAI,IAAI;AAAA,MAClF,iBAAiB,CAAC,CAAG,MAAc,MAAM,MAAM;AAAA,MAC/C,mBAAoB,MAAc,MAAM,MAAM,OAAO,OAAO,KAAM,MAAc,KAAK,KAAK,IAAI,EAAE,SAAS;AAAA,IAC3G,CAAC;AAAA,EACH;AAEA,UAAQ,IAAI,+BAAwB,OAAO,KAAK,IAAI,EAAE,MAAM,YAAY;AAExE,aAAW,CAAC,OAAO,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG;AAC/C,UAAM,OAAO,IAAI,YAAY;AAE7B,QAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC;AAAA,IACF;AAGA,UAAM,YAAY,SAAS,IAAI;AAI/B,UAAM,UAAU;AAAA,MACd;AAAA,MACA;AAAA,MACA;AAAA,QACE,SAAS,MAAM,OAAO;AAAA,QACtB,QAAQ,MAAM,OAAO;AAAA,QACrB,yBAAyB,MAAM,OAAO;AAAA,MACxC;AAAA,MACA,MAAM;AAAA,MACN,MAAM;AAAA,IACR;AAEA,QAAI,QAAQ,SAAS,GAAG;AAEtB,YAAM,WAAW,KAAK,IAAI,GAAG,QAAQ,IAAI,OAAK,EAAE,KAAK,CAAC;AAEtD,sBAAgB,KAAK;AAAA,QACnB,IAAI;AAAA,QACJ;AAAA,QACA,OAAO;AAAA,QACP,UAAU;AAAA,MACZ,CAAC;AAAA,IACH;AAAA,EACF;AAGA,kBAAgB,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAGhD,QAAM,QAAQ,OAAO,SAAS,gBAAgB;AAC9C,QAAM,iBAAiB,gBAAgB,MAAM,GAAG,KAAK;AAGrD,QAAM,OAAO,eAAe,IAAI,YAAU;AAAA,IACxC,IAAI,MAAM;AAAA,IACV,OAAO,MAAM;AAAA,IACb,UAAU,MAAM;AAAA;AAAA,IAEhB,UAAU,MAAM;AAAA,EAClB,EAAE;AAEF,QAAM,UAAU,YAAY,IAAI,IAAI;AAEpC,UAAQ,IAAI,gBAAW,KAAK,MAAM,eAAe,QAAQ,QAAQ,CAAC,CAAC,cAAc,KAAK,GAAG;AAEzF,SAAO;AAAA,IACL,SAAS;AAAA,MACP,WAAW,GAAG,QAAQ,QAAQ,CAAC,CAAC;AAAA,MAChC,KAAK,KAAK,MAAM,UAAU,GAAO;AAAA;AAAA,IACnC;AAAA,IACA;AAAA,IACA,OAAO,KAAK;AAAA,EACd;AACF;AAKA,eAAe,yBACb,gBACqB;AACrB,MAAI;AACF,YAAQ,IAAI,0DAAmD;AAG/D,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,uBAAuB;AAE7D,UAAM,WAAW,aAAa,eAAe,KAAK,eAAe,UAAU;AAG3E,UAAM,EAAE,MAAM,MAAM,IAAI,MAAM,SAAS,IAAI,iBAAiB;AAE5D,YAAQ,IAAI,2CAAoC;AAAA,MAC9C,UAAU,CAAC,CAAC;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB,SAAS,CAAC,CAAC;AAAA,MACX,UAAU,OAAO;AAAA,MACjB,UAAU,OAAO,OAAO,KAAK,IAAI,EAAE,SAAS;AAAA,IAC9C,CAAC;AAED,QAAI,OAAO;AACT,YAAM,IAAI,MAAM,mBAAmB,MAAM,OAAO,EAAE;AAAA,IACpD;AAEA,UAAM,aAAa,QAAQ,CAAC;AAC5B,YAAQ,IAAI,oBAAa,OAAO,KAAK,UAAU,EAAE,MAAM,gCAAgC;AAEvF,WAAO;AAAA,EACT,SAAS,OAAO;AACd,YAAQ,MAAM,iDAA4C,KAAK;AAC/D,UAAM;AAAA,EACR;AACF;AAKA,SAAS,6BACP,MACA,cACqB;AACrB,QAAM,KAAK,oBAAI,IAAoB;AAEnC,aAAW,OAAO,OAAO,OAAO,IAAI,GAAG;AACrC,UAAM,OAAO,IAAI,YAAY;AAE7B,QAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC;AAAA,IACF;AAGA,UAAM,QAAQ,IAAI,IAAI,SAAS,IAAI,CAAC;AAGpC,eAAW,QAAQ,OAAO;AACxB,SAAG,IAAI,OAAO,GAAG,IAAI,IAAI,KAAK,KAAK,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO;AACT;AAQA,SAAS,cAAc,MAAsB;AAC3C,SAAO,KACJ,YAAY,EACZ,UAAU,KAAK,EACf,QAAQ,oBAAoB,EAAE,EAE9B,QAAQ,gFAAgF,GAAG,EAC3F,QAAQ,6DAA6D,EAAE,EACvE,QAAQ,mBAAmB,GAAG,EAC9B,QAAQ,4BAA4B,GAAG,EACvC,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACV;AAQA,SAAS,SAAS,MAAwB;AAExC,SAAO,cAAc,IAAI,EACtB,MAAM,KAAK,EACX,OAAO,WAAS,MAAM,SAAS,CAAC;AACrC","sourcesContent":["/**\n * Fuzzy matching utilities using bounded Levenshtein distance\n * \n * This is the same algorithm used by Orama's match-highlight plugin\n * for consistent fuzzy matching behavior.\n */\n\n/**\n * Result of bounded Levenshtein distance calculation\n */\nexport interface BoundedLevenshteinResult {\n /** Whether the distance is within bounds */\n isBounded: boolean;\n /** The actual distance (only valid if isBounded is true) */\n distance: number;\n}\n\n/**\n * Calculate bounded Levenshtein distance between two strings\n * \n * Stops early if distance exceeds the bound for better performance.\n * This is the same algorithm as Orama's internal boundedLevenshtein.\n * \n * @param a - First string\n * @param b - Second string\n * @param bound - Maximum allowed distance\n * @returns Result indicating if strings are within bound and the distance\n */\nexport function boundedLevenshtein(\n a: string,\n b: string,\n bound: number\n): BoundedLevenshteinResult {\n // Quick checks\n if (a === b) {\n return { isBounded: true, distance: 0 };\n }\n\n const aLen = a.length;\n const bLen = b.length;\n\n // If length difference exceeds bound, no need to calculate\n if (Math.abs(aLen - bLen) > bound) {\n return { isBounded: false, distance: bound + 1 };\n }\n\n // Swap to ensure a is shorter (optimization)\n if (aLen > bLen) {\n [a, b] = [b, a];\n }\n\n const m = a.length;\n const n = b.length;\n\n // Use single array instead of matrix (memory optimization)\n let prevRow = new Array(n + 1);\n let currRow = new Array(n + 1);\n\n // Initialize first row\n for (let j = 0; j <= n; j++) {\n prevRow[j] = j;\n }\n\n for (let i = 1; i <= m; i++) {\n currRow[0] = i;\n let minInRow = i;\n\n for (let j = 1; j <= n; j++) {\n const cost = a[i - 1] === b[j - 1] ? 0 : 1;\n\n currRow[j] = Math.min(\n prevRow[j] + 1, // deletion\n currRow[j - 1] + 1, // insertion\n prevRow[j - 1] + cost // substitution\n );\n\n minInRow = Math.min(minInRow, currRow[j]);\n }\n\n // Early termination: if all values in row exceed bound, we're done\n if (minInRow > bound) {\n return { isBounded: false, distance: bound + 1 };\n }\n\n // Swap rows for next iteration\n [prevRow, currRow] = [currRow, prevRow];\n }\n\n const distance = prevRow[n];\n return {\n isBounded: distance <= bound,\n distance\n };\n}\n\n/**\n * Check if a word matches a query token with fuzzy matching\n * \n * @param word - Word from document\n * @param queryToken - Token from search query\n * @param tolerance - Maximum edit distance allowed\n * @returns Match result with score\n */\nexport function fuzzyMatch(\n word: string,\n queryToken: string,\n tolerance: number\n): { matches: boolean; distance: number; score: number } {\n // Exact match\n if (word === queryToken) {\n return { matches: true, distance: 0, score: 1.0 };\n }\n\n // Prefix match (high score, no distance)\n if (word.startsWith(queryToken)) {\n return { matches: true, distance: 0, score: 0.95 };\n }\n\n // Fuzzy match with tolerance\n const result = boundedLevenshtein(word, queryToken, tolerance);\n \n if (result.isBounded) {\n // Score decreases with distance\n // distance 1 = 0.8, distance 2 = 0.6, etc.\n const score = 1.0 - (result.distance * 0.2);\n return {\n matches: true,\n distance: result.distance,\n score: Math.max(0.1, score) // Minimum score of 0.1\n };\n }\n\n return { matches: false, distance: tolerance + 1, score: 0 };\n}\n\n/**\n * Calculate adaptive tolerance based on query length\n * \n * Longer queries get higher tolerance for better fuzzy matching.\n * \n * @param queryTokens - Array of query tokens\n * @param baseTolerance - Base tolerance value\n * @returns Calculated tolerance (always an integer)\n */\nexport function calculateAdaptiveTolerance(\n queryTokens: string[],\n baseTolerance: number\n): number {\n const queryLength = queryTokens.length;\n \n if (queryLength <= 2) {\n return baseTolerance;\n } else if (queryLength <= 4) {\n return baseTolerance + 1;\n } else if (queryLength <= 6) {\n return baseTolerance + 2;\n } else {\n return baseTolerance + 3;\n }\n}\n","/**\n * Candidate expansion: Find all possible matches for query tokens\n * including exact matches, fuzzy matches, and synonyms\n */\n\nimport { fuzzyMatch } from './fuzzy.js';\nimport type { Candidate, SynonymMap } from './types.js';\n\n/**\n * Extract all unique words from the radix tree index\n * \n * @param radixNode - Root node of the radix tree\n * @returns Set of all unique words in the index\n */\nexport function extractVocabularyFromRadixTree(radixNode: any): Set<string> {\n const vocabulary = new Set<string>();\n let nodesVisited = 0;\n let wordsFound = 0;\n \n function traverse(node: any, depth: number = 0) {\n if (!node) {\n return;\n }\n \n nodesVisited++;\n \n // Check if this node represents a complete word\n // e = true means it's an end of a word\n if (node.e && node.w && typeof node.w === 'string' && node.w.length > 0) {\n vocabulary.add(node.w);\n wordsFound++;\n }\n \n // Children can be Map, Array, or Object\n if (node.c) {\n if (node.c instanceof Map) {\n // Map format\n for (const [_key, childNode] of node.c) {\n traverse(childNode, depth + 1);\n }\n } else if (Array.isArray(node.c)) {\n // Array format: [[key, childNode], ...]\n for (const [_key, childNode] of node.c) {\n traverse(childNode, depth + 1);\n }\n } else if (typeof node.c === 'object') {\n // Object format: {key: childNode, ...}\n for (const childNode of Object.values(node.c)) {\n traverse(childNode, depth + 1);\n }\n }\n }\n }\n \n traverse(radixNode);\n console.log(`📚 Extracted ${vocabulary.size} words from ${nodesVisited} nodes visited`);\n return vocabulary;\n}\n\n/**\n * Find all candidate matches for a single query token\n * \n * @param queryToken - Token from search query\n * @param vocabulary - Set of all words in the index\n * @param tolerance - Fuzzy matching tolerance\n * @param synonyms - Synonym map (optional)\n * @param synonymScore - Score multiplier for synonym matches\n * @returns Array of candidate matches\n */\nexport function findCandidatesForToken(\n queryToken: string,\n vocabulary: Set<string>,\n tolerance: number,\n synonyms?: SynonymMap,\n synonymScore: number = 0.8\n): Candidate[] {\n const candidates: Candidate[] = [];\n const seen = new Set<string>();\n\n // 1. Check for exact match\n if (vocabulary.has(queryToken)) {\n candidates.push({\n word: queryToken,\n type: 'exact',\n queryToken,\n distance: 0,\n score: 1.0\n });\n seen.add(queryToken);\n }\n\n // 2. Check for fuzzy matches\n for (const word of vocabulary) {\n if (seen.has(word)) continue;\n\n const match = fuzzyMatch(word, queryToken, tolerance);\n if (match.matches) {\n candidates.push({\n word,\n type: 'fuzzy',\n queryToken,\n distance: match.distance,\n score: match.score\n });\n seen.add(word);\n }\n }\n\n // 3. Check for synonym matches\n if (synonyms && synonyms[queryToken]) {\n for (const synonym of synonyms[queryToken]) {\n if (seen.has(synonym)) continue;\n if (vocabulary.has(synonym)) {\n candidates.push({\n word: synonym,\n type: 'synonym',\n queryToken,\n distance: 0,\n score: synonymScore\n });\n seen.add(synonym);\n }\n }\n }\n\n return candidates;\n}\n\n/**\n * Find candidates for all query tokens\n * \n * @param queryTokens - Array of tokens from search query\n * @param vocabulary - Set of all words in the index\n * @param tolerance - Fuzzy matching tolerance\n * @param synonyms - Synonym map (optional)\n * @param synonymScore - Score multiplier for synonym matches\n * @returns Map of query tokens to their candidate matches\n */\nexport function findAllCandidates(\n queryTokens: string[],\n vocabulary: Set<string>,\n tolerance: number,\n synonyms?: SynonymMap,\n synonymScore: number = 0.8\n): Map<string, Candidate[]> {\n const candidatesMap = new Map<string, Candidate[]>();\n\n for (const token of queryTokens) {\n const tokenCandidates = findCandidatesForToken(\n token,\n vocabulary,\n tolerance,\n synonyms,\n synonymScore\n );\n candidatesMap.set(token, tokenCandidates);\n }\n\n return candidatesMap;\n}\n\n/**\n * Get total number of candidates across all tokens\n * \n * @param candidatesMap - Map of token to candidates\n * @returns Total count of all candidates\n */\nexport function getTotalCandidateCount(\n candidatesMap: Map<string, Candidate[]>\n): number {\n let total = 0;\n for (const candidates of candidatesMap.values()) {\n total += candidates.length;\n }\n return total;\n}\n\n/**\n * Filter candidates by minimum score threshold\n * \n * @param candidatesMap - Map of token to candidates\n * @param minScore - Minimum score threshold\n * @returns Filtered candidates map\n */\nexport function filterCandidatesByScore(\n candidatesMap: Map<string, Candidate[]>,\n minScore: number\n): Map<string, Candidate[]> {\n const filtered = new Map<string, Candidate[]>();\n\n for (const [token, candidates] of candidatesMap.entries()) {\n const filteredCandidates = candidates.filter(c => c.score >= minScore);\n if (filteredCandidates.length > 0) {\n filtered.set(token, filteredCandidates);\n }\n }\n\n return filtered;\n}\n","/**\n * Phrase scoring algorithm with semantic weighting\n */\n\nimport type { WordMatch, PhraseMatch, Candidate, GapWord } from './types.js';\n\n/**\n * Configuration for phrase scoring\n */\nexport interface ScoringConfig {\n weights: {\n exact: number;\n fuzzy: number;\n order: number;\n proximity: number;\n density: number;\n semantic: number;\n };\n maxGap: number;\n /** \n * Multiplier for proximity window calculation.\n * proximityWindow = queryTokens.length × proximitySpanMultiplier\n */\n proximitySpanMultiplier: number;\n}\n\n/**\n * Find all phrase matches in a document\n * \n * @param documentTokens - Tokenized document content (needed to extract gap words)\n * @param candidatesMap - Map of query tokens to their candidates\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map for TF-IDF\n * @param totalDocuments - Total number of documents\n * @returns Array of phrase matches\n */\nexport function findPhrasesInDocument(\n documentTokens: string[],\n candidatesMap: Map<string, Candidate[]>,\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number\n): PhraseMatch[] {\n const phrases: PhraseMatch[] = [];\n const queryTokens = Array.from(candidatesMap.keys());\n\n // Find all word matches in document\n const wordMatches: WordMatch[] = [];\n \n for (let i = 0; i < documentTokens.length; i++) {\n const docWord = documentTokens[i];\n \n // Check if this word matches any query token\n for (const [queryToken, candidates] of candidatesMap.entries()) {\n for (const candidate of candidates) {\n if (candidate.word === docWord) {\n wordMatches.push({\n word: docWord,\n queryToken,\n position: i,\n type: candidate.type,\n distance: candidate.distance,\n score: candidate.score\n });\n }\n }\n }\n }\n\n // Build phrases from word matches using sliding window\n for (let i = 0; i < wordMatches.length; i++) {\n const phrase = buildPhraseFromPosition(\n wordMatches,\n i,\n queryTokens,\n config,\n documentFrequency,\n totalDocuments,\n wordMatches,\n documentTokens // Pass document tokens to extract gap words\n );\n \n if (phrase && phrase.words.length > 0) {\n phrases.push(phrase);\n }\n }\n\n // Deduplicate and sort by score\n return deduplicatePhrases(phrases);\n}\n\n/**\n * Build a phrase starting from a specific word match position\n * \n * @param wordMatches - All word matches in document\n * @param startIndex - Starting index in wordMatches array\n * @param queryTokens - Original query tokens\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @param allWordMatches - All word matches in document (for density calculation)\n * @param documentTokens - Original document tokens (for gap word extraction)\n * @returns Phrase match or null\n */\nfunction buildPhraseFromPosition(\n wordMatches: WordMatch[],\n startIndex: number,\n queryTokens: string[],\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number,\n allWordMatches: WordMatch[],\n documentTokens: string[]\n): PhraseMatch | null {\n const startMatch = wordMatches[startIndex];\n const phraseWords: WordMatch[] = [startMatch];\n const coveredTokens = new Set([startMatch.queryToken]);\n const gapWords: GapWord[] = [];\n let totalGapUsed = 0;\n\n // Look for nearby matches to complete the phrase\n for (let i = startIndex + 1; i < wordMatches.length; i++) {\n const match = wordMatches[i];\n const lastPos = phraseWords[phraseWords.length - 1].position;\n const gap = match.position - lastPos - 1;\n\n // Stop if gap exceeds maximum\n if (gap > config.maxGap) {\n break;\n }\n\n // Track gap words between last match and current match\n for (let pos = lastPos + 1; pos < match.position; pos++) {\n totalGapUsed++;\n gapWords.push({\n word: documentTokens[pos],\n position: pos,\n gapIndex: totalGapUsed\n });\n }\n\n // Add if it's a different query token\n if (!coveredTokens.has(match.queryToken)) {\n phraseWords.push(match);\n coveredTokens.add(match.queryToken);\n }\n\n // Stop if we have all query tokens\n if (coveredTokens.size === queryTokens.length) {\n break;\n }\n }\n\n // Calculate phrase score\n if (phraseWords.length > 0) {\n const coverage = phraseWords.length / queryTokens.length;\n const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;\n \n const { score, breakdown } = calculatePhraseScore(\n phraseWords,\n queryTokens,\n config,\n documentFrequency,\n totalDocuments,\n allWordMatches,\n coverage\n );\n\n return {\n words: phraseWords,\n gapWords,\n gapUsed: totalGapUsed,\n coverage,\n startPosition: phraseWords[0].position,\n endPosition: phraseWords[phraseWords.length - 1].position,\n span,\n inOrder: isInOrder(phraseWords, queryTokens),\n score,\n scoreBreakdown: breakdown\n };\n }\n\n return null;\n}\n\n/**\n * Calculate overall phrase score\n * \n * @param phraseWords - Words in the phrase\n * @param queryTokens - Original query tokens\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @param allWordMatches - All word matches in document (for density calculation)\n * @param coverage - Pre-calculated coverage ratio (phraseWords.length / queryTokens.length)\n * @returns Phrase score (0-1) and detailed component breakdown\n */\nfunction calculatePhraseScore(\n phraseWords: WordMatch[],\n queryTokens: string[],\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number,\n allWordMatches: WordMatch[],\n coverage: number\n): { score: number; breakdown: { base: number; order: number; proximity: number; density: number; semantic: number; coverage: number } } {\n // Base score from word matches\n // Each word contributes: matchScore × typeWeight\n let baseScore = 0;\n for (const word of phraseWords) {\n const weight = word.type === 'exact' ? config.weights.exact :\n word.type === 'fuzzy' ? config.weights.fuzzy : \n config.weights.fuzzy * 0.8; // synonym gets 80% of fuzzy weight\n baseScore += word.score * weight;\n }\n baseScore /= phraseWords.length;\n\n // Order bonus: 1.0 if words appear in query order, 0.5 otherwise\n const inOrder = isInOrder(phraseWords, queryTokens);\n const orderScore = inOrder ? 1.0 : 0.5;\n\n // Proximity bonus (closer words score higher)\n // Uses proximitySpanMultiplier from config instead of hardcoded 5\n const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;\n const proximityWindow = queryTokens.length * config.proximitySpanMultiplier;\n const proximityScore = Math.max(0, 1.0 - (span / proximityWindow));\n\n // Density: Only applies to single-word queries (measures word repetition in document)\n // For multi-word phrase queries, density is 0 (coverage handles completeness separately)\n let densityScore = 0;\n \n if (queryTokens.length === 1) {\n // Single-word query: reward repetition\n const totalOccurrences = allWordMatches.length;\n // Cap at reasonable maximum to avoid runaway scores\n densityScore = Math.min(1.0, totalOccurrences / 10);\n }\n // For multi-word queries: densityScore stays 0\n // Coverage is applied as a multiplier at the end instead\n\n // Semantic score (TF-IDF based)\n const semanticScore = calculateSemanticScore(\n phraseWords,\n documentFrequency,\n totalDocuments\n );\n\n // Weighted combination\n const weights = config.weights;\n \n // Calculate weighted components\n const weightedBase = baseScore;\n const weightedOrder = orderScore * weights.order;\n const weightedProximity = proximityScore * weights.proximity;\n const weightedDensity = densityScore * weights.density;\n const weightedSemantic = semanticScore * weights.semantic;\n \n const totalScore = weightedBase + weightedOrder + weightedProximity + weightedDensity + weightedSemantic;\n\n // Calculate max possible score\n // FIX: Use actual max base weight (highest of exact/fuzzy) instead of hardcoded 1.0\n const maxBaseWeight = Math.max(weights.exact, weights.fuzzy);\n const maxPossibleScore = maxBaseWeight + weights.order + weights.proximity + weights.density + weights.semantic;\n \n // Normalize to 0-1 range\n const normalizedScore = totalScore / maxPossibleScore;\n \n // FIX: Apply coverage as a MULTIPLIER for multi-word queries\n // This ensures incomplete matches (2/3) can never outscore complete matches (3/3)\n const coverageMultiplier = queryTokens.length > 1 ? coverage : 1.0;\n const score = normalizedScore * coverageMultiplier;\n\n // Component contributions to the final normalized score (before coverage multiplier)\n const base = weightedBase / maxPossibleScore;\n const order = weightedOrder / maxPossibleScore;\n const proximity = weightedProximity / maxPossibleScore;\n const density = weightedDensity / maxPossibleScore;\n const semantic = weightedSemantic / maxPossibleScore;\n\n return {\n score,\n breakdown: {\n base,\n order,\n proximity,\n density,\n semantic,\n coverage: coverageMultiplier // Show coverage multiplier in breakdown\n }\n };\n}\n\n/**\n * Check if words are in the same order as query tokens\n * \n * @param phraseWords - Words in the phrase\n * @param queryTokens - Original query tokens\n * @returns True if in order\n */\nfunction isInOrder(phraseWords: WordMatch[], queryTokens: string[]): boolean {\n const tokenOrder = new Map(queryTokens.map((token, index) => [token, index]));\n \n for (let i = 1; i < phraseWords.length; i++) {\n const prevOrder = tokenOrder.get(phraseWords[i - 1].queryToken) ?? -1;\n const currOrder = tokenOrder.get(phraseWords[i].queryToken) ?? -1;\n \n if (currOrder < prevOrder) {\n return false;\n }\n }\n \n return true;\n}\n\n/**\n * Calculate semantic score using TF-IDF\n * \n * @param phraseWords - Words in the phrase\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @returns Semantic score (0-1)\n */\nfunction calculateSemanticScore(\n phraseWords: WordMatch[],\n documentFrequency: Map<string, number>,\n totalDocuments: number\n): number {\n // Handle edge case: no documents\n if (totalDocuments === 0) {\n return 0;\n }\n \n let tfidfSum = 0;\n \n for (const word of phraseWords) {\n const df = documentFrequency.get(word.word) || 1;\n const idf = Math.log(totalDocuments / df);\n tfidfSum += idf;\n }\n \n // Normalize by phrase length\n const avgTfidf = tfidfSum / phraseWords.length;\n \n // Normalize to 0-1 range (assuming max IDF of ~10)\n return Math.min(1.0, avgTfidf / 10);\n}\n\n/**\n * Deduplicate overlapping phrases, keeping highest scoring ones\n * \n * @param phrases - Array of phrase matches\n * @returns Deduplicated phrases sorted by score\n */\nfunction deduplicatePhrases(phrases: PhraseMatch[]): PhraseMatch[] {\n if (phrases.length === 0) return [];\n\n // Sort by score descending\n const sorted = phrases.slice().sort((a, b) => b.score - a.score);\n const result: PhraseMatch[] = [];\n const covered = new Set<number>();\n\n for (const phrase of sorted) {\n // Check if this phrase overlaps with already selected phrases\n let overlaps = false;\n for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {\n if (covered.has(pos)) {\n overlaps = true;\n break;\n }\n }\n\n if (!overlaps) {\n result.push(phrase);\n // Mark positions as covered\n for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {\n covered.add(pos);\n }\n }\n }\n\n return result.sort((a, b) => b.score - a.score);\n}\n","/**\n * Fuzzy Phrase Plugin for Orama\n * \n * Advanced fuzzy phrase matching with semantic weighting and synonym expansion.\n * Completely independent from QPS - accesses Orama's radix tree directly.\n */\n\nimport type { AnyOrama, OramaPlugin, Results, TypedDocument } from '@wcs-colab/orama';\nimport type { FuzzyPhraseConfig, PluginState, SynonymMap, DocumentMatch } from './types.js';\nimport { calculateAdaptiveTolerance } from './fuzzy.js';\nimport { \n extractVocabularyFromRadixTree, \n findAllCandidates,\n filterCandidatesByScore \n} from './candidates.js';\nimport { findPhrasesInDocument } from './scoring.js';\n\n/**\n * Default configuration\n */\nconst DEFAULT_CONFIG: Required<FuzzyPhraseConfig> = {\n textProperty: 'content',\n tolerance: 1,\n adaptiveTolerance: true,\n enableSynonyms: false,\n supabase: undefined as any,\n synonymMatchScore: 0.8,\n weights: {\n exact: 1.0,\n fuzzy: 0.8,\n order: 0.3,\n proximity: 0.2,\n density: 0.2,\n semantic: 0.15\n },\n maxGap: 5,\n minScore: 0.1,\n proximitySpanMultiplier: 5\n};\n\n/**\n * Plugin state storage (keyed by Orama instance)\n */\nconst pluginStates = new WeakMap<AnyOrama, PluginState>();\n\n/**\n * Create the Fuzzy Phrase Plugin\n * \n * @param userConfig - User configuration options\n * @returns Orama plugin instance\n */\nexport function pluginFuzzyPhrase(userConfig: FuzzyPhraseConfig = {}): OramaPlugin {\n // Merge user config with defaults\n const config: Required<FuzzyPhraseConfig> = {\n textProperty: userConfig.textProperty ?? DEFAULT_CONFIG.textProperty,\n tolerance: userConfig.tolerance ?? DEFAULT_CONFIG.tolerance,\n adaptiveTolerance: userConfig.adaptiveTolerance ?? DEFAULT_CONFIG.adaptiveTolerance,\n enableSynonyms: userConfig.enableSynonyms ?? DEFAULT_CONFIG.enableSynonyms,\n supabase: userConfig.supabase || DEFAULT_CONFIG.supabase,\n synonymMatchScore: userConfig.synonymMatchScore ?? DEFAULT_CONFIG.synonymMatchScore,\n weights: {\n exact: userConfig.weights?.exact ?? DEFAULT_CONFIG.weights.exact,\n fuzzy: userConfig.weights?.fuzzy ?? DEFAULT_CONFIG.weights.fuzzy,\n order: userConfig.weights?.order ?? DEFAULT_CONFIG.weights.order,\n proximity: userConfig.weights?.proximity ?? DEFAULT_CONFIG.weights.proximity,\n density: userConfig.weights?.density ?? DEFAULT_CONFIG.weights.density,\n semantic: userConfig.weights?.semantic ?? DEFAULT_CONFIG.weights.semantic\n },\n maxGap: userConfig.maxGap ?? DEFAULT_CONFIG.maxGap,\n minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore,\n proximitySpanMultiplier: userConfig.proximitySpanMultiplier ?? DEFAULT_CONFIG.proximitySpanMultiplier\n };\n\n const plugin: OramaPlugin = {\n name: 'fuzzy-phrase',\n\n /**\n * Initialize plugin after index is created\n */\n afterCreate: async (orama: AnyOrama) => {\n console.log('🔮 Initializing Fuzzy Phrase Plugin...');\n\n // Initialize state\n const state: PluginState = {\n synonymMap: {},\n config,\n documentFrequency: new Map(),\n totalDocuments: 0\n };\n\n // Load synonyms from Supabase if enabled\n if (config.enableSynonyms && config.supabase) {\n try {\n console.log('📖 Loading synonyms from Supabase...');\n state.synonymMap = await loadSynonymsFromSupabase(config.supabase);\n console.log(`✅ Loaded ${Object.keys(state.synonymMap).length} words with synonyms`);\n } catch (error) {\n console.error('⚠️ Failed to load synonyms:', error);\n // Continue without synonyms\n }\n }\n\n // Calculate document frequencies for TF-IDF from document store\n const docs = (orama.data as any)?.docs?.docs;\n if (docs) {\n state.totalDocuments = Object.keys(docs).length;\n state.documentFrequency = calculateDocumentFrequencies(docs, config.textProperty);\n console.log(`📊 Calculated document frequencies for ${state.totalDocuments} documents`);\n }\n\n // Store state\n pluginStates.set(orama, state);\n console.log('✅ Fuzzy Phrase Plugin initialized');\n \n // Signal ready - emit a custom event that can be listened to\n // Use setImmediate to ensure this runs after the afterCreate hook completes\n setImmediate(() => {\n if (typeof (globalThis as any).fuzzyPhrasePluginReady === 'function') {\n console.log('📡 Signaling plugin ready...');\n (globalThis as any).fuzzyPhrasePluginReady();\n } else {\n console.warn('⚠️ fuzzyPhrasePluginReady callback not found');\n }\n });\n }\n };\n\n return plugin;\n}\n\n/**\n * Search with fuzzy phrase matching\n * \n * This function should be called instead of the regular search() function\n * to enable fuzzy phrase matching.\n */\nexport async function searchWithFuzzyPhrase<T extends AnyOrama>(\n orama: T, \n params: { term?: string; properties?: string[]; limit?: number },\n language?: string\n): Promise<Results<TypedDocument<T>>> {\n const startTime = performance.now();\n \n // Get plugin state\n const state = pluginStates.get(orama);\n \n if (!state) {\n console.error('❌ Plugin state not initialized');\n throw new Error('Fuzzy Phrase Plugin not properly initialized');\n }\n\n const { term, properties } = params;\n \n if (!term || typeof term !== 'string') {\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Use specified property or default\n const textProperty = (properties && properties[0]) || state.config.textProperty;\n\n // Tokenize query\n const queryTokens = tokenize(term);\n \n if (queryTokens.length === 0) {\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Calculate tolerance (adaptive or fixed)\n const tolerance = state.config.adaptiveTolerance\n ? calculateAdaptiveTolerance(queryTokens, state.config.tolerance)\n : state.config.tolerance;\n\n console.log(`🔍 Fuzzy phrase search: \"${term}\" (${queryTokens.length} tokens, tolerance: ${tolerance})`);\n\n // Extract vocabulary from radix tree\n let vocabulary: Set<string>;\n \n try {\n // Access radix tree - the actual index data is in orama.data.index, not orama.index\n // orama.index is just the component interface (methods)\n const indexData = (orama as any).data?.index;\n \n if (!indexData) {\n console.error('❌ No index data found in orama.data.index');\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n \n console.log('🔍 DEBUG: Index data keys:', Object.keys(indexData || {}));\n \n // Try different paths to find the radix tree\n let radixNode = null;\n \n // Path 1: QPS-style (orama.data.index.indexes[property].node)\n if (indexData.indexes?.[textProperty]?.node) {\n radixNode = indexData.indexes[textProperty].node;\n console.log('✅ Found radix via QPS-style path (data.index.indexes)');\n }\n // Path 2: Standard Orama (orama.data.index[property].node)\n else if (indexData[textProperty]?.node) {\n radixNode = indexData[textProperty].node;\n console.log('✅ Found radix via standard path (data.index[property])');\n }\n \n if (!radixNode) {\n console.error('❌ Radix tree not found for property:', textProperty);\n console.error(' Available properties in index:', Object.keys(indexData));\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n vocabulary = extractVocabularyFromRadixTree(radixNode);\n console.log(`📚 Extracted ${vocabulary.size} unique words from index`);\n } catch (error) {\n console.error('❌ Failed to extract vocabulary:', error);\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Find candidates for all query tokens\n const candidatesMap = findAllCandidates(\n queryTokens,\n vocabulary,\n tolerance,\n state.config.enableSynonyms ? state.synonymMap : undefined,\n state.config.synonymMatchScore\n );\n\n // Filter by minimum score\n const filteredCandidates = filterCandidatesByScore(\n candidatesMap,\n state.config.minScore\n );\n\n console.log(`🎯 Found candidates: ${Array.from(filteredCandidates.values()).reduce((sum, c) => sum + c.length, 0)} total`);\n\n // Search through all documents\n const documentMatches: DocumentMatch[] = [];\n \n console.log('🔍 DEBUG orama.data structure:', {\n dataKeys: Object.keys((orama as any).data || {}),\n hasDocs: !!((orama as any).data?.docs),\n docsType: (orama as any).data?.docs ? typeof (orama as any).data.docs : 'undefined'\n });\n \n // Try multiple possible document storage locations\n let docs: Record<string, any> = {};\n \n // Access the actual documents - they're nested in orama.data.docs.docs\n if ((orama as any).data?.docs?.docs) {\n docs = (orama as any).data.docs.docs;\n console.log('✅ Found docs at orama.data.docs.docs');\n }\n // Fallback: orama.data.docs (might be the correct structure in some cases)\n else if ((orama as any).data?.docs && typeof (orama as any).data.docs === 'object') {\n // Check if it has document-like properties (not sharedInternalDocumentStore, etc.)\n const firstKey = Object.keys((orama as any).data.docs)[0];\n if (firstKey && firstKey !== 'sharedInternalDocumentStore' && firstKey !== 'count') {\n docs = (orama as any).data.docs;\n console.log('✅ Found docs at orama.data.docs (direct)');\n }\n }\n \n if (Object.keys(docs).length === 0) {\n console.log('❌ Could not find documents - available structure:', {\n hasDataDocs: !!((orama as any).data?.docs),\n dataDocsKeys: (orama as any).data?.docs ? Object.keys((orama as any).data.docs) : 'none',\n hasDataDocsDocs: !!((orama as any).data?.docs?.docs),\n dataDocsDocsCount: (orama as any).data?.docs?.docs ? Object.keys((orama as any).data.docs.docs).length : 0\n });\n }\n \n console.log(`📄 Searching through ${Object.keys(docs).length} documents`);\n\n for (const [docId, doc] of Object.entries(docs)) {\n const text = doc[textProperty];\n \n if (!text || typeof text !== 'string') {\n continue;\n }\n\n // Tokenize document\n const docTokens = tokenize(text);\n\n // Find phrases in this document\n // Note: state.config.weights is guaranteed to have all properties from default merge\n const phrases = findPhrasesInDocument(\n docTokens,\n filteredCandidates,\n {\n weights: state.config.weights as { exact: number; fuzzy: number; order: number; proximity: number; density: number; semantic: number },\n maxGap: state.config.maxGap,\n proximitySpanMultiplier: state.config.proximitySpanMultiplier\n },\n state.documentFrequency,\n state.totalDocuments\n );\n\n if (phrases.length > 0) {\n // Calculate overall document score (highest phrase score)\n const docScore = Math.max(...phrases.map(p => p.score));\n\n documentMatches.push({\n id: docId,\n phrases,\n score: docScore,\n document: doc\n });\n }\n }\n\n // Sort by score descending\n documentMatches.sort((a, b) => b.score - a.score);\n\n // Apply limit if specified\n const limit = params.limit ?? documentMatches.length;\n const limitedMatches = documentMatches.slice(0, limit);\n\n // Convert to Orama results format\n const hits = limitedMatches.map(match => ({\n id: match.id,\n score: match.score,\n document: match.document,\n // Store phrases for highlighting\n _phrases: match.phrases\n })) as any[];\n\n const elapsed = performance.now() - startTime;\n\n console.log(`✅ Found ${hits.length} results in ${elapsed.toFixed(2)}ms (limit: ${limit})`);\n\n return {\n elapsed: {\n formatted: `${elapsed.toFixed(2)}ms`,\n raw: Math.floor(elapsed * 1000000) // nanoseconds\n },\n hits,\n count: hits.length\n } as any;\n}\n\n/**\n * Load synonyms from Supabase\n */\nasync function loadSynonymsFromSupabase(\n supabaseConfig: { url: string; serviceKey: string }\n): Promise<SynonymMap> {\n try {\n console.log('🔍 DEBUG: Calling Supabase RPC get_synonym_map...');\n \n // Dynamic import to avoid bundling Supabase client if not needed\n const { createClient } = await import('@supabase/supabase-js');\n \n const supabase = createClient(supabaseConfig.url, supabaseConfig.serviceKey);\n \n // Call the get_synonym_map function\n const { data, error } = await supabase.rpc('get_synonym_map');\n \n console.log('🔍 DEBUG: Supabase RPC response:', {\n hasError: !!error,\n errorMessage: error?.message,\n hasData: !!data,\n dataType: typeof data,\n dataKeys: data ? Object.keys(data).length : 0\n });\n \n if (error) {\n throw new Error(`Supabase error: ${error.message}`);\n }\n \n const synonymMap = data || {};\n console.log(`📚 Loaded ${Object.keys(synonymMap).length} synonym entries from Supabase`);\n \n return synonymMap;\n } catch (error) {\n console.error('❌ Failed to load synonyms from Supabase:', error);\n throw error;\n }\n}\n\n/**\n * Calculate document frequencies for TF-IDF\n */\nfunction calculateDocumentFrequencies(\n docs: Record<string, any>,\n textProperty: string\n): Map<string, number> {\n const df = new Map<string, number>();\n\n for (const doc of Object.values(docs)) {\n const text = doc[textProperty];\n \n if (!text || typeof text !== 'string') {\n continue;\n }\n\n // Get unique words in this document\n const words = new Set(tokenize(text));\n\n // Increment document frequency for each unique word\n for (const word of words) {\n df.set(word, (df.get(word) || 0) + 1);\n }\n }\n\n return df;\n}\n\n/**\n * Normalize text using the same rules as server-side\n * \n * CRITICAL: This must match the normalizeText() function in server/index.js exactly\n * PLUS we remove all punctuation to match Orama's French tokenizer behavior\n */\nfunction normalizeText(text: string): string {\n return text\n .toLowerCase()\n .normalize('NFD')\n .replace(/[\\u0300-\\u036f]/g, '') // Remove diacritics\n // Replace French elisions (l', d', etc.) with space to preserve word boundaries\n .replace(/\\b[ldcjmnst][\\u2018\\u2019\\u201A\\u201B\\u2032\\u2035\\u0027\\u0060\\u00B4](?=\\w)/gi, ' ')\n .replace(/[\\u2018\\u2019\\u201A\\u201B\\u2032\\u2035\\u0027\\u0060\\u00B4]/g, '') // Remove remaining apostrophes\n .replace(/[\\u201c\\u201d]/g, '\"') // Normalize curly quotes to straight quotes\n .replace(/[.,;:!?()[\\]{}\\-—–«»\"\"]/g, ' ') // Remove punctuation (replace with space to preserve word boundaries)\n .replace(/\\s+/g, ' ') // Normalize multiple spaces to single space\n .trim();\n}\n\n/**\n * Tokenization matching normalized text behavior\n * \n * Note: Text should already be normalized before indexing, so we normalize again\n * to ensure plugin tokenization matches index tokenization\n */\nfunction tokenize(text: string): string[] {\n // Normalize first (same as indexing), then split by whitespace\n return normalizeText(text)\n .split(/\\s+/)\n .filter(token => token.length > 0);\n}\n\n/**\n * Export types for external use\n */\nexport type {\n FuzzyPhraseConfig,\n WordMatch,\n PhraseMatch,\n DocumentMatch,\n SynonymMap,\n Candidate\n} from './types.js';\n"]}
package/dist/index.d.cts CHANGED
@@ -32,6 +32,14 @@ interface FuzzyPhraseConfig {
32
32
  maxGap?: number;
33
33
  /** Minimum phrase score to include in results */
34
34
  minScore?: number;
35
+ /**
36
+ * Multiplier for proximity window calculation.
37
+ * proximityWindow = queryTokens.length × proximitySpanMultiplier
38
+ * Lower values = stricter proximity requirements (words must be closer)
39
+ * Higher values = more lenient (words can be further apart)
40
+ * @default 5
41
+ */
42
+ proximitySpanMultiplier?: number;
35
43
  }
36
44
  type SynonymMap = Record<string, string[]>;
37
45
  interface Candidate {
@@ -49,19 +57,41 @@ interface WordMatch {
49
57
  distance: number;
50
58
  score: number;
51
59
  }
60
+ /**
61
+ * Represents a word in a gap between matched query words
62
+ */
63
+ interface GapWord {
64
+ /** The actual word in the document */
65
+ word: string;
66
+ /** Position in document tokens */
67
+ position: number;
68
+ /** Which gap slot this uses (1-based, e.g., 1 of maxGap) */
69
+ gapIndex: number;
70
+ }
52
71
  interface PhraseMatch {
72
+ /** Matched query words */
53
73
  words: WordMatch[];
74
+ /** Non-query words between matched words */
75
+ gapWords: GapWord[];
76
+ /** Number of gap positions used (for display as X/maxGap) */
77
+ gapUsed: number;
78
+ /** Coverage ratio: matched words / query tokens (0-1) */
79
+ coverage: number;
54
80
  startPosition: number;
55
81
  endPosition: number;
56
- gap: number;
82
+ /** Total span from first to last matched word */
83
+ span: number;
57
84
  inOrder: boolean;
58
85
  score: number;
59
86
  scoreBreakdown: {
60
87
  base: number;
61
88
  order: number;
62
89
  proximity: number;
90
+ /** For single-word queries: repetition count. For phrases: 0 */
63
91
  density: number;
64
92
  semantic: number;
93
+ /** Coverage multiplier applied to final score (for phrases) */
94
+ coverage: number;
65
95
  };
66
96
  }
67
97
  interface DocumentMatch {
package/dist/index.d.ts CHANGED
@@ -32,6 +32,14 @@ interface FuzzyPhraseConfig {
32
32
  maxGap?: number;
33
33
  /** Minimum phrase score to include in results */
34
34
  minScore?: number;
35
+ /**
36
+ * Multiplier for proximity window calculation.
37
+ * proximityWindow = queryTokens.length × proximitySpanMultiplier
38
+ * Lower values = stricter proximity requirements (words must be closer)
39
+ * Higher values = more lenient (words can be further apart)
40
+ * @default 5
41
+ */
42
+ proximitySpanMultiplier?: number;
35
43
  }
36
44
  type SynonymMap = Record<string, string[]>;
37
45
  interface Candidate {
@@ -49,19 +57,41 @@ interface WordMatch {
49
57
  distance: number;
50
58
  score: number;
51
59
  }
60
+ /**
61
+ * Represents a word in a gap between matched query words
62
+ */
63
+ interface GapWord {
64
+ /** The actual word in the document */
65
+ word: string;
66
+ /** Position in document tokens */
67
+ position: number;
68
+ /** Which gap slot this uses (1-based, e.g., 1 of maxGap) */
69
+ gapIndex: number;
70
+ }
52
71
  interface PhraseMatch {
72
+ /** Matched query words */
53
73
  words: WordMatch[];
74
+ /** Non-query words between matched words */
75
+ gapWords: GapWord[];
76
+ /** Number of gap positions used (for display as X/maxGap) */
77
+ gapUsed: number;
78
+ /** Coverage ratio: matched words / query tokens (0-1) */
79
+ coverage: number;
54
80
  startPosition: number;
55
81
  endPosition: number;
56
- gap: number;
82
+ /** Total span from first to last matched word */
83
+ span: number;
57
84
  inOrder: boolean;
58
85
  score: number;
59
86
  scoreBreakdown: {
60
87
  base: number;
61
88
  order: number;
62
89
  proximity: number;
90
+ /** For single-word queries: repetition count. For phrases: 0 */
63
91
  density: number;
64
92
  semantic: number;
93
+ /** Coverage multiplier applied to final score (for phrases) */
94
+ coverage: number;
65
95
  };
66
96
  }
67
97
  interface DocumentMatch {
package/dist/index.js CHANGED
@@ -209,8 +209,9 @@ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFr
209
209
  config,
210
210
  documentFrequency,
211
211
  totalDocuments,
212
- wordMatches
213
- // Pass all word matches for density calculation
212
+ wordMatches,
213
+ documentTokens
214
+ // Pass document tokens to extract gap words
214
215
  );
215
216
  if (phrase && phrase.words.length > 0) {
216
217
  phrases.push(phrase);
@@ -218,16 +219,27 @@ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFr
218
219
  }
219
220
  return deduplicatePhrases(phrases);
220
221
  }
221
- function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments, allWordMatches) {
222
+ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments, allWordMatches, documentTokens) {
222
223
  const startMatch = wordMatches[startIndex];
223
224
  const phraseWords = [startMatch];
224
225
  const coveredTokens = /* @__PURE__ */ new Set([startMatch.queryToken]);
226
+ const gapWords = [];
227
+ let totalGapUsed = 0;
225
228
  for (let i = startIndex + 1; i < wordMatches.length; i++) {
226
229
  const match = wordMatches[i];
227
- const gap = match.position - phraseWords[phraseWords.length - 1].position - 1;
230
+ const lastPos = phraseWords[phraseWords.length - 1].position;
231
+ const gap = match.position - lastPos - 1;
228
232
  if (gap > config.maxGap) {
229
233
  break;
230
234
  }
235
+ for (let pos = lastPos + 1; pos < match.position; pos++) {
236
+ totalGapUsed++;
237
+ gapWords.push({
238
+ word: documentTokens[pos],
239
+ position: pos,
240
+ gapIndex: totalGapUsed
241
+ });
242
+ }
231
243
  if (!coveredTokens.has(match.queryToken)) {
232
244
  phraseWords.push(match);
233
245
  coveredTokens.add(match.queryToken);
@@ -237,19 +249,25 @@ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, d
237
249
  }
238
250
  }
239
251
  if (phraseWords.length > 0) {
252
+ const coverage = phraseWords.length / queryTokens.length;
253
+ const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;
240
254
  const { score, breakdown } = calculatePhraseScore(
241
255
  phraseWords,
242
256
  queryTokens,
243
257
  config,
244
258
  documentFrequency,
245
259
  totalDocuments,
246
- allWordMatches
260
+ allWordMatches,
261
+ coverage
247
262
  );
248
263
  return {
249
264
  words: phraseWords,
265
+ gapWords,
266
+ gapUsed: totalGapUsed,
267
+ coverage,
250
268
  startPosition: phraseWords[0].position,
251
269
  endPosition: phraseWords[phraseWords.length - 1].position,
252
- gap: phraseWords[phraseWords.length - 1].position - phraseWords[0].position,
270
+ span,
253
271
  inOrder: isInOrder(phraseWords, queryTokens),
254
272
  score,
255
273
  scoreBreakdown: breakdown
@@ -257,7 +275,7 @@ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, d
257
275
  }
258
276
  return null;
259
277
  }
260
- function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments, allWordMatches) {
278
+ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments, allWordMatches, coverage) {
261
279
  let baseScore = 0;
262
280
  for (const word of phraseWords) {
263
281
  const weight = word.type === "exact" ? config.weights.exact : word.type === "fuzzy" ? config.weights.fuzzy : config.weights.fuzzy * 0.8;
@@ -267,13 +285,12 @@ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequenc
267
285
  const inOrder = isInOrder(phraseWords, queryTokens);
268
286
  const orderScore = inOrder ? 1 : 0.5;
269
287
  const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;
270
- const proximityScore = Math.max(0, 1 - span / (queryTokens.length * 5));
288
+ const proximityWindow = queryTokens.length * config.proximitySpanMultiplier;
289
+ const proximityScore = Math.max(0, 1 - span / proximityWindow);
271
290
  let densityScore = 0;
272
291
  if (queryTokens.length === 1) {
273
292
  const totalOccurrences = allWordMatches.length;
274
- densityScore = totalOccurrences / queryTokens.length;
275
- } else {
276
- densityScore = phraseWords.length / queryTokens.length;
293
+ densityScore = Math.min(1, totalOccurrences / 10);
277
294
  }
278
295
  const semanticScore = calculateSemanticScore(
279
296
  phraseWords,
@@ -287,8 +304,11 @@ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequenc
287
304
  const weightedDensity = densityScore * weights.density;
288
305
  const weightedSemantic = semanticScore * weights.semantic;
289
306
  const totalScore = weightedBase + weightedOrder + weightedProximity + weightedDensity + weightedSemantic;
290
- const maxPossibleScore = 1 + weights.order + weights.proximity + weights.density + weights.semantic;
291
- const score = totalScore / maxPossibleScore;
307
+ const maxBaseWeight = Math.max(weights.exact, weights.fuzzy);
308
+ const maxPossibleScore = maxBaseWeight + weights.order + weights.proximity + weights.density + weights.semantic;
309
+ const normalizedScore = totalScore / maxPossibleScore;
310
+ const coverageMultiplier = queryTokens.length > 1 ? coverage : 1;
311
+ const score = normalizedScore * coverageMultiplier;
292
312
  const base = weightedBase / maxPossibleScore;
293
313
  const order = weightedOrder / maxPossibleScore;
294
314
  const proximity = weightedProximity / maxPossibleScore;
@@ -301,7 +321,9 @@ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequenc
301
321
  order,
302
322
  proximity,
303
323
  density,
304
- semantic
324
+ semantic,
325
+ coverage: coverageMultiplier
326
+ // Show coverage multiplier in breakdown
305
327
  }
306
328
  };
307
329
  }
@@ -370,7 +392,8 @@ var DEFAULT_CONFIG = {
370
392
  semantic: 0.15
371
393
  },
372
394
  maxGap: 5,
373
- minScore: 0.1
395
+ minScore: 0.1,
396
+ proximitySpanMultiplier: 5
374
397
  };
375
398
  var pluginStates = /* @__PURE__ */ new WeakMap();
376
399
  function pluginFuzzyPhrase(userConfig = {}) {
@@ -390,7 +413,8 @@ function pluginFuzzyPhrase(userConfig = {}) {
390
413
  semantic: userConfig.weights?.semantic ?? DEFAULT_CONFIG.weights.semantic
391
414
  },
392
415
  maxGap: userConfig.maxGap ?? DEFAULT_CONFIG.maxGap,
393
- minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore
416
+ minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore,
417
+ proximitySpanMultiplier: userConfig.proximitySpanMultiplier ?? DEFAULT_CONFIG.proximitySpanMultiplier
394
418
  };
395
419
  const plugin = {
396
420
  name: "fuzzy-phrase",
@@ -528,7 +552,8 @@ async function searchWithFuzzyPhrase(orama, params, language) {
528
552
  filteredCandidates,
529
553
  {
530
554
  weights: state.config.weights,
531
- maxGap: state.config.maxGap
555
+ maxGap: state.config.maxGap,
556
+ proximitySpanMultiplier: state.config.proximitySpanMultiplier
532
557
  },
533
558
  state.documentFrequency,
534
559
  state.totalDocuments
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/fuzzy.ts","../src/candidates.ts","../src/scoring.ts","../src/index.ts"],"names":[],"mappings":";AA4BO,SAAS,mBACd,GACA,GACA,OAC0B;AAE1B,MAAI,MAAM,GAAG;AACX,WAAO,EAAE,WAAW,MAAM,UAAU,EAAE;AAAA,EACxC;AAEA,QAAM,OAAO,EAAE;AACf,QAAM,OAAO,EAAE;AAGf,MAAI,KAAK,IAAI,OAAO,IAAI,IAAI,OAAO;AACjC,WAAO,EAAE,WAAW,OAAO,UAAU,QAAQ,EAAE;AAAA,EACjD;AAGA,MAAI,OAAO,MAAM;AACf,KAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;AAAA,EAChB;AAEA,QAAM,IAAI,EAAE;AACZ,QAAM,IAAI,EAAE;AAGZ,MAAI,UAAU,IAAI,MAAM,IAAI,CAAC;AAC7B,MAAI,UAAU,IAAI,MAAM,IAAI,CAAC;AAG7B,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAQ,CAAC,IAAI;AAAA,EACf;AAEA,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAQ,CAAC,IAAI;AACb,QAAI,WAAW;AAEf,aAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,IAAI;AAEzC,cAAQ,CAAC,IAAI,KAAK;AAAA,QAChB,QAAQ,CAAC,IAAI;AAAA;AAAA,QACb,QAAQ,IAAI,CAAC,IAAI;AAAA;AAAA,QACjB,QAAQ,IAAI,CAAC,IAAI;AAAA;AAAA,MACnB;AAEA,iBAAW,KAAK,IAAI,UAAU,QAAQ,CAAC,CAAC;AAAA,IAC1C;AAGA,QAAI,WAAW,OAAO;AACpB,aAAO,EAAE,WAAW,OAAO,UAAU,QAAQ,EAAE;AAAA,IACjD;AAGA,KAAC,SAAS,OAAO,IAAI,CAAC,SAAS,OAAO;AAAA,EACxC;AAEA,QAAM,WAAW,QAAQ,CAAC;AAC1B,SAAO;AAAA,IACL,WAAW,YAAY;AAAA,IACvB;AAAA,EACF;AACF;AAUO,SAAS,WACd,MACA,YACA,WACuD;AAEvD,MAAI,SAAS,YAAY;AACvB,WAAO,EAAE,SAAS,MAAM,UAAU,GAAG,OAAO,EAAI;AAAA,EAClD;AAGA,MAAI,KAAK,WAAW,UAAU,GAAG;AAC/B,WAAO,EAAE,SAAS,MAAM,UAAU,GAAG,OAAO,KAAK;AAAA,EACnD;AAGA,QAAM,SAAS,mBAAmB,MAAM,YAAY,SAAS;AAE7D,MAAI,OAAO,WAAW;AAGpB,UAAM,QAAQ,IAAO,OAAO,WAAW;AACvC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,UAAU,OAAO;AAAA,MACjB,OAAO,KAAK,IAAI,KAAK,KAAK;AAAA;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,OAAO,UAAU,YAAY,GAAG,OAAO,EAAE;AAC7D;AAWO,SAAS,2BACd,aACA,eACQ;AACR,QAAM,cAAc,YAAY;AAEhC,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,EACT,WAAW,eAAe,GAAG;AAC3B,WAAO,gBAAgB;AAAA,EACzB,WAAW,eAAe,GAAG;AAC3B,WAAO,gBAAgB;AAAA,EACzB,OAAO;AACL,WAAO,gBAAgB;AAAA,EACzB;AACF;;;ACjJO,SAAS,+BAA+B,WAA6B;AAC1E,QAAM,aAAa,oBAAI,IAAY;AACnC,MAAI,eAAe;AACnB,MAAI,aAAa;AAEjB,WAAS,SAAS,MAAW,QAAgB,GAAG;AAC9C,QAAI,CAAC,MAAM;AACT;AAAA,IACF;AAEA;AAIA,QAAI,KAAK,KAAK,KAAK,KAAK,OAAO,KAAK,MAAM,YAAY,KAAK,EAAE,SAAS,GAAG;AACvE,iBAAW,IAAI,KAAK,CAAC;AACrB;AAAA,IACF;AAGA,QAAI,KAAK,GAAG;AACV,UAAI,KAAK,aAAa,KAAK;AAEzB,mBAAW,CAAC,MAAM,SAAS,KAAK,KAAK,GAAG;AACtC,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF,WAAW,MAAM,QAAQ,KAAK,CAAC,GAAG;AAEhC,mBAAW,CAAC,MAAM,SAAS,KAAK,KAAK,GAAG;AACtC,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF,WAAW,OAAO,KAAK,MAAM,UAAU;AAErC,mBAAW,aAAa,OAAO,OAAO,KAAK,CAAC,GAAG;AAC7C,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,WAAS,SAAS;AAClB,UAAQ,IAAI,uBAAgB,WAAW,IAAI,eAAe,YAAY,gBAAgB;AACtF,SAAO;AACT;AAYO,SAAS,uBACd,YACA,YACA,WACA,UACA,eAAuB,KACV;AACb,QAAM,aAA0B,CAAC;AACjC,QAAM,OAAO,oBAAI,IAAY;AAG7B,MAAI,WAAW,IAAI,UAAU,GAAG;AAC9B,eAAW,KAAK;AAAA,MACd,MAAM;AAAA,MACN,MAAM;AAAA,MACN;AAAA,MACA,UAAU;AAAA,MACV,OAAO;AAAA,IACT,CAAC;AACD,SAAK,IAAI,UAAU;AAAA,EACrB;AAGA,aAAW,QAAQ,YAAY;AAC7B,QAAI,KAAK,IAAI,IAAI;AAAG;AAEpB,UAAM,QAAQ,WAAW,MAAM,YAAY,SAAS;AACpD,QAAI,MAAM,SAAS;AACjB,iBAAW,KAAK;AAAA,QACd;AAAA,QACA,MAAM;AAAA,QACN;AAAA,QACA,UAAU,MAAM;AAAA,QAChB,OAAO,MAAM;AAAA,MACf,CAAC;AACD,WAAK,IAAI,IAAI;AAAA,IACf;AAAA,EACF;AAGA,MAAI,YAAY,SAAS,UAAU,GAAG;AACpC,eAAW,WAAW,SAAS,UAAU,GAAG;AAC1C,UAAI,KAAK,IAAI,OAAO;AAAG;AACvB,UAAI,WAAW,IAAI,OAAO,GAAG;AAC3B,mBAAW,KAAK;AAAA,UACd,MAAM;AAAA,UACN,MAAM;AAAA,UACN;AAAA,UACA,UAAU;AAAA,UACV,OAAO;AAAA,QACT,CAAC;AACD,aAAK,IAAI,OAAO;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAYO,SAAS,kBACd,aACA,YACA,WACA,UACA,eAAuB,KACG;AAC1B,QAAM,gBAAgB,oBAAI,IAAyB;AAEnD,aAAW,SAAS,aAAa;AAC/B,UAAM,kBAAkB;AAAA,MACtB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,kBAAc,IAAI,OAAO,eAAe;AAAA,EAC1C;AAEA,SAAO;AACT;AAyBO,SAAS,wBACd,eACA,UAC0B;AAC1B,QAAM,WAAW,oBAAI,IAAyB;AAE9C,aAAW,CAAC,OAAO,UAAU,KAAK,cAAc,QAAQ,GAAG;AACzD,UAAM,qBAAqB,WAAW,OAAO,OAAK,EAAE,SAAS,QAAQ;AACrE,QAAI,mBAAmB,SAAS,GAAG;AACjC,eAAS,IAAI,OAAO,kBAAkB;AAAA,IACxC;AAAA,EACF;AAEA,SAAO;AACT;;;ACvKO,SAAS,sBACd,gBACA,eACA,QACA,mBACA,gBACe;AACf,QAAM,UAAyB,CAAC;AAChC,QAAM,cAAc,MAAM,KAAK,cAAc,KAAK,CAAC;AAGnD,QAAM,cAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK;AAC9C,UAAM,UAAU,eAAe,CAAC;AAGhC,eAAW,CAAC,YAAY,UAAU,KAAK,cAAc,QAAQ,GAAG;AAC9D,iBAAW,aAAa,YAAY;AAClC,YAAI,UAAU,SAAS,SAAS;AAC9B,sBAAY,KAAK;AAAA,YACf,MAAM;AAAA,YACN;AAAA,YACA,UAAU;AAAA,YACV,MAAM,UAAU;AAAA,YAChB,UAAU,UAAU;AAAA,YACpB,OAAO,UAAU;AAAA,UACnB,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,SAAS;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA;AAAA,IACF;AAEA,QAAI,UAAU,OAAO,MAAM,SAAS,GAAG;AACrC,cAAQ,KAAK,MAAM;AAAA,IACrB;AAAA,EACF;AAGA,SAAO,mBAAmB,OAAO;AACnC;AAcA,SAAS,wBACP,aACA,YACA,aACA,QACA,mBACA,gBACA,gBACoB;AACpB,QAAM,aAAa,YAAY,UAAU;AACzC,QAAM,cAA2B,CAAC,UAAU;AAC5C,QAAM,gBAAgB,oBAAI,IAAI,CAAC,WAAW,UAAU,CAAC;AAGrD,WAAS,IAAI,aAAa,GAAG,IAAI,YAAY,QAAQ,KAAK;AACxD,UAAM,QAAQ,YAAY,CAAC;AAC3B,UAAM,MAAM,MAAM,WAAW,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW;AAG5E,QAAI,MAAM,OAAO,QAAQ;AACvB;AAAA,IACF;AAGA,QAAI,CAAC,cAAc,IAAI,MAAM,UAAU,GAAG;AACxC,kBAAY,KAAK,KAAK;AACtB,oBAAc,IAAI,MAAM,UAAU;AAAA,IACpC;AAGA,QAAI,cAAc,SAAS,YAAY,QAAQ;AAC7C;AAAA,IACF;AAAA,EACF;AAGA,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,EAAE,OAAO,UAAU,IAAI;AAAA,MAC3B;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,WAAO;AAAA,MACL,OAAO;AAAA,MACP,eAAe,YAAY,CAAC,EAAE;AAAA,MAC9B,aAAa,YAAY,YAAY,SAAS,CAAC,EAAE;AAAA,MACjD,KAAK,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW,YAAY,CAAC,EAAE;AAAA,MACnE,SAAS,UAAU,aAAa,WAAW;AAAA,MAC3C;AAAA,MACA,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,SAAO;AACT;AAaA,SAAS,qBACP,aACA,aACA,QACA,mBACA,gBACA,gBACqH;AAErH,MAAI,YAAY;AAChB,aAAW,QAAQ,aAAa;AAC9B,UAAM,SAAS,KAAK,SAAS,UAAU,OAAO,QAAQ,QACvC,KAAK,SAAS,UAAU,OAAO,QAAQ,QACvC,OAAO,QAAQ,QAAQ;AACtC,iBAAa,KAAK,QAAQ;AAAA,EAC5B;AACA,eAAa,YAAY;AAGzB,QAAM,UAAU,UAAU,aAAa,WAAW;AAClD,QAAM,aAAa,UAAU,IAAM;AAGnC,QAAM,OAAO,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW,YAAY,CAAC,EAAE,WAAW;AACtF,QAAM,iBAAiB,KAAK,IAAI,GAAG,IAAO,QAAQ,YAAY,SAAS,EAAG;AAI1E,MAAI,eAAe;AAEnB,MAAI,YAAY,WAAW,GAAG;AAE5B,UAAM,mBAAmB,eAAe;AAExC,mBAAe,mBAAmB,YAAY;AAAA,EAChD,OAAO;AAGL,mBAAe,YAAY,SAAS,YAAY;AAAA,EAClD;AAGA,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAGA,QAAM,UAAU,OAAO;AAGvB,QAAM,eAAe;AACrB,QAAM,gBAAgB,aAAa,QAAQ;AAC3C,QAAM,oBAAoB,iBAAiB,QAAQ;AACnD,QAAM,kBAAkB,eAAe,QAAQ;AAC/C,QAAM,mBAAmB,gBAAgB,QAAQ;AAEjD,QAAM,aAAa,eAAe,gBAAgB,oBAAoB,kBAAkB;AAIxF,QAAM,mBAAmB,IAAM,QAAQ,QAAQ,QAAQ,YAAY,QAAQ,UAAU,QAAQ;AAG7F,QAAM,QAAQ,aAAa;AAG3B,QAAM,OAAO,eAAe;AAC5B,QAAM,QAAQ,gBAAgB;AAC9B,QAAM,YAAY,oBAAoB;AACtC,QAAM,UAAU,kBAAkB;AAClC,QAAM,WAAW,mBAAmB;AAEpC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,MACT;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AASA,SAAS,UAAU,aAA0B,aAAgC;AAC3E,QAAM,aAAa,IAAI,IAAI,YAAY,IAAI,CAAC,OAAO,UAAU,CAAC,OAAO,KAAK,CAAC,CAAC;AAE5E,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,YAAY,WAAW,IAAI,YAAY,IAAI,CAAC,EAAE,UAAU,KAAK;AACnE,UAAM,YAAY,WAAW,IAAI,YAAY,CAAC,EAAE,UAAU,KAAK;AAE/D,QAAI,YAAY,WAAW;AACzB,aAAO;AAAA,IACT;AAAA,EACF;AAEA,SAAO;AACT;AAUA,SAAS,uBACP,aACA,mBACA,gBACQ;AAER,MAAI,mBAAmB,GAAG;AACxB,WAAO;AAAA,EACT;AAEA,MAAI,WAAW;AAEf,aAAW,QAAQ,aAAa;AAC9B,UAAM,KAAK,kBAAkB,IAAI,KAAK,IAAI,KAAK;AAC/C,UAAM,MAAM,KAAK,IAAI,iBAAiB,EAAE;AACxC,gBAAY;AAAA,EACd;AAGA,QAAM,WAAW,WAAW,YAAY;AAGxC,SAAO,KAAK,IAAI,GAAK,WAAW,EAAE;AACpC;AAQA,SAAS,mBAAmB,SAAuC;AACjE,MAAI,QAAQ,WAAW;AAAG,WAAO,CAAC;AAGlC,QAAM,SAAS,QAAQ,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC/D,QAAM,SAAwB,CAAC;AAC/B,QAAM,UAAU,oBAAI,IAAY;AAEhC,aAAW,UAAU,QAAQ;AAE3B,QAAI,WAAW;AACf,aAAS,MAAM,OAAO,eAAe,OAAO,OAAO,aAAa,OAAO;AACrE,UAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,mBAAW;AACX;AAAA,MACF;AAAA,IACF;AAEA,QAAI,CAAC,UAAU;AACb,aAAO,KAAK,MAAM;AAElB,eAAS,MAAM,OAAO,eAAe,OAAO,OAAO,aAAa,OAAO;AACrE,gBAAQ,IAAI,GAAG;AAAA,MACjB;AAAA,IACF;AAAA,EACF;AAEA,SAAO,OAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAChD;;;ACnUA,IAAM,iBAA8C;AAAA,EAClD,cAAc;AAAA,EACd,WAAW;AAAA,EACX,mBAAmB;AAAA,EACnB,gBAAgB;AAAA,EAChB,UAAU;AAAA,EACV,mBAAmB;AAAA,EACnB,SAAS;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,WAAW;AAAA,IACX,SAAS;AAAA,IACT,UAAU;AAAA,EACZ;AAAA,EACA,QAAQ;AAAA,EACR,UAAU;AACZ;AAKA,IAAM,eAAe,oBAAI,QAA+B;AAQjD,SAAS,kBAAkB,aAAgC,CAAC,GAAgB;AAEjF,QAAM,SAAsC;AAAA,IAC1C,cAAc,WAAW,gBAAgB,eAAe;AAAA,IACxD,WAAW,WAAW,aAAa,eAAe;AAAA,IAClD,mBAAmB,WAAW,qBAAqB,eAAe;AAAA,IAClE,gBAAgB,WAAW,kBAAkB,eAAe;AAAA,IAC5D,UAAU,WAAW,YAAY,eAAe;AAAA,IAChD,mBAAmB,WAAW,qBAAqB,eAAe;AAAA,IAClE,SAAS;AAAA,MACP,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,WAAW,WAAW,SAAS,aAAa,eAAe,QAAQ;AAAA,MACnE,SAAS,WAAW,SAAS,WAAW,eAAe,QAAQ;AAAA,MAC/D,UAAU,WAAW,SAAS,YAAY,eAAe,QAAQ;AAAA,IACnE;AAAA,IACA,QAAQ,WAAW,UAAU,eAAe;AAAA,IAC5C,UAAU,WAAW,YAAY,eAAe;AAAA,EAClD;AAEA,QAAM,SAAsB;AAAA,IAC1B,MAAM;AAAA;AAAA;AAAA;AAAA,IAKN,aAAa,OAAO,UAAoB;AACtC,cAAQ,IAAI,+CAAwC;AAGpD,YAAM,QAAqB;AAAA,QACzB,YAAY,CAAC;AAAA,QACb;AAAA,QACA,mBAAmB,oBAAI,IAAI;AAAA,QAC3B,gBAAgB;AAAA,MAClB;AAGA,UAAI,OAAO,kBAAkB,OAAO,UAAU;AAC5C,YAAI;AACF,kBAAQ,IAAI,6CAAsC;AAClD,gBAAM,aAAa,MAAM,yBAAyB,OAAO,QAAQ;AACjE,kBAAQ,IAAI,iBAAY,OAAO,KAAK,MAAM,UAAU,EAAE,MAAM,sBAAsB;AAAA,QACpF,SAAS,OAAO;AACd,kBAAQ,MAAM,0CAAgC,KAAK;AAAA,QAErD;AAAA,MACF;AAGA,YAAM,OAAQ,MAAM,MAAc,MAAM;AACxC,UAAI,MAAM;AACR,cAAM,iBAAiB,OAAO,KAAK,IAAI,EAAE;AACzC,cAAM,oBAAoB,6BAA6B,MAAM,OAAO,YAAY;AAChF,gBAAQ,IAAI,iDAA0C,MAAM,cAAc,YAAY;AAAA,MACxF;AAGA,mBAAa,IAAI,OAAO,KAAK;AAC7B,cAAQ,IAAI,wCAAmC;AAI/C,mBAAa,MAAM;AACjB,YAAI,OAAQ,WAAmB,2BAA2B,YAAY;AACpE,kBAAQ,IAAI,qCAA8B;AAC1C,UAAC,WAAmB,uBAAuB;AAAA,QAC7C,OAAO;AACL,kBAAQ,KAAK,yDAA+C;AAAA,QAC9D;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAQA,eAAsB,sBACpB,OACA,QACA,UACoC;AACpC,QAAM,YAAY,YAAY,IAAI;AAGlC,QAAM,QAAQ,aAAa,IAAI,KAAK;AAEpC,MAAI,CAAC,OAAO;AACV,YAAQ,MAAM,qCAAgC;AAC9C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAChE;AAEA,QAAM,EAAE,MAAM,WAAW,IAAI;AAE7B,MAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,eAAgB,cAAc,WAAW,CAAC,KAAM,MAAM,OAAO;AAGnE,QAAM,cAAc,SAAS,IAAI;AAEjC,MAAI,YAAY,WAAW,GAAG;AAC5B,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,YAAY,MAAM,OAAO,oBAC3B,2BAA2B,aAAa,MAAM,OAAO,SAAS,IAC9D,MAAM,OAAO;AAEjB,UAAQ,IAAI,mCAA4B,IAAI,MAAM,YAAY,MAAM,uBAAuB,SAAS,GAAG;AAGvG,MAAI;AAEJ,MAAI;AAGF,UAAM,YAAa,MAAc,MAAM;AAEvC,QAAI,CAAC,WAAW;AACd,cAAQ,MAAM,gDAA2C;AACzD,aAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,IACrE;AAEA,YAAQ,IAAI,qCAA8B,OAAO,KAAK,aAAa,CAAC,CAAC,CAAC;AAGtE,QAAI,YAAY;AAGhB,QAAI,UAAU,UAAU,YAAY,GAAG,MAAM;AAC3C,kBAAY,UAAU,QAAQ,YAAY,EAAE;AAC5C,cAAQ,IAAI,4DAAuD;AAAA,IACrE,WAES,UAAU,YAAY,GAAG,MAAM;AACtC,kBAAY,UAAU,YAAY,EAAE;AACpC,cAAQ,IAAI,6DAAwD;AAAA,IACtE;AAEA,QAAI,CAAC,WAAW;AACd,cAAQ,MAAM,6CAAwC,YAAY;AAClE,cAAQ,MAAM,qCAAqC,OAAO,KAAK,SAAS,CAAC;AACzE,aAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,IACrE;AAEA,iBAAa,+BAA+B,SAAS;AACrD,YAAQ,IAAI,uBAAgB,WAAW,IAAI,0BAA0B;AAAA,EACvE,SAAS,OAAO;AACd,YAAQ,MAAM,wCAAmC,KAAK;AACtD,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM,OAAO,iBAAiB,MAAM,aAAa;AAAA,IACjD,MAAM,OAAO;AAAA,EACf;AAGA,QAAM,qBAAqB;AAAA,IACzB;AAAA,IACA,MAAM,OAAO;AAAA,EACf;AAEA,UAAQ,IAAI,+BAAwB,MAAM,KAAK,mBAAmB,OAAO,CAAC,EAAE,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,QAAQ,CAAC,CAAC,QAAQ;AAGzH,QAAM,kBAAmC,CAAC;AAE1C,UAAQ,IAAI,yCAAkC;AAAA,IAC5C,UAAU,OAAO,KAAM,MAAc,QAAQ,CAAC,CAAC;AAAA,IAC/C,SAAS,CAAC,CAAG,MAAc,MAAM;AAAA,IACjC,UAAW,MAAc,MAAM,OAAO,OAAQ,MAAc,KAAK,OAAO;AAAA,EAC1E,CAAC;AAGD,MAAI,OAA4B,CAAC;AAGjC,MAAK,MAAc,MAAM,MAAM,MAAM;AACnC,WAAQ,MAAc,KAAK,KAAK;AAChC,YAAQ,IAAI,2CAAsC;AAAA,EACpD,WAEU,MAAc,MAAM,QAAQ,OAAQ,MAAc,KAAK,SAAS,UAAU;AAElF,UAAM,WAAW,OAAO,KAAM,MAAc,KAAK,IAAI,EAAE,CAAC;AACxD,QAAI,YAAY,aAAa,iCAAiC,aAAa,SAAS;AAClF,aAAQ,MAAc,KAAK;AAC3B,cAAQ,IAAI,+CAA0C;AAAA,IACxD;AAAA,EACF;AAEA,MAAI,OAAO,KAAK,IAAI,EAAE,WAAW,GAAG;AAClC,YAAQ,IAAI,0DAAqD;AAAA,MAC/D,aAAa,CAAC,CAAG,MAAc,MAAM;AAAA,MACrC,cAAe,MAAc,MAAM,OAAO,OAAO,KAAM,MAAc,KAAK,IAAI,IAAI;AAAA,MAClF,iBAAiB,CAAC,CAAG,MAAc,MAAM,MAAM;AAAA,MAC/C,mBAAoB,MAAc,MAAM,MAAM,OAAO,OAAO,KAAM,MAAc,KAAK,KAAK,IAAI,EAAE,SAAS;AAAA,IAC3G,CAAC;AAAA,EACH;AAEA,UAAQ,IAAI,+BAAwB,OAAO,KAAK,IAAI,EAAE,MAAM,YAAY;AAExE,aAAW,CAAC,OAAO,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG;AAC/C,UAAM,OAAO,IAAI,YAAY;AAE7B,QAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC;AAAA,IACF;AAGA,UAAM,YAAY,SAAS,IAAI;AAG/B,UAAM,UAAU;AAAA,MACd;AAAA,MACA;AAAA,MACA;AAAA,QACE,SAAS,MAAM,OAAO;AAAA,QACtB,QAAQ,MAAM,OAAO;AAAA,MACvB;AAAA,MACA,MAAM;AAAA,MACN,MAAM;AAAA,IACR;AAEA,QAAI,QAAQ,SAAS,GAAG;AAEtB,YAAM,WAAW,KAAK,IAAI,GAAG,QAAQ,IAAI,OAAK,EAAE,KAAK,CAAC;AAEtD,sBAAgB,KAAK;AAAA,QACnB,IAAI;AAAA,QACJ;AAAA,QACA,OAAO;AAAA,QACP,UAAU;AAAA,MACZ,CAAC;AAAA,IACH;AAAA,EACF;AAGA,kBAAgB,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAGhD,QAAM,QAAQ,OAAO,SAAS,gBAAgB;AAC9C,QAAM,iBAAiB,gBAAgB,MAAM,GAAG,KAAK;AAGrD,QAAM,OAAO,eAAe,IAAI,YAAU;AAAA,IACxC,IAAI,MAAM;AAAA,IACV,OAAO,MAAM;AAAA,IACb,UAAU,MAAM;AAAA;AAAA,IAEhB,UAAU,MAAM;AAAA,EAClB,EAAE;AAEF,QAAM,UAAU,YAAY,IAAI,IAAI;AAEpC,UAAQ,IAAI,gBAAW,KAAK,MAAM,eAAe,QAAQ,QAAQ,CAAC,CAAC,cAAc,KAAK,GAAG;AAEzF,SAAO;AAAA,IACL,SAAS;AAAA,MACP,WAAW,GAAG,QAAQ,QAAQ,CAAC,CAAC;AAAA,MAChC,KAAK,KAAK,MAAM,UAAU,GAAO;AAAA;AAAA,IACnC;AAAA,IACA;AAAA,IACA,OAAO,KAAK;AAAA,EACd;AACF;AAKA,eAAe,yBACb,gBACqB;AACrB,MAAI;AACF,YAAQ,IAAI,0DAAmD;AAG/D,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,uBAAuB;AAE7D,UAAM,WAAW,aAAa,eAAe,KAAK,eAAe,UAAU;AAG3E,UAAM,EAAE,MAAM,MAAM,IAAI,MAAM,SAAS,IAAI,iBAAiB;AAE5D,YAAQ,IAAI,2CAAoC;AAAA,MAC9C,UAAU,CAAC,CAAC;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB,SAAS,CAAC,CAAC;AAAA,MACX,UAAU,OAAO;AAAA,MACjB,UAAU,OAAO,OAAO,KAAK,IAAI,EAAE,SAAS;AAAA,IAC9C,CAAC;AAED,QAAI,OAAO;AACT,YAAM,IAAI,MAAM,mBAAmB,MAAM,OAAO,EAAE;AAAA,IACpD;AAEA,UAAM,aAAa,QAAQ,CAAC;AAC5B,YAAQ,IAAI,oBAAa,OAAO,KAAK,UAAU,EAAE,MAAM,gCAAgC;AAEvF,WAAO;AAAA,EACT,SAAS,OAAO;AACd,YAAQ,MAAM,iDAA4C,KAAK;AAC/D,UAAM;AAAA,EACR;AACF;AAKA,SAAS,6BACP,MACA,cACqB;AACrB,QAAM,KAAK,oBAAI,IAAoB;AAEnC,aAAW,OAAO,OAAO,OAAO,IAAI,GAAG;AACrC,UAAM,OAAO,IAAI,YAAY;AAE7B,QAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC;AAAA,IACF;AAGA,UAAM,QAAQ,IAAI,IAAI,SAAS,IAAI,CAAC;AAGpC,eAAW,QAAQ,OAAO;AACxB,SAAG,IAAI,OAAO,GAAG,IAAI,IAAI,KAAK,KAAK,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO;AACT;AAQA,SAAS,cAAc,MAAsB;AAC3C,SAAO,KACJ,YAAY,EACZ,UAAU,KAAK,EACf,QAAQ,oBAAoB,EAAE,EAE9B,QAAQ,gFAAgF,GAAG,EAC3F,QAAQ,6DAA6D,EAAE,EACvE,QAAQ,mBAAmB,GAAG,EAC9B,QAAQ,4BAA4B,GAAG,EACvC,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACV;AAQA,SAAS,SAAS,MAAwB;AAExC,SAAO,cAAc,IAAI,EACtB,MAAM,KAAK,EACX,OAAO,WAAS,MAAM,SAAS,CAAC;AACrC","sourcesContent":["/**\n * Fuzzy matching utilities using bounded Levenshtein distance\n * \n * This is the same algorithm used by Orama's match-highlight plugin\n * for consistent fuzzy matching behavior.\n */\n\n/**\n * Result of bounded Levenshtein distance calculation\n */\nexport interface BoundedLevenshteinResult {\n /** Whether the distance is within bounds */\n isBounded: boolean;\n /** The actual distance (only valid if isBounded is true) */\n distance: number;\n}\n\n/**\n * Calculate bounded Levenshtein distance between two strings\n * \n * Stops early if distance exceeds the bound for better performance.\n * This is the same algorithm as Orama's internal boundedLevenshtein.\n * \n * @param a - First string\n * @param b - Second string\n * @param bound - Maximum allowed distance\n * @returns Result indicating if strings are within bound and the distance\n */\nexport function boundedLevenshtein(\n a: string,\n b: string,\n bound: number\n): BoundedLevenshteinResult {\n // Quick checks\n if (a === b) {\n return { isBounded: true, distance: 0 };\n }\n\n const aLen = a.length;\n const bLen = b.length;\n\n // If length difference exceeds bound, no need to calculate\n if (Math.abs(aLen - bLen) > bound) {\n return { isBounded: false, distance: bound + 1 };\n }\n\n // Swap to ensure a is shorter (optimization)\n if (aLen > bLen) {\n [a, b] = [b, a];\n }\n\n const m = a.length;\n const n = b.length;\n\n // Use single array instead of matrix (memory optimization)\n let prevRow = new Array(n + 1);\n let currRow = new Array(n + 1);\n\n // Initialize first row\n for (let j = 0; j <= n; j++) {\n prevRow[j] = j;\n }\n\n for (let i = 1; i <= m; i++) {\n currRow[0] = i;\n let minInRow = i;\n\n for (let j = 1; j <= n; j++) {\n const cost = a[i - 1] === b[j - 1] ? 0 : 1;\n\n currRow[j] = Math.min(\n prevRow[j] + 1, // deletion\n currRow[j - 1] + 1, // insertion\n prevRow[j - 1] + cost // substitution\n );\n\n minInRow = Math.min(minInRow, currRow[j]);\n }\n\n // Early termination: if all values in row exceed bound, we're done\n if (minInRow > bound) {\n return { isBounded: false, distance: bound + 1 };\n }\n\n // Swap rows for next iteration\n [prevRow, currRow] = [currRow, prevRow];\n }\n\n const distance = prevRow[n];\n return {\n isBounded: distance <= bound,\n distance\n };\n}\n\n/**\n * Check if a word matches a query token with fuzzy matching\n * \n * @param word - Word from document\n * @param queryToken - Token from search query\n * @param tolerance - Maximum edit distance allowed\n * @returns Match result with score\n */\nexport function fuzzyMatch(\n word: string,\n queryToken: string,\n tolerance: number\n): { matches: boolean; distance: number; score: number } {\n // Exact match\n if (word === queryToken) {\n return { matches: true, distance: 0, score: 1.0 };\n }\n\n // Prefix match (high score, no distance)\n if (word.startsWith(queryToken)) {\n return { matches: true, distance: 0, score: 0.95 };\n }\n\n // Fuzzy match with tolerance\n const result = boundedLevenshtein(word, queryToken, tolerance);\n \n if (result.isBounded) {\n // Score decreases with distance\n // distance 1 = 0.8, distance 2 = 0.6, etc.\n const score = 1.0 - (result.distance * 0.2);\n return {\n matches: true,\n distance: result.distance,\n score: Math.max(0.1, score) // Minimum score of 0.1\n };\n }\n\n return { matches: false, distance: tolerance + 1, score: 0 };\n}\n\n/**\n * Calculate adaptive tolerance based on query length\n * \n * Longer queries get higher tolerance for better fuzzy matching.\n * \n * @param queryTokens - Array of query tokens\n * @param baseTolerance - Base tolerance value\n * @returns Calculated tolerance (always an integer)\n */\nexport function calculateAdaptiveTolerance(\n queryTokens: string[],\n baseTolerance: number\n): number {\n const queryLength = queryTokens.length;\n \n if (queryLength <= 2) {\n return baseTolerance;\n } else if (queryLength <= 4) {\n return baseTolerance + 1;\n } else if (queryLength <= 6) {\n return baseTolerance + 2;\n } else {\n return baseTolerance + 3;\n }\n}\n","/**\n * Candidate expansion: Find all possible matches for query tokens\n * including exact matches, fuzzy matches, and synonyms\n */\n\nimport { fuzzyMatch } from './fuzzy.js';\nimport type { Candidate, SynonymMap } from './types.js';\n\n/**\n * Extract all unique words from the radix tree index\n * \n * @param radixNode - Root node of the radix tree\n * @returns Set of all unique words in the index\n */\nexport function extractVocabularyFromRadixTree(radixNode: any): Set<string> {\n const vocabulary = new Set<string>();\n let nodesVisited = 0;\n let wordsFound = 0;\n \n function traverse(node: any, depth: number = 0) {\n if (!node) {\n return;\n }\n \n nodesVisited++;\n \n // Check if this node represents a complete word\n // e = true means it's an end of a word\n if (node.e && node.w && typeof node.w === 'string' && node.w.length > 0) {\n vocabulary.add(node.w);\n wordsFound++;\n }\n \n // Children can be Map, Array, or Object\n if (node.c) {\n if (node.c instanceof Map) {\n // Map format\n for (const [_key, childNode] of node.c) {\n traverse(childNode, depth + 1);\n }\n } else if (Array.isArray(node.c)) {\n // Array format: [[key, childNode], ...]\n for (const [_key, childNode] of node.c) {\n traverse(childNode, depth + 1);\n }\n } else if (typeof node.c === 'object') {\n // Object format: {key: childNode, ...}\n for (const childNode of Object.values(node.c)) {\n traverse(childNode, depth + 1);\n }\n }\n }\n }\n \n traverse(radixNode);\n console.log(`📚 Extracted ${vocabulary.size} words from ${nodesVisited} nodes visited`);\n return vocabulary;\n}\n\n/**\n * Find all candidate matches for a single query token\n * \n * @param queryToken - Token from search query\n * @param vocabulary - Set of all words in the index\n * @param tolerance - Fuzzy matching tolerance\n * @param synonyms - Synonym map (optional)\n * @param synonymScore - Score multiplier for synonym matches\n * @returns Array of candidate matches\n */\nexport function findCandidatesForToken(\n queryToken: string,\n vocabulary: Set<string>,\n tolerance: number,\n synonyms?: SynonymMap,\n synonymScore: number = 0.8\n): Candidate[] {\n const candidates: Candidate[] = [];\n const seen = new Set<string>();\n\n // 1. Check for exact match\n if (vocabulary.has(queryToken)) {\n candidates.push({\n word: queryToken,\n type: 'exact',\n queryToken,\n distance: 0,\n score: 1.0\n });\n seen.add(queryToken);\n }\n\n // 2. Check for fuzzy matches\n for (const word of vocabulary) {\n if (seen.has(word)) continue;\n\n const match = fuzzyMatch(word, queryToken, tolerance);\n if (match.matches) {\n candidates.push({\n word,\n type: 'fuzzy',\n queryToken,\n distance: match.distance,\n score: match.score\n });\n seen.add(word);\n }\n }\n\n // 3. Check for synonym matches\n if (synonyms && synonyms[queryToken]) {\n for (const synonym of synonyms[queryToken]) {\n if (seen.has(synonym)) continue;\n if (vocabulary.has(synonym)) {\n candidates.push({\n word: synonym,\n type: 'synonym',\n queryToken,\n distance: 0,\n score: synonymScore\n });\n seen.add(synonym);\n }\n }\n }\n\n return candidates;\n}\n\n/**\n * Find candidates for all query tokens\n * \n * @param queryTokens - Array of tokens from search query\n * @param vocabulary - Set of all words in the index\n * @param tolerance - Fuzzy matching tolerance\n * @param synonyms - Synonym map (optional)\n * @param synonymScore - Score multiplier for synonym matches\n * @returns Map of query tokens to their candidate matches\n */\nexport function findAllCandidates(\n queryTokens: string[],\n vocabulary: Set<string>,\n tolerance: number,\n synonyms?: SynonymMap,\n synonymScore: number = 0.8\n): Map<string, Candidate[]> {\n const candidatesMap = new Map<string, Candidate[]>();\n\n for (const token of queryTokens) {\n const tokenCandidates = findCandidatesForToken(\n token,\n vocabulary,\n tolerance,\n synonyms,\n synonymScore\n );\n candidatesMap.set(token, tokenCandidates);\n }\n\n return candidatesMap;\n}\n\n/**\n * Get total number of candidates across all tokens\n * \n * @param candidatesMap - Map of token to candidates\n * @returns Total count of all candidates\n */\nexport function getTotalCandidateCount(\n candidatesMap: Map<string, Candidate[]>\n): number {\n let total = 0;\n for (const candidates of candidatesMap.values()) {\n total += candidates.length;\n }\n return total;\n}\n\n/**\n * Filter candidates by minimum score threshold\n * \n * @param candidatesMap - Map of token to candidates\n * @param minScore - Minimum score threshold\n * @returns Filtered candidates map\n */\nexport function filterCandidatesByScore(\n candidatesMap: Map<string, Candidate[]>,\n minScore: number\n): Map<string, Candidate[]> {\n const filtered = new Map<string, Candidate[]>();\n\n for (const [token, candidates] of candidatesMap.entries()) {\n const filteredCandidates = candidates.filter(c => c.score >= minScore);\n if (filteredCandidates.length > 0) {\n filtered.set(token, filteredCandidates);\n }\n }\n\n return filtered;\n}\n","/**\n * Phrase scoring algorithm with semantic weighting\n */\n\nimport type { WordMatch, PhraseMatch, Candidate } from './types.js';\n\n/**\n * Configuration for phrase scoring\n */\nexport interface ScoringConfig {\n weights: {\n exact: number;\n fuzzy: number;\n order: number;\n proximity: number;\n density: number;\n semantic: number;\n };\n maxGap: number;\n}\n\n/**\n * Find all phrase matches in a document\n * \n * @param documentTokens - Tokenized document content\n * @param candidatesMap - Map of query tokens to their candidates\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map for TF-IDF\n * @param totalDocuments - Total number of documents\n * @returns Array of phrase matches\n */\nexport function findPhrasesInDocument(\n documentTokens: string[],\n candidatesMap: Map<string, Candidate[]>,\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number\n): PhraseMatch[] {\n const phrases: PhraseMatch[] = [];\n const queryTokens = Array.from(candidatesMap.keys());\n\n // Find all word matches in document\n const wordMatches: WordMatch[] = [];\n \n for (let i = 0; i < documentTokens.length; i++) {\n const docWord = documentTokens[i];\n \n // Check if this word matches any query token\n for (const [queryToken, candidates] of candidatesMap.entries()) {\n for (const candidate of candidates) {\n if (candidate.word === docWord) {\n wordMatches.push({\n word: docWord,\n queryToken,\n position: i,\n type: candidate.type,\n distance: candidate.distance,\n score: candidate.score\n });\n }\n }\n }\n }\n\n // Build phrases from word matches using sliding window\n for (let i = 0; i < wordMatches.length; i++) {\n const phrase = buildPhraseFromPosition(\n wordMatches,\n i,\n queryTokens,\n config,\n documentFrequency,\n totalDocuments,\n wordMatches // Pass all word matches for density calculation\n );\n \n if (phrase && phrase.words.length > 0) {\n phrases.push(phrase);\n }\n }\n\n // Deduplicate and sort by score\n return deduplicatePhrases(phrases);\n}\n\n/**\n * Build a phrase starting from a specific word match position\n * \n * @param wordMatches - All word matches in document\n * @param startIndex - Starting index in wordMatches array\n * @param queryTokens - Original query tokens\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @param allWordMatches - All word matches in document (for density calculation)\n * @returns Phrase match or null\n */\nfunction buildPhraseFromPosition(\n wordMatches: WordMatch[],\n startIndex: number,\n queryTokens: string[],\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number,\n allWordMatches: WordMatch[]\n): PhraseMatch | null {\n const startMatch = wordMatches[startIndex];\n const phraseWords: WordMatch[] = [startMatch];\n const coveredTokens = new Set([startMatch.queryToken]);\n\n // Look for nearby matches to complete the phrase\n for (let i = startIndex + 1; i < wordMatches.length; i++) {\n const match = wordMatches[i];\n const gap = match.position - phraseWords[phraseWords.length - 1].position - 1;\n\n // Stop if gap exceeds maximum\n if (gap > config.maxGap) {\n break;\n }\n\n // Add if it's a different query token\n if (!coveredTokens.has(match.queryToken)) {\n phraseWords.push(match);\n coveredTokens.add(match.queryToken);\n }\n\n // Stop if we have all query tokens\n if (coveredTokens.size === queryTokens.length) {\n break;\n }\n }\n\n // Calculate phrase score\n if (phraseWords.length > 0) {\n const { score, breakdown } = calculatePhraseScore(\n phraseWords,\n queryTokens,\n config,\n documentFrequency,\n totalDocuments,\n allWordMatches\n );\n\n return {\n words: phraseWords,\n startPosition: phraseWords[0].position,\n endPosition: phraseWords[phraseWords.length - 1].position,\n gap: phraseWords[phraseWords.length - 1].position - phraseWords[0].position,\n inOrder: isInOrder(phraseWords, queryTokens),\n score,\n scoreBreakdown: breakdown\n };\n }\n\n return null;\n}\n\n/**\n * Calculate overall phrase score\n * \n * @param phraseWords - Words in the phrase\n * @param queryTokens - Original query tokens\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @param allWordMatches - All word matches in document (for density calculation)\n * @returns Phrase score (0-1) and detailed component breakdown\n */\nfunction calculatePhraseScore(\n phraseWords: WordMatch[],\n queryTokens: string[],\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number,\n allWordMatches: WordMatch[]\n): { score: number; breakdown: { base: number; order: number; proximity: number; density: number; semantic: number } } {\n // Base score from word matches\n let baseScore = 0;\n for (const word of phraseWords) {\n const weight = word.type === 'exact' ? config.weights.exact :\n word.type === 'fuzzy' ? config.weights.fuzzy : \n config.weights.fuzzy * 0.8; // synonym\n baseScore += word.score * weight;\n }\n baseScore /= phraseWords.length;\n\n // Order bonus\n const inOrder = isInOrder(phraseWords, queryTokens);\n const orderScore = inOrder ? 1.0 : 0.5;\n\n // Proximity bonus (closer words score higher)\n const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;\n const proximityScore = Math.max(0, 1.0 - (span / (queryTokens.length * 5)));\n\n // Density bonus (how many times query terms appear in the document)\n // Only applies to single-word queries - for phrase queries, use phrase-specific metrics\n let densityScore = 0;\n \n if (queryTokens.length === 1) {\n // Single-word query: reward repetition without capping\n const totalOccurrences = allWordMatches.length;\n // Normalize by query length but don't cap - more occurrences = higher score\n densityScore = totalOccurrences / queryTokens.length;\n } else {\n // Multi-word phrase query: density doesn't apply\n // Use phrase coverage instead (what percentage of query is in this phrase)\n densityScore = phraseWords.length / queryTokens.length;\n }\n\n // Semantic score (TF-IDF)\n const semanticScore = calculateSemanticScore(\n phraseWords,\n documentFrequency,\n totalDocuments\n );\n\n // Weighted combination\n const weights = config.weights;\n \n // Calculate weighted components\n const weightedBase = baseScore;\n const weightedOrder = orderScore * weights.order;\n const weightedProximity = proximityScore * weights.proximity;\n const weightedDensity = densityScore * weights.density;\n const weightedSemantic = semanticScore * weights.semantic;\n \n const totalScore = weightedBase + weightedOrder + weightedProximity + weightedDensity + weightedSemantic;\n\n // Calculate max possible score (all components at maximum)\n // baseScore max is 1.0 (from exact matches), other components are already 0-1\n const maxPossibleScore = 1.0 + weights.order + weights.proximity + weights.density + weights.semantic;\n \n // Normalize to 0-1 range without clamping\n const score = totalScore / maxPossibleScore;\n\n // Component contributions to the final normalized score\n const base = weightedBase / maxPossibleScore;\n const order = weightedOrder / maxPossibleScore;\n const proximity = weightedProximity / maxPossibleScore;\n const density = weightedDensity / maxPossibleScore;\n const semantic = weightedSemantic / maxPossibleScore;\n\n return {\n score,\n breakdown: {\n base,\n order,\n proximity,\n density,\n semantic\n }\n };\n}\n\n/**\n * Check if words are in the same order as query tokens\n * \n * @param phraseWords - Words in the phrase\n * @param queryTokens - Original query tokens\n * @returns True if in order\n */\nfunction isInOrder(phraseWords: WordMatch[], queryTokens: string[]): boolean {\n const tokenOrder = new Map(queryTokens.map((token, index) => [token, index]));\n \n for (let i = 1; i < phraseWords.length; i++) {\n const prevOrder = tokenOrder.get(phraseWords[i - 1].queryToken) ?? -1;\n const currOrder = tokenOrder.get(phraseWords[i].queryToken) ?? -1;\n \n if (currOrder < prevOrder) {\n return false;\n }\n }\n \n return true;\n}\n\n/**\n * Calculate semantic score using TF-IDF\n * \n * @param phraseWords - Words in the phrase\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @returns Semantic score (0-1)\n */\nfunction calculateSemanticScore(\n phraseWords: WordMatch[],\n documentFrequency: Map<string, number>,\n totalDocuments: number\n): number {\n // Handle edge case: no documents\n if (totalDocuments === 0) {\n return 0;\n }\n \n let tfidfSum = 0;\n \n for (const word of phraseWords) {\n const df = documentFrequency.get(word.word) || 1;\n const idf = Math.log(totalDocuments / df);\n tfidfSum += idf;\n }\n \n // Normalize by phrase length\n const avgTfidf = tfidfSum / phraseWords.length;\n \n // Normalize to 0-1 range (assuming max IDF of ~10)\n return Math.min(1.0, avgTfidf / 10);\n}\n\n/**\n * Deduplicate overlapping phrases, keeping highest scoring ones\n * \n * @param phrases - Array of phrase matches\n * @returns Deduplicated phrases sorted by score\n */\nfunction deduplicatePhrases(phrases: PhraseMatch[]): PhraseMatch[] {\n if (phrases.length === 0) return [];\n\n // Sort by score descending\n const sorted = phrases.slice().sort((a, b) => b.score - a.score);\n const result: PhraseMatch[] = [];\n const covered = new Set<number>();\n\n for (const phrase of sorted) {\n // Check if this phrase overlaps with already selected phrases\n let overlaps = false;\n for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {\n if (covered.has(pos)) {\n overlaps = true;\n break;\n }\n }\n\n if (!overlaps) {\n result.push(phrase);\n // Mark positions as covered\n for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {\n covered.add(pos);\n }\n }\n }\n\n return result.sort((a, b) => b.score - a.score);\n}\n","/**\n * Fuzzy Phrase Plugin for Orama\n * \n * Advanced fuzzy phrase matching with semantic weighting and synonym expansion.\n * Completely independent from QPS - accesses Orama's radix tree directly.\n */\n\nimport type { AnyOrama, OramaPlugin, Results, TypedDocument } from '@wcs-colab/orama';\nimport type { FuzzyPhraseConfig, PluginState, SynonymMap, DocumentMatch } from './types.js';\nimport { calculateAdaptiveTolerance } from './fuzzy.js';\nimport { \n extractVocabularyFromRadixTree, \n findAllCandidates,\n filterCandidatesByScore \n} from './candidates.js';\nimport { findPhrasesInDocument } from './scoring.js';\n\n/**\n * Default configuration\n */\nconst DEFAULT_CONFIG: Required<FuzzyPhraseConfig> = {\n textProperty: 'content',\n tolerance: 1,\n adaptiveTolerance: true,\n enableSynonyms: false,\n supabase: undefined as any,\n synonymMatchScore: 0.8,\n weights: {\n exact: 1.0,\n fuzzy: 0.8,\n order: 0.3,\n proximity: 0.2,\n density: 0.2,\n semantic: 0.15\n },\n maxGap: 5,\n minScore: 0.1\n};\n\n/**\n * Plugin state storage (keyed by Orama instance)\n */\nconst pluginStates = new WeakMap<AnyOrama, PluginState>();\n\n/**\n * Create the Fuzzy Phrase Plugin\n * \n * @param userConfig - User configuration options\n * @returns Orama plugin instance\n */\nexport function pluginFuzzyPhrase(userConfig: FuzzyPhraseConfig = {}): OramaPlugin {\n // Merge user config with defaults\n const config: Required<FuzzyPhraseConfig> = {\n textProperty: userConfig.textProperty ?? DEFAULT_CONFIG.textProperty,\n tolerance: userConfig.tolerance ?? DEFAULT_CONFIG.tolerance,\n adaptiveTolerance: userConfig.adaptiveTolerance ?? DEFAULT_CONFIG.adaptiveTolerance,\n enableSynonyms: userConfig.enableSynonyms ?? DEFAULT_CONFIG.enableSynonyms,\n supabase: userConfig.supabase || DEFAULT_CONFIG.supabase,\n synonymMatchScore: userConfig.synonymMatchScore ?? DEFAULT_CONFIG.synonymMatchScore,\n weights: {\n exact: userConfig.weights?.exact ?? DEFAULT_CONFIG.weights.exact,\n fuzzy: userConfig.weights?.fuzzy ?? DEFAULT_CONFIG.weights.fuzzy,\n order: userConfig.weights?.order ?? DEFAULT_CONFIG.weights.order,\n proximity: userConfig.weights?.proximity ?? DEFAULT_CONFIG.weights.proximity,\n density: userConfig.weights?.density ?? DEFAULT_CONFIG.weights.density,\n semantic: userConfig.weights?.semantic ?? DEFAULT_CONFIG.weights.semantic\n },\n maxGap: userConfig.maxGap ?? DEFAULT_CONFIG.maxGap,\n minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore\n };\n\n const plugin: OramaPlugin = {\n name: 'fuzzy-phrase',\n\n /**\n * Initialize plugin after index is created\n */\n afterCreate: async (orama: AnyOrama) => {\n console.log('🔮 Initializing Fuzzy Phrase Plugin...');\n\n // Initialize state\n const state: PluginState = {\n synonymMap: {},\n config,\n documentFrequency: new Map(),\n totalDocuments: 0\n };\n\n // Load synonyms from Supabase if enabled\n if (config.enableSynonyms && config.supabase) {\n try {\n console.log('📖 Loading synonyms from Supabase...');\n state.synonymMap = await loadSynonymsFromSupabase(config.supabase);\n console.log(`✅ Loaded ${Object.keys(state.synonymMap).length} words with synonyms`);\n } catch (error) {\n console.error('⚠️ Failed to load synonyms:', error);\n // Continue without synonyms\n }\n }\n\n // Calculate document frequencies for TF-IDF from document store\n const docs = (orama.data as any)?.docs?.docs;\n if (docs) {\n state.totalDocuments = Object.keys(docs).length;\n state.documentFrequency = calculateDocumentFrequencies(docs, config.textProperty);\n console.log(`📊 Calculated document frequencies for ${state.totalDocuments} documents`);\n }\n\n // Store state\n pluginStates.set(orama, state);\n console.log('✅ Fuzzy Phrase Plugin initialized');\n \n // Signal ready - emit a custom event that can be listened to\n // Use setImmediate to ensure this runs after the afterCreate hook completes\n setImmediate(() => {\n if (typeof (globalThis as any).fuzzyPhrasePluginReady === 'function') {\n console.log('📡 Signaling plugin ready...');\n (globalThis as any).fuzzyPhrasePluginReady();\n } else {\n console.warn('⚠️ fuzzyPhrasePluginReady callback not found');\n }\n });\n }\n };\n\n return plugin;\n}\n\n/**\n * Search with fuzzy phrase matching\n * \n * This function should be called instead of the regular search() function\n * to enable fuzzy phrase matching.\n */\nexport async function searchWithFuzzyPhrase<T extends AnyOrama>(\n orama: T, \n params: { term?: string; properties?: string[]; limit?: number },\n language?: string\n): Promise<Results<TypedDocument<T>>> {\n const startTime = performance.now();\n \n // Get plugin state\n const state = pluginStates.get(orama);\n \n if (!state) {\n console.error('❌ Plugin state not initialized');\n throw new Error('Fuzzy Phrase Plugin not properly initialized');\n }\n\n const { term, properties } = params;\n \n if (!term || typeof term !== 'string') {\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Use specified property or default\n const textProperty = (properties && properties[0]) || state.config.textProperty;\n\n // Tokenize query\n const queryTokens = tokenize(term);\n \n if (queryTokens.length === 0) {\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Calculate tolerance (adaptive or fixed)\n const tolerance = state.config.adaptiveTolerance\n ? calculateAdaptiveTolerance(queryTokens, state.config.tolerance)\n : state.config.tolerance;\n\n console.log(`🔍 Fuzzy phrase search: \"${term}\" (${queryTokens.length} tokens, tolerance: ${tolerance})`);\n\n // Extract vocabulary from radix tree\n let vocabulary: Set<string>;\n \n try {\n // Access radix tree - the actual index data is in orama.data.index, not orama.index\n // orama.index is just the component interface (methods)\n const indexData = (orama as any).data?.index;\n \n if (!indexData) {\n console.error('❌ No index data found in orama.data.index');\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n \n console.log('🔍 DEBUG: Index data keys:', Object.keys(indexData || {}));\n \n // Try different paths to find the radix tree\n let radixNode = null;\n \n // Path 1: QPS-style (orama.data.index.indexes[property].node)\n if (indexData.indexes?.[textProperty]?.node) {\n radixNode = indexData.indexes[textProperty].node;\n console.log('✅ Found radix via QPS-style path (data.index.indexes)');\n }\n // Path 2: Standard Orama (orama.data.index[property].node)\n else if (indexData[textProperty]?.node) {\n radixNode = indexData[textProperty].node;\n console.log('✅ Found radix via standard path (data.index[property])');\n }\n \n if (!radixNode) {\n console.error('❌ Radix tree not found for property:', textProperty);\n console.error(' Available properties in index:', Object.keys(indexData));\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n vocabulary = extractVocabularyFromRadixTree(radixNode);\n console.log(`📚 Extracted ${vocabulary.size} unique words from index`);\n } catch (error) {\n console.error('❌ Failed to extract vocabulary:', error);\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Find candidates for all query tokens\n const candidatesMap = findAllCandidates(\n queryTokens,\n vocabulary,\n tolerance,\n state.config.enableSynonyms ? state.synonymMap : undefined,\n state.config.synonymMatchScore\n );\n\n // Filter by minimum score\n const filteredCandidates = filterCandidatesByScore(\n candidatesMap,\n state.config.minScore\n );\n\n console.log(`🎯 Found candidates: ${Array.from(filteredCandidates.values()).reduce((sum, c) => sum + c.length, 0)} total`);\n\n // Search through all documents\n const documentMatches: DocumentMatch[] = [];\n \n console.log('🔍 DEBUG orama.data structure:', {\n dataKeys: Object.keys((orama as any).data || {}),\n hasDocs: !!((orama as any).data?.docs),\n docsType: (orama as any).data?.docs ? typeof (orama as any).data.docs : 'undefined'\n });\n \n // Try multiple possible document storage locations\n let docs: Record<string, any> = {};\n \n // Access the actual documents - they're nested in orama.data.docs.docs\n if ((orama as any).data?.docs?.docs) {\n docs = (orama as any).data.docs.docs;\n console.log('✅ Found docs at orama.data.docs.docs');\n }\n // Fallback: orama.data.docs (might be the correct structure in some cases)\n else if ((orama as any).data?.docs && typeof (orama as any).data.docs === 'object') {\n // Check if it has document-like properties (not sharedInternalDocumentStore, etc.)\n const firstKey = Object.keys((orama as any).data.docs)[0];\n if (firstKey && firstKey !== 'sharedInternalDocumentStore' && firstKey !== 'count') {\n docs = (orama as any).data.docs;\n console.log('✅ Found docs at orama.data.docs (direct)');\n }\n }\n \n if (Object.keys(docs).length === 0) {\n console.log('❌ Could not find documents - available structure:', {\n hasDataDocs: !!((orama as any).data?.docs),\n dataDocsKeys: (orama as any).data?.docs ? Object.keys((orama as any).data.docs) : 'none',\n hasDataDocsDocs: !!((orama as any).data?.docs?.docs),\n dataDocsDocsCount: (orama as any).data?.docs?.docs ? Object.keys((orama as any).data.docs.docs).length : 0\n });\n }\n \n console.log(`📄 Searching through ${Object.keys(docs).length} documents`);\n\n for (const [docId, doc] of Object.entries(docs)) {\n const text = doc[textProperty];\n \n if (!text || typeof text !== 'string') {\n continue;\n }\n\n // Tokenize document\n const docTokens = tokenize(text);\n\n // Find phrases in this document\n const phrases = findPhrasesInDocument(\n docTokens,\n filteredCandidates,\n {\n weights: state.config.weights as Required<FuzzyPhraseConfig['weights']>,\n maxGap: state.config.maxGap\n } as any,\n state.documentFrequency,\n state.totalDocuments\n );\n\n if (phrases.length > 0) {\n // Calculate overall document score (highest phrase score)\n const docScore = Math.max(...phrases.map(p => p.score));\n\n documentMatches.push({\n id: docId,\n phrases,\n score: docScore,\n document: doc\n });\n }\n }\n\n // Sort by score descending\n documentMatches.sort((a, b) => b.score - a.score);\n\n // Apply limit if specified\n const limit = params.limit ?? documentMatches.length;\n const limitedMatches = documentMatches.slice(0, limit);\n\n // Convert to Orama results format\n const hits = limitedMatches.map(match => ({\n id: match.id,\n score: match.score,\n document: match.document,\n // Store phrases for highlighting\n _phrases: match.phrases\n })) as any[];\n\n const elapsed = performance.now() - startTime;\n\n console.log(`✅ Found ${hits.length} results in ${elapsed.toFixed(2)}ms (limit: ${limit})`);\n\n return {\n elapsed: {\n formatted: `${elapsed.toFixed(2)}ms`,\n raw: Math.floor(elapsed * 1000000) // nanoseconds\n },\n hits,\n count: hits.length\n } as any;\n}\n\n/**\n * Load synonyms from Supabase\n */\nasync function loadSynonymsFromSupabase(\n supabaseConfig: { url: string; serviceKey: string }\n): Promise<SynonymMap> {\n try {\n console.log('🔍 DEBUG: Calling Supabase RPC get_synonym_map...');\n \n // Dynamic import to avoid bundling Supabase client if not needed\n const { createClient } = await import('@supabase/supabase-js');\n \n const supabase = createClient(supabaseConfig.url, supabaseConfig.serviceKey);\n \n // Call the get_synonym_map function\n const { data, error } = await supabase.rpc('get_synonym_map');\n \n console.log('🔍 DEBUG: Supabase RPC response:', {\n hasError: !!error,\n errorMessage: error?.message,\n hasData: !!data,\n dataType: typeof data,\n dataKeys: data ? Object.keys(data).length : 0\n });\n \n if (error) {\n throw new Error(`Supabase error: ${error.message}`);\n }\n \n const synonymMap = data || {};\n console.log(`📚 Loaded ${Object.keys(synonymMap).length} synonym entries from Supabase`);\n \n return synonymMap;\n } catch (error) {\n console.error('❌ Failed to load synonyms from Supabase:', error);\n throw error;\n }\n}\n\n/**\n * Calculate document frequencies for TF-IDF\n */\nfunction calculateDocumentFrequencies(\n docs: Record<string, any>,\n textProperty: string\n): Map<string, number> {\n const df = new Map<string, number>();\n\n for (const doc of Object.values(docs)) {\n const text = doc[textProperty];\n \n if (!text || typeof text !== 'string') {\n continue;\n }\n\n // Get unique words in this document\n const words = new Set(tokenize(text));\n\n // Increment document frequency for each unique word\n for (const word of words) {\n df.set(word, (df.get(word) || 0) + 1);\n }\n }\n\n return df;\n}\n\n/**\n * Normalize text using the same rules as server-side\n * \n * CRITICAL: This must match the normalizeText() function in server/index.js exactly\n * PLUS we remove all punctuation to match Orama's French tokenizer behavior\n */\nfunction normalizeText(text: string): string {\n return text\n .toLowerCase()\n .normalize('NFD')\n .replace(/[\\u0300-\\u036f]/g, '') // Remove diacritics\n // Replace French elisions (l', d', etc.) with space to preserve word boundaries\n .replace(/\\b[ldcjmnst][\\u2018\\u2019\\u201A\\u201B\\u2032\\u2035\\u0027\\u0060\\u00B4](?=\\w)/gi, ' ')\n .replace(/[\\u2018\\u2019\\u201A\\u201B\\u2032\\u2035\\u0027\\u0060\\u00B4]/g, '') // Remove remaining apostrophes\n .replace(/[\\u201c\\u201d]/g, '\"') // Normalize curly quotes to straight quotes\n .replace(/[.,;:!?()[\\]{}\\-—–«»\"\"]/g, ' ') // Remove punctuation (replace with space to preserve word boundaries)\n .replace(/\\s+/g, ' ') // Normalize multiple spaces to single space\n .trim();\n}\n\n/**\n * Tokenization matching normalized text behavior\n * \n * Note: Text should already be normalized before indexing, so we normalize again\n * to ensure plugin tokenization matches index tokenization\n */\nfunction tokenize(text: string): string[] {\n // Normalize first (same as indexing), then split by whitespace\n return normalizeText(text)\n .split(/\\s+/)\n .filter(token => token.length > 0);\n}\n\n/**\n * Export types for external use\n */\nexport type {\n FuzzyPhraseConfig,\n WordMatch,\n PhraseMatch,\n DocumentMatch,\n SynonymMap,\n Candidate\n} from './types.js';\n"]}
1
+ {"version":3,"sources":["../src/fuzzy.ts","../src/candidates.ts","../src/scoring.ts","../src/index.ts"],"names":[],"mappings":";AA4BO,SAAS,mBACd,GACA,GACA,OAC0B;AAE1B,MAAI,MAAM,GAAG;AACX,WAAO,EAAE,WAAW,MAAM,UAAU,EAAE;AAAA,EACxC;AAEA,QAAM,OAAO,EAAE;AACf,QAAM,OAAO,EAAE;AAGf,MAAI,KAAK,IAAI,OAAO,IAAI,IAAI,OAAO;AACjC,WAAO,EAAE,WAAW,OAAO,UAAU,QAAQ,EAAE;AAAA,EACjD;AAGA,MAAI,OAAO,MAAM;AACf,KAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;AAAA,EAChB;AAEA,QAAM,IAAI,EAAE;AACZ,QAAM,IAAI,EAAE;AAGZ,MAAI,UAAU,IAAI,MAAM,IAAI,CAAC;AAC7B,MAAI,UAAU,IAAI,MAAM,IAAI,CAAC;AAG7B,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAQ,CAAC,IAAI;AAAA,EACf;AAEA,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAQ,CAAC,IAAI;AACb,QAAI,WAAW;AAEf,aAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,IAAI;AAEzC,cAAQ,CAAC,IAAI,KAAK;AAAA,QAChB,QAAQ,CAAC,IAAI;AAAA;AAAA,QACb,QAAQ,IAAI,CAAC,IAAI;AAAA;AAAA,QACjB,QAAQ,IAAI,CAAC,IAAI;AAAA;AAAA,MACnB;AAEA,iBAAW,KAAK,IAAI,UAAU,QAAQ,CAAC,CAAC;AAAA,IAC1C;AAGA,QAAI,WAAW,OAAO;AACpB,aAAO,EAAE,WAAW,OAAO,UAAU,QAAQ,EAAE;AAAA,IACjD;AAGA,KAAC,SAAS,OAAO,IAAI,CAAC,SAAS,OAAO;AAAA,EACxC;AAEA,QAAM,WAAW,QAAQ,CAAC;AAC1B,SAAO;AAAA,IACL,WAAW,YAAY;AAAA,IACvB;AAAA,EACF;AACF;AAUO,SAAS,WACd,MACA,YACA,WACuD;AAEvD,MAAI,SAAS,YAAY;AACvB,WAAO,EAAE,SAAS,MAAM,UAAU,GAAG,OAAO,EAAI;AAAA,EAClD;AAGA,MAAI,KAAK,WAAW,UAAU,GAAG;AAC/B,WAAO,EAAE,SAAS,MAAM,UAAU,GAAG,OAAO,KAAK;AAAA,EACnD;AAGA,QAAM,SAAS,mBAAmB,MAAM,YAAY,SAAS;AAE7D,MAAI,OAAO,WAAW;AAGpB,UAAM,QAAQ,IAAO,OAAO,WAAW;AACvC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,UAAU,OAAO;AAAA,MACjB,OAAO,KAAK,IAAI,KAAK,KAAK;AAAA;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,OAAO,UAAU,YAAY,GAAG,OAAO,EAAE;AAC7D;AAWO,SAAS,2BACd,aACA,eACQ;AACR,QAAM,cAAc,YAAY;AAEhC,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,EACT,WAAW,eAAe,GAAG;AAC3B,WAAO,gBAAgB;AAAA,EACzB,WAAW,eAAe,GAAG;AAC3B,WAAO,gBAAgB;AAAA,EACzB,OAAO;AACL,WAAO,gBAAgB;AAAA,EACzB;AACF;;;ACjJO,SAAS,+BAA+B,WAA6B;AAC1E,QAAM,aAAa,oBAAI,IAAY;AACnC,MAAI,eAAe;AACnB,MAAI,aAAa;AAEjB,WAAS,SAAS,MAAW,QAAgB,GAAG;AAC9C,QAAI,CAAC,MAAM;AACT;AAAA,IACF;AAEA;AAIA,QAAI,KAAK,KAAK,KAAK,KAAK,OAAO,KAAK,MAAM,YAAY,KAAK,EAAE,SAAS,GAAG;AACvE,iBAAW,IAAI,KAAK,CAAC;AACrB;AAAA,IACF;AAGA,QAAI,KAAK,GAAG;AACV,UAAI,KAAK,aAAa,KAAK;AAEzB,mBAAW,CAAC,MAAM,SAAS,KAAK,KAAK,GAAG;AACtC,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF,WAAW,MAAM,QAAQ,KAAK,CAAC,GAAG;AAEhC,mBAAW,CAAC,MAAM,SAAS,KAAK,KAAK,GAAG;AACtC,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF,WAAW,OAAO,KAAK,MAAM,UAAU;AAErC,mBAAW,aAAa,OAAO,OAAO,KAAK,CAAC,GAAG;AAC7C,mBAAS,WAAW,QAAQ,CAAC;AAAA,QAC/B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,WAAS,SAAS;AAClB,UAAQ,IAAI,uBAAgB,WAAW,IAAI,eAAe,YAAY,gBAAgB;AACtF,SAAO;AACT;AAYO,SAAS,uBACd,YACA,YACA,WACA,UACA,eAAuB,KACV;AACb,QAAM,aAA0B,CAAC;AACjC,QAAM,OAAO,oBAAI,IAAY;AAG7B,MAAI,WAAW,IAAI,UAAU,GAAG;AAC9B,eAAW,KAAK;AAAA,MACd,MAAM;AAAA,MACN,MAAM;AAAA,MACN;AAAA,MACA,UAAU;AAAA,MACV,OAAO;AAAA,IACT,CAAC;AACD,SAAK,IAAI,UAAU;AAAA,EACrB;AAGA,aAAW,QAAQ,YAAY;AAC7B,QAAI,KAAK,IAAI,IAAI;AAAG;AAEpB,UAAM,QAAQ,WAAW,MAAM,YAAY,SAAS;AACpD,QAAI,MAAM,SAAS;AACjB,iBAAW,KAAK;AAAA,QACd;AAAA,QACA,MAAM;AAAA,QACN;AAAA,QACA,UAAU,MAAM;AAAA,QAChB,OAAO,MAAM;AAAA,MACf,CAAC;AACD,WAAK,IAAI,IAAI;AAAA,IACf;AAAA,EACF;AAGA,MAAI,YAAY,SAAS,UAAU,GAAG;AACpC,eAAW,WAAW,SAAS,UAAU,GAAG;AAC1C,UAAI,KAAK,IAAI,OAAO;AAAG;AACvB,UAAI,WAAW,IAAI,OAAO,GAAG;AAC3B,mBAAW,KAAK;AAAA,UACd,MAAM;AAAA,UACN,MAAM;AAAA,UACN;AAAA,UACA,UAAU;AAAA,UACV,OAAO;AAAA,QACT,CAAC;AACD,aAAK,IAAI,OAAO;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAYO,SAAS,kBACd,aACA,YACA,WACA,UACA,eAAuB,KACG;AAC1B,QAAM,gBAAgB,oBAAI,IAAyB;AAEnD,aAAW,SAAS,aAAa;AAC/B,UAAM,kBAAkB;AAAA,MACtB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,kBAAc,IAAI,OAAO,eAAe;AAAA,EAC1C;AAEA,SAAO;AACT;AAyBO,SAAS,wBACd,eACA,UAC0B;AAC1B,QAAM,WAAW,oBAAI,IAAyB;AAE9C,aAAW,CAAC,OAAO,UAAU,KAAK,cAAc,QAAQ,GAAG;AACzD,UAAM,qBAAqB,WAAW,OAAO,OAAK,EAAE,SAAS,QAAQ;AACrE,QAAI,mBAAmB,SAAS,GAAG;AACjC,eAAS,IAAI,OAAO,kBAAkB;AAAA,IACxC;AAAA,EACF;AAEA,SAAO;AACT;;;AClKO,SAAS,sBACd,gBACA,eACA,QACA,mBACA,gBACe;AACf,QAAM,UAAyB,CAAC;AAChC,QAAM,cAAc,MAAM,KAAK,cAAc,KAAK,CAAC;AAGnD,QAAM,cAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK;AAC9C,UAAM,UAAU,eAAe,CAAC;AAGhC,eAAW,CAAC,YAAY,UAAU,KAAK,cAAc,QAAQ,GAAG;AAC9D,iBAAW,aAAa,YAAY;AAClC,YAAI,UAAU,SAAS,SAAS;AAC9B,sBAAY,KAAK;AAAA,YACf,MAAM;AAAA,YACN;AAAA,YACA,UAAU;AAAA,YACV,MAAM,UAAU;AAAA,YAChB,UAAU,UAAU;AAAA,YACpB,OAAO,UAAU;AAAA,UACnB,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,SAAS;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA;AAAA,IACF;AAEA,QAAI,UAAU,OAAO,MAAM,SAAS,GAAG;AACrC,cAAQ,KAAK,MAAM;AAAA,IACrB;AAAA,EACF;AAGA,SAAO,mBAAmB,OAAO;AACnC;AAeA,SAAS,wBACP,aACA,YACA,aACA,QACA,mBACA,gBACA,gBACA,gBACoB;AACpB,QAAM,aAAa,YAAY,UAAU;AACzC,QAAM,cAA2B,CAAC,UAAU;AAC5C,QAAM,gBAAgB,oBAAI,IAAI,CAAC,WAAW,UAAU,CAAC;AACrD,QAAM,WAAsB,CAAC;AAC7B,MAAI,eAAe;AAGnB,WAAS,IAAI,aAAa,GAAG,IAAI,YAAY,QAAQ,KAAK;AACxD,UAAM,QAAQ,YAAY,CAAC;AAC3B,UAAM,UAAU,YAAY,YAAY,SAAS,CAAC,EAAE;AACpD,UAAM,MAAM,MAAM,WAAW,UAAU;AAGvC,QAAI,MAAM,OAAO,QAAQ;AACvB;AAAA,IACF;AAGA,aAAS,MAAM,UAAU,GAAG,MAAM,MAAM,UAAU,OAAO;AACvD;AACA,eAAS,KAAK;AAAA,QACZ,MAAM,eAAe,GAAG;AAAA,QACxB,UAAU;AAAA,QACV,UAAU;AAAA,MACZ,CAAC;AAAA,IACH;AAGA,QAAI,CAAC,cAAc,IAAI,MAAM,UAAU,GAAG;AACxC,kBAAY,KAAK,KAAK;AACtB,oBAAc,IAAI,MAAM,UAAU;AAAA,IACpC;AAGA,QAAI,cAAc,SAAS,YAAY,QAAQ;AAC7C;AAAA,IACF;AAAA,EACF;AAGA,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,WAAW,YAAY,SAAS,YAAY;AAClD,UAAM,OAAO,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW,YAAY,CAAC,EAAE,WAAW;AAEtF,UAAM,EAAE,OAAO,UAAU,IAAI;AAAA,MAC3B;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,WAAO;AAAA,MACL,OAAO;AAAA,MACP;AAAA,MACA,SAAS;AAAA,MACT;AAAA,MACA,eAAe,YAAY,CAAC,EAAE;AAAA,MAC9B,aAAa,YAAY,YAAY,SAAS,CAAC,EAAE;AAAA,MACjD;AAAA,MACA,SAAS,UAAU,aAAa,WAAW;AAAA,MAC3C;AAAA,MACA,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,SAAO;AACT;AAcA,SAAS,qBACP,aACA,aACA,QACA,mBACA,gBACA,gBACA,UACuI;AAGvI,MAAI,YAAY;AAChB,aAAW,QAAQ,aAAa;AAC9B,UAAM,SAAS,KAAK,SAAS,UAAU,OAAO,QAAQ,QACvC,KAAK,SAAS,UAAU,OAAO,QAAQ,QACvC,OAAO,QAAQ,QAAQ;AACtC,iBAAa,KAAK,QAAQ;AAAA,EAC5B;AACA,eAAa,YAAY;AAGzB,QAAM,UAAU,UAAU,aAAa,WAAW;AAClD,QAAM,aAAa,UAAU,IAAM;AAInC,QAAM,OAAO,YAAY,YAAY,SAAS,CAAC,EAAE,WAAW,YAAY,CAAC,EAAE,WAAW;AACtF,QAAM,kBAAkB,YAAY,SAAS,OAAO;AACpD,QAAM,iBAAiB,KAAK,IAAI,GAAG,IAAO,OAAO,eAAgB;AAIjE,MAAI,eAAe;AAEnB,MAAI,YAAY,WAAW,GAAG;AAE5B,UAAM,mBAAmB,eAAe;AAExC,mBAAe,KAAK,IAAI,GAAK,mBAAmB,EAAE;AAAA,EACpD;AAKA,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAGA,QAAM,UAAU,OAAO;AAGvB,QAAM,eAAe;AACrB,QAAM,gBAAgB,aAAa,QAAQ;AAC3C,QAAM,oBAAoB,iBAAiB,QAAQ;AACnD,QAAM,kBAAkB,eAAe,QAAQ;AAC/C,QAAM,mBAAmB,gBAAgB,QAAQ;AAEjD,QAAM,aAAa,eAAe,gBAAgB,oBAAoB,kBAAkB;AAIxF,QAAM,gBAAgB,KAAK,IAAI,QAAQ,OAAO,QAAQ,KAAK;AAC3D,QAAM,mBAAmB,gBAAgB,QAAQ,QAAQ,QAAQ,YAAY,QAAQ,UAAU,QAAQ;AAGvG,QAAM,kBAAkB,aAAa;AAIrC,QAAM,qBAAqB,YAAY,SAAS,IAAI,WAAW;AAC/D,QAAM,QAAQ,kBAAkB;AAGhC,QAAM,OAAO,eAAe;AAC5B,QAAM,QAAQ,gBAAgB;AAC9B,QAAM,YAAY,oBAAoB;AACtC,QAAM,UAAU,kBAAkB;AAClC,QAAM,WAAW,mBAAmB;AAEpC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,MACT;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA;AAAA,IACZ;AAAA,EACF;AACF;AASA,SAAS,UAAU,aAA0B,aAAgC;AAC3E,QAAM,aAAa,IAAI,IAAI,YAAY,IAAI,CAAC,OAAO,UAAU,CAAC,OAAO,KAAK,CAAC,CAAC;AAE5E,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,YAAY,WAAW,IAAI,YAAY,IAAI,CAAC,EAAE,UAAU,KAAK;AACnE,UAAM,YAAY,WAAW,IAAI,YAAY,CAAC,EAAE,UAAU,KAAK;AAE/D,QAAI,YAAY,WAAW;AACzB,aAAO;AAAA,IACT;AAAA,EACF;AAEA,SAAO;AACT;AAUA,SAAS,uBACP,aACA,mBACA,gBACQ;AAER,MAAI,mBAAmB,GAAG;AACxB,WAAO;AAAA,EACT;AAEA,MAAI,WAAW;AAEf,aAAW,QAAQ,aAAa;AAC9B,UAAM,KAAK,kBAAkB,IAAI,KAAK,IAAI,KAAK;AAC/C,UAAM,MAAM,KAAK,IAAI,iBAAiB,EAAE;AACxC,gBAAY;AAAA,EACd;AAGA,QAAM,WAAW,WAAW,YAAY;AAGxC,SAAO,KAAK,IAAI,GAAK,WAAW,EAAE;AACpC;AAQA,SAAS,mBAAmB,SAAuC;AACjE,MAAI,QAAQ,WAAW;AAAG,WAAO,CAAC;AAGlC,QAAM,SAAS,QAAQ,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC/D,QAAM,SAAwB,CAAC;AAC/B,QAAM,UAAU,oBAAI,IAAY;AAEhC,aAAW,UAAU,QAAQ;AAE3B,QAAI,WAAW;AACf,aAAS,MAAM,OAAO,eAAe,OAAO,OAAO,aAAa,OAAO;AACrE,UAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,mBAAW;AACX;AAAA,MACF;AAAA,IACF;AAEA,QAAI,CAAC,UAAU;AACb,aAAO,KAAK,MAAM;AAElB,eAAS,MAAM,OAAO,eAAe,OAAO,OAAO,aAAa,OAAO;AACrE,gBAAQ,IAAI,GAAG;AAAA,MACjB;AAAA,IACF;AAAA,EACF;AAEA,SAAO,OAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAChD;;;ACzWA,IAAM,iBAA8C;AAAA,EAClD,cAAc;AAAA,EACd,WAAW;AAAA,EACX,mBAAmB;AAAA,EACnB,gBAAgB;AAAA,EAChB,UAAU;AAAA,EACV,mBAAmB;AAAA,EACnB,SAAS;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,OAAO;AAAA,IACP,WAAW;AAAA,IACX,SAAS;AAAA,IACT,UAAU;AAAA,EACZ;AAAA,EACA,QAAQ;AAAA,EACR,UAAU;AAAA,EACV,yBAAyB;AAC3B;AAKA,IAAM,eAAe,oBAAI,QAA+B;AAQjD,SAAS,kBAAkB,aAAgC,CAAC,GAAgB;AAEjF,QAAM,SAAsC;AAAA,IAC1C,cAAc,WAAW,gBAAgB,eAAe;AAAA,IACxD,WAAW,WAAW,aAAa,eAAe;AAAA,IAClD,mBAAmB,WAAW,qBAAqB,eAAe;AAAA,IAClE,gBAAgB,WAAW,kBAAkB,eAAe;AAAA,IAC5D,UAAU,WAAW,YAAY,eAAe;AAAA,IAChD,mBAAmB,WAAW,qBAAqB,eAAe;AAAA,IAClE,SAAS;AAAA,MACP,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,OAAO,WAAW,SAAS,SAAS,eAAe,QAAQ;AAAA,MAC3D,WAAW,WAAW,SAAS,aAAa,eAAe,QAAQ;AAAA,MACnE,SAAS,WAAW,SAAS,WAAW,eAAe,QAAQ;AAAA,MAC/D,UAAU,WAAW,SAAS,YAAY,eAAe,QAAQ;AAAA,IACnE;AAAA,IACA,QAAQ,WAAW,UAAU,eAAe;AAAA,IAC5C,UAAU,WAAW,YAAY,eAAe;AAAA,IAChD,yBAAyB,WAAW,2BAA2B,eAAe;AAAA,EAChF;AAEA,QAAM,SAAsB;AAAA,IAC1B,MAAM;AAAA;AAAA;AAAA;AAAA,IAKN,aAAa,OAAO,UAAoB;AACtC,cAAQ,IAAI,+CAAwC;AAGpD,YAAM,QAAqB;AAAA,QACzB,YAAY,CAAC;AAAA,QACb;AAAA,QACA,mBAAmB,oBAAI,IAAI;AAAA,QAC3B,gBAAgB;AAAA,MAClB;AAGA,UAAI,OAAO,kBAAkB,OAAO,UAAU;AAC5C,YAAI;AACF,kBAAQ,IAAI,6CAAsC;AAClD,gBAAM,aAAa,MAAM,yBAAyB,OAAO,QAAQ;AACjE,kBAAQ,IAAI,iBAAY,OAAO,KAAK,MAAM,UAAU,EAAE,MAAM,sBAAsB;AAAA,QACpF,SAAS,OAAO;AACd,kBAAQ,MAAM,0CAAgC,KAAK;AAAA,QAErD;AAAA,MACF;AAGA,YAAM,OAAQ,MAAM,MAAc,MAAM;AACxC,UAAI,MAAM;AACR,cAAM,iBAAiB,OAAO,KAAK,IAAI,EAAE;AACzC,cAAM,oBAAoB,6BAA6B,MAAM,OAAO,YAAY;AAChF,gBAAQ,IAAI,iDAA0C,MAAM,cAAc,YAAY;AAAA,MACxF;AAGA,mBAAa,IAAI,OAAO,KAAK;AAC7B,cAAQ,IAAI,wCAAmC;AAI/C,mBAAa,MAAM;AACjB,YAAI,OAAQ,WAAmB,2BAA2B,YAAY;AACpE,kBAAQ,IAAI,qCAA8B;AAC1C,UAAC,WAAmB,uBAAuB;AAAA,QAC7C,OAAO;AACL,kBAAQ,KAAK,yDAA+C;AAAA,QAC9D;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAQA,eAAsB,sBACpB,OACA,QACA,UACoC;AACpC,QAAM,YAAY,YAAY,IAAI;AAGlC,QAAM,QAAQ,aAAa,IAAI,KAAK;AAEpC,MAAI,CAAC,OAAO;AACV,YAAQ,MAAM,qCAAgC;AAC9C,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAChE;AAEA,QAAM,EAAE,MAAM,WAAW,IAAI;AAE7B,MAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,eAAgB,cAAc,WAAW,CAAC,KAAM,MAAM,OAAO;AAGnE,QAAM,cAAc,SAAS,IAAI;AAEjC,MAAI,YAAY,WAAW,GAAG;AAC5B,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,YAAY,MAAM,OAAO,oBAC3B,2BAA2B,aAAa,MAAM,OAAO,SAAS,IAC9D,MAAM,OAAO;AAEjB,UAAQ,IAAI,mCAA4B,IAAI,MAAM,YAAY,MAAM,uBAAuB,SAAS,GAAG;AAGvG,MAAI;AAEJ,MAAI;AAGF,UAAM,YAAa,MAAc,MAAM;AAEvC,QAAI,CAAC,WAAW;AACd,cAAQ,MAAM,gDAA2C;AACzD,aAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,IACrE;AAEA,YAAQ,IAAI,qCAA8B,OAAO,KAAK,aAAa,CAAC,CAAC,CAAC;AAGtE,QAAI,YAAY;AAGhB,QAAI,UAAU,UAAU,YAAY,GAAG,MAAM;AAC3C,kBAAY,UAAU,QAAQ,YAAY,EAAE;AAC5C,cAAQ,IAAI,4DAAuD;AAAA,IACrE,WAES,UAAU,YAAY,GAAG,MAAM;AACtC,kBAAY,UAAU,YAAY,EAAE;AACpC,cAAQ,IAAI,6DAAwD;AAAA,IACtE;AAEA,QAAI,CAAC,WAAW;AACd,cAAQ,MAAM,6CAAwC,YAAY;AAClE,cAAQ,MAAM,qCAAqC,OAAO,KAAK,SAAS,CAAC;AACzE,aAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,IACrE;AAEA,iBAAa,+BAA+B,SAAS;AACrD,YAAQ,IAAI,uBAAgB,WAAW,IAAI,0BAA0B;AAAA,EACvE,SAAS,OAAO;AACd,YAAQ,MAAM,wCAAmC,KAAK;AACtD,WAAO,EAAE,SAAS,EAAE,WAAW,OAAO,KAAK,EAAE,GAAG,MAAM,CAAC,GAAG,OAAO,EAAE;AAAA,EACrE;AAGA,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM,OAAO,iBAAiB,MAAM,aAAa;AAAA,IACjD,MAAM,OAAO;AAAA,EACf;AAGA,QAAM,qBAAqB;AAAA,IACzB;AAAA,IACA,MAAM,OAAO;AAAA,EACf;AAEA,UAAQ,IAAI,+BAAwB,MAAM,KAAK,mBAAmB,OAAO,CAAC,EAAE,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,QAAQ,CAAC,CAAC,QAAQ;AAGzH,QAAM,kBAAmC,CAAC;AAE1C,UAAQ,IAAI,yCAAkC;AAAA,IAC5C,UAAU,OAAO,KAAM,MAAc,QAAQ,CAAC,CAAC;AAAA,IAC/C,SAAS,CAAC,CAAG,MAAc,MAAM;AAAA,IACjC,UAAW,MAAc,MAAM,OAAO,OAAQ,MAAc,KAAK,OAAO;AAAA,EAC1E,CAAC;AAGD,MAAI,OAA4B,CAAC;AAGjC,MAAK,MAAc,MAAM,MAAM,MAAM;AACnC,WAAQ,MAAc,KAAK,KAAK;AAChC,YAAQ,IAAI,2CAAsC;AAAA,EACpD,WAEU,MAAc,MAAM,QAAQ,OAAQ,MAAc,KAAK,SAAS,UAAU;AAElF,UAAM,WAAW,OAAO,KAAM,MAAc,KAAK,IAAI,EAAE,CAAC;AACxD,QAAI,YAAY,aAAa,iCAAiC,aAAa,SAAS;AAClF,aAAQ,MAAc,KAAK;AAC3B,cAAQ,IAAI,+CAA0C;AAAA,IACxD;AAAA,EACF;AAEA,MAAI,OAAO,KAAK,IAAI,EAAE,WAAW,GAAG;AAClC,YAAQ,IAAI,0DAAqD;AAAA,MAC/D,aAAa,CAAC,CAAG,MAAc,MAAM;AAAA,MACrC,cAAe,MAAc,MAAM,OAAO,OAAO,KAAM,MAAc,KAAK,IAAI,IAAI;AAAA,MAClF,iBAAiB,CAAC,CAAG,MAAc,MAAM,MAAM;AAAA,MAC/C,mBAAoB,MAAc,MAAM,MAAM,OAAO,OAAO,KAAM,MAAc,KAAK,KAAK,IAAI,EAAE,SAAS;AAAA,IAC3G,CAAC;AAAA,EACH;AAEA,UAAQ,IAAI,+BAAwB,OAAO,KAAK,IAAI,EAAE,MAAM,YAAY;AAExE,aAAW,CAAC,OAAO,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG;AAC/C,UAAM,OAAO,IAAI,YAAY;AAE7B,QAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC;AAAA,IACF;AAGA,UAAM,YAAY,SAAS,IAAI;AAI/B,UAAM,UAAU;AAAA,MACd;AAAA,MACA;AAAA,MACA;AAAA,QACE,SAAS,MAAM,OAAO;AAAA,QACtB,QAAQ,MAAM,OAAO;AAAA,QACrB,yBAAyB,MAAM,OAAO;AAAA,MACxC;AAAA,MACA,MAAM;AAAA,MACN,MAAM;AAAA,IACR;AAEA,QAAI,QAAQ,SAAS,GAAG;AAEtB,YAAM,WAAW,KAAK,IAAI,GAAG,QAAQ,IAAI,OAAK,EAAE,KAAK,CAAC;AAEtD,sBAAgB,KAAK;AAAA,QACnB,IAAI;AAAA,QACJ;AAAA,QACA,OAAO;AAAA,QACP,UAAU;AAAA,MACZ,CAAC;AAAA,IACH;AAAA,EACF;AAGA,kBAAgB,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAGhD,QAAM,QAAQ,OAAO,SAAS,gBAAgB;AAC9C,QAAM,iBAAiB,gBAAgB,MAAM,GAAG,KAAK;AAGrD,QAAM,OAAO,eAAe,IAAI,YAAU;AAAA,IACxC,IAAI,MAAM;AAAA,IACV,OAAO,MAAM;AAAA,IACb,UAAU,MAAM;AAAA;AAAA,IAEhB,UAAU,MAAM;AAAA,EAClB,EAAE;AAEF,QAAM,UAAU,YAAY,IAAI,IAAI;AAEpC,UAAQ,IAAI,gBAAW,KAAK,MAAM,eAAe,QAAQ,QAAQ,CAAC,CAAC,cAAc,KAAK,GAAG;AAEzF,SAAO;AAAA,IACL,SAAS;AAAA,MACP,WAAW,GAAG,QAAQ,QAAQ,CAAC,CAAC;AAAA,MAChC,KAAK,KAAK,MAAM,UAAU,GAAO;AAAA;AAAA,IACnC;AAAA,IACA;AAAA,IACA,OAAO,KAAK;AAAA,EACd;AACF;AAKA,eAAe,yBACb,gBACqB;AACrB,MAAI;AACF,YAAQ,IAAI,0DAAmD;AAG/D,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,uBAAuB;AAE7D,UAAM,WAAW,aAAa,eAAe,KAAK,eAAe,UAAU;AAG3E,UAAM,EAAE,MAAM,MAAM,IAAI,MAAM,SAAS,IAAI,iBAAiB;AAE5D,YAAQ,IAAI,2CAAoC;AAAA,MAC9C,UAAU,CAAC,CAAC;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB,SAAS,CAAC,CAAC;AAAA,MACX,UAAU,OAAO;AAAA,MACjB,UAAU,OAAO,OAAO,KAAK,IAAI,EAAE,SAAS;AAAA,IAC9C,CAAC;AAED,QAAI,OAAO;AACT,YAAM,IAAI,MAAM,mBAAmB,MAAM,OAAO,EAAE;AAAA,IACpD;AAEA,UAAM,aAAa,QAAQ,CAAC;AAC5B,YAAQ,IAAI,oBAAa,OAAO,KAAK,UAAU,EAAE,MAAM,gCAAgC;AAEvF,WAAO;AAAA,EACT,SAAS,OAAO;AACd,YAAQ,MAAM,iDAA4C,KAAK;AAC/D,UAAM;AAAA,EACR;AACF;AAKA,SAAS,6BACP,MACA,cACqB;AACrB,QAAM,KAAK,oBAAI,IAAoB;AAEnC,aAAW,OAAO,OAAO,OAAO,IAAI,GAAG;AACrC,UAAM,OAAO,IAAI,YAAY;AAE7B,QAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC;AAAA,IACF;AAGA,UAAM,QAAQ,IAAI,IAAI,SAAS,IAAI,CAAC;AAGpC,eAAW,QAAQ,OAAO;AACxB,SAAG,IAAI,OAAO,GAAG,IAAI,IAAI,KAAK,KAAK,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO;AACT;AAQA,SAAS,cAAc,MAAsB;AAC3C,SAAO,KACJ,YAAY,EACZ,UAAU,KAAK,EACf,QAAQ,oBAAoB,EAAE,EAE9B,QAAQ,gFAAgF,GAAG,EAC3F,QAAQ,6DAA6D,EAAE,EACvE,QAAQ,mBAAmB,GAAG,EAC9B,QAAQ,4BAA4B,GAAG,EACvC,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACV;AAQA,SAAS,SAAS,MAAwB;AAExC,SAAO,cAAc,IAAI,EACtB,MAAM,KAAK,EACX,OAAO,WAAS,MAAM,SAAS,CAAC;AACrC","sourcesContent":["/**\n * Fuzzy matching utilities using bounded Levenshtein distance\n * \n * This is the same algorithm used by Orama's match-highlight plugin\n * for consistent fuzzy matching behavior.\n */\n\n/**\n * Result of bounded Levenshtein distance calculation\n */\nexport interface BoundedLevenshteinResult {\n /** Whether the distance is within bounds */\n isBounded: boolean;\n /** The actual distance (only valid if isBounded is true) */\n distance: number;\n}\n\n/**\n * Calculate bounded Levenshtein distance between two strings\n * \n * Stops early if distance exceeds the bound for better performance.\n * This is the same algorithm as Orama's internal boundedLevenshtein.\n * \n * @param a - First string\n * @param b - Second string\n * @param bound - Maximum allowed distance\n * @returns Result indicating if strings are within bound and the distance\n */\nexport function boundedLevenshtein(\n a: string,\n b: string,\n bound: number\n): BoundedLevenshteinResult {\n // Quick checks\n if (a === b) {\n return { isBounded: true, distance: 0 };\n }\n\n const aLen = a.length;\n const bLen = b.length;\n\n // If length difference exceeds bound, no need to calculate\n if (Math.abs(aLen - bLen) > bound) {\n return { isBounded: false, distance: bound + 1 };\n }\n\n // Swap to ensure a is shorter (optimization)\n if (aLen > bLen) {\n [a, b] = [b, a];\n }\n\n const m = a.length;\n const n = b.length;\n\n // Use single array instead of matrix (memory optimization)\n let prevRow = new Array(n + 1);\n let currRow = new Array(n + 1);\n\n // Initialize first row\n for (let j = 0; j <= n; j++) {\n prevRow[j] = j;\n }\n\n for (let i = 1; i <= m; i++) {\n currRow[0] = i;\n let minInRow = i;\n\n for (let j = 1; j <= n; j++) {\n const cost = a[i - 1] === b[j - 1] ? 0 : 1;\n\n currRow[j] = Math.min(\n prevRow[j] + 1, // deletion\n currRow[j - 1] + 1, // insertion\n prevRow[j - 1] + cost // substitution\n );\n\n minInRow = Math.min(minInRow, currRow[j]);\n }\n\n // Early termination: if all values in row exceed bound, we're done\n if (minInRow > bound) {\n return { isBounded: false, distance: bound + 1 };\n }\n\n // Swap rows for next iteration\n [prevRow, currRow] = [currRow, prevRow];\n }\n\n const distance = prevRow[n];\n return {\n isBounded: distance <= bound,\n distance\n };\n}\n\n/**\n * Check if a word matches a query token with fuzzy matching\n * \n * @param word - Word from document\n * @param queryToken - Token from search query\n * @param tolerance - Maximum edit distance allowed\n * @returns Match result with score\n */\nexport function fuzzyMatch(\n word: string,\n queryToken: string,\n tolerance: number\n): { matches: boolean; distance: number; score: number } {\n // Exact match\n if (word === queryToken) {\n return { matches: true, distance: 0, score: 1.0 };\n }\n\n // Prefix match (high score, no distance)\n if (word.startsWith(queryToken)) {\n return { matches: true, distance: 0, score: 0.95 };\n }\n\n // Fuzzy match with tolerance\n const result = boundedLevenshtein(word, queryToken, tolerance);\n \n if (result.isBounded) {\n // Score decreases with distance\n // distance 1 = 0.8, distance 2 = 0.6, etc.\n const score = 1.0 - (result.distance * 0.2);\n return {\n matches: true,\n distance: result.distance,\n score: Math.max(0.1, score) // Minimum score of 0.1\n };\n }\n\n return { matches: false, distance: tolerance + 1, score: 0 };\n}\n\n/**\n * Calculate adaptive tolerance based on query length\n * \n * Longer queries get higher tolerance for better fuzzy matching.\n * \n * @param queryTokens - Array of query tokens\n * @param baseTolerance - Base tolerance value\n * @returns Calculated tolerance (always an integer)\n */\nexport function calculateAdaptiveTolerance(\n queryTokens: string[],\n baseTolerance: number\n): number {\n const queryLength = queryTokens.length;\n \n if (queryLength <= 2) {\n return baseTolerance;\n } else if (queryLength <= 4) {\n return baseTolerance + 1;\n } else if (queryLength <= 6) {\n return baseTolerance + 2;\n } else {\n return baseTolerance + 3;\n }\n}\n","/**\n * Candidate expansion: Find all possible matches for query tokens\n * including exact matches, fuzzy matches, and synonyms\n */\n\nimport { fuzzyMatch } from './fuzzy.js';\nimport type { Candidate, SynonymMap } from './types.js';\n\n/**\n * Extract all unique words from the radix tree index\n * \n * @param radixNode - Root node of the radix tree\n * @returns Set of all unique words in the index\n */\nexport function extractVocabularyFromRadixTree(radixNode: any): Set<string> {\n const vocabulary = new Set<string>();\n let nodesVisited = 0;\n let wordsFound = 0;\n \n function traverse(node: any, depth: number = 0) {\n if (!node) {\n return;\n }\n \n nodesVisited++;\n \n // Check if this node represents a complete word\n // e = true means it's an end of a word\n if (node.e && node.w && typeof node.w === 'string' && node.w.length > 0) {\n vocabulary.add(node.w);\n wordsFound++;\n }\n \n // Children can be Map, Array, or Object\n if (node.c) {\n if (node.c instanceof Map) {\n // Map format\n for (const [_key, childNode] of node.c) {\n traverse(childNode, depth + 1);\n }\n } else if (Array.isArray(node.c)) {\n // Array format: [[key, childNode], ...]\n for (const [_key, childNode] of node.c) {\n traverse(childNode, depth + 1);\n }\n } else if (typeof node.c === 'object') {\n // Object format: {key: childNode, ...}\n for (const childNode of Object.values(node.c)) {\n traverse(childNode, depth + 1);\n }\n }\n }\n }\n \n traverse(radixNode);\n console.log(`📚 Extracted ${vocabulary.size} words from ${nodesVisited} nodes visited`);\n return vocabulary;\n}\n\n/**\n * Find all candidate matches for a single query token\n * \n * @param queryToken - Token from search query\n * @param vocabulary - Set of all words in the index\n * @param tolerance - Fuzzy matching tolerance\n * @param synonyms - Synonym map (optional)\n * @param synonymScore - Score multiplier for synonym matches\n * @returns Array of candidate matches\n */\nexport function findCandidatesForToken(\n queryToken: string,\n vocabulary: Set<string>,\n tolerance: number,\n synonyms?: SynonymMap,\n synonymScore: number = 0.8\n): Candidate[] {\n const candidates: Candidate[] = [];\n const seen = new Set<string>();\n\n // 1. Check for exact match\n if (vocabulary.has(queryToken)) {\n candidates.push({\n word: queryToken,\n type: 'exact',\n queryToken,\n distance: 0,\n score: 1.0\n });\n seen.add(queryToken);\n }\n\n // 2. Check for fuzzy matches\n for (const word of vocabulary) {\n if (seen.has(word)) continue;\n\n const match = fuzzyMatch(word, queryToken, tolerance);\n if (match.matches) {\n candidates.push({\n word,\n type: 'fuzzy',\n queryToken,\n distance: match.distance,\n score: match.score\n });\n seen.add(word);\n }\n }\n\n // 3. Check for synonym matches\n if (synonyms && synonyms[queryToken]) {\n for (const synonym of synonyms[queryToken]) {\n if (seen.has(synonym)) continue;\n if (vocabulary.has(synonym)) {\n candidates.push({\n word: synonym,\n type: 'synonym',\n queryToken,\n distance: 0,\n score: synonymScore\n });\n seen.add(synonym);\n }\n }\n }\n\n return candidates;\n}\n\n/**\n * Find candidates for all query tokens\n * \n * @param queryTokens - Array of tokens from search query\n * @param vocabulary - Set of all words in the index\n * @param tolerance - Fuzzy matching tolerance\n * @param synonyms - Synonym map (optional)\n * @param synonymScore - Score multiplier for synonym matches\n * @returns Map of query tokens to their candidate matches\n */\nexport function findAllCandidates(\n queryTokens: string[],\n vocabulary: Set<string>,\n tolerance: number,\n synonyms?: SynonymMap,\n synonymScore: number = 0.8\n): Map<string, Candidate[]> {\n const candidatesMap = new Map<string, Candidate[]>();\n\n for (const token of queryTokens) {\n const tokenCandidates = findCandidatesForToken(\n token,\n vocabulary,\n tolerance,\n synonyms,\n synonymScore\n );\n candidatesMap.set(token, tokenCandidates);\n }\n\n return candidatesMap;\n}\n\n/**\n * Get total number of candidates across all tokens\n * \n * @param candidatesMap - Map of token to candidates\n * @returns Total count of all candidates\n */\nexport function getTotalCandidateCount(\n candidatesMap: Map<string, Candidate[]>\n): number {\n let total = 0;\n for (const candidates of candidatesMap.values()) {\n total += candidates.length;\n }\n return total;\n}\n\n/**\n * Filter candidates by minimum score threshold\n * \n * @param candidatesMap - Map of token to candidates\n * @param minScore - Minimum score threshold\n * @returns Filtered candidates map\n */\nexport function filterCandidatesByScore(\n candidatesMap: Map<string, Candidate[]>,\n minScore: number\n): Map<string, Candidate[]> {\n const filtered = new Map<string, Candidate[]>();\n\n for (const [token, candidates] of candidatesMap.entries()) {\n const filteredCandidates = candidates.filter(c => c.score >= minScore);\n if (filteredCandidates.length > 0) {\n filtered.set(token, filteredCandidates);\n }\n }\n\n return filtered;\n}\n","/**\n * Phrase scoring algorithm with semantic weighting\n */\n\nimport type { WordMatch, PhraseMatch, Candidate, GapWord } from './types.js';\n\n/**\n * Configuration for phrase scoring\n */\nexport interface ScoringConfig {\n weights: {\n exact: number;\n fuzzy: number;\n order: number;\n proximity: number;\n density: number;\n semantic: number;\n };\n maxGap: number;\n /** \n * Multiplier for proximity window calculation.\n * proximityWindow = queryTokens.length × proximitySpanMultiplier\n */\n proximitySpanMultiplier: number;\n}\n\n/**\n * Find all phrase matches in a document\n * \n * @param documentTokens - Tokenized document content (needed to extract gap words)\n * @param candidatesMap - Map of query tokens to their candidates\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map for TF-IDF\n * @param totalDocuments - Total number of documents\n * @returns Array of phrase matches\n */\nexport function findPhrasesInDocument(\n documentTokens: string[],\n candidatesMap: Map<string, Candidate[]>,\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number\n): PhraseMatch[] {\n const phrases: PhraseMatch[] = [];\n const queryTokens = Array.from(candidatesMap.keys());\n\n // Find all word matches in document\n const wordMatches: WordMatch[] = [];\n \n for (let i = 0; i < documentTokens.length; i++) {\n const docWord = documentTokens[i];\n \n // Check if this word matches any query token\n for (const [queryToken, candidates] of candidatesMap.entries()) {\n for (const candidate of candidates) {\n if (candidate.word === docWord) {\n wordMatches.push({\n word: docWord,\n queryToken,\n position: i,\n type: candidate.type,\n distance: candidate.distance,\n score: candidate.score\n });\n }\n }\n }\n }\n\n // Build phrases from word matches using sliding window\n for (let i = 0; i < wordMatches.length; i++) {\n const phrase = buildPhraseFromPosition(\n wordMatches,\n i,\n queryTokens,\n config,\n documentFrequency,\n totalDocuments,\n wordMatches,\n documentTokens // Pass document tokens to extract gap words\n );\n \n if (phrase && phrase.words.length > 0) {\n phrases.push(phrase);\n }\n }\n\n // Deduplicate and sort by score\n return deduplicatePhrases(phrases);\n}\n\n/**\n * Build a phrase starting from a specific word match position\n * \n * @param wordMatches - All word matches in document\n * @param startIndex - Starting index in wordMatches array\n * @param queryTokens - Original query tokens\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @param allWordMatches - All word matches in document (for density calculation)\n * @param documentTokens - Original document tokens (for gap word extraction)\n * @returns Phrase match or null\n */\nfunction buildPhraseFromPosition(\n wordMatches: WordMatch[],\n startIndex: number,\n queryTokens: string[],\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number,\n allWordMatches: WordMatch[],\n documentTokens: string[]\n): PhraseMatch | null {\n const startMatch = wordMatches[startIndex];\n const phraseWords: WordMatch[] = [startMatch];\n const coveredTokens = new Set([startMatch.queryToken]);\n const gapWords: GapWord[] = [];\n let totalGapUsed = 0;\n\n // Look for nearby matches to complete the phrase\n for (let i = startIndex + 1; i < wordMatches.length; i++) {\n const match = wordMatches[i];\n const lastPos = phraseWords[phraseWords.length - 1].position;\n const gap = match.position - lastPos - 1;\n\n // Stop if gap exceeds maximum\n if (gap > config.maxGap) {\n break;\n }\n\n // Track gap words between last match and current match\n for (let pos = lastPos + 1; pos < match.position; pos++) {\n totalGapUsed++;\n gapWords.push({\n word: documentTokens[pos],\n position: pos,\n gapIndex: totalGapUsed\n });\n }\n\n // Add if it's a different query token\n if (!coveredTokens.has(match.queryToken)) {\n phraseWords.push(match);\n coveredTokens.add(match.queryToken);\n }\n\n // Stop if we have all query tokens\n if (coveredTokens.size === queryTokens.length) {\n break;\n }\n }\n\n // Calculate phrase score\n if (phraseWords.length > 0) {\n const coverage = phraseWords.length / queryTokens.length;\n const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;\n \n const { score, breakdown } = calculatePhraseScore(\n phraseWords,\n queryTokens,\n config,\n documentFrequency,\n totalDocuments,\n allWordMatches,\n coverage\n );\n\n return {\n words: phraseWords,\n gapWords,\n gapUsed: totalGapUsed,\n coverage,\n startPosition: phraseWords[0].position,\n endPosition: phraseWords[phraseWords.length - 1].position,\n span,\n inOrder: isInOrder(phraseWords, queryTokens),\n score,\n scoreBreakdown: breakdown\n };\n }\n\n return null;\n}\n\n/**\n * Calculate overall phrase score\n * \n * @param phraseWords - Words in the phrase\n * @param queryTokens - Original query tokens\n * @param config - Scoring configuration\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @param allWordMatches - All word matches in document (for density calculation)\n * @param coverage - Pre-calculated coverage ratio (phraseWords.length / queryTokens.length)\n * @returns Phrase score (0-1) and detailed component breakdown\n */\nfunction calculatePhraseScore(\n phraseWords: WordMatch[],\n queryTokens: string[],\n config: ScoringConfig,\n documentFrequency: Map<string, number>,\n totalDocuments: number,\n allWordMatches: WordMatch[],\n coverage: number\n): { score: number; breakdown: { base: number; order: number; proximity: number; density: number; semantic: number; coverage: number } } {\n // Base score from word matches\n // Each word contributes: matchScore × typeWeight\n let baseScore = 0;\n for (const word of phraseWords) {\n const weight = word.type === 'exact' ? config.weights.exact :\n word.type === 'fuzzy' ? config.weights.fuzzy : \n config.weights.fuzzy * 0.8; // synonym gets 80% of fuzzy weight\n baseScore += word.score * weight;\n }\n baseScore /= phraseWords.length;\n\n // Order bonus: 1.0 if words appear in query order, 0.5 otherwise\n const inOrder = isInOrder(phraseWords, queryTokens);\n const orderScore = inOrder ? 1.0 : 0.5;\n\n // Proximity bonus (closer words score higher)\n // Uses proximitySpanMultiplier from config instead of hardcoded 5\n const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;\n const proximityWindow = queryTokens.length * config.proximitySpanMultiplier;\n const proximityScore = Math.max(0, 1.0 - (span / proximityWindow));\n\n // Density: Only applies to single-word queries (measures word repetition in document)\n // For multi-word phrase queries, density is 0 (coverage handles completeness separately)\n let densityScore = 0;\n \n if (queryTokens.length === 1) {\n // Single-word query: reward repetition\n const totalOccurrences = allWordMatches.length;\n // Cap at reasonable maximum to avoid runaway scores\n densityScore = Math.min(1.0, totalOccurrences / 10);\n }\n // For multi-word queries: densityScore stays 0\n // Coverage is applied as a multiplier at the end instead\n\n // Semantic score (TF-IDF based)\n const semanticScore = calculateSemanticScore(\n phraseWords,\n documentFrequency,\n totalDocuments\n );\n\n // Weighted combination\n const weights = config.weights;\n \n // Calculate weighted components\n const weightedBase = baseScore;\n const weightedOrder = orderScore * weights.order;\n const weightedProximity = proximityScore * weights.proximity;\n const weightedDensity = densityScore * weights.density;\n const weightedSemantic = semanticScore * weights.semantic;\n \n const totalScore = weightedBase + weightedOrder + weightedProximity + weightedDensity + weightedSemantic;\n\n // Calculate max possible score\n // FIX: Use actual max base weight (highest of exact/fuzzy) instead of hardcoded 1.0\n const maxBaseWeight = Math.max(weights.exact, weights.fuzzy);\n const maxPossibleScore = maxBaseWeight + weights.order + weights.proximity + weights.density + weights.semantic;\n \n // Normalize to 0-1 range\n const normalizedScore = totalScore / maxPossibleScore;\n \n // FIX: Apply coverage as a MULTIPLIER for multi-word queries\n // This ensures incomplete matches (2/3) can never outscore complete matches (3/3)\n const coverageMultiplier = queryTokens.length > 1 ? coverage : 1.0;\n const score = normalizedScore * coverageMultiplier;\n\n // Component contributions to the final normalized score (before coverage multiplier)\n const base = weightedBase / maxPossibleScore;\n const order = weightedOrder / maxPossibleScore;\n const proximity = weightedProximity / maxPossibleScore;\n const density = weightedDensity / maxPossibleScore;\n const semantic = weightedSemantic / maxPossibleScore;\n\n return {\n score,\n breakdown: {\n base,\n order,\n proximity,\n density,\n semantic,\n coverage: coverageMultiplier // Show coverage multiplier in breakdown\n }\n };\n}\n\n/**\n * Check if words are in the same order as query tokens\n * \n * @param phraseWords - Words in the phrase\n * @param queryTokens - Original query tokens\n * @returns True if in order\n */\nfunction isInOrder(phraseWords: WordMatch[], queryTokens: string[]): boolean {\n const tokenOrder = new Map(queryTokens.map((token, index) => [token, index]));\n \n for (let i = 1; i < phraseWords.length; i++) {\n const prevOrder = tokenOrder.get(phraseWords[i - 1].queryToken) ?? -1;\n const currOrder = tokenOrder.get(phraseWords[i].queryToken) ?? -1;\n \n if (currOrder < prevOrder) {\n return false;\n }\n }\n \n return true;\n}\n\n/**\n * Calculate semantic score using TF-IDF\n * \n * @param phraseWords - Words in the phrase\n * @param documentFrequency - Document frequency map\n * @param totalDocuments - Total document count\n * @returns Semantic score (0-1)\n */\nfunction calculateSemanticScore(\n phraseWords: WordMatch[],\n documentFrequency: Map<string, number>,\n totalDocuments: number\n): number {\n // Handle edge case: no documents\n if (totalDocuments === 0) {\n return 0;\n }\n \n let tfidfSum = 0;\n \n for (const word of phraseWords) {\n const df = documentFrequency.get(word.word) || 1;\n const idf = Math.log(totalDocuments / df);\n tfidfSum += idf;\n }\n \n // Normalize by phrase length\n const avgTfidf = tfidfSum / phraseWords.length;\n \n // Normalize to 0-1 range (assuming max IDF of ~10)\n return Math.min(1.0, avgTfidf / 10);\n}\n\n/**\n * Deduplicate overlapping phrases, keeping highest scoring ones\n * \n * @param phrases - Array of phrase matches\n * @returns Deduplicated phrases sorted by score\n */\nfunction deduplicatePhrases(phrases: PhraseMatch[]): PhraseMatch[] {\n if (phrases.length === 0) return [];\n\n // Sort by score descending\n const sorted = phrases.slice().sort((a, b) => b.score - a.score);\n const result: PhraseMatch[] = [];\n const covered = new Set<number>();\n\n for (const phrase of sorted) {\n // Check if this phrase overlaps with already selected phrases\n let overlaps = false;\n for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {\n if (covered.has(pos)) {\n overlaps = true;\n break;\n }\n }\n\n if (!overlaps) {\n result.push(phrase);\n // Mark positions as covered\n for (let pos = phrase.startPosition; pos <= phrase.endPosition; pos++) {\n covered.add(pos);\n }\n }\n }\n\n return result.sort((a, b) => b.score - a.score);\n}\n","/**\n * Fuzzy Phrase Plugin for Orama\n * \n * Advanced fuzzy phrase matching with semantic weighting and synonym expansion.\n * Completely independent from QPS - accesses Orama's radix tree directly.\n */\n\nimport type { AnyOrama, OramaPlugin, Results, TypedDocument } from '@wcs-colab/orama';\nimport type { FuzzyPhraseConfig, PluginState, SynonymMap, DocumentMatch } from './types.js';\nimport { calculateAdaptiveTolerance } from './fuzzy.js';\nimport { \n extractVocabularyFromRadixTree, \n findAllCandidates,\n filterCandidatesByScore \n} from './candidates.js';\nimport { findPhrasesInDocument } from './scoring.js';\n\n/**\n * Default configuration\n */\nconst DEFAULT_CONFIG: Required<FuzzyPhraseConfig> = {\n textProperty: 'content',\n tolerance: 1,\n adaptiveTolerance: true,\n enableSynonyms: false,\n supabase: undefined as any,\n synonymMatchScore: 0.8,\n weights: {\n exact: 1.0,\n fuzzy: 0.8,\n order: 0.3,\n proximity: 0.2,\n density: 0.2,\n semantic: 0.15\n },\n maxGap: 5,\n minScore: 0.1,\n proximitySpanMultiplier: 5\n};\n\n/**\n * Plugin state storage (keyed by Orama instance)\n */\nconst pluginStates = new WeakMap<AnyOrama, PluginState>();\n\n/**\n * Create the Fuzzy Phrase Plugin\n * \n * @param userConfig - User configuration options\n * @returns Orama plugin instance\n */\nexport function pluginFuzzyPhrase(userConfig: FuzzyPhraseConfig = {}): OramaPlugin {\n // Merge user config with defaults\n const config: Required<FuzzyPhraseConfig> = {\n textProperty: userConfig.textProperty ?? DEFAULT_CONFIG.textProperty,\n tolerance: userConfig.tolerance ?? DEFAULT_CONFIG.tolerance,\n adaptiveTolerance: userConfig.adaptiveTolerance ?? DEFAULT_CONFIG.adaptiveTolerance,\n enableSynonyms: userConfig.enableSynonyms ?? DEFAULT_CONFIG.enableSynonyms,\n supabase: userConfig.supabase || DEFAULT_CONFIG.supabase,\n synonymMatchScore: userConfig.synonymMatchScore ?? DEFAULT_CONFIG.synonymMatchScore,\n weights: {\n exact: userConfig.weights?.exact ?? DEFAULT_CONFIG.weights.exact,\n fuzzy: userConfig.weights?.fuzzy ?? DEFAULT_CONFIG.weights.fuzzy,\n order: userConfig.weights?.order ?? DEFAULT_CONFIG.weights.order,\n proximity: userConfig.weights?.proximity ?? DEFAULT_CONFIG.weights.proximity,\n density: userConfig.weights?.density ?? DEFAULT_CONFIG.weights.density,\n semantic: userConfig.weights?.semantic ?? DEFAULT_CONFIG.weights.semantic\n },\n maxGap: userConfig.maxGap ?? DEFAULT_CONFIG.maxGap,\n minScore: userConfig.minScore ?? DEFAULT_CONFIG.minScore,\n proximitySpanMultiplier: userConfig.proximitySpanMultiplier ?? DEFAULT_CONFIG.proximitySpanMultiplier\n };\n\n const plugin: OramaPlugin = {\n name: 'fuzzy-phrase',\n\n /**\n * Initialize plugin after index is created\n */\n afterCreate: async (orama: AnyOrama) => {\n console.log('🔮 Initializing Fuzzy Phrase Plugin...');\n\n // Initialize state\n const state: PluginState = {\n synonymMap: {},\n config,\n documentFrequency: new Map(),\n totalDocuments: 0\n };\n\n // Load synonyms from Supabase if enabled\n if (config.enableSynonyms && config.supabase) {\n try {\n console.log('📖 Loading synonyms from Supabase...');\n state.synonymMap = await loadSynonymsFromSupabase(config.supabase);\n console.log(`✅ Loaded ${Object.keys(state.synonymMap).length} words with synonyms`);\n } catch (error) {\n console.error('⚠️ Failed to load synonyms:', error);\n // Continue without synonyms\n }\n }\n\n // Calculate document frequencies for TF-IDF from document store\n const docs = (orama.data as any)?.docs?.docs;\n if (docs) {\n state.totalDocuments = Object.keys(docs).length;\n state.documentFrequency = calculateDocumentFrequencies(docs, config.textProperty);\n console.log(`📊 Calculated document frequencies for ${state.totalDocuments} documents`);\n }\n\n // Store state\n pluginStates.set(orama, state);\n console.log('✅ Fuzzy Phrase Plugin initialized');\n \n // Signal ready - emit a custom event that can be listened to\n // Use setImmediate to ensure this runs after the afterCreate hook completes\n setImmediate(() => {\n if (typeof (globalThis as any).fuzzyPhrasePluginReady === 'function') {\n console.log('📡 Signaling plugin ready...');\n (globalThis as any).fuzzyPhrasePluginReady();\n } else {\n console.warn('⚠️ fuzzyPhrasePluginReady callback not found');\n }\n });\n }\n };\n\n return plugin;\n}\n\n/**\n * Search with fuzzy phrase matching\n * \n * This function should be called instead of the regular search() function\n * to enable fuzzy phrase matching.\n */\nexport async function searchWithFuzzyPhrase<T extends AnyOrama>(\n orama: T, \n params: { term?: string; properties?: string[]; limit?: number },\n language?: string\n): Promise<Results<TypedDocument<T>>> {\n const startTime = performance.now();\n \n // Get plugin state\n const state = pluginStates.get(orama);\n \n if (!state) {\n console.error('❌ Plugin state not initialized');\n throw new Error('Fuzzy Phrase Plugin not properly initialized');\n }\n\n const { term, properties } = params;\n \n if (!term || typeof term !== 'string') {\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Use specified property or default\n const textProperty = (properties && properties[0]) || state.config.textProperty;\n\n // Tokenize query\n const queryTokens = tokenize(term);\n \n if (queryTokens.length === 0) {\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Calculate tolerance (adaptive or fixed)\n const tolerance = state.config.adaptiveTolerance\n ? calculateAdaptiveTolerance(queryTokens, state.config.tolerance)\n : state.config.tolerance;\n\n console.log(`🔍 Fuzzy phrase search: \"${term}\" (${queryTokens.length} tokens, tolerance: ${tolerance})`);\n\n // Extract vocabulary from radix tree\n let vocabulary: Set<string>;\n \n try {\n // Access radix tree - the actual index data is in orama.data.index, not orama.index\n // orama.index is just the component interface (methods)\n const indexData = (orama as any).data?.index;\n \n if (!indexData) {\n console.error('❌ No index data found in orama.data.index');\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n \n console.log('🔍 DEBUG: Index data keys:', Object.keys(indexData || {}));\n \n // Try different paths to find the radix tree\n let radixNode = null;\n \n // Path 1: QPS-style (orama.data.index.indexes[property].node)\n if (indexData.indexes?.[textProperty]?.node) {\n radixNode = indexData.indexes[textProperty].node;\n console.log('✅ Found radix via QPS-style path (data.index.indexes)');\n }\n // Path 2: Standard Orama (orama.data.index[property].node)\n else if (indexData[textProperty]?.node) {\n radixNode = indexData[textProperty].node;\n console.log('✅ Found radix via standard path (data.index[property])');\n }\n \n if (!radixNode) {\n console.error('❌ Radix tree not found for property:', textProperty);\n console.error(' Available properties in index:', Object.keys(indexData));\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n vocabulary = extractVocabularyFromRadixTree(radixNode);\n console.log(`📚 Extracted ${vocabulary.size} unique words from index`);\n } catch (error) {\n console.error('❌ Failed to extract vocabulary:', error);\n return { elapsed: { formatted: '0ms', raw: 0 }, hits: [], count: 0 };\n }\n\n // Find candidates for all query tokens\n const candidatesMap = findAllCandidates(\n queryTokens,\n vocabulary,\n tolerance,\n state.config.enableSynonyms ? state.synonymMap : undefined,\n state.config.synonymMatchScore\n );\n\n // Filter by minimum score\n const filteredCandidates = filterCandidatesByScore(\n candidatesMap,\n state.config.minScore\n );\n\n console.log(`🎯 Found candidates: ${Array.from(filteredCandidates.values()).reduce((sum, c) => sum + c.length, 0)} total`);\n\n // Search through all documents\n const documentMatches: DocumentMatch[] = [];\n \n console.log('🔍 DEBUG orama.data structure:', {\n dataKeys: Object.keys((orama as any).data || {}),\n hasDocs: !!((orama as any).data?.docs),\n docsType: (orama as any).data?.docs ? typeof (orama as any).data.docs : 'undefined'\n });\n \n // Try multiple possible document storage locations\n let docs: Record<string, any> = {};\n \n // Access the actual documents - they're nested in orama.data.docs.docs\n if ((orama as any).data?.docs?.docs) {\n docs = (orama as any).data.docs.docs;\n console.log('✅ Found docs at orama.data.docs.docs');\n }\n // Fallback: orama.data.docs (might be the correct structure in some cases)\n else if ((orama as any).data?.docs && typeof (orama as any).data.docs === 'object') {\n // Check if it has document-like properties (not sharedInternalDocumentStore, etc.)\n const firstKey = Object.keys((orama as any).data.docs)[0];\n if (firstKey && firstKey !== 'sharedInternalDocumentStore' && firstKey !== 'count') {\n docs = (orama as any).data.docs;\n console.log('✅ Found docs at orama.data.docs (direct)');\n }\n }\n \n if (Object.keys(docs).length === 0) {\n console.log('❌ Could not find documents - available structure:', {\n hasDataDocs: !!((orama as any).data?.docs),\n dataDocsKeys: (orama as any).data?.docs ? Object.keys((orama as any).data.docs) : 'none',\n hasDataDocsDocs: !!((orama as any).data?.docs?.docs),\n dataDocsDocsCount: (orama as any).data?.docs?.docs ? Object.keys((orama as any).data.docs.docs).length : 0\n });\n }\n \n console.log(`📄 Searching through ${Object.keys(docs).length} documents`);\n\n for (const [docId, doc] of Object.entries(docs)) {\n const text = doc[textProperty];\n \n if (!text || typeof text !== 'string') {\n continue;\n }\n\n // Tokenize document\n const docTokens = tokenize(text);\n\n // Find phrases in this document\n // Note: state.config.weights is guaranteed to have all properties from default merge\n const phrases = findPhrasesInDocument(\n docTokens,\n filteredCandidates,\n {\n weights: state.config.weights as { exact: number; fuzzy: number; order: number; proximity: number; density: number; semantic: number },\n maxGap: state.config.maxGap,\n proximitySpanMultiplier: state.config.proximitySpanMultiplier\n },\n state.documentFrequency,\n state.totalDocuments\n );\n\n if (phrases.length > 0) {\n // Calculate overall document score (highest phrase score)\n const docScore = Math.max(...phrases.map(p => p.score));\n\n documentMatches.push({\n id: docId,\n phrases,\n score: docScore,\n document: doc\n });\n }\n }\n\n // Sort by score descending\n documentMatches.sort((a, b) => b.score - a.score);\n\n // Apply limit if specified\n const limit = params.limit ?? documentMatches.length;\n const limitedMatches = documentMatches.slice(0, limit);\n\n // Convert to Orama results format\n const hits = limitedMatches.map(match => ({\n id: match.id,\n score: match.score,\n document: match.document,\n // Store phrases for highlighting\n _phrases: match.phrases\n })) as any[];\n\n const elapsed = performance.now() - startTime;\n\n console.log(`✅ Found ${hits.length} results in ${elapsed.toFixed(2)}ms (limit: ${limit})`);\n\n return {\n elapsed: {\n formatted: `${elapsed.toFixed(2)}ms`,\n raw: Math.floor(elapsed * 1000000) // nanoseconds\n },\n hits,\n count: hits.length\n } as any;\n}\n\n/**\n * Load synonyms from Supabase\n */\nasync function loadSynonymsFromSupabase(\n supabaseConfig: { url: string; serviceKey: string }\n): Promise<SynonymMap> {\n try {\n console.log('🔍 DEBUG: Calling Supabase RPC get_synonym_map...');\n \n // Dynamic import to avoid bundling Supabase client if not needed\n const { createClient } = await import('@supabase/supabase-js');\n \n const supabase = createClient(supabaseConfig.url, supabaseConfig.serviceKey);\n \n // Call the get_synonym_map function\n const { data, error } = await supabase.rpc('get_synonym_map');\n \n console.log('🔍 DEBUG: Supabase RPC response:', {\n hasError: !!error,\n errorMessage: error?.message,\n hasData: !!data,\n dataType: typeof data,\n dataKeys: data ? Object.keys(data).length : 0\n });\n \n if (error) {\n throw new Error(`Supabase error: ${error.message}`);\n }\n \n const synonymMap = data || {};\n console.log(`📚 Loaded ${Object.keys(synonymMap).length} synonym entries from Supabase`);\n \n return synonymMap;\n } catch (error) {\n console.error('❌ Failed to load synonyms from Supabase:', error);\n throw error;\n }\n}\n\n/**\n * Calculate document frequencies for TF-IDF\n */\nfunction calculateDocumentFrequencies(\n docs: Record<string, any>,\n textProperty: string\n): Map<string, number> {\n const df = new Map<string, number>();\n\n for (const doc of Object.values(docs)) {\n const text = doc[textProperty];\n \n if (!text || typeof text !== 'string') {\n continue;\n }\n\n // Get unique words in this document\n const words = new Set(tokenize(text));\n\n // Increment document frequency for each unique word\n for (const word of words) {\n df.set(word, (df.get(word) || 0) + 1);\n }\n }\n\n return df;\n}\n\n/**\n * Normalize text using the same rules as server-side\n * \n * CRITICAL: This must match the normalizeText() function in server/index.js exactly\n * PLUS we remove all punctuation to match Orama's French tokenizer behavior\n */\nfunction normalizeText(text: string): string {\n return text\n .toLowerCase()\n .normalize('NFD')\n .replace(/[\\u0300-\\u036f]/g, '') // Remove diacritics\n // Replace French elisions (l', d', etc.) with space to preserve word boundaries\n .replace(/\\b[ldcjmnst][\\u2018\\u2019\\u201A\\u201B\\u2032\\u2035\\u0027\\u0060\\u00B4](?=\\w)/gi, ' ')\n .replace(/[\\u2018\\u2019\\u201A\\u201B\\u2032\\u2035\\u0027\\u0060\\u00B4]/g, '') // Remove remaining apostrophes\n .replace(/[\\u201c\\u201d]/g, '\"') // Normalize curly quotes to straight quotes\n .replace(/[.,;:!?()[\\]{}\\-—–«»\"\"]/g, ' ') // Remove punctuation (replace with space to preserve word boundaries)\n .replace(/\\s+/g, ' ') // Normalize multiple spaces to single space\n .trim();\n}\n\n/**\n * Tokenization matching normalized text behavior\n * \n * Note: Text should already be normalized before indexing, so we normalize again\n * to ensure plugin tokenization matches index tokenization\n */\nfunction tokenize(text: string): string[] {\n // Normalize first (same as indexing), then split by whitespace\n return normalizeText(text)\n .split(/\\s+/)\n .filter(token => token.length > 0);\n}\n\n/**\n * Export types for external use\n */\nexport type {\n FuzzyPhraseConfig,\n WordMatch,\n PhraseMatch,\n DocumentMatch,\n SynonymMap,\n Candidate\n} from './types.js';\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wcs-colab/plugin-fuzzy-phrase",
3
- "version": "3.1.16-custom.newbase.2",
3
+ "version": "3.1.16-custom.newbase.4",
4
4
  "description": "Advanced fuzzy phrase matching plugin for Orama with semantic weighting and synonym expansion",
5
5
  "keywords": [
6
6
  "orama",
@@ -59,4 +59,4 @@
59
59
  "lint": "exit 0",
60
60
  "test": "node --test --import tsx test/*.test.ts"
61
61
  }
62
- }
62
+ }