@wcs-colab/plugin-fuzzy-phrase 3.1.16-custom.8 → 3.1.16-custom.newbase.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +13 -0
- package/README.md +1 -1
- package/dist/index.cjs +111 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +22 -91
- package/dist/index.d.ts +22 -91
- package/dist/index.js +111 -19
- package/dist/index.js.map +1 -1
- package/package.json +62 -54
package/dist/index.d.cts
CHANGED
|
@@ -1,108 +1,62 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { OramaPlugin, AnyOrama, Results, TypedDocument } from '@wcs-colab/orama';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Configuration for the Fuzzy Phrase Plugin
|
|
4
|
+
* Shared types for the fuzzy phrase plugin.
|
|
9
5
|
*/
|
|
10
6
|
interface FuzzyPhraseConfig {
|
|
11
|
-
/**
|
|
12
|
-
* Text property to search in
|
|
13
|
-
* @default 'content'
|
|
14
|
-
*/
|
|
7
|
+
/** Text property to search in (defaults to `content`) */
|
|
15
8
|
textProperty?: string;
|
|
16
|
-
/**
|
|
17
|
-
* Base fuzzy matching tolerance (edit distance)
|
|
18
|
-
* @default 1
|
|
19
|
-
*/
|
|
9
|
+
/** Base fuzzy matching tolerance (edit distance) */
|
|
20
10
|
tolerance?: number;
|
|
21
|
-
/**
|
|
22
|
-
* Enable adaptive tolerance (scales with query length)
|
|
23
|
-
* @default true
|
|
24
|
-
*/
|
|
11
|
+
/** Enable adaptive tolerance that scales with query length */
|
|
25
12
|
adaptiveTolerance?: boolean;
|
|
26
|
-
/**
|
|
27
|
-
* Enable synonym expansion
|
|
28
|
-
* @default false
|
|
29
|
-
*/
|
|
13
|
+
/** Enable synonym expansion using Supabase-backed synonym map */
|
|
30
14
|
enableSynonyms?: boolean;
|
|
31
|
-
/**
|
|
32
|
-
* Supabase configuration for loading synonyms
|
|
33
|
-
*/
|
|
15
|
+
/** Supabase configuration for loading synonyms */
|
|
34
16
|
supabase?: {
|
|
35
17
|
url: string;
|
|
36
18
|
serviceKey: string;
|
|
37
19
|
};
|
|
38
|
-
/**
|
|
39
|
-
* Scoring weight for synonym matches (0-1)
|
|
40
|
-
* @default 0.8
|
|
41
|
-
*/
|
|
20
|
+
/** Scoring weight for synonym matches (0-1, default ~0.8) */
|
|
42
21
|
synonymMatchScore?: number;
|
|
43
|
-
/**
|
|
44
|
-
* Scoring weights for different components
|
|
45
|
-
*/
|
|
22
|
+
/** Scoring weights for different components */
|
|
46
23
|
weights?: {
|
|
47
|
-
/** Weight for exact matches */
|
|
48
24
|
exact?: number;
|
|
49
|
-
/** Weight for fuzzy matches */
|
|
50
25
|
fuzzy?: number;
|
|
51
|
-
/** Weight for phrase order */
|
|
52
26
|
order?: number;
|
|
53
|
-
/** Weight for proximity bonus */
|
|
54
27
|
proximity?: number;
|
|
55
|
-
/** Weight for density bonus */
|
|
56
28
|
density?: number;
|
|
57
|
-
/** Weight for TF-IDF semantic score */
|
|
58
29
|
semantic?: number;
|
|
59
30
|
};
|
|
60
|
-
/**
|
|
61
|
-
* Maximum gap between words in a phrase
|
|
62
|
-
* @default 5
|
|
63
|
-
*/
|
|
31
|
+
/** Maximum gap between words in a phrase */
|
|
64
32
|
maxGap?: number;
|
|
65
|
-
/**
|
|
66
|
-
* Minimum phrase score to include in results
|
|
67
|
-
* @default 0.1
|
|
68
|
-
*/
|
|
33
|
+
/** Minimum phrase score to include in results */
|
|
69
34
|
minScore?: number;
|
|
70
35
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
36
|
+
type SynonymMap = Record<string, string[]>;
|
|
37
|
+
interface Candidate {
|
|
38
|
+
word: string;
|
|
39
|
+
type: 'exact' | 'fuzzy' | 'synonym';
|
|
40
|
+
queryToken: string;
|
|
41
|
+
distance: number;
|
|
42
|
+
score: number;
|
|
43
|
+
}
|
|
74
44
|
interface WordMatch {
|
|
75
|
-
/** The matched word from the document */
|
|
76
45
|
word: string;
|
|
77
|
-
/** The query token that matched */
|
|
78
46
|
queryToken: string;
|
|
79
|
-
/** Position of the word in the document */
|
|
80
47
|
position: number;
|
|
81
|
-
/** Type of match */
|
|
82
48
|
type: 'exact' | 'fuzzy' | 'synonym';
|
|
83
|
-
|
|
84
|
-
distance?: number;
|
|
85
|
-
/** Match score (0-1) */
|
|
49
|
+
distance: number;
|
|
86
50
|
score: number;
|
|
87
51
|
}
|
|
88
|
-
/**
|
|
89
|
-
* Phrase match information
|
|
90
|
-
*/
|
|
91
52
|
interface PhraseMatch {
|
|
92
|
-
/** All word matches in this phrase */
|
|
93
53
|
words: WordMatch[];
|
|
94
|
-
/** Start position in document */
|
|
95
54
|
startPosition: number;
|
|
96
|
-
/** End position in document */
|
|
97
55
|
endPosition: number;
|
|
98
|
-
/** Gap between words */
|
|
99
56
|
gap: number;
|
|
100
|
-
/** Whether words are in correct order */
|
|
101
57
|
inOrder: boolean;
|
|
102
|
-
/** Overall phrase score */
|
|
103
58
|
score: number;
|
|
104
|
-
|
|
105
|
-
scoreBreakdown?: {
|
|
59
|
+
scoreBreakdown: {
|
|
106
60
|
base: number;
|
|
107
61
|
order: number;
|
|
108
62
|
proximity: number;
|
|
@@ -110,34 +64,11 @@ interface PhraseMatch {
|
|
|
110
64
|
semantic: number;
|
|
111
65
|
};
|
|
112
66
|
}
|
|
113
|
-
/**
|
|
114
|
-
* Document match with all phrase matches
|
|
115
|
-
*/
|
|
116
67
|
interface DocumentMatch {
|
|
117
|
-
/** Document ID */
|
|
118
68
|
id: string;
|
|
119
|
-
/** All phrase matches found in this document */
|
|
120
69
|
phrases: PhraseMatch[];
|
|
121
|
-
/** Overall document score */
|
|
122
|
-
score: number;
|
|
123
|
-
/** Document data */
|
|
124
|
-
document: Record<string, SearchableValue>;
|
|
125
|
-
}
|
|
126
|
-
/**
|
|
127
|
-
* Synonym map structure
|
|
128
|
-
*/
|
|
129
|
-
interface SynonymMap {
|
|
130
|
-
[word: string]: string[];
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Candidate word for matching
|
|
134
|
-
*/
|
|
135
|
-
interface Candidate {
|
|
136
|
-
word: string;
|
|
137
|
-
type: 'exact' | 'fuzzy' | 'synonym';
|
|
138
|
-
queryToken: string;
|
|
139
|
-
distance?: number;
|
|
140
70
|
score: number;
|
|
71
|
+
document: any;
|
|
141
72
|
}
|
|
142
73
|
|
|
143
74
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -1,108 +1,62 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { OramaPlugin, AnyOrama, Results, TypedDocument } from '@wcs-colab/orama';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Configuration for the Fuzzy Phrase Plugin
|
|
4
|
+
* Shared types for the fuzzy phrase plugin.
|
|
9
5
|
*/
|
|
10
6
|
interface FuzzyPhraseConfig {
|
|
11
|
-
/**
|
|
12
|
-
* Text property to search in
|
|
13
|
-
* @default 'content'
|
|
14
|
-
*/
|
|
7
|
+
/** Text property to search in (defaults to `content`) */
|
|
15
8
|
textProperty?: string;
|
|
16
|
-
/**
|
|
17
|
-
* Base fuzzy matching tolerance (edit distance)
|
|
18
|
-
* @default 1
|
|
19
|
-
*/
|
|
9
|
+
/** Base fuzzy matching tolerance (edit distance) */
|
|
20
10
|
tolerance?: number;
|
|
21
|
-
/**
|
|
22
|
-
* Enable adaptive tolerance (scales with query length)
|
|
23
|
-
* @default true
|
|
24
|
-
*/
|
|
11
|
+
/** Enable adaptive tolerance that scales with query length */
|
|
25
12
|
adaptiveTolerance?: boolean;
|
|
26
|
-
/**
|
|
27
|
-
* Enable synonym expansion
|
|
28
|
-
* @default false
|
|
29
|
-
*/
|
|
13
|
+
/** Enable synonym expansion using Supabase-backed synonym map */
|
|
30
14
|
enableSynonyms?: boolean;
|
|
31
|
-
/**
|
|
32
|
-
* Supabase configuration for loading synonyms
|
|
33
|
-
*/
|
|
15
|
+
/** Supabase configuration for loading synonyms */
|
|
34
16
|
supabase?: {
|
|
35
17
|
url: string;
|
|
36
18
|
serviceKey: string;
|
|
37
19
|
};
|
|
38
|
-
/**
|
|
39
|
-
* Scoring weight for synonym matches (0-1)
|
|
40
|
-
* @default 0.8
|
|
41
|
-
*/
|
|
20
|
+
/** Scoring weight for synonym matches (0-1, default ~0.8) */
|
|
42
21
|
synonymMatchScore?: number;
|
|
43
|
-
/**
|
|
44
|
-
* Scoring weights for different components
|
|
45
|
-
*/
|
|
22
|
+
/** Scoring weights for different components */
|
|
46
23
|
weights?: {
|
|
47
|
-
/** Weight for exact matches */
|
|
48
24
|
exact?: number;
|
|
49
|
-
/** Weight for fuzzy matches */
|
|
50
25
|
fuzzy?: number;
|
|
51
|
-
/** Weight for phrase order */
|
|
52
26
|
order?: number;
|
|
53
|
-
/** Weight for proximity bonus */
|
|
54
27
|
proximity?: number;
|
|
55
|
-
/** Weight for density bonus */
|
|
56
28
|
density?: number;
|
|
57
|
-
/** Weight for TF-IDF semantic score */
|
|
58
29
|
semantic?: number;
|
|
59
30
|
};
|
|
60
|
-
/**
|
|
61
|
-
* Maximum gap between words in a phrase
|
|
62
|
-
* @default 5
|
|
63
|
-
*/
|
|
31
|
+
/** Maximum gap between words in a phrase */
|
|
64
32
|
maxGap?: number;
|
|
65
|
-
/**
|
|
66
|
-
* Minimum phrase score to include in results
|
|
67
|
-
* @default 0.1
|
|
68
|
-
*/
|
|
33
|
+
/** Minimum phrase score to include in results */
|
|
69
34
|
minScore?: number;
|
|
70
35
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
36
|
+
type SynonymMap = Record<string, string[]>;
|
|
37
|
+
interface Candidate {
|
|
38
|
+
word: string;
|
|
39
|
+
type: 'exact' | 'fuzzy' | 'synonym';
|
|
40
|
+
queryToken: string;
|
|
41
|
+
distance: number;
|
|
42
|
+
score: number;
|
|
43
|
+
}
|
|
74
44
|
interface WordMatch {
|
|
75
|
-
/** The matched word from the document */
|
|
76
45
|
word: string;
|
|
77
|
-
/** The query token that matched */
|
|
78
46
|
queryToken: string;
|
|
79
|
-
/** Position of the word in the document */
|
|
80
47
|
position: number;
|
|
81
|
-
/** Type of match */
|
|
82
48
|
type: 'exact' | 'fuzzy' | 'synonym';
|
|
83
|
-
|
|
84
|
-
distance?: number;
|
|
85
|
-
/** Match score (0-1) */
|
|
49
|
+
distance: number;
|
|
86
50
|
score: number;
|
|
87
51
|
}
|
|
88
|
-
/**
|
|
89
|
-
* Phrase match information
|
|
90
|
-
*/
|
|
91
52
|
interface PhraseMatch {
|
|
92
|
-
/** All word matches in this phrase */
|
|
93
53
|
words: WordMatch[];
|
|
94
|
-
/** Start position in document */
|
|
95
54
|
startPosition: number;
|
|
96
|
-
/** End position in document */
|
|
97
55
|
endPosition: number;
|
|
98
|
-
/** Gap between words */
|
|
99
56
|
gap: number;
|
|
100
|
-
/** Whether words are in correct order */
|
|
101
57
|
inOrder: boolean;
|
|
102
|
-
/** Overall phrase score */
|
|
103
58
|
score: number;
|
|
104
|
-
|
|
105
|
-
scoreBreakdown?: {
|
|
59
|
+
scoreBreakdown: {
|
|
106
60
|
base: number;
|
|
107
61
|
order: number;
|
|
108
62
|
proximity: number;
|
|
@@ -110,34 +64,11 @@ interface PhraseMatch {
|
|
|
110
64
|
semantic: number;
|
|
111
65
|
};
|
|
112
66
|
}
|
|
113
|
-
/**
|
|
114
|
-
* Document match with all phrase matches
|
|
115
|
-
*/
|
|
116
67
|
interface DocumentMatch {
|
|
117
|
-
/** Document ID */
|
|
118
68
|
id: string;
|
|
119
|
-
/** All phrase matches found in this document */
|
|
120
69
|
phrases: PhraseMatch[];
|
|
121
|
-
/** Overall document score */
|
|
122
|
-
score: number;
|
|
123
|
-
/** Document data */
|
|
124
|
-
document: Record<string, SearchableValue>;
|
|
125
|
-
}
|
|
126
|
-
/**
|
|
127
|
-
* Synonym map structure
|
|
128
|
-
*/
|
|
129
|
-
interface SynonymMap {
|
|
130
|
-
[word: string]: string[];
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Candidate word for matching
|
|
134
|
-
*/
|
|
135
|
-
interface Candidate {
|
|
136
|
-
word: string;
|
|
137
|
-
type: 'exact' | 'fuzzy' | 'synonym';
|
|
138
|
-
queryToken: string;
|
|
139
|
-
distance?: number;
|
|
140
70
|
score: number;
|
|
71
|
+
document: any;
|
|
141
72
|
}
|
|
142
73
|
|
|
143
74
|
/**
|
package/dist/index.js
CHANGED
|
@@ -88,7 +88,15 @@ function extractVocabularyFromRadixTree(radixNode) {
|
|
|
88
88
|
}
|
|
89
89
|
nodesVisited++;
|
|
90
90
|
if (nodesVisited <= 3) {
|
|
91
|
-
|
|
91
|
+
const cInfo = node.c ? {
|
|
92
|
+
isArray: Array.isArray(node.c),
|
|
93
|
+
isMap: node.c instanceof Map,
|
|
94
|
+
type: typeof node.c,
|
|
95
|
+
constructor: node.c.constructor?.name,
|
|
96
|
+
keys: node.c instanceof Map ? Array.from(node.c.keys()).slice(0, 3) : Object.keys(node.c).slice(0, 3),
|
|
97
|
+
valuesCount: node.c instanceof Map ? node.c.size : Array.isArray(node.c) ? node.c.length : Object.keys(node.c).length
|
|
98
|
+
} : "null";
|
|
99
|
+
console.log(`\u{1F50D} Node ${nodesVisited}:`, { w: node.w, e: node.e, has_c: !!node.c, c_info: cInfo });
|
|
92
100
|
}
|
|
93
101
|
if (node.e && node.w && typeof node.w === "string" && node.w.length > 0) {
|
|
94
102
|
vocabulary.add(node.w);
|
|
@@ -98,7 +106,11 @@ function extractVocabularyFromRadixTree(radixNode) {
|
|
|
98
106
|
}
|
|
99
107
|
}
|
|
100
108
|
if (node.c) {
|
|
101
|
-
if (
|
|
109
|
+
if (node.c instanceof Map) {
|
|
110
|
+
for (const [_key, childNode] of node.c) {
|
|
111
|
+
traverse(childNode, depth + 1);
|
|
112
|
+
}
|
|
113
|
+
} else if (Array.isArray(node.c)) {
|
|
102
114
|
for (const [_key, childNode] of node.c) {
|
|
103
115
|
traverse(childNode, depth + 1);
|
|
104
116
|
}
|
|
@@ -213,7 +225,9 @@ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFr
|
|
|
213
225
|
queryTokens,
|
|
214
226
|
config,
|
|
215
227
|
documentFrequency,
|
|
216
|
-
totalDocuments
|
|
228
|
+
totalDocuments,
|
|
229
|
+
wordMatches
|
|
230
|
+
// Pass all word matches for density calculation
|
|
217
231
|
);
|
|
218
232
|
if (phrase && phrase.words.length > 0) {
|
|
219
233
|
phrases.push(phrase);
|
|
@@ -221,7 +235,7 @@ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFr
|
|
|
221
235
|
}
|
|
222
236
|
return deduplicatePhrases(phrases);
|
|
223
237
|
}
|
|
224
|
-
function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments) {
|
|
238
|
+
function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments, allWordMatches) {
|
|
225
239
|
const startMatch = wordMatches[startIndex];
|
|
226
240
|
const phraseWords = [startMatch];
|
|
227
241
|
const coveredTokens = /* @__PURE__ */ new Set([startMatch.queryToken]);
|
|
@@ -240,12 +254,13 @@ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, d
|
|
|
240
254
|
}
|
|
241
255
|
}
|
|
242
256
|
if (phraseWords.length > 0) {
|
|
243
|
-
const score = calculatePhraseScore(
|
|
257
|
+
const { score, breakdown } = calculatePhraseScore(
|
|
244
258
|
phraseWords,
|
|
245
259
|
queryTokens,
|
|
246
260
|
config,
|
|
247
261
|
documentFrequency,
|
|
248
|
-
totalDocuments
|
|
262
|
+
totalDocuments,
|
|
263
|
+
allWordMatches
|
|
249
264
|
);
|
|
250
265
|
return {
|
|
251
266
|
words: phraseWords,
|
|
@@ -253,12 +268,13 @@ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, d
|
|
|
253
268
|
endPosition: phraseWords[phraseWords.length - 1].position,
|
|
254
269
|
gap: phraseWords[phraseWords.length - 1].position - phraseWords[0].position,
|
|
255
270
|
inOrder: isInOrder(phraseWords, queryTokens),
|
|
256
|
-
score
|
|
271
|
+
score,
|
|
272
|
+
scoreBreakdown: breakdown
|
|
257
273
|
};
|
|
258
274
|
}
|
|
259
275
|
return null;
|
|
260
276
|
}
|
|
261
|
-
function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments) {
|
|
277
|
+
function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments, allWordMatches) {
|
|
262
278
|
let baseScore = 0;
|
|
263
279
|
for (const word of phraseWords) {
|
|
264
280
|
const weight = word.type === "exact" ? config.weights.exact : word.type === "fuzzy" ? config.weights.fuzzy : config.weights.fuzzy * 0.8;
|
|
@@ -269,16 +285,42 @@ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequenc
|
|
|
269
285
|
const orderScore = inOrder ? 1 : 0.5;
|
|
270
286
|
const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;
|
|
271
287
|
const proximityScore = Math.max(0, 1 - span / (queryTokens.length * 5));
|
|
272
|
-
|
|
288
|
+
let densityScore = 0;
|
|
289
|
+
if (queryTokens.length === 1) {
|
|
290
|
+
const totalOccurrences = allWordMatches.length;
|
|
291
|
+
densityScore = totalOccurrences / queryTokens.length;
|
|
292
|
+
} else {
|
|
293
|
+
densityScore = phraseWords.length / queryTokens.length;
|
|
294
|
+
}
|
|
273
295
|
const semanticScore = calculateSemanticScore(
|
|
274
296
|
phraseWords,
|
|
275
297
|
documentFrequency,
|
|
276
298
|
totalDocuments
|
|
277
299
|
);
|
|
278
300
|
const weights = config.weights;
|
|
279
|
-
const
|
|
301
|
+
const weightedBase = baseScore;
|
|
302
|
+
const weightedOrder = orderScore * weights.order;
|
|
303
|
+
const weightedProximity = proximityScore * weights.proximity;
|
|
304
|
+
const weightedDensity = densityScore * weights.density;
|
|
305
|
+
const weightedSemantic = semanticScore * weights.semantic;
|
|
306
|
+
const totalScore = weightedBase + weightedOrder + weightedProximity + weightedDensity + weightedSemantic;
|
|
280
307
|
const maxPossibleScore = 1 + weights.order + weights.proximity + weights.density + weights.semantic;
|
|
281
|
-
|
|
308
|
+
const score = totalScore / maxPossibleScore;
|
|
309
|
+
const base = weightedBase / maxPossibleScore;
|
|
310
|
+
const order = weightedOrder / maxPossibleScore;
|
|
311
|
+
const proximity = weightedProximity / maxPossibleScore;
|
|
312
|
+
const density = weightedDensity / maxPossibleScore;
|
|
313
|
+
const semantic = weightedSemantic / maxPossibleScore;
|
|
314
|
+
return {
|
|
315
|
+
score,
|
|
316
|
+
breakdown: {
|
|
317
|
+
base,
|
|
318
|
+
order,
|
|
319
|
+
proximity,
|
|
320
|
+
density,
|
|
321
|
+
semantic
|
|
322
|
+
}
|
|
323
|
+
};
|
|
282
324
|
}
|
|
283
325
|
function isInOrder(phraseWords, queryTokens) {
|
|
284
326
|
const tokenOrder = new Map(queryTokens.map((token, index) => [token, index]));
|
|
@@ -292,6 +334,9 @@ function isInOrder(phraseWords, queryTokens) {
|
|
|
292
334
|
return true;
|
|
293
335
|
}
|
|
294
336
|
function calculateSemanticScore(phraseWords, documentFrequency, totalDocuments) {
|
|
337
|
+
if (totalDocuments === 0) {
|
|
338
|
+
return 0;
|
|
339
|
+
}
|
|
295
340
|
let tfidfSum = 0;
|
|
296
341
|
for (const word of phraseWords) {
|
|
297
342
|
const df = documentFrequency.get(word.word) || 1;
|
|
@@ -386,14 +431,22 @@ function pluginFuzzyPhrase(userConfig = {}) {
|
|
|
386
431
|
console.error("\u26A0\uFE0F Failed to load synonyms:", error);
|
|
387
432
|
}
|
|
388
433
|
}
|
|
389
|
-
|
|
390
|
-
|
|
434
|
+
const docs = orama.data?.docs?.docs;
|
|
435
|
+
if (docs) {
|
|
391
436
|
state.totalDocuments = Object.keys(docs).length;
|
|
392
437
|
state.documentFrequency = calculateDocumentFrequencies(docs, config.textProperty);
|
|
393
438
|
console.log(`\u{1F4CA} Calculated document frequencies for ${state.totalDocuments} documents`);
|
|
394
439
|
}
|
|
395
440
|
pluginStates.set(orama, state);
|
|
396
441
|
console.log("\u2705 Fuzzy Phrase Plugin initialized");
|
|
442
|
+
setImmediate(() => {
|
|
443
|
+
if (typeof globalThis.fuzzyPhrasePluginReady === "function") {
|
|
444
|
+
console.log("\u{1F4E1} Signaling plugin ready...");
|
|
445
|
+
globalThis.fuzzyPhrasePluginReady();
|
|
446
|
+
} else {
|
|
447
|
+
console.warn("\u26A0\uFE0F fuzzyPhrasePluginReady callback not found");
|
|
448
|
+
}
|
|
449
|
+
});
|
|
397
450
|
}
|
|
398
451
|
};
|
|
399
452
|
return plugin;
|
|
@@ -456,7 +509,31 @@ async function searchWithFuzzyPhrase(orama, params, language) {
|
|
|
456
509
|
);
|
|
457
510
|
console.log(`\u{1F3AF} Found candidates: ${Array.from(filteredCandidates.values()).reduce((sum, c) => sum + c.length, 0)} total`);
|
|
458
511
|
const documentMatches = [];
|
|
459
|
-
|
|
512
|
+
console.log("\u{1F50D} DEBUG orama.data structure:", {
|
|
513
|
+
dataKeys: Object.keys(orama.data || {}),
|
|
514
|
+
hasDocs: !!orama.data?.docs,
|
|
515
|
+
docsType: orama.data?.docs ? typeof orama.data.docs : "undefined"
|
|
516
|
+
});
|
|
517
|
+
let docs = {};
|
|
518
|
+
if (orama.data?.docs?.docs) {
|
|
519
|
+
docs = orama.data.docs.docs;
|
|
520
|
+
console.log("\u2705 Found docs at orama.data.docs.docs");
|
|
521
|
+
} else if (orama.data?.docs && typeof orama.data.docs === "object") {
|
|
522
|
+
const firstKey = Object.keys(orama.data.docs)[0];
|
|
523
|
+
if (firstKey && firstKey !== "sharedInternalDocumentStore" && firstKey !== "count") {
|
|
524
|
+
docs = orama.data.docs;
|
|
525
|
+
console.log("\u2705 Found docs at orama.data.docs (direct)");
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
if (Object.keys(docs).length === 0) {
|
|
529
|
+
console.log("\u274C Could not find documents - available structure:", {
|
|
530
|
+
hasDataDocs: !!orama.data?.docs,
|
|
531
|
+
dataDocsKeys: orama.data?.docs ? Object.keys(orama.data.docs) : "none",
|
|
532
|
+
hasDataDocsDocs: !!orama.data?.docs?.docs,
|
|
533
|
+
dataDocsDocsCount: orama.data?.docs?.docs ? Object.keys(orama.data.docs.docs).length : 0
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
console.log(`\u{1F4C4} Searching through ${Object.keys(docs).length} documents`);
|
|
460
537
|
for (const [docId, doc] of Object.entries(docs)) {
|
|
461
538
|
const text = doc[textProperty];
|
|
462
539
|
if (!text || typeof text !== "string") {
|
|
@@ -484,7 +561,9 @@ async function searchWithFuzzyPhrase(orama, params, language) {
|
|
|
484
561
|
}
|
|
485
562
|
}
|
|
486
563
|
documentMatches.sort((a, b) => b.score - a.score);
|
|
487
|
-
const
|
|
564
|
+
const limit = params.limit ?? documentMatches.length;
|
|
565
|
+
const limitedMatches = documentMatches.slice(0, limit);
|
|
566
|
+
const hits = limitedMatches.map((match) => ({
|
|
488
567
|
id: match.id,
|
|
489
568
|
score: match.score,
|
|
490
569
|
document: match.document,
|
|
@@ -492,7 +571,7 @@ async function searchWithFuzzyPhrase(orama, params, language) {
|
|
|
492
571
|
_phrases: match.phrases
|
|
493
572
|
}));
|
|
494
573
|
const elapsed = performance.now() - startTime;
|
|
495
|
-
console.log(`\u2705 Found ${hits.length} results in ${elapsed.toFixed(2)}ms`);
|
|
574
|
+
console.log(`\u2705 Found ${hits.length} results in ${elapsed.toFixed(2)}ms (limit: ${limit})`);
|
|
496
575
|
return {
|
|
497
576
|
elapsed: {
|
|
498
577
|
formatted: `${elapsed.toFixed(2)}ms`,
|
|
@@ -505,15 +584,25 @@ async function searchWithFuzzyPhrase(orama, params, language) {
|
|
|
505
584
|
}
|
|
506
585
|
async function loadSynonymsFromSupabase(supabaseConfig) {
|
|
507
586
|
try {
|
|
587
|
+
console.log("\u{1F50D} DEBUG: Calling Supabase RPC get_synonym_map...");
|
|
508
588
|
const { createClient } = await import('@supabase/supabase-js');
|
|
509
589
|
const supabase = createClient(supabaseConfig.url, supabaseConfig.serviceKey);
|
|
510
590
|
const { data, error } = await supabase.rpc("get_synonym_map");
|
|
591
|
+
console.log("\u{1F50D} DEBUG: Supabase RPC response:", {
|
|
592
|
+
hasError: !!error,
|
|
593
|
+
errorMessage: error?.message,
|
|
594
|
+
hasData: !!data,
|
|
595
|
+
dataType: typeof data,
|
|
596
|
+
dataKeys: data ? Object.keys(data).length : 0
|
|
597
|
+
});
|
|
511
598
|
if (error) {
|
|
512
599
|
throw new Error(`Supabase error: ${error.message}`);
|
|
513
600
|
}
|
|
514
|
-
|
|
601
|
+
const synonymMap = data || {};
|
|
602
|
+
console.log(`\u{1F4DA} Loaded ${Object.keys(synonymMap).length} synonym entries from Supabase`);
|
|
603
|
+
return synonymMap;
|
|
515
604
|
} catch (error) {
|
|
516
|
-
console.error("Failed to load synonyms from Supabase:", error);
|
|
605
|
+
console.error("\u274C Failed to load synonyms from Supabase:", error);
|
|
517
606
|
throw error;
|
|
518
607
|
}
|
|
519
608
|
}
|
|
@@ -531,8 +620,11 @@ function calculateDocumentFrequencies(docs, textProperty) {
|
|
|
531
620
|
}
|
|
532
621
|
return df;
|
|
533
622
|
}
|
|
623
|
+
function normalizeText(text) {
|
|
624
|
+
return text.toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\b[ldcjmnst][\u2018\u2019\u201A\u201B\u2032\u2035\u0027\u0060\u00B4](?=\w)/gi, " ").replace(/[\u2018\u2019\u201A\u201B\u2032\u2035\u0027\u0060\u00B4]/g, "").replace(/[\u201c\u201d]/g, '"').replace(/[.,;:!?()[\]{}\-—–«»""]/g, " ").replace(/\s+/g, " ").trim();
|
|
625
|
+
}
|
|
534
626
|
function tokenize(text) {
|
|
535
|
-
return text
|
|
627
|
+
return normalizeText(text).split(/\s+/).filter((token) => token.length > 0);
|
|
536
628
|
}
|
|
537
629
|
|
|
538
630
|
export { pluginFuzzyPhrase, searchWithFuzzyPhrase };
|