@wcs-colab/plugin-fuzzy-phrase 3.1.16-custom.9 → 3.1.16-custom.newbase.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +13 -0
- package/README.md +1 -1
- package/dist/index.cjs +97 -34
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +22 -91
- package/dist/index.d.ts +22 -91
- package/dist/index.js +97 -34
- package/dist/index.js.map +1 -1
- package/package.json +62 -54
package/dist/index.d.cts
CHANGED
|
@@ -1,108 +1,62 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { OramaPlugin, AnyOrama, Results, TypedDocument } from '@wcs-colab/orama';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Configuration for the Fuzzy Phrase Plugin
|
|
4
|
+
* Shared types for the fuzzy phrase plugin.
|
|
9
5
|
*/
|
|
10
6
|
interface FuzzyPhraseConfig {
|
|
11
|
-
/**
|
|
12
|
-
* Text property to search in
|
|
13
|
-
* @default 'content'
|
|
14
|
-
*/
|
|
7
|
+
/** Text property to search in (defaults to `content`) */
|
|
15
8
|
textProperty?: string;
|
|
16
|
-
/**
|
|
17
|
-
* Base fuzzy matching tolerance (edit distance)
|
|
18
|
-
* @default 1
|
|
19
|
-
*/
|
|
9
|
+
/** Base fuzzy matching tolerance (edit distance) */
|
|
20
10
|
tolerance?: number;
|
|
21
|
-
/**
|
|
22
|
-
* Enable adaptive tolerance (scales with query length)
|
|
23
|
-
* @default true
|
|
24
|
-
*/
|
|
11
|
+
/** Enable adaptive tolerance that scales with query length */
|
|
25
12
|
adaptiveTolerance?: boolean;
|
|
26
|
-
/**
|
|
27
|
-
* Enable synonym expansion
|
|
28
|
-
* @default false
|
|
29
|
-
*/
|
|
13
|
+
/** Enable synonym expansion using Supabase-backed synonym map */
|
|
30
14
|
enableSynonyms?: boolean;
|
|
31
|
-
/**
|
|
32
|
-
* Supabase configuration for loading synonyms
|
|
33
|
-
*/
|
|
15
|
+
/** Supabase configuration for loading synonyms */
|
|
34
16
|
supabase?: {
|
|
35
17
|
url: string;
|
|
36
18
|
serviceKey: string;
|
|
37
19
|
};
|
|
38
|
-
/**
|
|
39
|
-
* Scoring weight for synonym matches (0-1)
|
|
40
|
-
* @default 0.8
|
|
41
|
-
*/
|
|
20
|
+
/** Scoring weight for synonym matches (0-1, default ~0.8) */
|
|
42
21
|
synonymMatchScore?: number;
|
|
43
|
-
/**
|
|
44
|
-
* Scoring weights for different components
|
|
45
|
-
*/
|
|
22
|
+
/** Scoring weights for different components */
|
|
46
23
|
weights?: {
|
|
47
|
-
/** Weight for exact matches */
|
|
48
24
|
exact?: number;
|
|
49
|
-
/** Weight for fuzzy matches */
|
|
50
25
|
fuzzy?: number;
|
|
51
|
-
/** Weight for phrase order */
|
|
52
26
|
order?: number;
|
|
53
|
-
/** Weight for proximity bonus */
|
|
54
27
|
proximity?: number;
|
|
55
|
-
/** Weight for density bonus */
|
|
56
28
|
density?: number;
|
|
57
|
-
/** Weight for TF-IDF semantic score */
|
|
58
29
|
semantic?: number;
|
|
59
30
|
};
|
|
60
|
-
/**
|
|
61
|
-
* Maximum gap between words in a phrase
|
|
62
|
-
* @default 5
|
|
63
|
-
*/
|
|
31
|
+
/** Maximum gap between words in a phrase */
|
|
64
32
|
maxGap?: number;
|
|
65
|
-
/**
|
|
66
|
-
* Minimum phrase score to include in results
|
|
67
|
-
* @default 0.1
|
|
68
|
-
*/
|
|
33
|
+
/** Minimum phrase score to include in results */
|
|
69
34
|
minScore?: number;
|
|
70
35
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
36
|
+
type SynonymMap = Record<string, string[]>;
|
|
37
|
+
interface Candidate {
|
|
38
|
+
word: string;
|
|
39
|
+
type: 'exact' | 'fuzzy' | 'synonym';
|
|
40
|
+
queryToken: string;
|
|
41
|
+
distance: number;
|
|
42
|
+
score: number;
|
|
43
|
+
}
|
|
74
44
|
interface WordMatch {
|
|
75
|
-
/** The matched word from the document */
|
|
76
45
|
word: string;
|
|
77
|
-
/** The query token that matched */
|
|
78
46
|
queryToken: string;
|
|
79
|
-
/** Position of the word in the document */
|
|
80
47
|
position: number;
|
|
81
|
-
/** Type of match */
|
|
82
48
|
type: 'exact' | 'fuzzy' | 'synonym';
|
|
83
|
-
|
|
84
|
-
distance?: number;
|
|
85
|
-
/** Match score (0-1) */
|
|
49
|
+
distance: number;
|
|
86
50
|
score: number;
|
|
87
51
|
}
|
|
88
|
-
/**
|
|
89
|
-
* Phrase match information
|
|
90
|
-
*/
|
|
91
52
|
interface PhraseMatch {
|
|
92
|
-
/** All word matches in this phrase */
|
|
93
53
|
words: WordMatch[];
|
|
94
|
-
/** Start position in document */
|
|
95
54
|
startPosition: number;
|
|
96
|
-
/** End position in document */
|
|
97
55
|
endPosition: number;
|
|
98
|
-
/** Gap between words */
|
|
99
56
|
gap: number;
|
|
100
|
-
/** Whether words are in correct order */
|
|
101
57
|
inOrder: boolean;
|
|
102
|
-
/** Overall phrase score */
|
|
103
58
|
score: number;
|
|
104
|
-
|
|
105
|
-
scoreBreakdown?: {
|
|
59
|
+
scoreBreakdown: {
|
|
106
60
|
base: number;
|
|
107
61
|
order: number;
|
|
108
62
|
proximity: number;
|
|
@@ -110,34 +64,11 @@ interface PhraseMatch {
|
|
|
110
64
|
semantic: number;
|
|
111
65
|
};
|
|
112
66
|
}
|
|
113
|
-
/**
|
|
114
|
-
* Document match with all phrase matches
|
|
115
|
-
*/
|
|
116
67
|
interface DocumentMatch {
|
|
117
|
-
/** Document ID */
|
|
118
68
|
id: string;
|
|
119
|
-
/** All phrase matches found in this document */
|
|
120
69
|
phrases: PhraseMatch[];
|
|
121
|
-
/** Overall document score */
|
|
122
|
-
score: number;
|
|
123
|
-
/** Document data */
|
|
124
|
-
document: Record<string, SearchableValue>;
|
|
125
|
-
}
|
|
126
|
-
/**
|
|
127
|
-
* Synonym map structure
|
|
128
|
-
*/
|
|
129
|
-
interface SynonymMap {
|
|
130
|
-
[word: string]: string[];
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Candidate word for matching
|
|
134
|
-
*/
|
|
135
|
-
interface Candidate {
|
|
136
|
-
word: string;
|
|
137
|
-
type: 'exact' | 'fuzzy' | 'synonym';
|
|
138
|
-
queryToken: string;
|
|
139
|
-
distance?: number;
|
|
140
70
|
score: number;
|
|
71
|
+
document: any;
|
|
141
72
|
}
|
|
142
73
|
|
|
143
74
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -1,108 +1,62 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { OramaPlugin, AnyOrama, Results, TypedDocument } from '@wcs-colab/orama';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Configuration for the Fuzzy Phrase Plugin
|
|
4
|
+
* Shared types for the fuzzy phrase plugin.
|
|
9
5
|
*/
|
|
10
6
|
interface FuzzyPhraseConfig {
|
|
11
|
-
/**
|
|
12
|
-
* Text property to search in
|
|
13
|
-
* @default 'content'
|
|
14
|
-
*/
|
|
7
|
+
/** Text property to search in (defaults to `content`) */
|
|
15
8
|
textProperty?: string;
|
|
16
|
-
/**
|
|
17
|
-
* Base fuzzy matching tolerance (edit distance)
|
|
18
|
-
* @default 1
|
|
19
|
-
*/
|
|
9
|
+
/** Base fuzzy matching tolerance (edit distance) */
|
|
20
10
|
tolerance?: number;
|
|
21
|
-
/**
|
|
22
|
-
* Enable adaptive tolerance (scales with query length)
|
|
23
|
-
* @default true
|
|
24
|
-
*/
|
|
11
|
+
/** Enable adaptive tolerance that scales with query length */
|
|
25
12
|
adaptiveTolerance?: boolean;
|
|
26
|
-
/**
|
|
27
|
-
* Enable synonym expansion
|
|
28
|
-
* @default false
|
|
29
|
-
*/
|
|
13
|
+
/** Enable synonym expansion using Supabase-backed synonym map */
|
|
30
14
|
enableSynonyms?: boolean;
|
|
31
|
-
/**
|
|
32
|
-
* Supabase configuration for loading synonyms
|
|
33
|
-
*/
|
|
15
|
+
/** Supabase configuration for loading synonyms */
|
|
34
16
|
supabase?: {
|
|
35
17
|
url: string;
|
|
36
18
|
serviceKey: string;
|
|
37
19
|
};
|
|
38
|
-
/**
|
|
39
|
-
* Scoring weight for synonym matches (0-1)
|
|
40
|
-
* @default 0.8
|
|
41
|
-
*/
|
|
20
|
+
/** Scoring weight for synonym matches (0-1, default ~0.8) */
|
|
42
21
|
synonymMatchScore?: number;
|
|
43
|
-
/**
|
|
44
|
-
* Scoring weights for different components
|
|
45
|
-
*/
|
|
22
|
+
/** Scoring weights for different components */
|
|
46
23
|
weights?: {
|
|
47
|
-
/** Weight for exact matches */
|
|
48
24
|
exact?: number;
|
|
49
|
-
/** Weight for fuzzy matches */
|
|
50
25
|
fuzzy?: number;
|
|
51
|
-
/** Weight for phrase order */
|
|
52
26
|
order?: number;
|
|
53
|
-
/** Weight for proximity bonus */
|
|
54
27
|
proximity?: number;
|
|
55
|
-
/** Weight for density bonus */
|
|
56
28
|
density?: number;
|
|
57
|
-
/** Weight for TF-IDF semantic score */
|
|
58
29
|
semantic?: number;
|
|
59
30
|
};
|
|
60
|
-
/**
|
|
61
|
-
* Maximum gap between words in a phrase
|
|
62
|
-
* @default 5
|
|
63
|
-
*/
|
|
31
|
+
/** Maximum gap between words in a phrase */
|
|
64
32
|
maxGap?: number;
|
|
65
|
-
/**
|
|
66
|
-
* Minimum phrase score to include in results
|
|
67
|
-
* @default 0.1
|
|
68
|
-
*/
|
|
33
|
+
/** Minimum phrase score to include in results */
|
|
69
34
|
minScore?: number;
|
|
70
35
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
36
|
+
type SynonymMap = Record<string, string[]>;
|
|
37
|
+
interface Candidate {
|
|
38
|
+
word: string;
|
|
39
|
+
type: 'exact' | 'fuzzy' | 'synonym';
|
|
40
|
+
queryToken: string;
|
|
41
|
+
distance: number;
|
|
42
|
+
score: number;
|
|
43
|
+
}
|
|
74
44
|
interface WordMatch {
|
|
75
|
-
/** The matched word from the document */
|
|
76
45
|
word: string;
|
|
77
|
-
/** The query token that matched */
|
|
78
46
|
queryToken: string;
|
|
79
|
-
/** Position of the word in the document */
|
|
80
47
|
position: number;
|
|
81
|
-
/** Type of match */
|
|
82
48
|
type: 'exact' | 'fuzzy' | 'synonym';
|
|
83
|
-
|
|
84
|
-
distance?: number;
|
|
85
|
-
/** Match score (0-1) */
|
|
49
|
+
distance: number;
|
|
86
50
|
score: number;
|
|
87
51
|
}
|
|
88
|
-
/**
|
|
89
|
-
* Phrase match information
|
|
90
|
-
*/
|
|
91
52
|
interface PhraseMatch {
|
|
92
|
-
/** All word matches in this phrase */
|
|
93
53
|
words: WordMatch[];
|
|
94
|
-
/** Start position in document */
|
|
95
54
|
startPosition: number;
|
|
96
|
-
/** End position in document */
|
|
97
55
|
endPosition: number;
|
|
98
|
-
/** Gap between words */
|
|
99
56
|
gap: number;
|
|
100
|
-
/** Whether words are in correct order */
|
|
101
57
|
inOrder: boolean;
|
|
102
|
-
/** Overall phrase score */
|
|
103
58
|
score: number;
|
|
104
|
-
|
|
105
|
-
scoreBreakdown?: {
|
|
59
|
+
scoreBreakdown: {
|
|
106
60
|
base: number;
|
|
107
61
|
order: number;
|
|
108
62
|
proximity: number;
|
|
@@ -110,34 +64,11 @@ interface PhraseMatch {
|
|
|
110
64
|
semantic: number;
|
|
111
65
|
};
|
|
112
66
|
}
|
|
113
|
-
/**
|
|
114
|
-
* Document match with all phrase matches
|
|
115
|
-
*/
|
|
116
67
|
interface DocumentMatch {
|
|
117
|
-
/** Document ID */
|
|
118
68
|
id: string;
|
|
119
|
-
/** All phrase matches found in this document */
|
|
120
69
|
phrases: PhraseMatch[];
|
|
121
|
-
/** Overall document score */
|
|
122
|
-
score: number;
|
|
123
|
-
/** Document data */
|
|
124
|
-
document: Record<string, SearchableValue>;
|
|
125
|
-
}
|
|
126
|
-
/**
|
|
127
|
-
* Synonym map structure
|
|
128
|
-
*/
|
|
129
|
-
interface SynonymMap {
|
|
130
|
-
[word: string]: string[];
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Candidate word for matching
|
|
134
|
-
*/
|
|
135
|
-
interface Candidate {
|
|
136
|
-
word: string;
|
|
137
|
-
type: 'exact' | 'fuzzy' | 'synonym';
|
|
138
|
-
queryToken: string;
|
|
139
|
-
distance?: number;
|
|
140
70
|
score: number;
|
|
71
|
+
document: any;
|
|
141
72
|
}
|
|
142
73
|
|
|
143
74
|
/**
|
package/dist/index.js
CHANGED
|
@@ -80,30 +80,13 @@ function calculateAdaptiveTolerance(queryTokens, baseTolerance) {
|
|
|
80
80
|
function extractVocabularyFromRadixTree(radixNode) {
|
|
81
81
|
const vocabulary = /* @__PURE__ */ new Set();
|
|
82
82
|
let nodesVisited = 0;
|
|
83
|
-
let wordsFound = 0;
|
|
84
83
|
function traverse(node, depth = 0) {
|
|
85
84
|
if (!node) {
|
|
86
|
-
console.log(`\u26A0\uFE0F Null node at depth ${depth}`);
|
|
87
85
|
return;
|
|
88
86
|
}
|
|
89
87
|
nodesVisited++;
|
|
90
|
-
if (nodesVisited <= 3) {
|
|
91
|
-
const cInfo = node.c ? {
|
|
92
|
-
isArray: Array.isArray(node.c),
|
|
93
|
-
isMap: node.c instanceof Map,
|
|
94
|
-
type: typeof node.c,
|
|
95
|
-
constructor: node.c.constructor?.name,
|
|
96
|
-
keys: node.c instanceof Map ? Array.from(node.c.keys()).slice(0, 3) : Object.keys(node.c).slice(0, 3),
|
|
97
|
-
valuesCount: node.c instanceof Map ? node.c.size : Array.isArray(node.c) ? node.c.length : Object.keys(node.c).length
|
|
98
|
-
} : "null";
|
|
99
|
-
console.log(`\u{1F50D} Node ${nodesVisited}:`, { w: node.w, e: node.e, has_c: !!node.c, c_info: cInfo });
|
|
100
|
-
}
|
|
101
88
|
if (node.e && node.w && typeof node.w === "string" && node.w.length > 0) {
|
|
102
89
|
vocabulary.add(node.w);
|
|
103
|
-
wordsFound++;
|
|
104
|
-
if (wordsFound <= 5) {
|
|
105
|
-
console.log(`\u2705 Found word ${wordsFound}: "${node.w}"`);
|
|
106
|
-
}
|
|
107
90
|
}
|
|
108
91
|
if (node.c) {
|
|
109
92
|
if (node.c instanceof Map) {
|
|
@@ -225,7 +208,9 @@ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFr
|
|
|
225
208
|
queryTokens,
|
|
226
209
|
config,
|
|
227
210
|
documentFrequency,
|
|
228
|
-
totalDocuments
|
|
211
|
+
totalDocuments,
|
|
212
|
+
wordMatches
|
|
213
|
+
// Pass all word matches for density calculation
|
|
229
214
|
);
|
|
230
215
|
if (phrase && phrase.words.length > 0) {
|
|
231
216
|
phrases.push(phrase);
|
|
@@ -233,7 +218,7 @@ function findPhrasesInDocument(documentTokens, candidatesMap, config, documentFr
|
|
|
233
218
|
}
|
|
234
219
|
return deduplicatePhrases(phrases);
|
|
235
220
|
}
|
|
236
|
-
function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments) {
|
|
221
|
+
function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, documentFrequency, totalDocuments, allWordMatches) {
|
|
237
222
|
const startMatch = wordMatches[startIndex];
|
|
238
223
|
const phraseWords = [startMatch];
|
|
239
224
|
const coveredTokens = /* @__PURE__ */ new Set([startMatch.queryToken]);
|
|
@@ -252,12 +237,13 @@ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, d
|
|
|
252
237
|
}
|
|
253
238
|
}
|
|
254
239
|
if (phraseWords.length > 0) {
|
|
255
|
-
const score = calculatePhraseScore(
|
|
240
|
+
const { score, breakdown } = calculatePhraseScore(
|
|
256
241
|
phraseWords,
|
|
257
242
|
queryTokens,
|
|
258
243
|
config,
|
|
259
244
|
documentFrequency,
|
|
260
|
-
totalDocuments
|
|
245
|
+
totalDocuments,
|
|
246
|
+
allWordMatches
|
|
261
247
|
);
|
|
262
248
|
return {
|
|
263
249
|
words: phraseWords,
|
|
@@ -265,12 +251,13 @@ function buildPhraseFromPosition(wordMatches, startIndex, queryTokens, config, d
|
|
|
265
251
|
endPosition: phraseWords[phraseWords.length - 1].position,
|
|
266
252
|
gap: phraseWords[phraseWords.length - 1].position - phraseWords[0].position,
|
|
267
253
|
inOrder: isInOrder(phraseWords, queryTokens),
|
|
268
|
-
score
|
|
254
|
+
score,
|
|
255
|
+
scoreBreakdown: breakdown
|
|
269
256
|
};
|
|
270
257
|
}
|
|
271
258
|
return null;
|
|
272
259
|
}
|
|
273
|
-
function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments) {
|
|
260
|
+
function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequency, totalDocuments, allWordMatches) {
|
|
274
261
|
let baseScore = 0;
|
|
275
262
|
for (const word of phraseWords) {
|
|
276
263
|
const weight = word.type === "exact" ? config.weights.exact : word.type === "fuzzy" ? config.weights.fuzzy : config.weights.fuzzy * 0.8;
|
|
@@ -281,16 +268,42 @@ function calculatePhraseScore(phraseWords, queryTokens, config, documentFrequenc
|
|
|
281
268
|
const orderScore = inOrder ? 1 : 0.5;
|
|
282
269
|
const span = phraseWords[phraseWords.length - 1].position - phraseWords[0].position + 1;
|
|
283
270
|
const proximityScore = Math.max(0, 1 - span / (queryTokens.length * 5));
|
|
284
|
-
|
|
271
|
+
let densityScore = 0;
|
|
272
|
+
if (queryTokens.length === 1) {
|
|
273
|
+
const totalOccurrences = allWordMatches.length;
|
|
274
|
+
densityScore = totalOccurrences / queryTokens.length;
|
|
275
|
+
} else {
|
|
276
|
+
densityScore = phraseWords.length / queryTokens.length;
|
|
277
|
+
}
|
|
285
278
|
const semanticScore = calculateSemanticScore(
|
|
286
279
|
phraseWords,
|
|
287
280
|
documentFrequency,
|
|
288
281
|
totalDocuments
|
|
289
282
|
);
|
|
290
283
|
const weights = config.weights;
|
|
291
|
-
const
|
|
284
|
+
const weightedBase = baseScore;
|
|
285
|
+
const weightedOrder = orderScore * weights.order;
|
|
286
|
+
const weightedProximity = proximityScore * weights.proximity;
|
|
287
|
+
const weightedDensity = densityScore * weights.density;
|
|
288
|
+
const weightedSemantic = semanticScore * weights.semantic;
|
|
289
|
+
const totalScore = weightedBase + weightedOrder + weightedProximity + weightedDensity + weightedSemantic;
|
|
292
290
|
const maxPossibleScore = 1 + weights.order + weights.proximity + weights.density + weights.semantic;
|
|
293
|
-
|
|
291
|
+
const score = totalScore / maxPossibleScore;
|
|
292
|
+
const base = weightedBase / maxPossibleScore;
|
|
293
|
+
const order = weightedOrder / maxPossibleScore;
|
|
294
|
+
const proximity = weightedProximity / maxPossibleScore;
|
|
295
|
+
const density = weightedDensity / maxPossibleScore;
|
|
296
|
+
const semantic = weightedSemantic / maxPossibleScore;
|
|
297
|
+
return {
|
|
298
|
+
score,
|
|
299
|
+
breakdown: {
|
|
300
|
+
base,
|
|
301
|
+
order,
|
|
302
|
+
proximity,
|
|
303
|
+
density,
|
|
304
|
+
semantic
|
|
305
|
+
}
|
|
306
|
+
};
|
|
294
307
|
}
|
|
295
308
|
function isInOrder(phraseWords, queryTokens) {
|
|
296
309
|
const tokenOrder = new Map(queryTokens.map((token, index) => [token, index]));
|
|
@@ -304,6 +317,9 @@ function isInOrder(phraseWords, queryTokens) {
|
|
|
304
317
|
return true;
|
|
305
318
|
}
|
|
306
319
|
function calculateSemanticScore(phraseWords, documentFrequency, totalDocuments) {
|
|
320
|
+
if (totalDocuments === 0) {
|
|
321
|
+
return 0;
|
|
322
|
+
}
|
|
307
323
|
let tfidfSum = 0;
|
|
308
324
|
for (const word of phraseWords) {
|
|
309
325
|
const df = documentFrequency.get(word.word) || 1;
|
|
@@ -398,14 +414,22 @@ function pluginFuzzyPhrase(userConfig = {}) {
|
|
|
398
414
|
console.error("\u26A0\uFE0F Failed to load synonyms:", error);
|
|
399
415
|
}
|
|
400
416
|
}
|
|
401
|
-
|
|
402
|
-
|
|
417
|
+
const docs = orama.data?.docs?.docs;
|
|
418
|
+
if (docs) {
|
|
403
419
|
state.totalDocuments = Object.keys(docs).length;
|
|
404
420
|
state.documentFrequency = calculateDocumentFrequencies(docs, config.textProperty);
|
|
405
421
|
console.log(`\u{1F4CA} Calculated document frequencies for ${state.totalDocuments} documents`);
|
|
406
422
|
}
|
|
407
423
|
pluginStates.set(orama, state);
|
|
408
424
|
console.log("\u2705 Fuzzy Phrase Plugin initialized");
|
|
425
|
+
setImmediate(() => {
|
|
426
|
+
if (typeof globalThis.fuzzyPhrasePluginReady === "function") {
|
|
427
|
+
console.log("\u{1F4E1} Signaling plugin ready...");
|
|
428
|
+
globalThis.fuzzyPhrasePluginReady();
|
|
429
|
+
} else {
|
|
430
|
+
console.warn("\u26A0\uFE0F fuzzyPhrasePluginReady callback not found");
|
|
431
|
+
}
|
|
432
|
+
});
|
|
409
433
|
}
|
|
410
434
|
};
|
|
411
435
|
return plugin;
|
|
@@ -468,7 +492,31 @@ async function searchWithFuzzyPhrase(orama, params, language) {
|
|
|
468
492
|
);
|
|
469
493
|
console.log(`\u{1F3AF} Found candidates: ${Array.from(filteredCandidates.values()).reduce((sum, c) => sum + c.length, 0)} total`);
|
|
470
494
|
const documentMatches = [];
|
|
471
|
-
|
|
495
|
+
console.log("\u{1F50D} DEBUG orama.data structure:", {
|
|
496
|
+
dataKeys: Object.keys(orama.data || {}),
|
|
497
|
+
hasDocs: !!orama.data?.docs,
|
|
498
|
+
docsType: orama.data?.docs ? typeof orama.data.docs : "undefined"
|
|
499
|
+
});
|
|
500
|
+
let docs = {};
|
|
501
|
+
if (orama.data?.docs?.docs) {
|
|
502
|
+
docs = orama.data.docs.docs;
|
|
503
|
+
console.log("\u2705 Found docs at orama.data.docs.docs");
|
|
504
|
+
} else if (orama.data?.docs && typeof orama.data.docs === "object") {
|
|
505
|
+
const firstKey = Object.keys(orama.data.docs)[0];
|
|
506
|
+
if (firstKey && firstKey !== "sharedInternalDocumentStore" && firstKey !== "count") {
|
|
507
|
+
docs = orama.data.docs;
|
|
508
|
+
console.log("\u2705 Found docs at orama.data.docs (direct)");
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
if (Object.keys(docs).length === 0) {
|
|
512
|
+
console.log("\u274C Could not find documents - available structure:", {
|
|
513
|
+
hasDataDocs: !!orama.data?.docs,
|
|
514
|
+
dataDocsKeys: orama.data?.docs ? Object.keys(orama.data.docs) : "none",
|
|
515
|
+
hasDataDocsDocs: !!orama.data?.docs?.docs,
|
|
516
|
+
dataDocsDocsCount: orama.data?.docs?.docs ? Object.keys(orama.data.docs.docs).length : 0
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
console.log(`\u{1F4C4} Searching through ${Object.keys(docs).length} documents`);
|
|
472
520
|
for (const [docId, doc] of Object.entries(docs)) {
|
|
473
521
|
const text = doc[textProperty];
|
|
474
522
|
if (!text || typeof text !== "string") {
|
|
@@ -496,7 +544,9 @@ async function searchWithFuzzyPhrase(orama, params, language) {
|
|
|
496
544
|
}
|
|
497
545
|
}
|
|
498
546
|
documentMatches.sort((a, b) => b.score - a.score);
|
|
499
|
-
const
|
|
547
|
+
const limit = params.limit ?? documentMatches.length;
|
|
548
|
+
const limitedMatches = documentMatches.slice(0, limit);
|
|
549
|
+
const hits = limitedMatches.map((match) => ({
|
|
500
550
|
id: match.id,
|
|
501
551
|
score: match.score,
|
|
502
552
|
document: match.document,
|
|
@@ -504,7 +554,7 @@ async function searchWithFuzzyPhrase(orama, params, language) {
|
|
|
504
554
|
_phrases: match.phrases
|
|
505
555
|
}));
|
|
506
556
|
const elapsed = performance.now() - startTime;
|
|
507
|
-
console.log(`\u2705 Found ${hits.length} results in ${elapsed.toFixed(2)}ms`);
|
|
557
|
+
console.log(`\u2705 Found ${hits.length} results in ${elapsed.toFixed(2)}ms (limit: ${limit})`);
|
|
508
558
|
return {
|
|
509
559
|
elapsed: {
|
|
510
560
|
formatted: `${elapsed.toFixed(2)}ms`,
|
|
@@ -517,15 +567,25 @@ async function searchWithFuzzyPhrase(orama, params, language) {
|
|
|
517
567
|
}
|
|
518
568
|
async function loadSynonymsFromSupabase(supabaseConfig) {
|
|
519
569
|
try {
|
|
570
|
+
console.log("\u{1F50D} DEBUG: Calling Supabase RPC get_synonym_map...");
|
|
520
571
|
const { createClient } = await import('@supabase/supabase-js');
|
|
521
572
|
const supabase = createClient(supabaseConfig.url, supabaseConfig.serviceKey);
|
|
522
573
|
const { data, error } = await supabase.rpc("get_synonym_map");
|
|
574
|
+
console.log("\u{1F50D} DEBUG: Supabase RPC response:", {
|
|
575
|
+
hasError: !!error,
|
|
576
|
+
errorMessage: error?.message,
|
|
577
|
+
hasData: !!data,
|
|
578
|
+
dataType: typeof data,
|
|
579
|
+
dataKeys: data ? Object.keys(data).length : 0
|
|
580
|
+
});
|
|
523
581
|
if (error) {
|
|
524
582
|
throw new Error(`Supabase error: ${error.message}`);
|
|
525
583
|
}
|
|
526
|
-
|
|
584
|
+
const synonymMap = data || {};
|
|
585
|
+
console.log(`\u{1F4DA} Loaded ${Object.keys(synonymMap).length} synonym entries from Supabase`);
|
|
586
|
+
return synonymMap;
|
|
527
587
|
} catch (error) {
|
|
528
|
-
console.error("Failed to load synonyms from Supabase:", error);
|
|
588
|
+
console.error("\u274C Failed to load synonyms from Supabase:", error);
|
|
529
589
|
throw error;
|
|
530
590
|
}
|
|
531
591
|
}
|
|
@@ -543,8 +603,11 @@ function calculateDocumentFrequencies(docs, textProperty) {
|
|
|
543
603
|
}
|
|
544
604
|
return df;
|
|
545
605
|
}
|
|
606
|
+
function normalizeText(text) {
|
|
607
|
+
return text.toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\b[ldcjmnst][\u2018\u2019\u201A\u201B\u2032\u2035\u0027\u0060\u00B4](?=\w)/gi, " ").replace(/[\u2018\u2019\u201A\u201B\u2032\u2035\u0027\u0060\u00B4]/g, "").replace(/[\u201c\u201d]/g, '"').replace(/[.,;:!?()[\]{}\-—–«»""]/g, " ").replace(/\s+/g, " ").trim();
|
|
608
|
+
}
|
|
546
609
|
function tokenize(text) {
|
|
547
|
-
return text
|
|
610
|
+
return normalizeText(text).split(/\s+/).filter((token) => token.length > 0);
|
|
548
611
|
}
|
|
549
612
|
|
|
550
613
|
export { pluginFuzzyPhrase, searchWithFuzzyPhrase };
|