@wcs-colab/plugin-fuzzy-phrase 3.1.16-custom.newbase.3 → 3.1.16-custom.newbase.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +441 -86
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +116 -3
- package/dist/index.d.ts +116 -3
- package/dist/index.js +437 -87
- package/dist/index.js.map +1 -1
- package/package.json +5 -3
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { AnyOrama, Results, TypedDocument, OramaPlugin } from '@wcs-colab/orama';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Shared types for the fuzzy phrase plugin.
|
|
@@ -30,8 +30,12 @@ interface FuzzyPhraseConfig {
|
|
|
30
30
|
};
|
|
31
31
|
/** Maximum gap between words in a phrase */
|
|
32
32
|
maxGap?: number;
|
|
33
|
-
/** Minimum
|
|
33
|
+
/** Minimum candidate score to include before phrase building */
|
|
34
34
|
minScore?: number;
|
|
35
|
+
/** Enable final score filtering after phrase/document scoring */
|
|
36
|
+
enableFinalScoreMinimum?: boolean;
|
|
37
|
+
/** Minimum final document score threshold (only applies if enableFinalScoreMinimum is true) */
|
|
38
|
+
finalScoreMinimum?: number;
|
|
35
39
|
/**
|
|
36
40
|
* Multiplier for proximity window calculation.
|
|
37
41
|
* proximityWindow = queryTokens.length × proximitySpanMultiplier
|
|
@@ -100,6 +104,108 @@ interface DocumentMatch {
|
|
|
100
104
|
score: number;
|
|
101
105
|
document: any;
|
|
102
106
|
}
|
|
107
|
+
interface PluginState {
|
|
108
|
+
synonymMap: SynonymMap;
|
|
109
|
+
config: Required<FuzzyPhraseConfig>;
|
|
110
|
+
documentFrequency: Map<string, number>;
|
|
111
|
+
totalDocuments: number;
|
|
112
|
+
/** Cached vocabulary extracted from radix tree (avoids O(V) extraction per query) */
|
|
113
|
+
vocabulary: Set<string>;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Optimized Search: QPS Candidate Pruning + Fuzzy Phrase Scoring
|
|
118
|
+
*
|
|
119
|
+
* This module provides an algorithmic pruning path where:
|
|
120
|
+
* 1. QPS (Quantum Proximity Scoring) finds candidate documents quickly (token-level, typo-tolerant)
|
|
121
|
+
* 2. Fuzzy Phrase scoring runs only on those candidates (expensive phrase scoring on smaller set)
|
|
122
|
+
*
|
|
123
|
+
* This dramatically reduces the search space for phrase scoring while maintaining
|
|
124
|
+
* the same result quality as full fuzzy phrase search.
|
|
125
|
+
*/
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* QPS Index data structure (matches plugin-qps internal format)
|
|
129
|
+
*/
|
|
130
|
+
interface QPSIndexData {
|
|
131
|
+
indexes: Record<string, {
|
|
132
|
+
type: string;
|
|
133
|
+
node: any;
|
|
134
|
+
isArray: boolean;
|
|
135
|
+
}>;
|
|
136
|
+
stats: Record<string, {
|
|
137
|
+
tokenQuantums: Record<string, Record<string, number>>;
|
|
138
|
+
tokensLength: Map<string, number>;
|
|
139
|
+
}>;
|
|
140
|
+
searchableProperties: string[];
|
|
141
|
+
searchablePropertiesWithTypes: Record<string, string>;
|
|
142
|
+
vectorIndexes: Record<string, any>;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Configuration for optimized search
|
|
146
|
+
*/
|
|
147
|
+
interface OptimizedSearchConfig extends FuzzyPhraseConfig {
|
|
148
|
+
/** Maximum number of QPS candidates to consider for phrase scoring */
|
|
149
|
+
maxQPSCandidates?: number;
|
|
150
|
+
/** Minimum QPS score to include a candidate (0-1, relative to best score) */
|
|
151
|
+
minQPSScore?: number;
|
|
152
|
+
/** Whether to use exact matching for QPS (faster but no typo tolerance) */
|
|
153
|
+
qpsExact?: boolean;
|
|
154
|
+
/** QPS tolerance (edit distance for fuzzy matching) */
|
|
155
|
+
qpsTolerance?: number;
|
|
156
|
+
/** Boost factors per property for QPS scoring */
|
|
157
|
+
qpsBoostPerProp?: Record<string, number>;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Normalize text using the same rules as Orama's French tokenizer
|
|
161
|
+
*
|
|
162
|
+
* CRITICAL: This must match normalizeFrenchText() in Orama's tokenizer exactly
|
|
163
|
+
*/
|
|
164
|
+
declare function normalizeText(text: string): string;
|
|
165
|
+
/**
|
|
166
|
+
* Tokenize text using normalization matching Orama's French tokenizer
|
|
167
|
+
*/
|
|
168
|
+
declare function tokenize(text: string): string[];
|
|
169
|
+
/**
|
|
170
|
+
* Optimized search combining QPS candidate pruning with fuzzy phrase scoring
|
|
171
|
+
*
|
|
172
|
+
* This is the main entry point for optimized search. It:
|
|
173
|
+
* 1. Uses QPS to quickly find candidate documents (token-level matching)
|
|
174
|
+
* 2. Filters to top candidates by QPS score
|
|
175
|
+
* 3. Runs full fuzzy phrase scoring only on those candidates
|
|
176
|
+
*
|
|
177
|
+
* @param orama - Orama instance
|
|
178
|
+
* @param qpsIndex - QPS index data (from pluginQPS.getComponents().index)
|
|
179
|
+
* @param pluginState - Fuzzy phrase plugin state
|
|
180
|
+
* @param params - Search parameters
|
|
181
|
+
* @param config - Optimized search configuration
|
|
182
|
+
* @param language - Language for tokenization (default: french)
|
|
183
|
+
* @returns Search results with fuzzy phrase scoring
|
|
184
|
+
*/
|
|
185
|
+
declare function searchWithQPSPruning<T extends AnyOrama>(orama: T, qpsIndex: QPSIndexData, pluginState: PluginState, params: {
|
|
186
|
+
term?: string;
|
|
187
|
+
properties?: string[];
|
|
188
|
+
limit?: number;
|
|
189
|
+
tokenCache?: Map<string, string[]>;
|
|
190
|
+
}, config?: OptimizedSearchConfig, language?: string): Promise<Results<TypedDocument<T>>>;
|
|
191
|
+
/**
|
|
192
|
+
* Create an optimized search function bound to a specific Orama + QPS index
|
|
193
|
+
*
|
|
194
|
+
* This is a convenience wrapper that captures the Orama instance and QPS index,
|
|
195
|
+
* returning a simpler search function.
|
|
196
|
+
*
|
|
197
|
+
* @param orama - Orama instance
|
|
198
|
+
* @param qpsIndex - QPS index data
|
|
199
|
+
* @param pluginState - Fuzzy phrase plugin state
|
|
200
|
+
* @param config - Optimized search configuration
|
|
201
|
+
* @returns Bound search function
|
|
202
|
+
*/
|
|
203
|
+
declare function createOptimizedSearch<T extends AnyOrama>(orama: T, qpsIndex: QPSIndexData, pluginState: PluginState, config?: OptimizedSearchConfig): (params: {
|
|
204
|
+
term?: string;
|
|
205
|
+
properties?: string[];
|
|
206
|
+
limit?: number;
|
|
207
|
+
tokenCache?: Map<string, string[]>;
|
|
208
|
+
}, language?: string) => Promise<Results<TypedDocument<T>>>;
|
|
103
209
|
|
|
104
210
|
/**
|
|
105
211
|
* Fuzzy Phrase Plugin for Orama
|
|
@@ -125,6 +231,13 @@ declare function searchWithFuzzyPhrase<T extends AnyOrama>(orama: T, params: {
|
|
|
125
231
|
term?: string;
|
|
126
232
|
properties?: string[];
|
|
127
233
|
limit?: number;
|
|
234
|
+
tokenCache?: Map<string, string[]>;
|
|
235
|
+
candidateIds?: Set<string> | string[];
|
|
128
236
|
}, language?: string): Promise<Results<TypedDocument<T>>>;
|
|
129
237
|
|
|
130
|
-
|
|
238
|
+
/**
|
|
239
|
+
* Re-export plugin state accessor for optimized search integration
|
|
240
|
+
*/
|
|
241
|
+
declare function getPluginState(orama: any): PluginState | undefined;
|
|
242
|
+
|
|
243
|
+
export { Candidate, DocumentMatch, FuzzyPhraseConfig, OptimizedSearchConfig, PhraseMatch, PluginState, SynonymMap, WordMatch, createOptimizedSearch, getPluginState, normalizeText as normalizeTextOptimized, pluginFuzzyPhrase, searchWithFuzzyPhrase, searchWithQPSPruning, tokenize as tokenizeOptimized };
|