@wcs-colab/plugin-fuzzy-phrase 3.1.16-custom.newbase.3 → 3.1.16-custom.newbase.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { OramaPlugin, AnyOrama, Results, TypedDocument } from '@wcs-colab/orama';
1
+ import { AnyOrama, Results, TypedDocument, OramaPlugin } from '@wcs-colab/orama';
2
2
 
3
3
  /**
4
4
  * Shared types for the fuzzy phrase plugin.
@@ -30,8 +30,12 @@ interface FuzzyPhraseConfig {
30
30
  };
31
31
  /** Maximum gap between words in a phrase */
32
32
  maxGap?: number;
33
- /** Minimum phrase score to include in results */
33
+ /** Minimum candidate score to include before phrase building */
34
34
  minScore?: number;
35
+ /** Enable final score filtering after phrase/document scoring */
36
+ enableFinalScoreMinimum?: boolean;
37
+ /** Minimum final document score threshold (only applies if enableFinalScoreMinimum is true) */
38
+ finalScoreMinimum?: number;
35
39
  /**
36
40
  * Multiplier for proximity window calculation.
37
41
  * proximityWindow = queryTokens.length × proximitySpanMultiplier
@@ -100,6 +104,108 @@ interface DocumentMatch {
100
104
  score: number;
101
105
  document: any;
102
106
  }
107
+ interface PluginState {
108
+ synonymMap: SynonymMap;
109
+ config: Required<FuzzyPhraseConfig>;
110
+ documentFrequency: Map<string, number>;
111
+ totalDocuments: number;
112
+ /** Cached vocabulary extracted from radix tree (avoids O(V) extraction per query) */
113
+ vocabulary: Set<string>;
114
+ }
115
+
116
+ /**
117
+ * Optimized Search: QPS Candidate Pruning + Fuzzy Phrase Scoring
118
+ *
119
+ * This module provides an algorithmic pruning path where:
120
+ * 1. QPS (Quantum Proximity Scoring) finds candidate documents quickly (token-level, typo-tolerant)
121
+ * 2. Fuzzy Phrase scoring runs only on those candidates (expensive phrase scoring on smaller set)
122
+ *
123
+ * This dramatically reduces the search space for phrase scoring while maintaining
124
+ * the same result quality as full fuzzy phrase search.
125
+ */
126
+
127
+ /**
128
+ * QPS Index data structure (matches plugin-qps internal format)
129
+ */
130
+ interface QPSIndexData {
131
+ indexes: Record<string, {
132
+ type: string;
133
+ node: any;
134
+ isArray: boolean;
135
+ }>;
136
+ stats: Record<string, {
137
+ tokenQuantums: Record<string, Record<string, number>>;
138
+ tokensLength: Map<string, number>;
139
+ }>;
140
+ searchableProperties: string[];
141
+ searchablePropertiesWithTypes: Record<string, string>;
142
+ vectorIndexes: Record<string, any>;
143
+ }
144
+ /**
145
+ * Configuration for optimized search
146
+ */
147
+ interface OptimizedSearchConfig extends FuzzyPhraseConfig {
148
+ /** Maximum number of QPS candidates to consider for phrase scoring */
149
+ maxQPSCandidates?: number;
150
+ /** Minimum QPS score to include a candidate (0-1, relative to best score) */
151
+ minQPSScore?: number;
152
+ /** Whether to use exact matching for QPS (faster but no typo tolerance) */
153
+ qpsExact?: boolean;
154
+ /** QPS tolerance (edit distance for fuzzy matching) */
155
+ qpsTolerance?: number;
156
+ /** Boost factors per property for QPS scoring */
157
+ qpsBoostPerProp?: Record<string, number>;
158
+ }
159
+ /**
160
+ * Normalize text using the same rules as Orama's French tokenizer
161
+ *
162
+ * CRITICAL: This must match normalizeFrenchText() in Orama's tokenizer exactly
163
+ */
164
+ declare function normalizeText(text: string): string;
165
+ /**
166
+ * Tokenize text using normalization matching Orama's French tokenizer
167
+ */
168
+ declare function tokenize(text: string): string[];
169
+ /**
170
+ * Optimized search combining QPS candidate pruning with fuzzy phrase scoring
171
+ *
172
+ * This is the main entry point for optimized search. It:
173
+ * 1. Uses QPS to quickly find candidate documents (token-level matching)
174
+ * 2. Filters to top candidates by QPS score
175
+ * 3. Runs full fuzzy phrase scoring only on those candidates
176
+ *
177
+ * @param orama - Orama instance
178
+ * @param qpsIndex - QPS index data (from pluginQPS.getComponents().index)
179
+ * @param pluginState - Fuzzy phrase plugin state
180
+ * @param params - Search parameters
181
+ * @param config - Optimized search configuration
182
+ * @param language - Language for tokenization (default: french)
183
+ * @returns Search results with fuzzy phrase scoring
184
+ */
185
+ declare function searchWithQPSPruning<T extends AnyOrama>(orama: T, qpsIndex: QPSIndexData, pluginState: PluginState, params: {
186
+ term?: string;
187
+ properties?: string[];
188
+ limit?: number;
189
+ tokenCache?: Map<string, string[]>;
190
+ }, config?: OptimizedSearchConfig, language?: string): Promise<Results<TypedDocument<T>>>;
191
+ /**
192
+ * Create an optimized search function bound to a specific Orama + QPS index
193
+ *
194
+ * This is a convenience wrapper that captures the Orama instance and QPS index,
195
+ * returning a simpler search function.
196
+ *
197
+ * @param orama - Orama instance
198
+ * @param qpsIndex - QPS index data
199
+ * @param pluginState - Fuzzy phrase plugin state
200
+ * @param config - Optimized search configuration
201
+ * @returns Bound search function
202
+ */
203
+ declare function createOptimizedSearch<T extends AnyOrama>(orama: T, qpsIndex: QPSIndexData, pluginState: PluginState, config?: OptimizedSearchConfig): (params: {
204
+ term?: string;
205
+ properties?: string[];
206
+ limit?: number;
207
+ tokenCache?: Map<string, string[]>;
208
+ }, language?: string) => Promise<Results<TypedDocument<T>>>;
103
209
 
104
210
  /**
105
211
  * Fuzzy Phrase Plugin for Orama
@@ -125,6 +231,13 @@ declare function searchWithFuzzyPhrase<T extends AnyOrama>(orama: T, params: {
125
231
  term?: string;
126
232
  properties?: string[];
127
233
  limit?: number;
234
+ tokenCache?: Map<string, string[]>;
235
+ candidateIds?: Set<string> | string[];
128
236
  }, language?: string): Promise<Results<TypedDocument<T>>>;
129
237
 
130
- export { Candidate, DocumentMatch, FuzzyPhraseConfig, PhraseMatch, SynonymMap, WordMatch, pluginFuzzyPhrase, searchWithFuzzyPhrase };
238
+ /**
239
+ * Re-export plugin state accessor for optimized search integration
240
+ */
241
+ declare function getPluginState(orama: any): PluginState | undefined;
242
+
243
+ export { Candidate, DocumentMatch, FuzzyPhraseConfig, OptimizedSearchConfig, PhraseMatch, PluginState, SynonymMap, WordMatch, createOptimizedSearch, getPluginState, normalizeText as normalizeTextOptimized, pluginFuzzyPhrase, searchWithFuzzyPhrase, searchWithQPSPruning, tokenize as tokenizeOptimized };