@topgunbuild/core 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +878 -1
- package/dist/index.d.ts +878 -1
- package/dist/index.js +1283 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1265 -1
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1897,6 +1897,147 @@ declare const JournalReadResponseSchema: z.ZodObject<{
|
|
|
1897
1897
|
}, z.core.$strip>>;
|
|
1898
1898
|
hasMore: z.ZodBoolean;
|
|
1899
1899
|
}, z.core.$strip>;
|
|
1900
|
+
/**
|
|
1901
|
+
* Search options schema for FTS queries.
|
|
1902
|
+
*/
|
|
1903
|
+
declare const SearchOptionsSchema: z.ZodObject<{
|
|
1904
|
+
limit: z.ZodOptional<z.ZodNumber>;
|
|
1905
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
1906
|
+
boost: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
1907
|
+
}, z.core.$strip>;
|
|
1908
|
+
/**
|
|
1909
|
+
* SEARCH: Client requests one-shot BM25 search.
|
|
1910
|
+
*/
|
|
1911
|
+
declare const SearchPayloadSchema: z.ZodObject<{
|
|
1912
|
+
requestId: z.ZodString;
|
|
1913
|
+
mapName: z.ZodString;
|
|
1914
|
+
query: z.ZodString;
|
|
1915
|
+
options: z.ZodOptional<z.ZodObject<{
|
|
1916
|
+
limit: z.ZodOptional<z.ZodNumber>;
|
|
1917
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
1918
|
+
boost: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
1919
|
+
}, z.core.$strip>>;
|
|
1920
|
+
}, z.core.$strip>;
|
|
1921
|
+
declare const SearchMessageSchema: z.ZodObject<{
|
|
1922
|
+
type: z.ZodLiteral<"SEARCH">;
|
|
1923
|
+
payload: z.ZodObject<{
|
|
1924
|
+
requestId: z.ZodString;
|
|
1925
|
+
mapName: z.ZodString;
|
|
1926
|
+
query: z.ZodString;
|
|
1927
|
+
options: z.ZodOptional<z.ZodObject<{
|
|
1928
|
+
limit: z.ZodOptional<z.ZodNumber>;
|
|
1929
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
1930
|
+
boost: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
1931
|
+
}, z.core.$strip>>;
|
|
1932
|
+
}, z.core.$strip>;
|
|
1933
|
+
}, z.core.$strip>;
|
|
1934
|
+
/**
|
|
1935
|
+
* SEARCH_RESP: Server responds with search results.
|
|
1936
|
+
*/
|
|
1937
|
+
declare const SearchRespPayloadSchema: z.ZodObject<{
|
|
1938
|
+
requestId: z.ZodString;
|
|
1939
|
+
results: z.ZodArray<z.ZodObject<{
|
|
1940
|
+
key: z.ZodString;
|
|
1941
|
+
value: z.ZodUnknown;
|
|
1942
|
+
score: z.ZodNumber;
|
|
1943
|
+
matchedTerms: z.ZodArray<z.ZodString>;
|
|
1944
|
+
}, z.core.$strip>>;
|
|
1945
|
+
totalCount: z.ZodNumber;
|
|
1946
|
+
error: z.ZodOptional<z.ZodString>;
|
|
1947
|
+
}, z.core.$strip>;
|
|
1948
|
+
declare const SearchRespMessageSchema: z.ZodObject<{
|
|
1949
|
+
type: z.ZodLiteral<"SEARCH_RESP">;
|
|
1950
|
+
payload: z.ZodObject<{
|
|
1951
|
+
requestId: z.ZodString;
|
|
1952
|
+
results: z.ZodArray<z.ZodObject<{
|
|
1953
|
+
key: z.ZodString;
|
|
1954
|
+
value: z.ZodUnknown;
|
|
1955
|
+
score: z.ZodNumber;
|
|
1956
|
+
matchedTerms: z.ZodArray<z.ZodString>;
|
|
1957
|
+
}, z.core.$strip>>;
|
|
1958
|
+
totalCount: z.ZodNumber;
|
|
1959
|
+
error: z.ZodOptional<z.ZodString>;
|
|
1960
|
+
}, z.core.$strip>;
|
|
1961
|
+
}, z.core.$strip>;
|
|
1962
|
+
/**
|
|
1963
|
+
* Search delta update type.
|
|
1964
|
+
* - ENTER: Document entered the result set (new or score exceeded minScore)
|
|
1965
|
+
* - UPDATE: Document score changed while remaining in result set
|
|
1966
|
+
* - LEAVE: Document left the result set (removed or score dropped below minScore)
|
|
1967
|
+
*/
|
|
1968
|
+
declare const SearchUpdateTypeSchema: z.ZodEnum<{
|
|
1969
|
+
UPDATE: "UPDATE";
|
|
1970
|
+
ENTER: "ENTER";
|
|
1971
|
+
LEAVE: "LEAVE";
|
|
1972
|
+
}>;
|
|
1973
|
+
/**
|
|
1974
|
+
* SEARCH_SUB: Client subscribes to live search results.
|
|
1975
|
+
*/
|
|
1976
|
+
declare const SearchSubPayloadSchema: z.ZodObject<{
|
|
1977
|
+
subscriptionId: z.ZodString;
|
|
1978
|
+
mapName: z.ZodString;
|
|
1979
|
+
query: z.ZodString;
|
|
1980
|
+
options: z.ZodOptional<z.ZodObject<{
|
|
1981
|
+
limit: z.ZodOptional<z.ZodNumber>;
|
|
1982
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
1983
|
+
boost: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
1984
|
+
}, z.core.$strip>>;
|
|
1985
|
+
}, z.core.$strip>;
|
|
1986
|
+
declare const SearchSubMessageSchema: z.ZodObject<{
|
|
1987
|
+
type: z.ZodLiteral<"SEARCH_SUB">;
|
|
1988
|
+
payload: z.ZodObject<{
|
|
1989
|
+
subscriptionId: z.ZodString;
|
|
1990
|
+
mapName: z.ZodString;
|
|
1991
|
+
query: z.ZodString;
|
|
1992
|
+
options: z.ZodOptional<z.ZodObject<{
|
|
1993
|
+
limit: z.ZodOptional<z.ZodNumber>;
|
|
1994
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
1995
|
+
boost: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
1996
|
+
}, z.core.$strip>>;
|
|
1997
|
+
}, z.core.$strip>;
|
|
1998
|
+
}, z.core.$strip>;
|
|
1999
|
+
/**
|
|
2000
|
+
* SEARCH_UPDATE: Server sends delta update for a subscribed search.
|
|
2001
|
+
*/
|
|
2002
|
+
declare const SearchUpdatePayloadSchema: z.ZodObject<{
|
|
2003
|
+
subscriptionId: z.ZodString;
|
|
2004
|
+
key: z.ZodString;
|
|
2005
|
+
value: z.ZodUnknown;
|
|
2006
|
+
score: z.ZodNumber;
|
|
2007
|
+
matchedTerms: z.ZodArray<z.ZodString>;
|
|
2008
|
+
type: z.ZodEnum<{
|
|
2009
|
+
UPDATE: "UPDATE";
|
|
2010
|
+
ENTER: "ENTER";
|
|
2011
|
+
LEAVE: "LEAVE";
|
|
2012
|
+
}>;
|
|
2013
|
+
}, z.core.$strip>;
|
|
2014
|
+
declare const SearchUpdateMessageSchema: z.ZodObject<{
|
|
2015
|
+
type: z.ZodLiteral<"SEARCH_UPDATE">;
|
|
2016
|
+
payload: z.ZodObject<{
|
|
2017
|
+
subscriptionId: z.ZodString;
|
|
2018
|
+
key: z.ZodString;
|
|
2019
|
+
value: z.ZodUnknown;
|
|
2020
|
+
score: z.ZodNumber;
|
|
2021
|
+
matchedTerms: z.ZodArray<z.ZodString>;
|
|
2022
|
+
type: z.ZodEnum<{
|
|
2023
|
+
UPDATE: "UPDATE";
|
|
2024
|
+
ENTER: "ENTER";
|
|
2025
|
+
LEAVE: "LEAVE";
|
|
2026
|
+
}>;
|
|
2027
|
+
}, z.core.$strip>;
|
|
2028
|
+
}, z.core.$strip>;
|
|
2029
|
+
/**
|
|
2030
|
+
* SEARCH_UNSUB: Client unsubscribes from live search.
|
|
2031
|
+
*/
|
|
2032
|
+
declare const SearchUnsubPayloadSchema: z.ZodObject<{
|
|
2033
|
+
subscriptionId: z.ZodString;
|
|
2034
|
+
}, z.core.$strip>;
|
|
2035
|
+
declare const SearchUnsubMessageSchema: z.ZodObject<{
|
|
2036
|
+
type: z.ZodLiteral<"SEARCH_UNSUB">;
|
|
2037
|
+
payload: z.ZodObject<{
|
|
2038
|
+
subscriptionId: z.ZodString;
|
|
2039
|
+
}, z.core.$strip>;
|
|
2040
|
+
}, z.core.$strip>;
|
|
1900
2041
|
/**
|
|
1901
2042
|
* Conflict resolver definition schema (wire format).
|
|
1902
2043
|
*/
|
|
@@ -2488,6 +2629,62 @@ declare const MessageSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2488
2629
|
priority: z.ZodOptional<z.ZodNumber>;
|
|
2489
2630
|
keyPattern: z.ZodOptional<z.ZodString>;
|
|
2490
2631
|
}, z.core.$strip>>;
|
|
2632
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
2633
|
+
type: z.ZodLiteral<"SEARCH">;
|
|
2634
|
+
payload: z.ZodObject<{
|
|
2635
|
+
requestId: z.ZodString;
|
|
2636
|
+
mapName: z.ZodString;
|
|
2637
|
+
query: z.ZodString;
|
|
2638
|
+
options: z.ZodOptional<z.ZodObject<{
|
|
2639
|
+
limit: z.ZodOptional<z.ZodNumber>;
|
|
2640
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
2641
|
+
boost: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
2642
|
+
}, z.core.$strip>>;
|
|
2643
|
+
}, z.core.$strip>;
|
|
2644
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
2645
|
+
type: z.ZodLiteral<"SEARCH_RESP">;
|
|
2646
|
+
payload: z.ZodObject<{
|
|
2647
|
+
requestId: z.ZodString;
|
|
2648
|
+
results: z.ZodArray<z.ZodObject<{
|
|
2649
|
+
key: z.ZodString;
|
|
2650
|
+
value: z.ZodUnknown;
|
|
2651
|
+
score: z.ZodNumber;
|
|
2652
|
+
matchedTerms: z.ZodArray<z.ZodString>;
|
|
2653
|
+
}, z.core.$strip>>;
|
|
2654
|
+
totalCount: z.ZodNumber;
|
|
2655
|
+
error: z.ZodOptional<z.ZodString>;
|
|
2656
|
+
}, z.core.$strip>;
|
|
2657
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
2658
|
+
type: z.ZodLiteral<"SEARCH_SUB">;
|
|
2659
|
+
payload: z.ZodObject<{
|
|
2660
|
+
subscriptionId: z.ZodString;
|
|
2661
|
+
mapName: z.ZodString;
|
|
2662
|
+
query: z.ZodString;
|
|
2663
|
+
options: z.ZodOptional<z.ZodObject<{
|
|
2664
|
+
limit: z.ZodOptional<z.ZodNumber>;
|
|
2665
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
2666
|
+
boost: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
2667
|
+
}, z.core.$strip>>;
|
|
2668
|
+
}, z.core.$strip>;
|
|
2669
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
2670
|
+
type: z.ZodLiteral<"SEARCH_UPDATE">;
|
|
2671
|
+
payload: z.ZodObject<{
|
|
2672
|
+
subscriptionId: z.ZodString;
|
|
2673
|
+
key: z.ZodString;
|
|
2674
|
+
value: z.ZodUnknown;
|
|
2675
|
+
score: z.ZodNumber;
|
|
2676
|
+
matchedTerms: z.ZodArray<z.ZodString>;
|
|
2677
|
+
type: z.ZodEnum<{
|
|
2678
|
+
UPDATE: "UPDATE";
|
|
2679
|
+
ENTER: "ENTER";
|
|
2680
|
+
LEAVE: "LEAVE";
|
|
2681
|
+
}>;
|
|
2682
|
+
}, z.core.$strip>;
|
|
2683
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
2684
|
+
type: z.ZodLiteral<"SEARCH_UNSUB">;
|
|
2685
|
+
payload: z.ZodObject<{
|
|
2686
|
+
subscriptionId: z.ZodString;
|
|
2687
|
+
}, z.core.$strip>;
|
|
2491
2688
|
}, z.core.$strip>], "type">;
|
|
2492
2689
|
type Query$1 = z.infer<typeof QuerySchema>;
|
|
2493
2690
|
type ClientOp = z.infer<typeof ClientOpSchema>;
|
|
@@ -2517,6 +2714,18 @@ type UnregisterResolverResponse = z.infer<typeof UnregisterResolverResponseSchem
|
|
|
2517
2714
|
type MergeRejectedMessage = z.infer<typeof MergeRejectedMessageSchema>;
|
|
2518
2715
|
type ListResolversRequest = z.infer<typeof ListResolversRequestSchema>;
|
|
2519
2716
|
type ListResolversResponse = z.infer<typeof ListResolversResponseSchema>;
|
|
2717
|
+
type SearchOptions$1 = z.infer<typeof SearchOptionsSchema>;
|
|
2718
|
+
type SearchPayload = z.infer<typeof SearchPayloadSchema>;
|
|
2719
|
+
type SearchMessage = z.infer<typeof SearchMessageSchema>;
|
|
2720
|
+
type SearchRespPayload = z.infer<typeof SearchRespPayloadSchema>;
|
|
2721
|
+
type SearchRespMessage = z.infer<typeof SearchRespMessageSchema>;
|
|
2722
|
+
type SearchUpdateType = z.infer<typeof SearchUpdateTypeSchema>;
|
|
2723
|
+
type SearchSubPayload = z.infer<typeof SearchSubPayloadSchema>;
|
|
2724
|
+
type SearchSubMessage = z.infer<typeof SearchSubMessageSchema>;
|
|
2725
|
+
type SearchUpdatePayload = z.infer<typeof SearchUpdatePayloadSchema>;
|
|
2726
|
+
type SearchUpdateMessage = z.infer<typeof SearchUpdateMessageSchema>;
|
|
2727
|
+
type SearchUnsubPayload = z.infer<typeof SearchUnsubPayloadSchema>;
|
|
2728
|
+
type SearchUnsubMessage = z.infer<typeof SearchUnsubMessageSchema>;
|
|
2520
2729
|
|
|
2521
2730
|
/**
|
|
2522
2731
|
* Write Concern - Configurable Acknowledgment Levels
|
|
@@ -6613,6 +6822,326 @@ declare class IndexedLWWMap<K extends string, V> extends LWWMap<K, V> {
|
|
|
6613
6822
|
private extractQueryType;
|
|
6614
6823
|
}
|
|
6615
6824
|
|
|
6825
|
+
/**
|
|
6826
|
+
* Full-Text Search Types
|
|
6827
|
+
*
|
|
6828
|
+
* Type definitions for the FTS (Full-Text Search) module.
|
|
6829
|
+
* This module provides BM25-based keyword search capabilities.
|
|
6830
|
+
*
|
|
6831
|
+
* @module fts/types
|
|
6832
|
+
*/
|
|
6833
|
+
/**
|
|
6834
|
+
* Options for configuring the FTS Tokenizer.
|
|
6835
|
+
*/
|
|
6836
|
+
interface TokenizerOptions {
|
|
6837
|
+
/**
|
|
6838
|
+
* Convert all text to lowercase before tokenization.
|
|
6839
|
+
* @default true
|
|
6840
|
+
*/
|
|
6841
|
+
lowercase?: boolean;
|
|
6842
|
+
/**
|
|
6843
|
+
* Set of words to exclude from tokenization (e.g., "the", "and", "is").
|
|
6844
|
+
* @default ENGLISH_STOPWORDS (174 words)
|
|
6845
|
+
*/
|
|
6846
|
+
stopwords?: Set<string>;
|
|
6847
|
+
/**
|
|
6848
|
+
* Function to reduce words to their root form.
|
|
6849
|
+
* @default Porter stemmer
|
|
6850
|
+
*/
|
|
6851
|
+
stemmer?: (word: string) => string;
|
|
6852
|
+
/**
|
|
6853
|
+
* Minimum token length to include in results.
|
|
6854
|
+
* @default 2
|
|
6855
|
+
*/
|
|
6856
|
+
minLength?: number;
|
|
6857
|
+
/**
|
|
6858
|
+
* Maximum token length to include in results.
|
|
6859
|
+
* @default 40
|
|
6860
|
+
*/
|
|
6861
|
+
maxLength?: number;
|
|
6862
|
+
}
|
|
6863
|
+
/**
|
|
6864
|
+
* Information about a term's occurrence in a document.
|
|
6865
|
+
*/
|
|
6866
|
+
interface TermInfo {
|
|
6867
|
+
/** Document ID where the term appears */
|
|
6868
|
+
docId: string;
|
|
6869
|
+
/** Number of times the term appears in the document */
|
|
6870
|
+
termFrequency: number;
|
|
6871
|
+
/** Optional: positions of the term for phrase search (future) */
|
|
6872
|
+
fieldPositions?: number[];
|
|
6873
|
+
}
|
|
6874
|
+
/**
|
|
6875
|
+
* Posting list entry for the inverted index.
|
|
6876
|
+
*/
|
|
6877
|
+
interface Posting {
|
|
6878
|
+
/** Document ID */
|
|
6879
|
+
docId: string;
|
|
6880
|
+
/** Term frequency in this document */
|
|
6881
|
+
termFrequency: number;
|
|
6882
|
+
/** Optional: positions for phrase search */
|
|
6883
|
+
positions?: number[];
|
|
6884
|
+
}
|
|
6885
|
+
/**
|
|
6886
|
+
* BM25 algorithm configuration options.
|
|
6887
|
+
*/
|
|
6888
|
+
interface BM25Options {
|
|
6889
|
+
/**
|
|
6890
|
+
* Term frequency saturation parameter.
|
|
6891
|
+
* Higher values give more weight to repeated terms.
|
|
6892
|
+
* @default 1.2
|
|
6893
|
+
*/
|
|
6894
|
+
k1?: number;
|
|
6895
|
+
/**
|
|
6896
|
+
* Document length normalization parameter.
|
|
6897
|
+
* 0 = no length normalization, 1 = full normalization.
|
|
6898
|
+
* @default 0.75
|
|
6899
|
+
*/
|
|
6900
|
+
b?: number;
|
|
6901
|
+
}
|
|
6902
|
+
/**
|
|
6903
|
+
* A document with its BM25 relevance score.
|
|
6904
|
+
*/
|
|
6905
|
+
interface ScoredDocument {
|
|
6906
|
+
/** Document ID */
|
|
6907
|
+
docId: string;
|
|
6908
|
+
/** BM25 relevance score */
|
|
6909
|
+
score: number;
|
|
6910
|
+
/** Terms from the query that matched this document */
|
|
6911
|
+
matchedTerms: string[];
|
|
6912
|
+
}
|
|
6913
|
+
/**
|
|
6914
|
+
* Configuration for a FullTextIndex.
|
|
6915
|
+
*/
|
|
6916
|
+
interface FullTextIndexConfig {
|
|
6917
|
+
/** Fields to index for full-text search (e.g., ['title', 'body']) */
|
|
6918
|
+
fields: string[];
|
|
6919
|
+
/** Tokenizer configuration */
|
|
6920
|
+
tokenizer?: TokenizerOptions;
|
|
6921
|
+
/** BM25 scoring parameters */
|
|
6922
|
+
bm25?: BM25Options;
|
|
6923
|
+
}
|
|
6924
|
+
/**
|
|
6925
|
+
* Options for search queries.
|
|
6926
|
+
*/
|
|
6927
|
+
interface SearchOptions {
|
|
6928
|
+
/** Maximum number of results to return */
|
|
6929
|
+
limit?: number;
|
|
6930
|
+
/** Minimum BM25 score threshold */
|
|
6931
|
+
minScore?: number;
|
|
6932
|
+
/** Restrict search to specific fields */
|
|
6933
|
+
fields?: string[];
|
|
6934
|
+
/** Field boost weights (e.g., { title: 2.0, body: 1.0 }) */
|
|
6935
|
+
boost?: Record<string, number>;
|
|
6936
|
+
}
|
|
6937
|
+
/**
|
|
6938
|
+
* Search result with full details.
|
|
6939
|
+
*/
|
|
6940
|
+
interface SearchResult {
|
|
6941
|
+
/** Document ID */
|
|
6942
|
+
docId: string;
|
|
6943
|
+
/** BM25 relevance score */
|
|
6944
|
+
score: number;
|
|
6945
|
+
/** Source of the match (for hybrid search) */
|
|
6946
|
+
source?: 'exact' | 'fulltext' | 'vector' | 'bfs';
|
|
6947
|
+
/** Original document data (if requested) */
|
|
6948
|
+
data?: unknown;
|
|
6949
|
+
/** Highlighted text snippet (if requested) */
|
|
6950
|
+
highlightedText?: string;
|
|
6951
|
+
/** Terms that matched */
|
|
6952
|
+
matchedTerms?: string[];
|
|
6953
|
+
/** Document embedding (if available) */
|
|
6954
|
+
embedding?: number[];
|
|
6955
|
+
/** Cosine similarity to query (0-1) */
|
|
6956
|
+
embeddingSimilarity?: number;
|
|
6957
|
+
/** Debug information */
|
|
6958
|
+
debug?: {
|
|
6959
|
+
exactScore?: number;
|
|
6960
|
+
fulltextScore?: number;
|
|
6961
|
+
vectorScore?: number;
|
|
6962
|
+
rangeScore?: number;
|
|
6963
|
+
};
|
|
6964
|
+
}
|
|
6965
|
+
/**
|
|
6966
|
+
* Serialized format for index persistence.
|
|
6967
|
+
*/
|
|
6968
|
+
interface SerializedIndex {
|
|
6969
|
+
/** Schema version for backwards compatibility */
|
|
6970
|
+
version: number;
|
|
6971
|
+
/** Index metadata */
|
|
6972
|
+
metadata: {
|
|
6973
|
+
totalDocs: number;
|
|
6974
|
+
avgDocLength: number;
|
|
6975
|
+
createdAt: number;
|
|
6976
|
+
lastModified: number;
|
|
6977
|
+
};
|
|
6978
|
+
/** Serialized term data */
|
|
6979
|
+
terms: Array<{
|
|
6980
|
+
term: string;
|
|
6981
|
+
idf: number;
|
|
6982
|
+
postings: Array<{
|
|
6983
|
+
docId: string;
|
|
6984
|
+
termFrequency: number;
|
|
6985
|
+
positions?: number[];
|
|
6986
|
+
}>;
|
|
6987
|
+
}>;
|
|
6988
|
+
/** Document lengths for BM25 normalization */
|
|
6989
|
+
docLengths: Record<string, number>;
|
|
6990
|
+
}
|
|
6991
|
+
|
|
6992
|
+
/**
|
|
6993
|
+
* Full-Text Index
|
|
6994
|
+
*
|
|
6995
|
+
* High-level integration class that combines Tokenizer, InvertedIndex,
|
|
6996
|
+
* and BM25Scorer for complete full-text search functionality.
|
|
6997
|
+
* Designed to integrate with TopGun's CRDT maps.
|
|
6998
|
+
*
|
|
6999
|
+
* @module fts/FullTextIndex
|
|
7000
|
+
*/
|
|
7001
|
+
|
|
7002
|
+
/**
|
|
7003
|
+
* Full-Text Index for TopGun
|
|
7004
|
+
*
|
|
7005
|
+
* Provides BM25-based full-text search across document fields.
|
|
7006
|
+
* Supports incremental updates (add/update/remove) for real-time sync.
|
|
7007
|
+
*
|
|
7008
|
+
* @example
|
|
7009
|
+
* ```typescript
|
|
7010
|
+
* const index = new FullTextIndex({
|
|
7011
|
+
* fields: ['title', 'body'],
|
|
7012
|
+
* tokenizer: { minLength: 2 },
|
|
7013
|
+
* bm25: { k1: 1.2, b: 0.75 }
|
|
7014
|
+
* });
|
|
7015
|
+
*
|
|
7016
|
+
* index.onSet('doc1', { title: 'Hello World', body: 'Test content' });
|
|
7017
|
+
* const results = index.search('hello');
|
|
7018
|
+
* // [{ docId: 'doc1', score: 0.5, matchedTerms: ['hello'] }]
|
|
7019
|
+
* ```
|
|
7020
|
+
*/
|
|
7021
|
+
declare class FullTextIndex {
|
|
7022
|
+
/** Fields to index from documents */
|
|
7023
|
+
private readonly fields;
|
|
7024
|
+
/** Tokenizer for text processing */
|
|
7025
|
+
private readonly tokenizer;
|
|
7026
|
+
/** BM25 scorer for relevance ranking */
|
|
7027
|
+
private readonly scorer;
|
|
7028
|
+
/** Per-field inverted indexes for field boosting */
|
|
7029
|
+
private readonly fieldIndexes;
|
|
7030
|
+
/** Combined index for all fields */
|
|
7031
|
+
private combinedIndex;
|
|
7032
|
+
/** Track indexed documents */
|
|
7033
|
+
private readonly indexedDocs;
|
|
7034
|
+
/** Serializer for persistence */
|
|
7035
|
+
private readonly serializer;
|
|
7036
|
+
/**
|
|
7037
|
+
* Cache of document tokens for fast single-document scoring.
|
|
7038
|
+
* Maps docId → tokenized terms from all indexed fields.
|
|
7039
|
+
*/
|
|
7040
|
+
private readonly documentTokensCache;
|
|
7041
|
+
/**
|
|
7042
|
+
* Create a new FullTextIndex.
|
|
7043
|
+
*
|
|
7044
|
+
* @param config - Index configuration
|
|
7045
|
+
*/
|
|
7046
|
+
constructor(config: FullTextIndexConfig);
|
|
7047
|
+
/**
|
|
7048
|
+
* Index a document (add or update).
|
|
7049
|
+
* Called when a document is set in the CRDT map.
|
|
7050
|
+
*
|
|
7051
|
+
* @param docId - Document identifier
|
|
7052
|
+
* @param document - Document data containing fields to index
|
|
7053
|
+
*/
|
|
7054
|
+
onSet(docId: string, document: Record<string, unknown> | null | undefined): void;
|
|
7055
|
+
/**
|
|
7056
|
+
* Remove a document from the index.
|
|
7057
|
+
* Called when a document is deleted from the CRDT map.
|
|
7058
|
+
*
|
|
7059
|
+
* @param docId - Document identifier to remove
|
|
7060
|
+
*/
|
|
7061
|
+
onRemove(docId: string): void;
|
|
7062
|
+
/**
|
|
7063
|
+
* Search the index with a query.
|
|
7064
|
+
*
|
|
7065
|
+
* @param query - Search query text
|
|
7066
|
+
* @param options - Search options (limit, minScore, boost)
|
|
7067
|
+
* @returns Array of search results, sorted by relevance
|
|
7068
|
+
*/
|
|
7069
|
+
search(query: string, options?: SearchOptions): SearchResult[];
|
|
7070
|
+
/**
|
|
7071
|
+
* Serialize the index state.
|
|
7072
|
+
*
|
|
7073
|
+
* @returns Serialized index data
|
|
7074
|
+
*/
|
|
7075
|
+
serialize(): SerializedIndex;
|
|
7076
|
+
/**
|
|
7077
|
+
* Load index from serialized state.
|
|
7078
|
+
*
|
|
7079
|
+
* @param data - Serialized index data
|
|
7080
|
+
*/
|
|
7081
|
+
load(data: SerializedIndex): void;
|
|
7082
|
+
/**
|
|
7083
|
+
* Build the index from an array of entries.
|
|
7084
|
+
* Useful for initial bulk loading.
|
|
7085
|
+
*
|
|
7086
|
+
* @param entries - Array of [docId, document] tuples
|
|
7087
|
+
*/
|
|
7088
|
+
buildFromEntries(entries: Array<[string, Record<string, unknown> | null]>): void;
|
|
7089
|
+
/**
|
|
7090
|
+
* Clear all data from the index.
|
|
7091
|
+
*/
|
|
7092
|
+
clear(): void;
|
|
7093
|
+
/**
|
|
7094
|
+
* Get the number of indexed documents.
|
|
7095
|
+
*
|
|
7096
|
+
* @returns Number of documents in the index
|
|
7097
|
+
*/
|
|
7098
|
+
getSize(): number;
|
|
7099
|
+
/**
|
|
7100
|
+
* Tokenize a query string using the index's tokenizer.
|
|
7101
|
+
* Public method for external use (e.g., SearchCoordinator).
|
|
7102
|
+
*
|
|
7103
|
+
* @param query - Query text to tokenize
|
|
7104
|
+
* @returns Array of tokenized terms
|
|
7105
|
+
*/
|
|
7106
|
+
tokenizeQuery(query: string): string[];
|
|
7107
|
+
/**
|
|
7108
|
+
* Score a single document against query terms.
|
|
7109
|
+
* O(Q × D) complexity where Q = query terms, D = document tokens.
|
|
7110
|
+
*
|
|
7111
|
+
* This method is optimized for checking if a single document
|
|
7112
|
+
* matches a query, avoiding full index scan.
|
|
7113
|
+
*
|
|
7114
|
+
* @param docId - Document ID to score
|
|
7115
|
+
* @param queryTerms - Pre-tokenized query terms
|
|
7116
|
+
* @param document - Optional document data (used if not in cache)
|
|
7117
|
+
* @returns SearchResult with score and matched terms, or null if no match
|
|
7118
|
+
*/
|
|
7119
|
+
scoreSingleDocument(docId: string, queryTerms: string[], document?: Record<string, unknown>): SearchResult | null;
|
|
7120
|
+
/**
|
|
7121
|
+
* Tokenize all indexed fields of a document.
|
|
7122
|
+
* Internal helper for scoreSingleDocument when document not in cache.
|
|
7123
|
+
*
|
|
7124
|
+
* @param document - Document data
|
|
7125
|
+
* @returns Array of all tokens from indexed fields
|
|
7126
|
+
*/
|
|
7127
|
+
private tokenizeDocument;
|
|
7128
|
+
/**
|
|
7129
|
+
* Get the index name (for debugging/display).
|
|
7130
|
+
*
|
|
7131
|
+
* @returns Descriptive name including indexed fields
|
|
7132
|
+
*/
|
|
7133
|
+
get name(): string;
|
|
7134
|
+
/**
|
|
7135
|
+
* Remove document from all indexes (internal).
|
|
7136
|
+
*/
|
|
7137
|
+
private removeFromIndexes;
|
|
7138
|
+
/**
|
|
7139
|
+
* Search with field boosting.
|
|
7140
|
+
* Scores are computed per-field and combined with boost weights.
|
|
7141
|
+
*/
|
|
7142
|
+
private searchWithBoost;
|
|
7143
|
+
}
|
|
7144
|
+
|
|
6616
7145
|
/**
|
|
6617
7146
|
* IndexedORMap Implementation
|
|
6618
7147
|
*
|
|
@@ -6640,6 +7169,16 @@ interface ORMapQueryResult<K, V> {
|
|
|
6640
7169
|
tag: string;
|
|
6641
7170
|
value: V;
|
|
6642
7171
|
}
|
|
7172
|
+
/**
|
|
7173
|
+
* Result of a full-text search on IndexedORMap.
|
|
7174
|
+
* Includes BM25 relevance score for ranking.
|
|
7175
|
+
*/
|
|
7176
|
+
interface ORMapSearchResult<K, V> extends ORMapQueryResult<K, V> {
|
|
7177
|
+
/** BM25 relevance score */
|
|
7178
|
+
score: number;
|
|
7179
|
+
/** Terms from the query that matched */
|
|
7180
|
+
matchedTerms: string[];
|
|
7181
|
+
}
|
|
6643
7182
|
/**
|
|
6644
7183
|
* ORMap with index support.
|
|
6645
7184
|
*
|
|
@@ -6657,6 +7196,7 @@ declare class IndexedORMap<K extends string, V> extends ORMap<K, V> {
|
|
|
6657
7196
|
private readonly autoIndexManager;
|
|
6658
7197
|
private readonly defaultIndexingStrategy;
|
|
6659
7198
|
private readonly options;
|
|
7199
|
+
private fullTextIndex;
|
|
6660
7200
|
constructor(hlc: HLC, options?: IndexedMapOptions);
|
|
6661
7201
|
/**
|
|
6662
7202
|
* Add a hash index on an attribute.
|
|
@@ -6689,6 +7229,59 @@ declare class IndexedORMap<K extends string, V> extends ORMap<K, V> {
|
|
|
6689
7229
|
* @param index - Index to add
|
|
6690
7230
|
*/
|
|
6691
7231
|
addIndex<A>(index: Index<string, V, A>): void;
|
|
7232
|
+
/**
|
|
7233
|
+
* Enable BM25-based full-text search on specified fields.
|
|
7234
|
+
* This creates a FullTextIndex for relevance-ranked search.
|
|
7235
|
+
*
|
|
7236
|
+
* Note: This is different from addInvertedIndex which provides
|
|
7237
|
+
* boolean matching (contains/containsAll/containsAny). This method
|
|
7238
|
+
* provides BM25 relevance scoring for true full-text search.
|
|
7239
|
+
*
|
|
7240
|
+
* @param config - Full-text index configuration
|
|
7241
|
+
* @returns The created FullTextIndex
|
|
7242
|
+
*
|
|
7243
|
+
* @example
|
|
7244
|
+
* ```typescript
|
|
7245
|
+
* const map = new IndexedORMap(hlc);
|
|
7246
|
+
* map.enableFullTextSearch({
|
|
7247
|
+
* fields: ['title', 'body'],
|
|
7248
|
+
* tokenizer: { minLength: 2 },
|
|
7249
|
+
* bm25: { k1: 1.2, b: 0.75 }
|
|
7250
|
+
* });
|
|
7251
|
+
*
|
|
7252
|
+
* map.add('doc1', { title: 'Hello World', body: 'Test content' });
|
|
7253
|
+
* const results = map.search('hello');
|
|
7254
|
+
* // [{ key: 'doc1', tag: '...', value: {...}, score: 0.5, matchedTerms: ['hello'] }]
|
|
7255
|
+
* ```
|
|
7256
|
+
*/
|
|
7257
|
+
enableFullTextSearch(config: FullTextIndexConfig): FullTextIndex;
|
|
7258
|
+
/**
|
|
7259
|
+
* Check if full-text search is enabled.
|
|
7260
|
+
*
|
|
7261
|
+
* @returns true if full-text search is enabled
|
|
7262
|
+
*/
|
|
7263
|
+
isFullTextSearchEnabled(): boolean;
|
|
7264
|
+
/**
|
|
7265
|
+
* Get the full-text index (if enabled).
|
|
7266
|
+
*
|
|
7267
|
+
* @returns The FullTextIndex or null
|
|
7268
|
+
*/
|
|
7269
|
+
getFullTextIndex(): FullTextIndex | null;
|
|
7270
|
+
/**
|
|
7271
|
+
* Perform a BM25-ranked full-text search.
|
|
7272
|
+
* Results are sorted by relevance score (highest first).
|
|
7273
|
+
*
|
|
7274
|
+
* @param query - Search query text
|
|
7275
|
+
* @param options - Search options (limit, minScore, boost)
|
|
7276
|
+
* @returns Array of search results with scores, sorted by relevance
|
|
7277
|
+
*
|
|
7278
|
+
* @throws Error if full-text search is not enabled
|
|
7279
|
+
*/
|
|
7280
|
+
search(query: string, options?: SearchOptions): ORMapSearchResult<K, V>[];
|
|
7281
|
+
/**
|
|
7282
|
+
* Disable full-text search and release the index.
|
|
7283
|
+
*/
|
|
7284
|
+
disableFullTextSearch(): void;
|
|
6692
7285
|
/**
|
|
6693
7286
|
* Remove an index.
|
|
6694
7287
|
*
|
|
@@ -6879,4 +7472,288 @@ declare class IndexedORMap<K extends string, V> extends ORMap<K, V> {
|
|
|
6879
7472
|
private extractQueryType;
|
|
6880
7473
|
}
|
|
6881
7474
|
|
|
6882
|
-
|
|
7475
|
+
/**
|
|
7476
|
+
* English stopwords list (174 common words).
|
|
7477
|
+
* These words are filtered out during tokenization as they
|
|
7478
|
+
* don't contribute to search relevance.
|
|
7479
|
+
*/
|
|
7480
|
+
declare const ENGLISH_STOPWORDS: Set<string>;
|
|
7481
|
+
|
|
7482
|
+
/**
|
|
7483
|
+
* Porter Stemming Algorithm
|
|
7484
|
+
*
|
|
7485
|
+
* Reduces English words to their stem (root form).
|
|
7486
|
+
* Based on the algorithm by Martin Porter (1980).
|
|
7487
|
+
*
|
|
7488
|
+
* @see https://tartarus.org/martin/PorterStemmer/
|
|
7489
|
+
*
|
|
7490
|
+
* @param word - Word to stem (should be lowercase)
|
|
7491
|
+
* @returns Stemmed word
|
|
7492
|
+
*/
|
|
7493
|
+
declare function porterStem(word: string): string;
|
|
7494
|
+
|
|
7495
|
+
/**
|
|
7496
|
+
* FTS Tokenizer with Porter Stemming and Stopwords
|
|
7497
|
+
*
|
|
7498
|
+
* Provides text tokenization for BM25 full-text search.
|
|
7499
|
+
* Features:
|
|
7500
|
+
* - Unicode-aware word boundary detection
|
|
7501
|
+
* - English stopwords filtering (174 words)
|
|
7502
|
+
* - Porter stemming algorithm for word normalization
|
|
7503
|
+
* - Configurable min/max token length
|
|
7504
|
+
*
|
|
7505
|
+
* @module fts/Tokenizer
|
|
7506
|
+
*/
|
|
7507
|
+
|
|
7508
|
+
/**
|
|
7509
|
+
* FTS Tokenizer
|
|
7510
|
+
*
|
|
7511
|
+
* Splits text into searchable tokens with normalization.
|
|
7512
|
+
*
|
|
7513
|
+
* @example
|
|
7514
|
+
* ```typescript
|
|
7515
|
+
* const tokenizer = new BM25Tokenizer();
|
|
7516
|
+
* const tokens = tokenizer.tokenize('The quick brown foxes');
|
|
7517
|
+
* // ['quick', 'brown', 'fox']
|
|
7518
|
+
* ```
|
|
7519
|
+
*/
|
|
7520
|
+
declare class BM25Tokenizer implements Tokenizer {
|
|
7521
|
+
private readonly options;
|
|
7522
|
+
/**
|
|
7523
|
+
* Create a new BM25Tokenizer.
|
|
7524
|
+
*
|
|
7525
|
+
* @param options - Configuration options
|
|
7526
|
+
*/
|
|
7527
|
+
constructor(options?: TokenizerOptions);
|
|
7528
|
+
/**
|
|
7529
|
+
* Tokenize text into an array of normalized tokens.
|
|
7530
|
+
*
|
|
7531
|
+
* @param text - Text to tokenize
|
|
7532
|
+
* @returns Array of tokens
|
|
7533
|
+
*/
|
|
7534
|
+
tokenize(text: string): string[];
|
|
7535
|
+
}
|
|
7536
|
+
|
|
7537
|
+
/**
|
|
7538
|
+
* FTS Inverted Index
|
|
7539
|
+
*
|
|
7540
|
+
* Data structure for full-text search that maps terms to documents.
|
|
7541
|
+
* Supports efficient term lookup, document frequency calculation,
|
|
7542
|
+
* and IDF (Inverse Document Frequency) for BM25 scoring.
|
|
7543
|
+
*
|
|
7544
|
+
* @module fts/BM25InvertedIndex
|
|
7545
|
+
*/
|
|
7546
|
+
|
|
7547
|
+
/**
|
|
7548
|
+
* Inverted Index for Full-Text Search (BM25)
|
|
7549
|
+
*
|
|
7550
|
+
* Maps terms to the documents containing them, along with term frequency
|
|
7551
|
+
* information needed for BM25 scoring.
|
|
7552
|
+
*
|
|
7553
|
+
* @example
|
|
7554
|
+
* ```typescript
|
|
7555
|
+
* const index = new BM25InvertedIndex();
|
|
7556
|
+
* index.addDocument('doc1', ['hello', 'world']);
|
|
7557
|
+
* index.addDocument('doc2', ['hello', 'there']);
|
|
7558
|
+
*
|
|
7559
|
+
* const docs = index.getDocumentsForTerm('hello');
|
|
7560
|
+
* // [{ docId: 'doc1', termFrequency: 1 }, { docId: 'doc2', termFrequency: 1 }]
|
|
7561
|
+
* ```
|
|
7562
|
+
*/
|
|
7563
|
+
declare class BM25InvertedIndex {
|
|
7564
|
+
/** term → list of documents containing term */
|
|
7565
|
+
private index;
|
|
7566
|
+
/** document → total term count (for length normalization) */
|
|
7567
|
+
private docLengths;
|
|
7568
|
+
/** document → set of terms (for efficient removal) */
|
|
7569
|
+
private docTerms;
|
|
7570
|
+
/** Inverse Document Frequency cache */
|
|
7571
|
+
private idfCache;
|
|
7572
|
+
/** Total number of documents */
|
|
7573
|
+
private totalDocs;
|
|
7574
|
+
/** Average document length */
|
|
7575
|
+
private avgDocLength;
|
|
7576
|
+
constructor();
|
|
7577
|
+
/**
|
|
7578
|
+
* Add a document to the index.
|
|
7579
|
+
*
|
|
7580
|
+
* @param docId - Unique document identifier
|
|
7581
|
+
* @param tokens - Array of tokens (already tokenized/stemmed)
|
|
7582
|
+
*/
|
|
7583
|
+
addDocument(docId: string, tokens: string[]): void;
|
|
7584
|
+
/**
|
|
7585
|
+
* Remove a document from the index.
|
|
7586
|
+
*
|
|
7587
|
+
* @param docId - Document identifier to remove
|
|
7588
|
+
*/
|
|
7589
|
+
removeDocument(docId: string): void;
|
|
7590
|
+
/**
|
|
7591
|
+
* Get all documents containing a term.
|
|
7592
|
+
*
|
|
7593
|
+
* @param term - Term to look up
|
|
7594
|
+
* @returns Array of TermInfo objects
|
|
7595
|
+
*/
|
|
7596
|
+
getDocumentsForTerm(term: string): TermInfo[];
|
|
7597
|
+
/**
|
|
7598
|
+
* Calculate IDF (Inverse Document Frequency) for a term.
|
|
7599
|
+
*
|
|
7600
|
+
* Uses BM25 IDF formula:
|
|
7601
|
+
* IDF = log((N - df + 0.5) / (df + 0.5) + 1)
|
|
7602
|
+
*
|
|
7603
|
+
* Where:
|
|
7604
|
+
* - N = total documents
|
|
7605
|
+
* - df = document frequency (docs containing term)
|
|
7606
|
+
*
|
|
7607
|
+
* @param term - Term to calculate IDF for
|
|
7608
|
+
* @returns IDF value (0 if term doesn't exist)
|
|
7609
|
+
*/
|
|
7610
|
+
getIDF(term: string): number;
|
|
7611
|
+
/**
|
|
7612
|
+
* Get the length of a document (number of tokens).
|
|
7613
|
+
*
|
|
7614
|
+
* @param docId - Document identifier
|
|
7615
|
+
* @returns Document length (0 if not found)
|
|
7616
|
+
*/
|
|
7617
|
+
getDocLength(docId: string): number;
|
|
7618
|
+
/**
|
|
7619
|
+
* Get the average document length.
|
|
7620
|
+
*
|
|
7621
|
+
* @returns Average length across all documents
|
|
7622
|
+
*/
|
|
7623
|
+
getAvgDocLength(): number;
|
|
7624
|
+
/**
|
|
7625
|
+
* Get the total number of documents in the index.
|
|
7626
|
+
*
|
|
7627
|
+
* @returns Total document count
|
|
7628
|
+
*/
|
|
7629
|
+
getTotalDocs(): number;
|
|
7630
|
+
/**
|
|
7631
|
+
* Get iterator for document lengths (useful for serialization).
|
|
7632
|
+
*
|
|
7633
|
+
* @returns Iterator of [docId, length] pairs
|
|
7634
|
+
*/
|
|
7635
|
+
getDocLengths(): IterableIterator<[string, number]>;
|
|
7636
|
+
/**
|
|
7637
|
+
* Get the number of documents in the index (alias for getTotalDocs).
|
|
7638
|
+
*
|
|
7639
|
+
* @returns Number of indexed documents
|
|
7640
|
+
*/
|
|
7641
|
+
getSize(): number;
|
|
7642
|
+
/**
|
|
7643
|
+
* Clear all data from the index.
|
|
7644
|
+
*/
|
|
7645
|
+
clear(): void;
|
|
7646
|
+
/**
|
|
7647
|
+
* Check if a document exists in the index.
|
|
7648
|
+
*
|
|
7649
|
+
* @param docId - Document identifier
|
|
7650
|
+
* @returns True if document exists
|
|
7651
|
+
*/
|
|
7652
|
+
hasDocument(docId: string): boolean;
|
|
7653
|
+
/**
|
|
7654
|
+
* Get all unique terms in the index.
|
|
7655
|
+
*
|
|
7656
|
+
* @returns Iterator of all terms
|
|
7657
|
+
*/
|
|
7658
|
+
getTerms(): IterableIterator<string>;
|
|
7659
|
+
/**
|
|
7660
|
+
* Get the number of unique terms in the index.
|
|
7661
|
+
*
|
|
7662
|
+
* @returns Number of unique terms
|
|
7663
|
+
*/
|
|
7664
|
+
getTermCount(): number;
|
|
7665
|
+
/**
|
|
7666
|
+
* Update the average document length after add/remove.
|
|
7667
|
+
*/
|
|
7668
|
+
private updateAvgDocLength;
|
|
7669
|
+
}
|
|
7670
|
+
|
|
7671
|
+
/**
|
|
7672
|
+
* BM25 Scorer
|
|
7673
|
+
*
|
|
7674
|
+
* Implements the Okapi BM25 ranking algorithm for full-text search.
|
|
7675
|
+
* BM25 is a probabilistic relevance ranking function used to estimate
|
|
7676
|
+
* the relevance of documents to a given search query.
|
|
7677
|
+
*
|
|
7678
|
+
* @see https://en.wikipedia.org/wiki/Okapi_BM25
|
|
7679
|
+
* @module fts/BM25Scorer
|
|
7680
|
+
*/
|
|
7681
|
+
|
|
7682
|
+
/**
|
|
7683
|
+
* BM25 Scorer for relevance ranking
|
|
7684
|
+
*
|
|
7685
|
+
* The BM25 formula:
|
|
7686
|
+
* score(D,Q) = Σ IDF(qi) × (f(qi,D) × (k1 + 1)) / (f(qi,D) + k1 × (1 - b + b × |D| / avgdl))
|
|
7687
|
+
*
|
|
7688
|
+
* Where:
|
|
7689
|
+
* - D = document
|
|
7690
|
+
* - Q = query
|
|
7691
|
+
* - qi = query term i
|
|
7692
|
+
* - f(qi,D) = term frequency of qi in D
|
|
7693
|
+
* - |D| = length of D (number of terms)
|
|
7694
|
+
* - avgdl = average document length
|
|
7695
|
+
* - k1 = term frequency saturation parameter (default: 1.2)
|
|
7696
|
+
* - b = document length normalization parameter (default: 0.75)
|
|
7697
|
+
*
|
|
7698
|
+
* @example
|
|
7699
|
+
* ```typescript
|
|
7700
|
+
* const index = new BM25InvertedIndex();
|
|
7701
|
+
* index.addDocument('doc1', ['hello', 'world']);
|
|
7702
|
+
* index.addDocument('doc2', ['hello', 'there']);
|
|
7703
|
+
*
|
|
7704
|
+
* const scorer = new BM25Scorer();
|
|
7705
|
+
* const results = scorer.score(['hello'], index);
|
|
7706
|
+
* // [{ docId: 'doc1', score: 0.28, matchedTerms: ['hello'] }, ...]
|
|
7707
|
+
* ```
|
|
7708
|
+
*/
|
|
7709
|
+
declare class BM25Scorer {
|
|
7710
|
+
/**
|
|
7711
|
+
* Term frequency saturation parameter.
|
|
7712
|
+
* Higher values give more weight to repeated terms.
|
|
7713
|
+
* Typical range: 1.2 - 2.0
|
|
7714
|
+
*/
|
|
7715
|
+
private readonly k1;
|
|
7716
|
+
/**
|
|
7717
|
+
* Document length normalization parameter.
|
|
7718
|
+
* 0 = no length normalization
|
|
7719
|
+
* 1 = full length normalization
|
|
7720
|
+
* Typical value: 0.75
|
|
7721
|
+
*/
|
|
7722
|
+
private readonly b;
|
|
7723
|
+
/**
|
|
7724
|
+
* Create a new BM25 scorer.
|
|
7725
|
+
*
|
|
7726
|
+
* @param options - BM25 configuration options
|
|
7727
|
+
*/
|
|
7728
|
+
constructor(options?: BM25Options);
|
|
7729
|
+
/**
|
|
7730
|
+
* Score documents against a query.
|
|
7731
|
+
*
|
|
7732
|
+
* @param queryTerms - Array of query terms (already tokenized/stemmed)
|
|
7733
|
+
* @param index - The inverted index to search
|
|
7734
|
+
* @returns Array of scored documents, sorted by relevance (descending)
|
|
7735
|
+
*/
|
|
7736
|
+
score(queryTerms: string[], index: BM25InvertedIndex): ScoredDocument[];
|
|
7737
|
+
/**
|
|
7738
|
+
* Score a single document against query terms.
|
|
7739
|
+
* Uses pre-computed IDF from index but calculates TF locally.
|
|
7740
|
+
*
|
|
7741
|
+
* Complexity: O(Q × D) where Q = query terms, D = document tokens
|
|
7742
|
+
*
|
|
7743
|
+
* @param queryTerms - Tokenized query terms
|
|
7744
|
+
* @param docTokens - Tokenized document terms
|
|
7745
|
+
* @param index - Inverted index for IDF and avgDocLength
|
|
7746
|
+
* @returns BM25 score (0 if no matching terms)
|
|
7747
|
+
*/
|
|
7748
|
+
scoreSingleDocument(queryTerms: string[], docTokens: string[], index: BM25InvertedIndex): number;
|
|
7749
|
+
/**
|
|
7750
|
+
* Get the k1 parameter value.
|
|
7751
|
+
*/
|
|
7752
|
+
getK1(): number;
|
|
7753
|
+
/**
|
|
7754
|
+
* Get the b parameter value.
|
|
7755
|
+
*/
|
|
7756
|
+
getB(): number;
|
|
7757
|
+
}
|
|
7758
|
+
|
|
7759
|
+
export { type Attribute, AuthMessageSchema, type BM25Options, BM25Scorer, type BatchMessage, BatchMessageSchema, BuiltInProcessors, BuiltInResolvers, type CircuitBreakerConfig, type ClientOp, ClientOpMessageSchema, ClientOpSchema, type ClusterClientConfig, type ClusterEvents, type ReadOptions as ClusterReadOptions, type WriteOptions as ClusterWriteOptions, type CompareFn, type ConflictResolver, type ConflictResolverDef, ConflictResolverDefSchema, type ConflictResolverFn, ConflictResolverSchema, type ConnectionPoolConfig, type ConnectionState, ConsistencyLevel, CounterRequestSchema, CounterResponseSchema, CounterSyncSchema, CounterUpdateSchema, DEFAULT_BACKUP_COUNT, DEFAULT_CIRCUIT_BREAKER_CONFIG, DEFAULT_CONNECTION_POOL_CONFIG, DEFAULT_EVENT_JOURNAL_CONFIG, DEFAULT_MIGRATION_CONFIG, DEFAULT_PARTITION_ROUTER_CONFIG, DEFAULT_PROCESSOR_RATE_LIMITS, DEFAULT_REPLICATION_CONFIG, DEFAULT_RESOLVER_RATE_LIMITS, DEFAULT_STOP_WORDS, DEFAULT_WRITE_CONCERN_TIMEOUT, ENGLISH_STOPWORDS, type EntryProcessBatchRequest, EntryProcessBatchRequestSchema, type EntryProcessBatchResponse, EntryProcessBatchResponseSchema, type EntryProcessKeyResult, EntryProcessKeyResultSchema, type EntryProcessRequest, EntryProcessRequestSchema, type EntryProcessResponse, EntryProcessResponseSchema, type EntryProcessorDef, EntryProcessorDefSchema, type EntryProcessorFn, type EntryProcessorResult, EntryProcessorSchema, type EventJournal, type EventJournalConfig, EventJournalImpl, FORBIDDEN_PATTERNS, BM25InvertedIndex as FTSInvertedIndex, type SearchOptions as FTSSearchOptions, type SearchResult as FTSSearchResult, BM25Tokenizer as FTSTokenizer, type TokenizerOptions as FTSTokenizerOptions, FallbackIndex, type FilterStep, FilteringResultSet, type FullScanStep, FullTextIndex, type FullTextIndexConfig, HLC, HashIndex, type Index, type IndexQuery, IndexRegistry, type IndexRegistryStats, type IndexScanStep, type IndexStats, IndexedLWWMap, IndexedORMap, IntersectionResultSet, type IntersectionStep, InvertedIndex, type InvertedIndexStats, type IteratorFactory, type JournalEvent, type JournalEventData, JournalEventDataSchema, type JournalEventInput, type JournalEventListener, type JournalEventMessage, JournalEventMessageSchema, type JournalEventType, JournalEventTypeSchema, type JournalReadRequest, JournalReadRequestSchema, type JournalReadResponse, JournalReadResponseSchema, type JournalSubscribeRequest, JournalSubscribeRequestSchema, type JournalUnsubscribeRequest, JournalUnsubscribeRequestSchema, LWWMap, type LWWRecord, LWWRecordSchema, LazyResultSet, LimitResultSet, type ListResolversRequest, ListResolversRequestSchema, type ListResolversResponse, ListResolversResponseSchema, type LiveQueryCallback, type LiveQueryDeltaEvent, type LiveQueryEvent, type LiveQueryInitialEvent, LiveQueryManager, type LiveQueryManagerOptions, type LiveQueryManagerStats, LockReleaseSchema, LockRequestSchema, type LogicalQueryNode, LowercaseFilter, MaxLengthFilter, type MergeContext, type MergeKeyResult, type MergeRejectedMessage, MergeRejectedMessageSchema, type MergeRejection, type MergeResult, MerkleReqBucketMessageSchema, MerkleTree, type Message, MessageSchema, type MigrationChunkAckMessage, type MigrationChunkMessage, type MigrationCompleteMessage, type MigrationConfig, type MigrationMessage, type MigrationMetrics, type MigrationStartMessage, type MigrationStatus, type MigrationVerifyMessage, MinLengthFilter, MultiValueAttribute, NGramTokenizer, NavigableIndex, type NodeHealth, type NodeInfo, type NodeStatus, type NotOwnerError, type NotStep, ORMap, ORMapDiffRequestSchema, ORMapDiffResponseSchema, type ORMapMerkleNode, ORMapMerkleReqBucketSchema, ORMapMerkleTree, ORMapPushDiffSchema, type ORMapQueryResult, type ORMapRecord, ORMapRecordSchema, type ORMapSearchResult, type ORMapSnapshot, ORMapSyncInitSchema, ORMapSyncRespBucketsSchema, ORMapSyncRespLeafSchema, ORMapSyncRespRootSchema, type OpAckMessage, OpAckMessageSchema, OpBatchMessageSchema, type OpRejectedMessage, OpRejectedMessageSchema, type OpResult, OpResultSchema, PARTITION_COUNT, type PNCounter, type PNCounterConfig, PNCounterImpl, type PNCounterState, type PNCounterStateObject, PNCounterStateObjectSchema, type PartitionChange, type PartitionInfo, type PartitionMap, type PartitionMapDeltaMessage, type PartitionMapMessage, type PartitionMapRequestMessage, PartitionMapRequestSchema, type PartitionMigration, type PartitionRouterConfig, PartitionState, type PendingWrite, type PermissionPolicy, type PermissionType, type PingMessage, PingMessageSchema, type PlanStep, type PongMessage, PongMessageSchema, type Posting, type PredicateFn, type PredicateNode, PredicateNodeSchema, type PredicateOp, PredicateOpSchema, Predicates, type Principal, type ProcessorRateLimitConfig, type Query$1 as Query, type Query as QueryExpression, type QueryNode, QueryOptimizer, type QueryOptimizerOptions, type QueryOptions, type QueryPlan, QuerySchema, QuerySubMessageSchema, QueryUnsubMessageSchema, RESOLVER_FORBIDDEN_PATTERNS, type RegisterResolverRequest, RegisterResolverRequestSchema, type RegisterResolverResponse, RegisterResolverResponseSchema, type ReplicationAckMessage, type ReplicationBatchAckMessage, type ReplicationBatchMessage, type ReplicationConfig, type ReplicationHealth, type ReplicationLag, type ReplicationMessage, type ReplicationProtocolMessage, type ReplicationResult, type ReplicationTask, type ResolverRateLimitConfig, type ResultSet, Ringbuffer, type RoutingError, type ScoredDocument, type SearchMessage, SearchMessageSchema, type SearchOptions$1 as SearchOptions, SearchOptionsSchema, type SearchPayload, SearchPayloadSchema, type SearchRespMessage, SearchRespMessageSchema, type SearchRespPayload, SearchRespPayloadSchema, type SearchSubMessage, SearchSubMessageSchema, type SearchSubPayload, SearchSubPayloadSchema, type SearchUnsubMessage, SearchUnsubMessageSchema, type SearchUnsubPayload, SearchUnsubPayloadSchema, type SearchUpdateMessage, SearchUpdateMessageSchema, type SearchUpdatePayload, SearchUpdatePayloadSchema, type SearchUpdateType, SearchUpdateTypeSchema, type SerializedIndex, SetResultSet, SimpleAttribute, type SimpleQueryNode, SortedMap, SortedResultSet, type StaleMapError, type StandingQueryChange, StandingQueryIndex, type StandingQueryIndexOptions, StandingQueryRegistry, type StandingQueryRegistryOptions, type StandingQueryRegistryStats, StopWordFilter, SyncInitMessageSchema, SyncRespBucketsMessageSchema, SyncRespLeafMessageSchema, SyncRespRootMessageSchema, type TermInfo, type Timestamp, TimestampSchema, type TokenFilter, TokenizationPipeline, type TokenizationPipelineOptions, type Tokenizer, TopicMessageEventSchema, TopicPubSchema, TopicSubSchema, TopicUnsubSchema, TrimFilter, UnionResultSet, type UnionStep, UniqueFilter, type UnregisterResolverRequest, UnregisterResolverRequestSchema, type UnregisterResolverResponse, UnregisterResolverResponseSchema, WRITE_CONCERN_ORDER, WhitespaceTokenizer, WordBoundaryTokenizer, WriteConcern, WriteConcernSchema, type WriteConcernValue, type WriteOptions$1 as WriteOptions, type WriteResult, combineHashes, compareHLCTimestamps, compareTimestamps, createFieldComparator, createPredicateMatcher, deepMerge, deserialize, disableNativeHash, evaluatePredicate, getHighestWriteConcernLevel, hashORMapEntry, hashORMapRecord, hashString, isLogicalQuery, isSimpleQuery, isUsingNativeHash, isWriteConcernAchieved, multiAttribute, porterStem, resetNativeHash, serialize, simpleAttribute, timestampToString, validateProcessorCode, validateResolverCode };
|