npm - @superatomai/sdk-node - Versions diffs - 0.0.24 → 0.0.25 - Mend

@superatomai/sdk-node 0.0.24 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -1483,6 +1483,134 @@ declare const CONTEXT_CONFIG: {
     MAX_CONVERSATION_CONTEXT_BLOCKS: number;
 };
+/**
+ * BM25L Reranker for hybrid semantic search
+ *
+ * BM25L is an improved variant of BM25 that provides better handling of
+ * long documents and term frequency saturation. This implementation is
+ * designed to rerank semantic search results from ChromaDB.
+ *
+ * The hybrid approach combines:
+ * 1. Semantic similarity from ChromaDB embeddings (dense vectors)
+ * 2. Lexical matching from BM25L (sparse, keyword-based)
+ *
+ * This addresses the weakness of pure semantic search which may miss
+ * exact keyword matches that are important for user intent.
+ */
+interface BM25LOptions {
+    /** Term frequency saturation parameter (default: 1.5) */
+    k1?: number;
+    /** Length normalization parameter (default: 0.75) */
+    b?: number;
+    /** Lower-bound adjustment from BM25L paper (default: 0.5) */
+    delta?: number;
+}
+interface RerankedResult<T> {
+    item: T;
+    originalIndex: number;
+    semanticScore: number;
+    bm25Score: number;
+    hybridScore: number;
+}
+interface HybridSearchOptions extends BM25LOptions {
+    /** Weight for semantic score (0-1, default: 0.7) */
+    semanticWeight?: number;
+    /** Weight for BM25 score (0-1, default: 0.3) */
+    bm25Weight?: number;
+    /** Minimum hybrid score threshold (0-1, default: 0) */
+    minScore?: number;
+}
+/**
+ * BM25L implementation for lexical scoring
+ */
+declare class BM25L {
+    private k1;
+    private b;
+    private delta;
+    private documents;
+    private docLengths;
+    private avgDocLength;
+    private termDocFreq;
+    /**
+     * @param documents - Array of raw documents (strings)
+     * @param opts - Optional BM25L parameters
+     */
+    constructor(documents?: string[], opts?: BM25LOptions);
+    /**
+     * Tokenize text into lowercase alphanumeric tokens
+     */
+    tokenize(text: string): string[];
+    /**
+     * Compute IDF (Inverse Document Frequency) with smoothing
+     */
+    private idf;
+    /**
+     * Compute BM25L score for a single document
+     */
+    score(query: string, docIndex: number): number;
+    /**
+     * Search and rank all documents
+     */
+    search(query: string): Array<{
+        index: number;
+        score: number;
+    }>;
+}
+/**
+ * Hybrid reranker that combines semantic and BM25L scores
+ *
+ * @param query - The search query
+ * @param items - Array of items to rerank
+ * @param getDocument - Function to extract document text from an item
+ * @param getSemanticScore - Function to extract semantic similarity score from an item
+ * @param options - Hybrid search options
+ * @returns Reranked items with hybrid scores
+ */
+declare function hybridRerank<T>(query: string, items: T[], getDocument: (item: T) => string, getSemanticScore: (item: T) => number, options?: HybridSearchOptions): RerankedResult<T>[];
+/**
+ * Simple reranking function for ChromaDB results
+ *
+ * This is a convenience wrapper for reranking ChromaDB query results
+ * that follow the standard { ids, documents, metadatas, distances } format.
+ *
+ * @param query - The search query
+ * @param chromaResults - ChromaDB query results
+ * @param options - Hybrid search options
+ * @returns Reranked results with hybrid scores
+ */
+declare function rerankChromaResults(query: string, chromaResults: {
+    ids: string[][];
+    documents: (string | null)[][];
+    metadatas: Record<string, any>[][];
+    distances: number[][];
+}, options?: HybridSearchOptions): Array<{
+    id: string;
+    document: string | null;
+    metadata: Record<string, any>;
+    distance: number;
+    semanticScore: number;
+    bm25Score: number;
+    hybridScore: number;
+}>;
+/**
+ * Rerank conversation search results specifically
+ *
+ * This function is designed to work with the conversation-history.search collection
+ * where we need to fetch more results initially and then rerank them.
+ *
+ * @param query - The user's search query
+ * @param results - Array of conversation search results from ChromaDB
+ * @param options - Hybrid search options
+ * @returns Reranked results sorted by hybrid score
+ */
+declare function rerankConversationResults<T extends {
+    userPrompt?: string;
+    similarity?: number;
+}>(query: string, results: T[], options?: HybridSearchOptions): Array<T & {
+    hybridScore: number;
+    bm25Score: number;
+}>;
 declare const SDK_VERSION = "0.0.8";
 type MessageTypeHandler = (message: IncomingMessage) => void | Promise<void>;
 declare class SuperatomSDK {
@@ -1587,4 +1715,4 @@ declare class SuperatomSDK {
     getTools(): Tool$1[];
 }
-export { type Action, CONTEXT_CONFIG, type CapturedLog, CleanupService, type CollectionHandler, type CollectionOperation, type DBUIBlock, type IncomingMessage, type KbNodesQueryFilters, type KbNodesRequestPayload, LLM, type LogLevel, type Message, SDK_VERSION, STORAGE_CONFIG, SuperatomSDK, type SuperatomSDKConfig, Thread, ThreadManager, type Tool$1 as Tool, UIBlock, UILogCollector, type User, UserManager, type UsersData, logger };
+export { type Action, BM25L, type BM25LOptions, CONTEXT_CONFIG, type CapturedLog, CleanupService, type CollectionHandler, type CollectionOperation, type DBUIBlock, type HybridSearchOptions, type IncomingMessage, type KbNodesQueryFilters, type KbNodesRequestPayload, LLM, type LogLevel, type Message, type RerankedResult, SDK_VERSION, STORAGE_CONFIG, SuperatomSDK, type SuperatomSDKConfig, Thread, ThreadManager, type Tool$1 as Tool, UIBlock, UILogCollector, type User, UserManager, type UsersData, hybridRerank, logger, rerankChromaResults, rerankConversationResults };

package/dist/index.d.ts CHANGED Viewed

@@ -1483,6 +1483,134 @@ declare const CONTEXT_CONFIG: {
     MAX_CONVERSATION_CONTEXT_BLOCKS: number;
 };
+/**
+ * BM25L Reranker for hybrid semantic search
+ *
+ * BM25L is an improved variant of BM25 that provides better handling of
+ * long documents and term frequency saturation. This implementation is
+ * designed to rerank semantic search results from ChromaDB.
+ *
+ * The hybrid approach combines:
+ * 1. Semantic similarity from ChromaDB embeddings (dense vectors)
+ * 2. Lexical matching from BM25L (sparse, keyword-based)
+ *
+ * This addresses the weakness of pure semantic search which may miss
+ * exact keyword matches that are important for user intent.
+ */
+interface BM25LOptions {
+    /** Term frequency saturation parameter (default: 1.5) */
+    k1?: number;
+    /** Length normalization parameter (default: 0.75) */
+    b?: number;
+    /** Lower-bound adjustment from BM25L paper (default: 0.5) */
+    delta?: number;
+}
+interface RerankedResult<T> {
+    item: T;
+    originalIndex: number;
+    semanticScore: number;
+    bm25Score: number;
+    hybridScore: number;
+}
+interface HybridSearchOptions extends BM25LOptions {
+    /** Weight for semantic score (0-1, default: 0.7) */
+    semanticWeight?: number;
+    /** Weight for BM25 score (0-1, default: 0.3) */
+    bm25Weight?: number;
+    /** Minimum hybrid score threshold (0-1, default: 0) */
+    minScore?: number;
+}
+/**
+ * BM25L implementation for lexical scoring
+ */
+declare class BM25L {
+    private k1;
+    private b;
+    private delta;
+    private documents;
+    private docLengths;
+    private avgDocLength;
+    private termDocFreq;
+    /**
+     * @param documents - Array of raw documents (strings)
+     * @param opts - Optional BM25L parameters
+     */
+    constructor(documents?: string[], opts?: BM25LOptions);
+    /**
+     * Tokenize text into lowercase alphanumeric tokens
+     */
+    tokenize(text: string): string[];
+    /**
+     * Compute IDF (Inverse Document Frequency) with smoothing
+     */
+    private idf;
+    /**
+     * Compute BM25L score for a single document
+     */
+    score(query: string, docIndex: number): number;
+    /**
+     * Search and rank all documents
+     */
+    search(query: string): Array<{
+        index: number;
+        score: number;
+    }>;
+}
+/**
+ * Hybrid reranker that combines semantic and BM25L scores
+ *
+ * @param query - The search query
+ * @param items - Array of items to rerank
+ * @param getDocument - Function to extract document text from an item
+ * @param getSemanticScore - Function to extract semantic similarity score from an item
+ * @param options - Hybrid search options
+ * @returns Reranked items with hybrid scores
+ */
+declare function hybridRerank<T>(query: string, items: T[], getDocument: (item: T) => string, getSemanticScore: (item: T) => number, options?: HybridSearchOptions): RerankedResult<T>[];
+/**
+ * Simple reranking function for ChromaDB results
+ *
+ * This is a convenience wrapper for reranking ChromaDB query results
+ * that follow the standard { ids, documents, metadatas, distances } format.
+ *
+ * @param query - The search query
+ * @param chromaResults - ChromaDB query results
+ * @param options - Hybrid search options
+ * @returns Reranked results with hybrid scores
+ */
+declare function rerankChromaResults(query: string, chromaResults: {
+    ids: string[][];
+    documents: (string | null)[][];
+    metadatas: Record<string, any>[][];
+    distances: number[][];
+}, options?: HybridSearchOptions): Array<{
+    id: string;
+    document: string | null;
+    metadata: Record<string, any>;
+    distance: number;
+    semanticScore: number;
+    bm25Score: number;
+    hybridScore: number;
+}>;
+/**
+ * Rerank conversation search results specifically
+ *
+ * This function is designed to work with the conversation-history.search collection
+ * where we need to fetch more results initially and then rerank them.
+ *
+ * @param query - The user's search query
+ * @param results - Array of conversation search results from ChromaDB
+ * @param options - Hybrid search options
+ * @returns Reranked results sorted by hybrid score
+ */
+declare function rerankConversationResults<T extends {
+    userPrompt?: string;
+    similarity?: number;
+}>(query: string, results: T[], options?: HybridSearchOptions): Array<T & {
+    hybridScore: number;
+    bm25Score: number;
+}>;
 declare const SDK_VERSION = "0.0.8";
 type MessageTypeHandler = (message: IncomingMessage) => void | Promise<void>;
 declare class SuperatomSDK {
@@ -1587,4 +1715,4 @@ declare class SuperatomSDK {
     getTools(): Tool$1[];
 }
-export { type Action, CONTEXT_CONFIG, type CapturedLog, CleanupService, type CollectionHandler, type CollectionOperation, type DBUIBlock, type IncomingMessage, type KbNodesQueryFilters, type KbNodesRequestPayload, LLM, type LogLevel, type Message, SDK_VERSION, STORAGE_CONFIG, SuperatomSDK, type SuperatomSDKConfig, Thread, ThreadManager, type Tool$1 as Tool, UIBlock, UILogCollector, type User, UserManager, type UsersData, logger };
+export { type Action, BM25L, type BM25LOptions, CONTEXT_CONFIG, type CapturedLog, CleanupService, type CollectionHandler, type CollectionOperation, type DBUIBlock, type HybridSearchOptions, type IncomingMessage, type KbNodesQueryFilters, type KbNodesRequestPayload, LLM, type LogLevel, type Message, type RerankedResult, SDK_VERSION, STORAGE_CONFIG, SuperatomSDK, type SuperatomSDKConfig, Thread, ThreadManager, type Tool$1 as Tool, UIBlock, UILogCollector, type User, UserManager, type UsersData, hybridRerank, logger, rerankChromaResults, rerankConversationResults };