@superatomai/sdk-node 0.0.24 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1483,6 +1483,134 @@ declare const CONTEXT_CONFIG: {
1483
1483
  MAX_CONVERSATION_CONTEXT_BLOCKS: number;
1484
1484
  };
1485
1485
 
1486
+ /**
1487
+ * BM25L Reranker for hybrid semantic search
1488
+ *
1489
+ * BM25L is an improved variant of BM25 that provides better handling of
1490
+ * long documents and term frequency saturation. This implementation is
1491
+ * designed to rerank semantic search results from ChromaDB.
1492
+ *
1493
+ * The hybrid approach combines:
1494
+ * 1. Semantic similarity from ChromaDB embeddings (dense vectors)
1495
+ * 2. Lexical matching from BM25L (sparse, keyword-based)
1496
+ *
1497
+ * This addresses the weakness of pure semantic search which may miss
1498
+ * exact keyword matches that are important for user intent.
1499
+ */
1500
+ interface BM25LOptions {
1501
+ /** Term frequency saturation parameter (default: 1.5) */
1502
+ k1?: number;
1503
+ /** Length normalization parameter (default: 0.75) */
1504
+ b?: number;
1505
+ /** Lower-bound adjustment from BM25L paper (default: 0.5) */
1506
+ delta?: number;
1507
+ }
1508
+ interface RerankedResult<T> {
1509
+ item: T;
1510
+ originalIndex: number;
1511
+ semanticScore: number;
1512
+ bm25Score: number;
1513
+ hybridScore: number;
1514
+ }
1515
+ interface HybridSearchOptions extends BM25LOptions {
1516
+ /** Weight for semantic score (0-1, default: 0.7) */
1517
+ semanticWeight?: number;
1518
+ /** Weight for BM25 score (0-1, default: 0.3) */
1519
+ bm25Weight?: number;
1520
+ /** Minimum hybrid score threshold (0-1, default: 0) */
1521
+ minScore?: number;
1522
+ }
1523
+ /**
1524
+ * BM25L implementation for lexical scoring
1525
+ */
1526
+ declare class BM25L {
1527
+ private k1;
1528
+ private b;
1529
+ private delta;
1530
+ private documents;
1531
+ private docLengths;
1532
+ private avgDocLength;
1533
+ private termDocFreq;
1534
+ /**
1535
+ * @param documents - Array of raw documents (strings)
1536
+ * @param opts - Optional BM25L parameters
1537
+ */
1538
+ constructor(documents?: string[], opts?: BM25LOptions);
1539
+ /**
1540
+ * Tokenize text into lowercase alphanumeric tokens
1541
+ */
1542
+ tokenize(text: string): string[];
1543
+ /**
1544
+ * Compute IDF (Inverse Document Frequency) with smoothing
1545
+ */
1546
+ private idf;
1547
+ /**
1548
+ * Compute BM25L score for a single document
1549
+ */
1550
+ score(query: string, docIndex: number): number;
1551
+ /**
1552
+ * Search and rank all documents
1553
+ */
1554
+ search(query: string): Array<{
1555
+ index: number;
1556
+ score: number;
1557
+ }>;
1558
+ }
1559
+ /**
1560
+ * Hybrid reranker that combines semantic and BM25L scores
1561
+ *
1562
+ * @param query - The search query
1563
+ * @param items - Array of items to rerank
1564
+ * @param getDocument - Function to extract document text from an item
1565
+ * @param getSemanticScore - Function to extract semantic similarity score from an item
1566
+ * @param options - Hybrid search options
1567
+ * @returns Reranked items with hybrid scores
1568
+ */
1569
+ declare function hybridRerank<T>(query: string, items: T[], getDocument: (item: T) => string, getSemanticScore: (item: T) => number, options?: HybridSearchOptions): RerankedResult<T>[];
1570
+ /**
1571
+ * Simple reranking function for ChromaDB results
1572
+ *
1573
+ * This is a convenience wrapper for reranking ChromaDB query results
1574
+ * that follow the standard { ids, documents, metadatas, distances } format.
1575
+ *
1576
+ * @param query - The search query
1577
+ * @param chromaResults - ChromaDB query results
1578
+ * @param options - Hybrid search options
1579
+ * @returns Reranked results with hybrid scores
1580
+ */
1581
+ declare function rerankChromaResults(query: string, chromaResults: {
1582
+ ids: string[][];
1583
+ documents: (string | null)[][];
1584
+ metadatas: Record<string, any>[][];
1585
+ distances: number[][];
1586
+ }, options?: HybridSearchOptions): Array<{
1587
+ id: string;
1588
+ document: string | null;
1589
+ metadata: Record<string, any>;
1590
+ distance: number;
1591
+ semanticScore: number;
1592
+ bm25Score: number;
1593
+ hybridScore: number;
1594
+ }>;
1595
+ /**
1596
+ * Rerank conversation search results specifically
1597
+ *
1598
+ * This function is designed to work with the conversation-history.search collection
1599
+ * where we need to fetch more results initially and then rerank them.
1600
+ *
1601
+ * @param query - The user's search query
1602
+ * @param results - Array of conversation search results from ChromaDB
1603
+ * @param options - Hybrid search options
1604
+ * @returns Reranked results sorted by hybrid score
1605
+ */
1606
+ declare function rerankConversationResults<T extends {
1607
+ userPrompt?: string;
1608
+ similarity?: number;
1609
+ }>(query: string, results: T[], options?: HybridSearchOptions): Array<T & {
1610
+ hybridScore: number;
1611
+ bm25Score: number;
1612
+ }>;
1613
+
1486
1614
  declare const SDK_VERSION = "0.0.8";
1487
1615
  type MessageTypeHandler = (message: IncomingMessage) => void | Promise<void>;
1488
1616
  declare class SuperatomSDK {
@@ -1587,4 +1715,4 @@ declare class SuperatomSDK {
1587
1715
  getTools(): Tool$1[];
1588
1716
  }
1589
1717
 
1590
- export { type Action, CONTEXT_CONFIG, type CapturedLog, CleanupService, type CollectionHandler, type CollectionOperation, type DBUIBlock, type IncomingMessage, type KbNodesQueryFilters, type KbNodesRequestPayload, LLM, type LogLevel, type Message, SDK_VERSION, STORAGE_CONFIG, SuperatomSDK, type SuperatomSDKConfig, Thread, ThreadManager, type Tool$1 as Tool, UIBlock, UILogCollector, type User, UserManager, type UsersData, logger };
1718
+ export { type Action, BM25L, type BM25LOptions, CONTEXT_CONFIG, type CapturedLog, CleanupService, type CollectionHandler, type CollectionOperation, type DBUIBlock, type HybridSearchOptions, type IncomingMessage, type KbNodesQueryFilters, type KbNodesRequestPayload, LLM, type LogLevel, type Message, type RerankedResult, SDK_VERSION, STORAGE_CONFIG, SuperatomSDK, type SuperatomSDKConfig, Thread, ThreadManager, type Tool$1 as Tool, UIBlock, UILogCollector, type User, UserManager, type UsersData, hybridRerank, logger, rerankChromaResults, rerankConversationResults };
package/dist/index.d.ts CHANGED
@@ -1483,6 +1483,134 @@ declare const CONTEXT_CONFIG: {
1483
1483
  MAX_CONVERSATION_CONTEXT_BLOCKS: number;
1484
1484
  };
1485
1485
 
1486
+ /**
1487
+ * BM25L Reranker for hybrid semantic search
1488
+ *
1489
+ * BM25L is an improved variant of BM25 that provides better handling of
1490
+ * long documents and term frequency saturation. This implementation is
1491
+ * designed to rerank semantic search results from ChromaDB.
1492
+ *
1493
+ * The hybrid approach combines:
1494
+ * 1. Semantic similarity from ChromaDB embeddings (dense vectors)
1495
+ * 2. Lexical matching from BM25L (sparse, keyword-based)
1496
+ *
1497
+ * This addresses the weakness of pure semantic search which may miss
1498
+ * exact keyword matches that are important for user intent.
1499
+ */
1500
+ interface BM25LOptions {
1501
+ /** Term frequency saturation parameter (default: 1.5) */
1502
+ k1?: number;
1503
+ /** Length normalization parameter (default: 0.75) */
1504
+ b?: number;
1505
+ /** Lower-bound adjustment from BM25L paper (default: 0.5) */
1506
+ delta?: number;
1507
+ }
1508
+ interface RerankedResult<T> {
1509
+ item: T;
1510
+ originalIndex: number;
1511
+ semanticScore: number;
1512
+ bm25Score: number;
1513
+ hybridScore: number;
1514
+ }
1515
+ interface HybridSearchOptions extends BM25LOptions {
1516
+ /** Weight for semantic score (0-1, default: 0.7) */
1517
+ semanticWeight?: number;
1518
+ /** Weight for BM25 score (0-1, default: 0.3) */
1519
+ bm25Weight?: number;
1520
+ /** Minimum hybrid score threshold (0-1, default: 0) */
1521
+ minScore?: number;
1522
+ }
1523
+ /**
1524
+ * BM25L implementation for lexical scoring
1525
+ */
1526
+ declare class BM25L {
1527
+ private k1;
1528
+ private b;
1529
+ private delta;
1530
+ private documents;
1531
+ private docLengths;
1532
+ private avgDocLength;
1533
+ private termDocFreq;
1534
+ /**
1535
+ * @param documents - Array of raw documents (strings)
1536
+ * @param opts - Optional BM25L parameters
1537
+ */
1538
+ constructor(documents?: string[], opts?: BM25LOptions);
1539
+ /**
1540
+ * Tokenize text into lowercase alphanumeric tokens
1541
+ */
1542
+ tokenize(text: string): string[];
1543
+ /**
1544
+ * Compute IDF (Inverse Document Frequency) with smoothing
1545
+ */
1546
+ private idf;
1547
+ /**
1548
+ * Compute BM25L score for a single document
1549
+ */
1550
+ score(query: string, docIndex: number): number;
1551
+ /**
1552
+ * Search and rank all documents
1553
+ */
1554
+ search(query: string): Array<{
1555
+ index: number;
1556
+ score: number;
1557
+ }>;
1558
+ }
1559
+ /**
1560
+ * Hybrid reranker that combines semantic and BM25L scores
1561
+ *
1562
+ * @param query - The search query
1563
+ * @param items - Array of items to rerank
1564
+ * @param getDocument - Function to extract document text from an item
1565
+ * @param getSemanticScore - Function to extract semantic similarity score from an item
1566
+ * @param options - Hybrid search options
1567
+ * @returns Reranked items with hybrid scores
1568
+ */
1569
+ declare function hybridRerank<T>(query: string, items: T[], getDocument: (item: T) => string, getSemanticScore: (item: T) => number, options?: HybridSearchOptions): RerankedResult<T>[];
1570
+ /**
1571
+ * Simple reranking function for ChromaDB results
1572
+ *
1573
+ * This is a convenience wrapper for reranking ChromaDB query results
1574
+ * that follow the standard { ids, documents, metadatas, distances } format.
1575
+ *
1576
+ * @param query - The search query
1577
+ * @param chromaResults - ChromaDB query results
1578
+ * @param options - Hybrid search options
1579
+ * @returns Reranked results with hybrid scores
1580
+ */
1581
+ declare function rerankChromaResults(query: string, chromaResults: {
1582
+ ids: string[][];
1583
+ documents: (string | null)[][];
1584
+ metadatas: Record<string, any>[][];
1585
+ distances: number[][];
1586
+ }, options?: HybridSearchOptions): Array<{
1587
+ id: string;
1588
+ document: string | null;
1589
+ metadata: Record<string, any>;
1590
+ distance: number;
1591
+ semanticScore: number;
1592
+ bm25Score: number;
1593
+ hybridScore: number;
1594
+ }>;
1595
+ /**
1596
+ * Rerank conversation search results specifically
1597
+ *
1598
+ * This function is designed to work with the conversation-history.search collection
1599
+ * where we need to fetch more results initially and then rerank them.
1600
+ *
1601
+ * @param query - The user's search query
1602
+ * @param results - Array of conversation search results from ChromaDB
1603
+ * @param options - Hybrid search options
1604
+ * @returns Reranked results sorted by hybrid score
1605
+ */
1606
+ declare function rerankConversationResults<T extends {
1607
+ userPrompt?: string;
1608
+ similarity?: number;
1609
+ }>(query: string, results: T[], options?: HybridSearchOptions): Array<T & {
1610
+ hybridScore: number;
1611
+ bm25Score: number;
1612
+ }>;
1613
+
1486
1614
  declare const SDK_VERSION = "0.0.8";
1487
1615
  type MessageTypeHandler = (message: IncomingMessage) => void | Promise<void>;
1488
1616
  declare class SuperatomSDK {
@@ -1587,4 +1715,4 @@ declare class SuperatomSDK {
1587
1715
  getTools(): Tool$1[];
1588
1716
  }
1589
1717
 
1590
- export { type Action, CONTEXT_CONFIG, type CapturedLog, CleanupService, type CollectionHandler, type CollectionOperation, type DBUIBlock, type IncomingMessage, type KbNodesQueryFilters, type KbNodesRequestPayload, LLM, type LogLevel, type Message, SDK_VERSION, STORAGE_CONFIG, SuperatomSDK, type SuperatomSDKConfig, Thread, ThreadManager, type Tool$1 as Tool, UIBlock, UILogCollector, type User, UserManager, type UsersData, logger };
1718
+ export { type Action, BM25L, type BM25LOptions, CONTEXT_CONFIG, type CapturedLog, CleanupService, type CollectionHandler, type CollectionOperation, type DBUIBlock, type HybridSearchOptions, type IncomingMessage, type KbNodesQueryFilters, type KbNodesRequestPayload, LLM, type LogLevel, type Message, type RerankedResult, SDK_VERSION, STORAGE_CONFIG, SuperatomSDK, type SuperatomSDKConfig, Thread, ThreadManager, type Tool$1 as Tool, UIBlock, UILogCollector, type User, UserManager, type UsersData, hybridRerank, logger, rerankChromaResults, rerankConversationResults };