npm - @zokizuan/satori-core - Versions diffs - 1.1.1 → 1.3.0 - Mend

@zokizuan/satori-core 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -36,7 +36,13 @@ const context = new Context({
 });
 await context.indexCodebase('/absolute/path/to/repo');
-const results = await context.semanticSearch('/absolute/path/to/repo', 'authentication logic', 5);
+const results = await context.semanticSearch({
+  codebasePath: '/absolute/path/to/repo',
+  query: 'authentication logic',
+  topK: 5,
+  retrievalMode: 'hybrid',
+  scorePolicy: { kind: 'topk_only' }
+});
 ```
 ## Development

package/dist/core/context.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { Splitter } from '../splitter';
 import { Embedding } from '../embedding';
 import { VectorDatabase, IndexCompletionMarkerDocument } from '../vectordb';
-import { SemanticSearchResult } from '../types';
+import { SemanticSearchRequest, SemanticSearchResult } from '../types';
 import { FileSynchronizer } from '../sync/synchronizer';
 export interface ContextConfig {
     embedding?: Embedding;
@@ -125,7 +125,10 @@ export declare class Context {
      * @param topK Number of results to return
      * @param threshold Similarity threshold
      */
+    semanticSearch(request: SemanticSearchRequest): Promise<SemanticSearchResult[]>;
     semanticSearch(codebasePath: string, query: string, topK?: number, threshold?: number, filterExpr?: string): Promise<SemanticSearchResult[]>;
+    private normalizeSemanticSearchRequest;
+    private resolveSemanticSearchRequest;
     private buildSemanticSearchFilterExpr;
     private queryCompletionMarkerRows;
     clearIndexCompletionMarker(codebasePath: string): Promise<void>;

package/dist/core/context.js CHANGED Viewed

@@ -353,18 +353,14 @@ class Context {
             }
         }
     }
-    /**
-     * Semantic search with unified implementation
-     * @param codebasePath Codebase path to search in
-     * @param query Search query
-     * @param topK Number of results to return
-     * @param threshold Similarity threshold
-     */
-    async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
-        const isHybrid = this.getIsHybrid();
+    async semanticSearch(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
+        const request = this.normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK, threshold, filterExpr);
+        const resolvedRequest = this.resolveSemanticSearchRequest(request);
+        const codebasePath = resolvedRequest.codebasePath;
+        const isHybrid = resolvedRequest.retrievalMode !== 'dense' && this.getIsHybrid() === true;
         const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
-        console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
-        const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(filterExpr);
+        console.log(`[Context] 🔍 Executing ${searchType}: "${resolvedRequest.query}" in ${codebasePath}`);
+        const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(resolvedRequest.filterExpr);
         const normalizeBreadcrumbs = (value) => {
             if (!Array.isArray(value)) {
                 return undefined;
@@ -394,8 +390,8 @@ class Context {
                 console.log(`[Context] ⚠️  Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
             }
             // 1. Generate query vector
-            console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
-            const queryEmbedding = await this.embedding.embed(query);
+            console.log(`[Context] 🔍 Generating embeddings for query: "${resolvedRequest.query}"`);
+            const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
             console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
             console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
             // 2. Prepare hybrid search requests
@@ -404,17 +400,17 @@ class Context {
                     data: queryEmbedding.vector,
                     anns_field: "vector",
                     param: { "nprobe": 10 },
-                    limit: topK
+                    limit: resolvedRequest.topK
                 },
                 {
-                    data: query,
+                    data: resolvedRequest.query,
                     anns_field: "sparse_vector",
                     param: { "drop_ratio_search": 0.2 },
-                    limit: topK
+                    limit: resolvedRequest.topK
                 }
             ];
             console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
-            console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
+            console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${resolvedRequest.query}", limit=${searchRequests[1].limit}`);
             // 3. Execute hybrid search
             console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
             const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
@@ -422,8 +418,9 @@ class Context {
                     strategy: 'rrf',
                     params: { k: 100 }
                 },
-                limit: topK,
-                threshold,
+                limit: resolvedRequest.topK,
+                // Hybrid RRF scores are backend/rerank relative, so dense similarity
+                // thresholds can erase valid sparse lexical matches before MCP ranking.
                 filterExpr: effectiveFilterExpr
             });
             console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`);
@@ -438,7 +435,9 @@ class Context {
                 breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
                 indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
                 symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
-                symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
+                symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
+                backendScore: result.score,
+                backendScoreKind: 'rrf_fusion'
             }));
             console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
             if (results.length > 0) {
@@ -449,9 +448,12 @@ class Context {
         else {
             // Regular semantic search
             // 1. Generate query vector
-            const queryEmbedding = await this.embedding.embed(query);
+            const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
+            const denseThreshold = resolvedRequest.scorePolicy.kind === 'dense_similarity_min'
+                ? resolvedRequest.scorePolicy.min
+                : undefined;
             // 2. Search in vector database
-            const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr: effectiveFilterExpr });
+            const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK: resolvedRequest.topK, threshold: denseThreshold, filterExpr: effectiveFilterExpr });
             // 3. Convert to semantic search result format
             const results = searchResults.map(result => ({
                 content: result.document.content,
@@ -463,12 +465,54 @@ class Context {
                 breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
                 indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
                 symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
-                symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
+                symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
+                backendScore: result.score,
+                backendScoreKind: 'dense_similarity'
             }));
             console.log(`[Context] ✅ Found ${results.length} relevant results`);
             return results;
         }
     }
+    normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
+        if (typeof requestOrCodebasePath === 'string') {
+            return {
+                codebasePath: requestOrCodebasePath,
+                query: query ?? '',
+                topK,
+                filterExpr,
+                ...(threshold > 0
+                    ? {
+                        retrievalMode: 'dense',
+                        scorePolicy: { kind: 'dense_similarity_min', min: threshold }
+                    }
+                    : {
+                        scorePolicy: { kind: 'topk_only' }
+                    })
+            };
+        }
+        return requestOrCodebasePath;
+    }
+    resolveSemanticSearchRequest(request) {
+        const hybridEnabled = this.getIsHybrid() === true;
+        const retrievalMode = request.retrievalMode ?? (hybridEnabled ? 'hybrid' : 'dense');
+        const scorePolicy = request.scorePolicy ?? (retrievalMode === 'dense'
+            ? { kind: 'dense_similarity_min', min: 0.5 }
+            : { kind: 'topk_only' });
+        if (request.retrievalMode !== undefined && retrievalMode !== 'dense' && hybridEnabled !== true) {
+            throw new Error(`${retrievalMode} retrieval requires hybrid search support, but HYBRID_MODE is disabled.`);
+        }
+        if (retrievalMode !== 'dense' && scorePolicy.kind === 'dense_similarity_min') {
+            throw new Error(`Dense similarity threshold score policy is invalid for ${retrievalMode} retrieval.`);
+        }
+        return {
+            codebasePath: request.codebasePath,
+            query: request.query,
+            topK: request.topK ?? 5,
+            retrievalMode,
+            filterExpr: request.filterExpr ?? '',
+            scorePolicy
+        };
+    }
     buildSemanticSearchFilterExpr(filterExpr) {
         const markerExclusion = `fileExtension != "${vectordb_1.INDEX_COMPLETION_MARKER_FILE_EXTENSION}"`;
         if (!filterExpr || filterExpr.trim().length === 0) {

package/dist/types.d.ts CHANGED Viewed

@@ -1,8 +1,17 @@
+import type { BackendScoreKind, RetrievalMode, ScorePolicy } from './vectordb/types';
 export interface SearchQuery {
     term: string;
     includeContent?: boolean;
     limit?: number;
 }
+export interface SemanticSearchRequest {
+    codebasePath: string;
+    query: string;
+    topK?: number;
+    retrievalMode?: RetrievalMode;
+    filterExpr?: string;
+    scorePolicy?: ScorePolicy;
+}
 export interface SemanticSearchResult {
     content: string;
     relativePath: string;
@@ -14,5 +23,7 @@ export interface SemanticSearchResult {
     indexedAt?: string;
     symbolId?: string;
     symbolLabel?: string;
+    backendScore?: number;
+    backendScoreKind?: BackendScoreKind;
 }
 //# sourceMappingURL=types.d.ts.map

package/dist/vectordb/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
+export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, RetrievalMode, ScorePolicy, BackendScoreKind, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
 export { MilvusRestfulVectorDatabase, MilvusRestfulConfig } from './milvus-restful-vectordb';
 export { MilvusVectorDatabase, MilvusConfig } from './milvus-vectordb';
 export { ClusterManager, ZillizConfig, Project, Cluster, CreateFreeClusterRequest, CreateFreeClusterResponse, CreateFreeClusterWithDetailsResponse, DescribeClusterResponse } from './zilliz-utils';

package/dist/vectordb/types.d.ts CHANGED Viewed

@@ -8,6 +8,14 @@ export interface VectorDocument {
     fileExtension: string;
     metadata: Record<string, any>;
 }
+export type RetrievalMode = 'dense' | 'lexical' | 'hybrid';
+export type ScorePolicy = {
+    kind: 'dense_similarity_min';
+    min: number;
+} | {
+    kind: 'topk_only';
+};
+export type BackendScoreKind = 'dense_similarity' | 'lexical_rank' | 'rrf_fusion';
 export interface SearchOptions {
     topK?: number;
     filter?: Record<string, any>;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zokizuan/satori-core",
-  "version": "1.1.1",
+  "version": "1.3.0",
   "description": "Core semantic indexing engine for Satori's insight-first retrieval",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",