@zokizuan/satori-core 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,7 +36,13 @@ const context = new Context({
36
36
  });
37
37
 
38
38
  await context.indexCodebase('/absolute/path/to/repo');
39
- const results = await context.semanticSearch('/absolute/path/to/repo', 'authentication logic', 5);
39
+ const results = await context.semanticSearch({
40
+ codebasePath: '/absolute/path/to/repo',
41
+ query: 'authentication logic',
42
+ topK: 5,
43
+ retrievalMode: 'hybrid',
44
+ scorePolicy: { kind: 'topk_only' }
45
+ });
40
46
  ```
41
47
 
42
48
  ## Development
@@ -1,7 +1,7 @@
1
1
  import { Splitter } from '../splitter';
2
2
  import { Embedding } from '../embedding';
3
3
  import { VectorDatabase, IndexCompletionMarkerDocument } from '../vectordb';
4
- import { SemanticSearchResult } from '../types';
4
+ import { SemanticSearchRequest, SemanticSearchResult } from '../types';
5
5
  import { FileSynchronizer } from '../sync/synchronizer';
6
6
  export interface ContextConfig {
7
7
  embedding?: Embedding;
@@ -125,7 +125,10 @@ export declare class Context {
125
125
  * @param topK Number of results to return
126
126
  * @param threshold Similarity threshold
127
127
  */
128
+ semanticSearch(request: SemanticSearchRequest): Promise<SemanticSearchResult[]>;
128
129
  semanticSearch(codebasePath: string, query: string, topK?: number, threshold?: number, filterExpr?: string): Promise<SemanticSearchResult[]>;
130
+ private normalizeSemanticSearchRequest;
131
+ private resolveSemanticSearchRequest;
129
132
  private buildSemanticSearchFilterExpr;
130
133
  private queryCompletionMarkerRows;
131
134
  clearIndexCompletionMarker(codebasePath: string): Promise<void>;
@@ -353,18 +353,14 @@ class Context {
353
353
  }
354
354
  }
355
355
  }
356
- /**
357
- * Semantic search with unified implementation
358
- * @param codebasePath Codebase path to search in
359
- * @param query Search query
360
- * @param topK Number of results to return
361
- * @param threshold Similarity threshold
362
- */
363
- async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
364
- const isHybrid = this.getIsHybrid();
356
+ async semanticSearch(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
357
+ const request = this.normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK, threshold, filterExpr);
358
+ const resolvedRequest = this.resolveSemanticSearchRequest(request);
359
+ const codebasePath = resolvedRequest.codebasePath;
360
+ const isHybrid = resolvedRequest.retrievalMode !== 'dense' && this.getIsHybrid() === true;
365
361
  const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
366
- console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
367
- const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(filterExpr);
362
+ console.log(`[Context] 🔍 Executing ${searchType}: "${resolvedRequest.query}" in ${codebasePath}`);
363
+ const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(resolvedRequest.filterExpr);
368
364
  const normalizeBreadcrumbs = (value) => {
369
365
  if (!Array.isArray(value)) {
370
366
  return undefined;
@@ -394,8 +390,8 @@ class Context {
394
390
  console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
395
391
  }
396
392
  // 1. Generate query vector
397
- console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
398
- const queryEmbedding = await this.embedding.embed(query);
393
+ console.log(`[Context] 🔍 Generating embeddings for query: "${resolvedRequest.query}"`);
394
+ const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
399
395
  console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
400
396
  console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
401
397
  // 2. Prepare hybrid search requests
@@ -404,17 +400,17 @@ class Context {
404
400
  data: queryEmbedding.vector,
405
401
  anns_field: "vector",
406
402
  param: { "nprobe": 10 },
407
- limit: topK
403
+ limit: resolvedRequest.topK
408
404
  },
409
405
  {
410
- data: query,
406
+ data: resolvedRequest.query,
411
407
  anns_field: "sparse_vector",
412
408
  param: { "drop_ratio_search": 0.2 },
413
- limit: topK
409
+ limit: resolvedRequest.topK
414
410
  }
415
411
  ];
416
412
  console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
417
- console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
413
+ console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${resolvedRequest.query}", limit=${searchRequests[1].limit}`);
418
414
  // 3. Execute hybrid search
419
415
  console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
420
416
  const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
@@ -422,8 +418,9 @@ class Context {
422
418
  strategy: 'rrf',
423
419
  params: { k: 100 }
424
420
  },
425
- limit: topK,
426
- threshold,
421
+ limit: resolvedRequest.topK,
422
+ // Hybrid RRF scores are backend/rerank relative, so dense similarity
423
+ // thresholds can erase valid sparse lexical matches before MCP ranking.
427
424
  filterExpr: effectiveFilterExpr
428
425
  });
429
426
  console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`);
@@ -438,7 +435,9 @@ class Context {
438
435
  breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
439
436
  indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
440
437
  symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
441
- symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
438
+ symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
439
+ backendScore: result.score,
440
+ backendScoreKind: 'rrf_fusion'
442
441
  }));
443
442
  console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
444
443
  if (results.length > 0) {
@@ -449,9 +448,12 @@ class Context {
449
448
  else {
450
449
  // Regular semantic search
451
450
  // 1. Generate query vector
452
- const queryEmbedding = await this.embedding.embed(query);
451
+ const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
452
+ const denseThreshold = resolvedRequest.scorePolicy.kind === 'dense_similarity_min'
453
+ ? resolvedRequest.scorePolicy.min
454
+ : undefined;
453
455
  // 2. Search in vector database
454
- const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr: effectiveFilterExpr });
456
+ const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK: resolvedRequest.topK, threshold: denseThreshold, filterExpr: effectiveFilterExpr });
455
457
  // 3. Convert to semantic search result format
456
458
  const results = searchResults.map(result => ({
457
459
  content: result.document.content,
@@ -463,12 +465,54 @@ class Context {
463
465
  breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
464
466
  indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
465
467
  symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
466
- symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
468
+ symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
469
+ backendScore: result.score,
470
+ backendScoreKind: 'dense_similarity'
467
471
  }));
468
472
  console.log(`[Context] ✅ Found ${results.length} relevant results`);
469
473
  return results;
470
474
  }
471
475
  }
476
+ normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
477
+ if (typeof requestOrCodebasePath === 'string') {
478
+ return {
479
+ codebasePath: requestOrCodebasePath,
480
+ query: query ?? '',
481
+ topK,
482
+ filterExpr,
483
+ ...(threshold > 0
484
+ ? {
485
+ retrievalMode: 'dense',
486
+ scorePolicy: { kind: 'dense_similarity_min', min: threshold }
487
+ }
488
+ : {
489
+ scorePolicy: { kind: 'topk_only' }
490
+ })
491
+ };
492
+ }
493
+ return requestOrCodebasePath;
494
+ }
495
+ resolveSemanticSearchRequest(request) {
496
+ const hybridEnabled = this.getIsHybrid() === true;
497
+ const retrievalMode = request.retrievalMode ?? (hybridEnabled ? 'hybrid' : 'dense');
498
+ const scorePolicy = request.scorePolicy ?? (retrievalMode === 'dense'
499
+ ? { kind: 'dense_similarity_min', min: 0.5 }
500
+ : { kind: 'topk_only' });
501
+ if (request.retrievalMode !== undefined && retrievalMode !== 'dense' && hybridEnabled !== true) {
502
+ throw new Error(`${retrievalMode} retrieval requires hybrid search support, but HYBRID_MODE is disabled.`);
503
+ }
504
+ if (retrievalMode !== 'dense' && scorePolicy.kind === 'dense_similarity_min') {
505
+ throw new Error(`Dense similarity threshold score policy is invalid for ${retrievalMode} retrieval.`);
506
+ }
507
+ return {
508
+ codebasePath: request.codebasePath,
509
+ query: request.query,
510
+ topK: request.topK ?? 5,
511
+ retrievalMode,
512
+ filterExpr: request.filterExpr ?? '',
513
+ scorePolicy
514
+ };
515
+ }
472
516
  buildSemanticSearchFilterExpr(filterExpr) {
473
517
  const markerExclusion = `fileExtension != "${vectordb_1.INDEX_COMPLETION_MARKER_FILE_EXTENSION}"`;
474
518
  if (!filterExpr || filterExpr.trim().length === 0) {
package/dist/types.d.ts CHANGED
@@ -1,8 +1,17 @@
1
+ import type { BackendScoreKind, RetrievalMode, ScorePolicy } from './vectordb/types';
1
2
  export interface SearchQuery {
2
3
  term: string;
3
4
  includeContent?: boolean;
4
5
  limit?: number;
5
6
  }
7
+ export interface SemanticSearchRequest {
8
+ codebasePath: string;
9
+ query: string;
10
+ topK?: number;
11
+ retrievalMode?: RetrievalMode;
12
+ filterExpr?: string;
13
+ scorePolicy?: ScorePolicy;
14
+ }
6
15
  export interface SemanticSearchResult {
7
16
  content: string;
8
17
  relativePath: string;
@@ -14,5 +23,7 @@ export interface SemanticSearchResult {
14
23
  indexedAt?: string;
15
24
  symbolId?: string;
16
25
  symbolLabel?: string;
26
+ backendScore?: number;
27
+ backendScoreKind?: BackendScoreKind;
17
28
  }
18
29
  //# sourceMappingURL=types.d.ts.map
@@ -1,4 +1,4 @@
1
- export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
1
+ export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, RetrievalMode, ScorePolicy, BackendScoreKind, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
2
2
  export { MilvusRestfulVectorDatabase, MilvusRestfulConfig } from './milvus-restful-vectordb';
3
3
  export { MilvusVectorDatabase, MilvusConfig } from './milvus-vectordb';
4
4
  export { ClusterManager, ZillizConfig, Project, Cluster, CreateFreeClusterRequest, CreateFreeClusterResponse, CreateFreeClusterWithDetailsResponse, DescribeClusterResponse } from './zilliz-utils';
@@ -8,6 +8,14 @@ export interface VectorDocument {
8
8
  fileExtension: string;
9
9
  metadata: Record<string, any>;
10
10
  }
11
+ export type RetrievalMode = 'dense' | 'lexical' | 'hybrid';
12
+ export type ScorePolicy = {
13
+ kind: 'dense_similarity_min';
14
+ min: number;
15
+ } | {
16
+ kind: 'topk_only';
17
+ };
18
+ export type BackendScoreKind = 'dense_similarity' | 'lexical_rank' | 'rrf_fusion';
11
19
  export interface SearchOptions {
12
20
  topK?: number;
13
21
  filter?: Record<string, any>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zokizuan/satori-core",
3
- "version": "1.1.1",
3
+ "version": "1.3.0",
4
4
  "description": "Core semantic indexing engine for Satori's insight-first retrieval",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",