@zokizuan/satori-core 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,7 +36,13 @@ const context = new Context({
36
36
  });
37
37
 
38
38
  await context.indexCodebase('/absolute/path/to/repo');
39
- const results = await context.semanticSearch('/absolute/path/to/repo', 'authentication logic', 5);
39
+ const results = await context.semanticSearch({
40
+ codebasePath: '/absolute/path/to/repo',
41
+ query: 'authentication logic',
42
+ topK: 5,
43
+ retrievalMode: 'hybrid',
44
+ scorePolicy: { kind: 'topk_only' }
45
+ });
40
46
  ```
41
47
 
42
48
  ## Development
@@ -1,7 +1,7 @@
1
1
  import { Splitter } from '../splitter';
2
2
  import { Embedding } from '../embedding';
3
3
  import { VectorDatabase, IndexCompletionMarkerDocument } from '../vectordb';
4
- import { SemanticSearchResult } from '../types';
4
+ import { SemanticSearchRequest, SemanticSearchResult } from '../types';
5
5
  import { FileSynchronizer } from '../sync/synchronizer';
6
6
  export interface ContextConfig {
7
7
  embedding?: Embedding;
@@ -125,7 +125,10 @@ export declare class Context {
125
125
  * @param topK Number of results to return
126
126
  * @param threshold Similarity threshold
127
127
  */
128
+ semanticSearch(request: SemanticSearchRequest): Promise<SemanticSearchResult[]>;
128
129
  semanticSearch(codebasePath: string, query: string, topK?: number, threshold?: number, filterExpr?: string): Promise<SemanticSearchResult[]>;
130
+ private normalizeSemanticSearchRequest;
131
+ private resolveSemanticSearchRequest;
129
132
  private buildSemanticSearchFilterExpr;
130
133
  private queryCompletionMarkerRows;
131
134
  clearIndexCompletionMarker(codebasePath: string): Promise<void>;
@@ -353,18 +353,14 @@ class Context {
353
353
  }
354
354
  }
355
355
  }
356
- /**
357
- * Semantic search with unified implementation
358
- * @param codebasePath Codebase path to search in
359
- * @param query Search query
360
- * @param topK Number of results to return
361
- * @param threshold Similarity threshold
362
- */
363
- async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
364
- const isHybrid = this.getIsHybrid();
356
+ async semanticSearch(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
357
+ const request = this.normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK, threshold, filterExpr);
358
+ const resolvedRequest = this.resolveSemanticSearchRequest(request);
359
+ const codebasePath = resolvedRequest.codebasePath;
360
+ const isHybrid = resolvedRequest.retrievalMode !== 'dense' && this.getIsHybrid() === true;
365
361
  const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
366
- console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
367
- const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(filterExpr);
362
+ console.log(`[Context] 🔍 Executing ${searchType}: "${resolvedRequest.query}" in ${codebasePath}`);
363
+ const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(resolvedRequest.filterExpr);
368
364
  const normalizeBreadcrumbs = (value) => {
369
365
  if (!Array.isArray(value)) {
370
366
  return undefined;
@@ -394,8 +390,8 @@ class Context {
394
390
  console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
395
391
  }
396
392
  // 1. Generate query vector
397
- console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
398
- const queryEmbedding = await this.embedding.embed(query);
393
+ console.log(`[Context] 🔍 Generating embeddings for query: "${resolvedRequest.query}"`);
394
+ const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
399
395
  console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
400
396
  console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
401
397
  // 2. Prepare hybrid search requests
@@ -404,17 +400,17 @@ class Context {
404
400
  data: queryEmbedding.vector,
405
401
  anns_field: "vector",
406
402
  param: { "nprobe": 10 },
407
- limit: topK
403
+ limit: resolvedRequest.topK
408
404
  },
409
405
  {
410
- data: query,
406
+ data: resolvedRequest.query,
411
407
  anns_field: "sparse_vector",
412
408
  param: { "drop_ratio_search": 0.2 },
413
- limit: topK
409
+ limit: resolvedRequest.topK
414
410
  }
415
411
  ];
416
412
  console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
417
- console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
413
+ console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${resolvedRequest.query}", limit=${searchRequests[1].limit}`);
418
414
  // 3. Execute hybrid search
419
415
  console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
420
416
  const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
@@ -422,7 +418,7 @@ class Context {
422
418
  strategy: 'rrf',
423
419
  params: { k: 100 }
424
420
  },
425
- limit: topK,
421
+ limit: resolvedRequest.topK,
426
422
  // Hybrid RRF scores are backend/rerank relative, so dense similarity
427
423
  // thresholds can erase valid sparse lexical matches before MCP ranking.
428
424
  filterExpr: effectiveFilterExpr
@@ -439,7 +435,9 @@ class Context {
439
435
  breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
440
436
  indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
441
437
  symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
442
- symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
438
+ symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
439
+ backendScore: result.score,
440
+ backendScoreKind: 'rrf_fusion'
443
441
  }));
444
442
  console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
445
443
  if (results.length > 0) {
@@ -450,9 +448,12 @@ class Context {
450
448
  else {
451
449
  // Regular semantic search
452
450
  // 1. Generate query vector
453
- const queryEmbedding = await this.embedding.embed(query);
451
+ const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
452
+ const denseThreshold = resolvedRequest.scorePolicy.kind === 'dense_similarity_min'
453
+ ? resolvedRequest.scorePolicy.min
454
+ : undefined;
454
455
  // 2. Search in vector database
455
- const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr: effectiveFilterExpr });
456
+ const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK: resolvedRequest.topK, threshold: denseThreshold, filterExpr: effectiveFilterExpr });
456
457
  // 3. Convert to semantic search result format
457
458
  const results = searchResults.map(result => ({
458
459
  content: result.document.content,
@@ -464,12 +465,54 @@ class Context {
464
465
  breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
465
466
  indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
466
467
  symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
467
- symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
468
+ symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
469
+ backendScore: result.score,
470
+ backendScoreKind: 'dense_similarity'
468
471
  }));
469
472
  console.log(`[Context] ✅ Found ${results.length} relevant results`);
470
473
  return results;
471
474
  }
472
475
  }
476
+ normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
477
+ if (typeof requestOrCodebasePath === 'string') {
478
+ return {
479
+ codebasePath: requestOrCodebasePath,
480
+ query: query ?? '',
481
+ topK,
482
+ filterExpr,
483
+ ...(threshold > 0
484
+ ? {
485
+ retrievalMode: 'dense',
486
+ scorePolicy: { kind: 'dense_similarity_min', min: threshold }
487
+ }
488
+ : {
489
+ scorePolicy: { kind: 'topk_only' }
490
+ })
491
+ };
492
+ }
493
+ return requestOrCodebasePath;
494
+ }
495
+ resolveSemanticSearchRequest(request) {
496
+ const hybridEnabled = this.getIsHybrid() === true;
497
+ const retrievalMode = request.retrievalMode ?? (hybridEnabled ? 'hybrid' : 'dense');
498
+ const scorePolicy = request.scorePolicy ?? (retrievalMode === 'dense'
499
+ ? { kind: 'dense_similarity_min', min: 0.5 }
500
+ : { kind: 'topk_only' });
501
+ if (request.retrievalMode !== undefined && retrievalMode !== 'dense' && hybridEnabled !== true) {
502
+ throw new Error(`${retrievalMode} retrieval requires hybrid search support, but HYBRID_MODE is disabled.`);
503
+ }
504
+ if (retrievalMode !== 'dense' && scorePolicy.kind === 'dense_similarity_min') {
505
+ throw new Error(`Dense similarity threshold score policy is invalid for ${retrievalMode} retrieval.`);
506
+ }
507
+ return {
508
+ codebasePath: request.codebasePath,
509
+ query: request.query,
510
+ topK: request.topK ?? 5,
511
+ retrievalMode,
512
+ filterExpr: request.filterExpr ?? '',
513
+ scorePolicy
514
+ };
515
+ }
473
516
  buildSemanticSearchFilterExpr(filterExpr) {
474
517
  const markerExclusion = `fileExtension != "${vectordb_1.INDEX_COMPLETION_MARKER_FILE_EXTENSION}"`;
475
518
  if (!filterExpr || filterExpr.trim().length === 0) {
package/dist/types.d.ts CHANGED
@@ -1,8 +1,17 @@
1
+ import type { BackendScoreKind, RetrievalMode, ScorePolicy } from './vectordb/types';
1
2
  export interface SearchQuery {
2
3
  term: string;
3
4
  includeContent?: boolean;
4
5
  limit?: number;
5
6
  }
7
+ export interface SemanticSearchRequest {
8
+ codebasePath: string;
9
+ query: string;
10
+ topK?: number;
11
+ retrievalMode?: RetrievalMode;
12
+ filterExpr?: string;
13
+ scorePolicy?: ScorePolicy;
14
+ }
6
15
  export interface SemanticSearchResult {
7
16
  content: string;
8
17
  relativePath: string;
@@ -14,5 +23,7 @@ export interface SemanticSearchResult {
14
23
  indexedAt?: string;
15
24
  symbolId?: string;
16
25
  symbolLabel?: string;
26
+ backendScore?: number;
27
+ backendScoreKind?: BackendScoreKind;
17
28
  }
18
29
  //# sourceMappingURL=types.d.ts.map
@@ -1,4 +1,4 @@
1
- export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
1
+ export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, RetrievalMode, ScorePolicy, BackendScoreKind, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
2
2
  export { MilvusRestfulVectorDatabase, MilvusRestfulConfig } from './milvus-restful-vectordb';
3
3
  export { MilvusVectorDatabase, MilvusConfig } from './milvus-vectordb';
4
4
  export { ClusterManager, ZillizConfig, Project, Cluster, CreateFreeClusterRequest, CreateFreeClusterResponse, CreateFreeClusterWithDetailsResponse, DescribeClusterResponse } from './zilliz-utils';
@@ -8,6 +8,14 @@ export interface VectorDocument {
8
8
  fileExtension: string;
9
9
  metadata: Record<string, any>;
10
10
  }
11
+ export type RetrievalMode = 'dense' | 'lexical' | 'hybrid';
12
+ export type ScorePolicy = {
13
+ kind: 'dense_similarity_min';
14
+ min: number;
15
+ } | {
16
+ kind: 'topk_only';
17
+ };
18
+ export type BackendScoreKind = 'dense_similarity' | 'lexical_rank' | 'rrf_fusion';
11
19
  export interface SearchOptions {
12
20
  topK?: number;
13
21
  filter?: Record<string, any>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zokizuan/satori-core",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "description": "Core semantic indexing engine for Satori's insight-first retrieval",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",