@zokizuan/satori-core 1.1.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/dist/core/context.d.ts +4 -1
- package/dist/core/context.js +67 -23
- package/dist/types.d.ts +11 -0
- package/dist/vectordb/index.d.ts +1 -1
- package/dist/vectordb/types.d.ts +8 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -36,7 +36,13 @@ const context = new Context({
|
|
|
36
36
|
});
|
|
37
37
|
|
|
38
38
|
await context.indexCodebase('/absolute/path/to/repo');
|
|
39
|
-
const results = await context.semanticSearch(
|
|
39
|
+
const results = await context.semanticSearch({
|
|
40
|
+
codebasePath: '/absolute/path/to/repo',
|
|
41
|
+
query: 'authentication logic',
|
|
42
|
+
topK: 5,
|
|
43
|
+
retrievalMode: 'hybrid',
|
|
44
|
+
scorePolicy: { kind: 'topk_only' }
|
|
45
|
+
});
|
|
40
46
|
```
|
|
41
47
|
|
|
42
48
|
## Development
|
package/dist/core/context.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Splitter } from '../splitter';
|
|
2
2
|
import { Embedding } from '../embedding';
|
|
3
3
|
import { VectorDatabase, IndexCompletionMarkerDocument } from '../vectordb';
|
|
4
|
-
import { SemanticSearchResult } from '../types';
|
|
4
|
+
import { SemanticSearchRequest, SemanticSearchResult } from '../types';
|
|
5
5
|
import { FileSynchronizer } from '../sync/synchronizer';
|
|
6
6
|
export interface ContextConfig {
|
|
7
7
|
embedding?: Embedding;
|
|
@@ -125,7 +125,10 @@ export declare class Context {
|
|
|
125
125
|
* @param topK Number of results to return
|
|
126
126
|
* @param threshold Similarity threshold
|
|
127
127
|
*/
|
|
128
|
+
semanticSearch(request: SemanticSearchRequest): Promise<SemanticSearchResult[]>;
|
|
128
129
|
semanticSearch(codebasePath: string, query: string, topK?: number, threshold?: number, filterExpr?: string): Promise<SemanticSearchResult[]>;
|
|
130
|
+
private normalizeSemanticSearchRequest;
|
|
131
|
+
private resolveSemanticSearchRequest;
|
|
129
132
|
private buildSemanticSearchFilterExpr;
|
|
130
133
|
private queryCompletionMarkerRows;
|
|
131
134
|
clearIndexCompletionMarker(codebasePath: string): Promise<void>;
|
package/dist/core/context.js
CHANGED
|
@@ -353,18 +353,14 @@ class Context {
|
|
|
353
353
|
}
|
|
354
354
|
}
|
|
355
355
|
}
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
* @param threshold Similarity threshold
|
|
362
|
-
*/
|
|
363
|
-
async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
|
|
364
|
-
const isHybrid = this.getIsHybrid();
|
|
356
|
+
async semanticSearch(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
|
|
357
|
+
const request = this.normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK, threshold, filterExpr);
|
|
358
|
+
const resolvedRequest = this.resolveSemanticSearchRequest(request);
|
|
359
|
+
const codebasePath = resolvedRequest.codebasePath;
|
|
360
|
+
const isHybrid = resolvedRequest.retrievalMode !== 'dense' && this.getIsHybrid() === true;
|
|
365
361
|
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
|
366
|
-
console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
|
|
367
|
-
const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(filterExpr);
|
|
362
|
+
console.log(`[Context] 🔍 Executing ${searchType}: "${resolvedRequest.query}" in ${codebasePath}`);
|
|
363
|
+
const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(resolvedRequest.filterExpr);
|
|
368
364
|
const normalizeBreadcrumbs = (value) => {
|
|
369
365
|
if (!Array.isArray(value)) {
|
|
370
366
|
return undefined;
|
|
@@ -394,8 +390,8 @@ class Context {
|
|
|
394
390
|
console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
|
|
395
391
|
}
|
|
396
392
|
// 1. Generate query vector
|
|
397
|
-
console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
|
|
398
|
-
const queryEmbedding = await this.embedding.embed(query);
|
|
393
|
+
console.log(`[Context] 🔍 Generating embeddings for query: "${resolvedRequest.query}"`);
|
|
394
|
+
const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
|
|
399
395
|
console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
|
|
400
396
|
console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
|
|
401
397
|
// 2. Prepare hybrid search requests
|
|
@@ -404,17 +400,17 @@ class Context {
|
|
|
404
400
|
data: queryEmbedding.vector,
|
|
405
401
|
anns_field: "vector",
|
|
406
402
|
param: { "nprobe": 10 },
|
|
407
|
-
limit: topK
|
|
403
|
+
limit: resolvedRequest.topK
|
|
408
404
|
},
|
|
409
405
|
{
|
|
410
|
-
data: query,
|
|
406
|
+
data: resolvedRequest.query,
|
|
411
407
|
anns_field: "sparse_vector",
|
|
412
408
|
param: { "drop_ratio_search": 0.2 },
|
|
413
|
-
limit: topK
|
|
409
|
+
limit: resolvedRequest.topK
|
|
414
410
|
}
|
|
415
411
|
];
|
|
416
412
|
console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
|
|
417
|
-
console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
|
|
413
|
+
console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${resolvedRequest.query}", limit=${searchRequests[1].limit}`);
|
|
418
414
|
// 3. Execute hybrid search
|
|
419
415
|
console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
|
|
420
416
|
const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
|
|
@@ -422,8 +418,9 @@ class Context {
|
|
|
422
418
|
strategy: 'rrf',
|
|
423
419
|
params: { k: 100 }
|
|
424
420
|
},
|
|
425
|
-
limit: topK,
|
|
426
|
-
|
|
421
|
+
limit: resolvedRequest.topK,
|
|
422
|
+
// Hybrid RRF scores are backend/rerank relative, so dense similarity
|
|
423
|
+
// thresholds can erase valid sparse lexical matches before MCP ranking.
|
|
427
424
|
filterExpr: effectiveFilterExpr
|
|
428
425
|
});
|
|
429
426
|
console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`);
|
|
@@ -438,7 +435,9 @@ class Context {
|
|
|
438
435
|
breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
|
|
439
436
|
indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
|
|
440
437
|
symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
|
|
441
|
-
symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
|
|
438
|
+
symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
|
|
439
|
+
backendScore: result.score,
|
|
440
|
+
backendScoreKind: 'rrf_fusion'
|
|
442
441
|
}));
|
|
443
442
|
console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
|
|
444
443
|
if (results.length > 0) {
|
|
@@ -449,9 +448,12 @@ class Context {
|
|
|
449
448
|
else {
|
|
450
449
|
// Regular semantic search
|
|
451
450
|
// 1. Generate query vector
|
|
452
|
-
const queryEmbedding = await this.embedding.embed(query);
|
|
451
|
+
const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
|
|
452
|
+
const denseThreshold = resolvedRequest.scorePolicy.kind === 'dense_similarity_min'
|
|
453
|
+
? resolvedRequest.scorePolicy.min
|
|
454
|
+
: undefined;
|
|
453
455
|
// 2. Search in vector database
|
|
454
|
-
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr: effectiveFilterExpr });
|
|
456
|
+
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK: resolvedRequest.topK, threshold: denseThreshold, filterExpr: effectiveFilterExpr });
|
|
455
457
|
// 3. Convert to semantic search result format
|
|
456
458
|
const results = searchResults.map(result => ({
|
|
457
459
|
content: result.document.content,
|
|
@@ -463,12 +465,54 @@ class Context {
|
|
|
463
465
|
breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
|
|
464
466
|
indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
|
|
465
467
|
symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
|
|
466
|
-
symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
|
|
468
|
+
symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
|
|
469
|
+
backendScore: result.score,
|
|
470
|
+
backendScoreKind: 'dense_similarity'
|
|
467
471
|
}));
|
|
468
472
|
console.log(`[Context] ✅ Found ${results.length} relevant results`);
|
|
469
473
|
return results;
|
|
470
474
|
}
|
|
471
475
|
}
|
|
476
|
+
normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
|
|
477
|
+
if (typeof requestOrCodebasePath === 'string') {
|
|
478
|
+
return {
|
|
479
|
+
codebasePath: requestOrCodebasePath,
|
|
480
|
+
query: query ?? '',
|
|
481
|
+
topK,
|
|
482
|
+
filterExpr,
|
|
483
|
+
...(threshold > 0
|
|
484
|
+
? {
|
|
485
|
+
retrievalMode: 'dense',
|
|
486
|
+
scorePolicy: { kind: 'dense_similarity_min', min: threshold }
|
|
487
|
+
}
|
|
488
|
+
: {
|
|
489
|
+
scorePolicy: { kind: 'topk_only' }
|
|
490
|
+
})
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
return requestOrCodebasePath;
|
|
494
|
+
}
|
|
495
|
+
resolveSemanticSearchRequest(request) {
|
|
496
|
+
const hybridEnabled = this.getIsHybrid() === true;
|
|
497
|
+
const retrievalMode = request.retrievalMode ?? (hybridEnabled ? 'hybrid' : 'dense');
|
|
498
|
+
const scorePolicy = request.scorePolicy ?? (retrievalMode === 'dense'
|
|
499
|
+
? { kind: 'dense_similarity_min', min: 0.5 }
|
|
500
|
+
: { kind: 'topk_only' });
|
|
501
|
+
if (request.retrievalMode !== undefined && retrievalMode !== 'dense' && hybridEnabled !== true) {
|
|
502
|
+
throw new Error(`${retrievalMode} retrieval requires hybrid search support, but HYBRID_MODE is disabled.`);
|
|
503
|
+
}
|
|
504
|
+
if (retrievalMode !== 'dense' && scorePolicy.kind === 'dense_similarity_min') {
|
|
505
|
+
throw new Error(`Dense similarity threshold score policy is invalid for ${retrievalMode} retrieval.`);
|
|
506
|
+
}
|
|
507
|
+
return {
|
|
508
|
+
codebasePath: request.codebasePath,
|
|
509
|
+
query: request.query,
|
|
510
|
+
topK: request.topK ?? 5,
|
|
511
|
+
retrievalMode,
|
|
512
|
+
filterExpr: request.filterExpr ?? '',
|
|
513
|
+
scorePolicy
|
|
514
|
+
};
|
|
515
|
+
}
|
|
472
516
|
buildSemanticSearchFilterExpr(filterExpr) {
|
|
473
517
|
const markerExclusion = `fileExtension != "${vectordb_1.INDEX_COMPLETION_MARKER_FILE_EXTENSION}"`;
|
|
474
518
|
if (!filterExpr || filterExpr.trim().length === 0) {
|
package/dist/types.d.ts
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
|
+
import type { BackendScoreKind, RetrievalMode, ScorePolicy } from './vectordb/types';
|
|
1
2
|
export interface SearchQuery {
|
|
2
3
|
term: string;
|
|
3
4
|
includeContent?: boolean;
|
|
4
5
|
limit?: number;
|
|
5
6
|
}
|
|
7
|
+
export interface SemanticSearchRequest {
|
|
8
|
+
codebasePath: string;
|
|
9
|
+
query: string;
|
|
10
|
+
topK?: number;
|
|
11
|
+
retrievalMode?: RetrievalMode;
|
|
12
|
+
filterExpr?: string;
|
|
13
|
+
scorePolicy?: ScorePolicy;
|
|
14
|
+
}
|
|
6
15
|
export interface SemanticSearchResult {
|
|
7
16
|
content: string;
|
|
8
17
|
relativePath: string;
|
|
@@ -14,5 +23,7 @@ export interface SemanticSearchResult {
|
|
|
14
23
|
indexedAt?: string;
|
|
15
24
|
symbolId?: string;
|
|
16
25
|
symbolLabel?: string;
|
|
26
|
+
backendScore?: number;
|
|
27
|
+
backendScoreKind?: BackendScoreKind;
|
|
17
28
|
}
|
|
18
29
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/vectordb/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
|
|
1
|
+
export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, RetrievalMode, ScorePolicy, BackendScoreKind, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
|
|
2
2
|
export { MilvusRestfulVectorDatabase, MilvusRestfulConfig } from './milvus-restful-vectordb';
|
|
3
3
|
export { MilvusVectorDatabase, MilvusConfig } from './milvus-vectordb';
|
|
4
4
|
export { ClusterManager, ZillizConfig, Project, Cluster, CreateFreeClusterRequest, CreateFreeClusterResponse, CreateFreeClusterWithDetailsResponse, DescribeClusterResponse } from './zilliz-utils';
|
package/dist/vectordb/types.d.ts
CHANGED
|
@@ -8,6 +8,14 @@ export interface VectorDocument {
|
|
|
8
8
|
fileExtension: string;
|
|
9
9
|
metadata: Record<string, any>;
|
|
10
10
|
}
|
|
11
|
+
export type RetrievalMode = 'dense' | 'lexical' | 'hybrid';
|
|
12
|
+
export type ScorePolicy = {
|
|
13
|
+
kind: 'dense_similarity_min';
|
|
14
|
+
min: number;
|
|
15
|
+
} | {
|
|
16
|
+
kind: 'topk_only';
|
|
17
|
+
};
|
|
18
|
+
export type BackendScoreKind = 'dense_similarity' | 'lexical_rank' | 'rrf_fusion';
|
|
11
19
|
export interface SearchOptions {
|
|
12
20
|
topK?: number;
|
|
13
21
|
filter?: Record<string, any>;
|