@zokizuan/satori-core 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/dist/core/context.d.ts +4 -1
- package/dist/core/context.js +65 -22
- package/dist/types.d.ts +11 -0
- package/dist/vectordb/index.d.ts +1 -1
- package/dist/vectordb/types.d.ts +8 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -36,7 +36,13 @@ const context = new Context({
|
|
|
36
36
|
});
|
|
37
37
|
|
|
38
38
|
await context.indexCodebase('/absolute/path/to/repo');
|
|
39
|
-
const results = await context.semanticSearch(
|
|
39
|
+
const results = await context.semanticSearch({
|
|
40
|
+
codebasePath: '/absolute/path/to/repo',
|
|
41
|
+
query: 'authentication logic',
|
|
42
|
+
topK: 5,
|
|
43
|
+
retrievalMode: 'hybrid',
|
|
44
|
+
scorePolicy: { kind: 'topk_only' }
|
|
45
|
+
});
|
|
40
46
|
```
|
|
41
47
|
|
|
42
48
|
## Development
|
package/dist/core/context.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Splitter } from '../splitter';
|
|
2
2
|
import { Embedding } from '../embedding';
|
|
3
3
|
import { VectorDatabase, IndexCompletionMarkerDocument } from '../vectordb';
|
|
4
|
-
import { SemanticSearchResult } from '../types';
|
|
4
|
+
import { SemanticSearchRequest, SemanticSearchResult } from '../types';
|
|
5
5
|
import { FileSynchronizer } from '../sync/synchronizer';
|
|
6
6
|
export interface ContextConfig {
|
|
7
7
|
embedding?: Embedding;
|
|
@@ -125,7 +125,10 @@ export declare class Context {
|
|
|
125
125
|
* @param topK Number of results to return
|
|
126
126
|
* @param threshold Similarity threshold
|
|
127
127
|
*/
|
|
128
|
+
semanticSearch(request: SemanticSearchRequest): Promise<SemanticSearchResult[]>;
|
|
128
129
|
semanticSearch(codebasePath: string, query: string, topK?: number, threshold?: number, filterExpr?: string): Promise<SemanticSearchResult[]>;
|
|
130
|
+
private normalizeSemanticSearchRequest;
|
|
131
|
+
private resolveSemanticSearchRequest;
|
|
129
132
|
private buildSemanticSearchFilterExpr;
|
|
130
133
|
private queryCompletionMarkerRows;
|
|
131
134
|
clearIndexCompletionMarker(codebasePath: string): Promise<void>;
|
package/dist/core/context.js
CHANGED
|
@@ -353,18 +353,14 @@ class Context {
|
|
|
353
353
|
}
|
|
354
354
|
}
|
|
355
355
|
}
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
* @param threshold Similarity threshold
|
|
362
|
-
*/
|
|
363
|
-
async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
|
|
364
|
-
const isHybrid = this.getIsHybrid();
|
|
356
|
+
async semanticSearch(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
|
|
357
|
+
const request = this.normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK, threshold, filterExpr);
|
|
358
|
+
const resolvedRequest = this.resolveSemanticSearchRequest(request);
|
|
359
|
+
const codebasePath = resolvedRequest.codebasePath;
|
|
360
|
+
const isHybrid = resolvedRequest.retrievalMode !== 'dense' && this.getIsHybrid() === true;
|
|
365
361
|
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
|
366
|
-
console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
|
|
367
|
-
const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(filterExpr);
|
|
362
|
+
console.log(`[Context] 🔍 Executing ${searchType}: "${resolvedRequest.query}" in ${codebasePath}`);
|
|
363
|
+
const effectiveFilterExpr = this.buildSemanticSearchFilterExpr(resolvedRequest.filterExpr);
|
|
368
364
|
const normalizeBreadcrumbs = (value) => {
|
|
369
365
|
if (!Array.isArray(value)) {
|
|
370
366
|
return undefined;
|
|
@@ -394,8 +390,8 @@ class Context {
|
|
|
394
390
|
console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
|
|
395
391
|
}
|
|
396
392
|
// 1. Generate query vector
|
|
397
|
-
console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
|
|
398
|
-
const queryEmbedding = await this.embedding.embed(query);
|
|
393
|
+
console.log(`[Context] 🔍 Generating embeddings for query: "${resolvedRequest.query}"`);
|
|
394
|
+
const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
|
|
399
395
|
console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
|
|
400
396
|
console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
|
|
401
397
|
// 2. Prepare hybrid search requests
|
|
@@ -404,17 +400,17 @@ class Context {
|
|
|
404
400
|
data: queryEmbedding.vector,
|
|
405
401
|
anns_field: "vector",
|
|
406
402
|
param: { "nprobe": 10 },
|
|
407
|
-
limit: topK
|
|
403
|
+
limit: resolvedRequest.topK
|
|
408
404
|
},
|
|
409
405
|
{
|
|
410
|
-
data: query,
|
|
406
|
+
data: resolvedRequest.query,
|
|
411
407
|
anns_field: "sparse_vector",
|
|
412
408
|
param: { "drop_ratio_search": 0.2 },
|
|
413
|
-
limit: topK
|
|
409
|
+
limit: resolvedRequest.topK
|
|
414
410
|
}
|
|
415
411
|
];
|
|
416
412
|
console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
|
|
417
|
-
console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
|
|
413
|
+
console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${resolvedRequest.query}", limit=${searchRequests[1].limit}`);
|
|
418
414
|
// 3. Execute hybrid search
|
|
419
415
|
console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
|
|
420
416
|
const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
|
|
@@ -422,7 +418,7 @@ class Context {
|
|
|
422
418
|
strategy: 'rrf',
|
|
423
419
|
params: { k: 100 }
|
|
424
420
|
},
|
|
425
|
-
limit: topK,
|
|
421
|
+
limit: resolvedRequest.topK,
|
|
426
422
|
// Hybrid RRF scores are backend/rerank relative, so dense similarity
|
|
427
423
|
// thresholds can erase valid sparse lexical matches before MCP ranking.
|
|
428
424
|
filterExpr: effectiveFilterExpr
|
|
@@ -439,7 +435,9 @@ class Context {
|
|
|
439
435
|
breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
|
|
440
436
|
indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
|
|
441
437
|
symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
|
|
442
|
-
symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
|
|
438
|
+
symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
|
|
439
|
+
backendScore: result.score,
|
|
440
|
+
backendScoreKind: 'rrf_fusion'
|
|
443
441
|
}));
|
|
444
442
|
console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
|
|
445
443
|
if (results.length > 0) {
|
|
@@ -450,9 +448,12 @@ class Context {
|
|
|
450
448
|
else {
|
|
451
449
|
// Regular semantic search
|
|
452
450
|
// 1. Generate query vector
|
|
453
|
-
const queryEmbedding = await this.embedding.embed(query);
|
|
451
|
+
const queryEmbedding = await this.embedding.embed(resolvedRequest.query);
|
|
452
|
+
const denseThreshold = resolvedRequest.scorePolicy.kind === 'dense_similarity_min'
|
|
453
|
+
? resolvedRequest.scorePolicy.min
|
|
454
|
+
: undefined;
|
|
454
455
|
// 2. Search in vector database
|
|
455
|
-
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr: effectiveFilterExpr });
|
|
456
|
+
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK: resolvedRequest.topK, threshold: denseThreshold, filterExpr: effectiveFilterExpr });
|
|
456
457
|
// 3. Convert to semantic search result format
|
|
457
458
|
const results = searchResults.map(result => ({
|
|
458
459
|
content: result.document.content,
|
|
@@ -464,12 +465,54 @@ class Context {
|
|
|
464
465
|
breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
|
|
465
466
|
indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
|
|
466
467
|
symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
|
|
467
|
-
symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
|
|
468
|
+
symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined,
|
|
469
|
+
backendScore: result.score,
|
|
470
|
+
backendScoreKind: 'dense_similarity'
|
|
468
471
|
}));
|
|
469
472
|
console.log(`[Context] ✅ Found ${results.length} relevant results`);
|
|
470
473
|
return results;
|
|
471
474
|
}
|
|
472
475
|
}
|
|
476
|
+
normalizeSemanticSearchRequest(requestOrCodebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
|
|
477
|
+
if (typeof requestOrCodebasePath === 'string') {
|
|
478
|
+
return {
|
|
479
|
+
codebasePath: requestOrCodebasePath,
|
|
480
|
+
query: query ?? '',
|
|
481
|
+
topK,
|
|
482
|
+
filterExpr,
|
|
483
|
+
...(threshold > 0
|
|
484
|
+
? {
|
|
485
|
+
retrievalMode: 'dense',
|
|
486
|
+
scorePolicy: { kind: 'dense_similarity_min', min: threshold }
|
|
487
|
+
}
|
|
488
|
+
: {
|
|
489
|
+
scorePolicy: { kind: 'topk_only' }
|
|
490
|
+
})
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
return requestOrCodebasePath;
|
|
494
|
+
}
|
|
495
|
+
resolveSemanticSearchRequest(request) {
|
|
496
|
+
const hybridEnabled = this.getIsHybrid() === true;
|
|
497
|
+
const retrievalMode = request.retrievalMode ?? (hybridEnabled ? 'hybrid' : 'dense');
|
|
498
|
+
const scorePolicy = request.scorePolicy ?? (retrievalMode === 'dense'
|
|
499
|
+
? { kind: 'dense_similarity_min', min: 0.5 }
|
|
500
|
+
: { kind: 'topk_only' });
|
|
501
|
+
if (request.retrievalMode !== undefined && retrievalMode !== 'dense' && hybridEnabled !== true) {
|
|
502
|
+
throw new Error(`${retrievalMode} retrieval requires hybrid search support, but HYBRID_MODE is disabled.`);
|
|
503
|
+
}
|
|
504
|
+
if (retrievalMode !== 'dense' && scorePolicy.kind === 'dense_similarity_min') {
|
|
505
|
+
throw new Error(`Dense similarity threshold score policy is invalid for ${retrievalMode} retrieval.`);
|
|
506
|
+
}
|
|
507
|
+
return {
|
|
508
|
+
codebasePath: request.codebasePath,
|
|
509
|
+
query: request.query,
|
|
510
|
+
topK: request.topK ?? 5,
|
|
511
|
+
retrievalMode,
|
|
512
|
+
filterExpr: request.filterExpr ?? '',
|
|
513
|
+
scorePolicy
|
|
514
|
+
};
|
|
515
|
+
}
|
|
473
516
|
buildSemanticSearchFilterExpr(filterExpr) {
|
|
474
517
|
const markerExclusion = `fileExtension != "${vectordb_1.INDEX_COMPLETION_MARKER_FILE_EXTENSION}"`;
|
|
475
518
|
if (!filterExpr || filterExpr.trim().length === 0) {
|
package/dist/types.d.ts
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
|
+
import type { BackendScoreKind, RetrievalMode, ScorePolicy } from './vectordb/types';
|
|
1
2
|
export interface SearchQuery {
|
|
2
3
|
term: string;
|
|
3
4
|
includeContent?: boolean;
|
|
4
5
|
limit?: number;
|
|
5
6
|
}
|
|
7
|
+
export interface SemanticSearchRequest {
|
|
8
|
+
codebasePath: string;
|
|
9
|
+
query: string;
|
|
10
|
+
topK?: number;
|
|
11
|
+
retrievalMode?: RetrievalMode;
|
|
12
|
+
filterExpr?: string;
|
|
13
|
+
scorePolicy?: ScorePolicy;
|
|
14
|
+
}
|
|
6
15
|
export interface SemanticSearchResult {
|
|
7
16
|
content: string;
|
|
8
17
|
relativePath: string;
|
|
@@ -14,5 +23,7 @@ export interface SemanticSearchResult {
|
|
|
14
23
|
indexedAt?: string;
|
|
15
24
|
symbolId?: string;
|
|
16
25
|
symbolLabel?: string;
|
|
26
|
+
backendScore?: number;
|
|
27
|
+
backendScoreKind?: BackendScoreKind;
|
|
17
28
|
}
|
|
18
29
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/vectordb/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
|
|
1
|
+
export { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, CollectionDetails, VectorStoreBackendInfo, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, RerankStrategy, RetrievalMode, ScorePolicy, BackendScoreKind, IndexCompletionFingerprint, IndexCompletionMarkerDocument, INDEX_COMPLETION_MARKER_DOC_ID, INDEX_COMPLETION_MARKER_FILE_EXTENSION, INDEX_COMPLETION_MARKER_RELATIVE_PATH, COLLECTION_LIMIT_MESSAGE } from './types';
|
|
2
2
|
export { MilvusRestfulVectorDatabase, MilvusRestfulConfig } from './milvus-restful-vectordb';
|
|
3
3
|
export { MilvusVectorDatabase, MilvusConfig } from './milvus-vectordb';
|
|
4
4
|
export { ClusterManager, ZillizConfig, Project, Cluster, CreateFreeClusterRequest, CreateFreeClusterResponse, CreateFreeClusterWithDetailsResponse, DescribeClusterResponse } from './zilliz-utils';
|
package/dist/vectordb/types.d.ts
CHANGED
|
@@ -8,6 +8,14 @@ export interface VectorDocument {
|
|
|
8
8
|
fileExtension: string;
|
|
9
9
|
metadata: Record<string, any>;
|
|
10
10
|
}
|
|
11
|
+
export type RetrievalMode = 'dense' | 'lexical' | 'hybrid';
|
|
12
|
+
export type ScorePolicy = {
|
|
13
|
+
kind: 'dense_similarity_min';
|
|
14
|
+
min: number;
|
|
15
|
+
} | {
|
|
16
|
+
kind: 'topk_only';
|
|
17
|
+
};
|
|
18
|
+
export type BackendScoreKind = 'dense_similarity' | 'lexical_rank' | 'rrf_fusion';
|
|
11
19
|
export interface SearchOptions {
|
|
12
20
|
topK?: number;
|
|
13
21
|
filter?: Record<string, any>;
|