codebase-context 1.5.1 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +128 -158
- package/dist/core/indexer.d.ts.map +1 -1
- package/dist/core/indexer.js +26 -16
- package/dist/core/indexer.js.map +1 -1
- package/dist/core/reranker.d.ts +23 -0
- package/dist/core/reranker.d.ts.map +1 -0
- package/dist/core/reranker.js +120 -0
- package/dist/core/reranker.js.map +1 -0
- package/dist/core/search.d.ts +10 -2
- package/dist/core/search.d.ts.map +1 -1
- package/dist/core/search.js +312 -68
- package/dist/core/search.js.map +1 -1
- package/dist/embeddings/transformers.d.ts.map +1 -1
- package/dist/embeddings/transformers.js +17 -7
- package/dist/embeddings/transformers.js.map +1 -1
- package/dist/embeddings/types.d.ts.map +1 -1
- package/dist/embeddings/types.js +3 -0
- package/dist/embeddings/types.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +109 -49
- package/dist/index.js.map +1 -1
- package/dist/preflight/evidence-lock.js +1 -1
- package/dist/types/index.d.ts +0 -3
- package/dist/types/index.d.ts.map +1 -1
- package/dist/utils/chunking.js +2 -2
- package/dist/utils/chunking.js.map +1 -1
- package/dist/utils/usage-tracker.d.ts.map +1 -1
- package/dist/utils/usage-tracker.js +2 -4
- package/dist/utils/usage-tracker.js.map +1 -1
- package/docs/capabilities.md +75 -0
- package/package.json +31 -9
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reranker.d.ts","sourceRoot":"","sources":["../../src/core/reranker.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAsFtD;;;;GAIG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAQ5D;AAED;;;;GAIG;AACH,wBAAsB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CA8B5F"}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stage-2 cross-encoder reranker for search results.
|
|
3
|
+
*
|
|
4
|
+
* Triggered by score ambiguity (clustered top scores), not by intent.
|
|
5
|
+
* Uses a lightweight cross-encoder to re-score (query, passage) pairs,
|
|
6
|
+
* converting high top-3 recall into better top-1 accuracy.
|
|
7
|
+
*
|
|
8
|
+
* Default model: Xenova/ms-marco-MiniLM-L-6-v2 (~22M params, ~80MB, CPU-safe).
|
|
9
|
+
*/
|
|
10
|
+
const DEFAULT_RERANKER_MODEL = 'Xenova/ms-marco-MiniLM-L-6-v2';
|
|
11
|
+
/** How many top results to rerank (keeps latency bounded) */
|
|
12
|
+
const RERANK_TOP_K = 10;
|
|
13
|
+
/** Trigger reranking when the score gap between #1 and #3 is below this threshold */
|
|
14
|
+
const AMBIGUITY_THRESHOLD = 0.08;
|
|
15
|
+
let cachedTokenizer = null;
|
|
16
|
+
let cachedModel = null;
|
|
17
|
+
let initPromise = null;
|
|
18
|
+
async function ensureModelLoaded() {
|
|
19
|
+
if (cachedModel && cachedTokenizer)
|
|
20
|
+
return;
|
|
21
|
+
if (initPromise)
|
|
22
|
+
return initPromise;
|
|
23
|
+
initPromise = (async () => {
|
|
24
|
+
const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
|
|
25
|
+
console.error(`[reranker] Loading cross-encoder: ${DEFAULT_RERANKER_MODEL}`);
|
|
26
|
+
console.error('[reranker] (First run will download the model - this may take a moment)');
|
|
27
|
+
cachedTokenizer = await AutoTokenizer.from_pretrained(DEFAULT_RERANKER_MODEL);
|
|
28
|
+
cachedModel = await AutoModelForSequenceClassification.from_pretrained(DEFAULT_RERANKER_MODEL, {
|
|
29
|
+
dtype: 'q8'
|
|
30
|
+
});
|
|
31
|
+
console.error('[reranker] Cross-encoder loaded successfully');
|
|
32
|
+
})();
|
|
33
|
+
return initPromise;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Build a compact passage from a search result for cross-encoder scoring.
|
|
37
|
+
* Keeps it short - cross-encoders are slow on long inputs.
|
|
38
|
+
*/
|
|
39
|
+
function buildPassage(result) {
|
|
40
|
+
const parts = [];
|
|
41
|
+
// File path is critical signal
|
|
42
|
+
parts.push(`path: ${result.filePath.replace(/\\/g, '/')}`);
|
|
43
|
+
// Component type / layer if available
|
|
44
|
+
if (result.componentType && result.componentType !== 'unknown') {
|
|
45
|
+
parts.push(`type: ${result.componentType}`);
|
|
46
|
+
}
|
|
47
|
+
if (result.layer && result.layer !== 'unknown') {
|
|
48
|
+
parts.push(`layer: ${result.layer}`);
|
|
49
|
+
}
|
|
50
|
+
// Summary is the most information-dense field
|
|
51
|
+
if (result.summary) {
|
|
52
|
+
parts.push(result.summary);
|
|
53
|
+
}
|
|
54
|
+
// Snippet: first ~500 chars (cross-encoder has 512-token context)
|
|
55
|
+
if (result.snippet) {
|
|
56
|
+
const trimmed = result.snippet.slice(0, 500);
|
|
57
|
+
parts.push(trimmed);
|
|
58
|
+
}
|
|
59
|
+
return parts.join('\n');
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Score a single (query, passage) pair using the cross-encoder.
|
|
63
|
+
* Returns a relevance score (higher = more relevant).
|
|
64
|
+
*/
|
|
65
|
+
async function scorePair(query, passage) {
|
|
66
|
+
const inputs = cachedTokenizer(query, passage, {
|
|
67
|
+
padding: true,
|
|
68
|
+
truncation: true,
|
|
69
|
+
max_length: 512
|
|
70
|
+
});
|
|
71
|
+
const output = await cachedModel(inputs);
|
|
72
|
+
// Cross-encoder outputs a single logit for relevance
|
|
73
|
+
const score = output.logits.data[0];
|
|
74
|
+
return score;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Detect whether the result set has ambiguous ordering.
|
|
78
|
+
* Returns true when the top scores are clustered, meaning
|
|
79
|
+
* the embedding model isn't confident about the ranking.
|
|
80
|
+
*/
|
|
81
|
+
export function isAmbiguous(results) {
|
|
82
|
+
if (results.length < 3)
|
|
83
|
+
return false;
|
|
84
|
+
const topScore = results[0].score;
|
|
85
|
+
const thirdScore = results[Math.min(2, results.length - 1)].score;
|
|
86
|
+
const gap = topScore - thirdScore;
|
|
87
|
+
return gap < AMBIGUITY_THRESHOLD;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Rerank the top-K results using a cross-encoder.
|
|
91
|
+
* Only reranks when scores are ambiguous (clustered).
|
|
92
|
+
* Returns the full result array with the top-K portion re-ordered.
|
|
93
|
+
*/
|
|
94
|
+
export async function rerank(query, results) {
|
|
95
|
+
if (results.length <= 1)
|
|
96
|
+
return results;
|
|
97
|
+
if (!isAmbiguous(results))
|
|
98
|
+
return results;
|
|
99
|
+
await ensureModelLoaded();
|
|
100
|
+
const toRerank = results.slice(0, Math.min(RERANK_TOP_K, results.length));
|
|
101
|
+
const rest = results.slice(toRerank.length);
|
|
102
|
+
// Score each result against the query using the cross-encoder
|
|
103
|
+
const scored = [];
|
|
104
|
+
for (const result of toRerank) {
|
|
105
|
+
const passage = buildPassage(result);
|
|
106
|
+
const crossScore = await scorePair(query, passage);
|
|
107
|
+
scored.push({ result, crossScore });
|
|
108
|
+
}
|
|
109
|
+
// Sort by cross-encoder score (descending)
|
|
110
|
+
scored.sort((a, b) => b.crossScore - a.crossScore);
|
|
111
|
+
// Rebuild the result array: reranked top-K + unchanged rest
|
|
112
|
+
// Sigmoid normalizes raw logits to [0,1] so downstream quality gating works
|
|
113
|
+
const sigmoid = (x) => 1 / (1 + Math.exp(-x));
|
|
114
|
+
const reranked = scored.map(({ result, crossScore }) => ({
|
|
115
|
+
...result,
|
|
116
|
+
score: sigmoid(crossScore)
|
|
117
|
+
}));
|
|
118
|
+
return [...reranked, ...rest];
|
|
119
|
+
}
|
|
120
|
+
//# sourceMappingURL=reranker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reranker.js","sourceRoot":"","sources":["../../src/core/reranker.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAMH,MAAM,sBAAsB,GAAG,+BAA+B,CAAC;AAE/D,6DAA6D;AAC7D,MAAM,YAAY,GAAG,EAAE,CAAC;AAExB,qFAAqF;AACrF,MAAM,mBAAmB,GAAG,IAAI,CAAC;AAEjC,IAAI,eAAe,GAAQ,IAAI,CAAC;AAChC,IAAI,WAAW,GAAQ,IAAI,CAAC;AAC5B,IAAI,WAAW,GAAyB,IAAI,CAAC;AAE7C,KAAK,UAAU,iBAAiB;IAC9B,IAAI,WAAW,IAAI,eAAe;QAAE,OAAO;IAC3C,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;QACxB,MAAM,EAAE,aAAa,EAAE,kCAAkC,EAAE,GACzD,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;QAE5C,OAAO,CAAC,KAAK,CAAC,qCAAqC,sBAAsB,EAAE,CAAC,CAAC;QAC7E,OAAO,CAAC,KAAK,CAAC,yEAAyE,CAAC,CAAC;QAEzF,eAAe,GAAG,MAAM,aAAa,CAAC,eAAe,CAAC,sBAAsB,CAAC,CAAC;QAC9E,WAAW,GAAG,MAAM,kCAAkC,CAAC,eAAe,CAAC,sBAAsB,EAAE;YAC7F,KAAK,EAAE,IAAI;SACZ,CAAC,CAAC;QAEH,OAAO,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAChE,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,MAAoB;IACxC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,+BAA+B;IAC/B,KAAK,CAAC,IAAI,CAAC,SAAS,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IAE3D,sCAAsC;IACtC,IAAI,MAAM,CAAC,aAAa,IAAI,MAAM,CAAC,aAAa,KAAK,SAAS,EAAE,CAAC;QAC/D,KAAK,CAAC,IAAI,CAAC,SAAS,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC;IAC9C,CAAC;IACD,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;QAC/C,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;IACvC,CAAC;IAED,8CAA8C;IAC9C,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IAED,kEAAkE;IAClE,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC7C,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,SAAS,CAAC,KAAa,EAAE,OAAe;IACrD,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,OAAO,EAAE;QAC7C,OAAO,EAAE,IAAI;QACb,UAAU,EAAE,IAAI;QAChB,UAAU,EAAE,GAAG;KAChB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,CAAC;IAEzC,qDAAqD;IACrD,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpC,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,WAAW,CAAC,OAAuB;IACjD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAErC,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAClC,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAClE,MAAM,GAAG,GAAG,QAAQ,GAAG,UAAU,CAAC;IAElC,OAAO,GAAG,GAAG,mBAAmB,CAAC;AACnC,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,KAAa,EAAE,OAAuB;IACjE,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IACxC,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC;QAAE,OAAO,OAAO,CAAC;IAE1C,MAAM,iBAAiB,EAAE,CAAC;IAE1B,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;IAC1E,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAE5C,8DAA8D;IAC9D,MAAM,MAAM,GAAwD,EAAE,CAAC;IAEvE,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACnD,MAAM,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;IACtC,CAAC;IAED,2CAA2C;IAC3C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;IAEnD,4DAA4D;IAC5D,4EAA4E;IAC5E,MAAM,OAAO,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACtD,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;QACvD,GAAG,MAAM;QACT,KAAK,EAAE,OAAO,CAAC,UAAU,CAAC;KAC3B,CAAC,CAAC,CAAC;IAEJ,OAAO,CAAC,GAAG,QAAQ,EAAE,GAAG,IAAI,CAAC,CAAC;AAChC,CAAC"}
|
package/dist/core/search.d.ts
CHANGED
|
@@ -11,6 +11,8 @@ export interface SearchOptions {
|
|
|
11
11
|
enableQueryExpansion?: boolean;
|
|
12
12
|
enableLowConfidenceRescue?: boolean;
|
|
13
13
|
candidateFloor?: number;
|
|
14
|
+
/** Enable stage-2 cross-encoder reranking when top scores are ambiguous. Default: true. */
|
|
15
|
+
enableReranker?: boolean;
|
|
14
16
|
}
|
|
15
17
|
export type SearchIntentProfile = 'explore' | 'edit' | 'refactor' | 'migrate';
|
|
16
18
|
export declare class CodebaseSearcher {
|
|
@@ -22,20 +24,26 @@ export declare class CodebaseSearcher {
|
|
|
22
24
|
private storageProvider;
|
|
23
25
|
private initialized;
|
|
24
26
|
private patternIntelligence;
|
|
27
|
+
private importCentrality;
|
|
25
28
|
constructor(rootPath: string);
|
|
26
29
|
initialize(): Promise<void>;
|
|
27
30
|
private loadKeywordIndex;
|
|
28
31
|
/**
|
|
29
|
-
*
|
|
32
|
+
* Load pattern intelligence for trend detection and warnings
|
|
30
33
|
*/
|
|
31
34
|
private loadPatternIntelligence;
|
|
32
35
|
/**
|
|
33
|
-
*
|
|
36
|
+
* Detect pattern trend from chunk content
|
|
34
37
|
*/
|
|
35
38
|
private detectChunkTrend;
|
|
36
39
|
private isTestFile;
|
|
37
40
|
private normalizeQueryTerms;
|
|
41
|
+
/**
|
|
42
|
+
* Classify query intent based on heuristic patterns
|
|
43
|
+
*/
|
|
44
|
+
private classifyQueryIntent;
|
|
38
45
|
private buildQueryVariants;
|
|
46
|
+
private isTemplateOrStyleFile;
|
|
39
47
|
private isCompositionRootFile;
|
|
40
48
|
private queryPathTokenOverlap;
|
|
41
49
|
private isLikelyWiringOrFlowQuery;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/core/search.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,OAAO,EAAa,YAAY,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/core/search.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,OAAO,EAAa,YAAY,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAe3E,MAAM,WAAW,aAAa;IAC5B,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,yBAAyB,CAAC,EAAE,OAAO,CAAC;IACpC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,2FAA2F;IAC3F,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED,MAAM,MAAM,mBAAmB,GAAG,SAAS,GAAG,MAAM,GAAG,UAAU,GAAG,SAAS,CAAC;AA0E9E,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,WAAW,CAAS;IAE5B,OAAO,CAAC,SAAS,CAAgC;IACjD,OAAO,CAAC,MAAM,CAAmB;IAEjC,OAAO,CAAC,iBAAiB,CAAkC;IAC3D,OAAO,CAAC,eAAe,CAAsC;IAE7D,OAAO,CAAC,WAAW,CAAS;IAG5B,OAAO,CAAC,mBAAmB,CAIX;IAEhB,OAAO,CAAC,gBAAgB,CAAoC;gBAEhD,QAAQ,EAAE,MAAM;IAKtB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAsBnB,gBAAgB;IA4B9B;;OAEG;YACW,uBAAuB;IA8ErC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IA+BxB,OAAO,CAAC,UAAU;IAUlB,OAAO,CAAC,mBAAmB;IAO3B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAqE3B,OAAO,CAAC,kBAAkB;IAkC1B,OAAO,CAAC,qBAAqB;IAK7B,OAAO,CAAC,qBAAqB;IAe7B,OAAO,CAAC,qBAAqB;IAQ7B,OAAO,CAAC,yBAAyB;IAMjC,OAAO,CAAC,kBAAkB;IAM1B,OAAO,CAAC,uBAAuB;IAc/B,OAAO,CAAC,mBAAmB;IAwP3B,OAAO,CAAC,mBAAmB;YAuBb,oBAAoB;IAkF5B,MAAM,CACV,KAAK,EAAE,MAAM,EACb,KAAK,GAAE,MAAU,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,OAAO,GAAE,aAAsC,GAC9C,OAAO,CAAC,YAAY,EAAE,CAAC;IAoG1B,OAAO,CAAC,eAAe;IAiDvB,OAAO,CAAC,eAAe;YAWT,cAAc;YAmBd,aAAa;IA4E3B,OAAO,CAAC,uBAAuB;IAoBzB,aAAa,IAAI,OAAO,CAAC,MAAM,CAAC;IAOtC,OAAO,IAAI,OAAO;CAGnB"}
|