@codeledger/selector 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/knowledge-bundle.d.ts +41 -0
- package/dist/knowledge-bundle.d.ts.map +1 -0
- package/dist/knowledge-bundle.js +200 -0
- package/dist/knowledge-bundle.js.map +1 -0
- package/dist/knowledge-candidates.d.ts +27 -0
- package/dist/knowledge-candidates.d.ts.map +1 -0
- package/dist/knowledge-candidates.js +123 -0
- package/dist/knowledge-candidates.js.map +1 -0
- package/dist/knowledge-excerpt.d.ts +26 -0
- package/dist/knowledge-excerpt.d.ts.map +1 -0
- package/dist/knowledge-excerpt.js +179 -0
- package/dist/knowledge-excerpt.js.map +1 -0
- package/dist/knowledge-scorer.d.ts +33 -0
- package/dist/knowledge-scorer.d.ts.map +1 -0
- package/dist/knowledge-scorer.js +234 -0
- package/dist/knowledge-scorer.js.map +1 -0
- package/dist/mode-detect.d.ts +23 -0
- package/dist/mode-detect.d.ts.map +1 -0
- package/dist/mode-detect.js +76 -0
- package/dist/mode-detect.js.map +1 -0
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -19,4 +19,12 @@ export { scanTodos, todoRelevanceScore } from './todo-scan.js';
|
|
|
19
19
|
export { estimateTokensCalibrated, estimateTokensByExtension } from './token-calibration.js';
|
|
20
20
|
export { getFileLayer, sortByLayer } from './layer-ordering.js';
|
|
21
21
|
export type { LayerRule } from './layer-ordering.js';
|
|
22
|
+
export { computeKnowledgeFeatures, scoreKnowledgeFile, deriveKnowledgeReasons, scoreAllKnowledgeCandidates, DEFAULT_KNOWLEDGE_WEIGHTS, } from './knowledge-scorer.js';
|
|
23
|
+
export { tokenizeIntent, computeKnowledgeTokenWeights, inferAnchorFolders, generateKnowledgeCandidates, } from './knowledge-candidates.js';
|
|
24
|
+
export { extractKnowledgeExcerpt } from './knowledge-excerpt.js';
|
|
25
|
+
export type { KnowledgeExcerptResult } from './knowledge-excerpt.js';
|
|
26
|
+
export { buildKnowledgeBundle, buildKnowledgeTrace } from './knowledge-bundle.js';
|
|
27
|
+
export type { KnowledgeBundleOptions } from './knowledge-bundle.js';
|
|
28
|
+
export { detectSelectionMode } from './mode-detect.js';
|
|
29
|
+
export type { ModeDetectionInput } from './mode-detect.js';
|
|
22
30
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACvG,YAAY,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,kBAAkB,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAClH,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC1C,YAAY,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AACpD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AACrE,YAAY,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAC7F,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAChE,YAAY,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACvG,YAAY,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,kBAAkB,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAClH,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC1C,YAAY,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AACpD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AACrE,YAAY,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAC7F,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAChE,YAAY,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAGrD,OAAO,EACL,wBAAwB,EACxB,kBAAkB,EAClB,sBAAsB,EACtB,2BAA2B,EAC3B,yBAAyB,GAC1B,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACL,cAAc,EACd,4BAA4B,EAC5B,kBAAkB,EAClB,2BAA2B,GAC5B,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,YAAY,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AACrE,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAClF,YAAY,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AACvD,YAAY,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -14,4 +14,10 @@ export { inferTaskType, applyTaskTypeWeights } from './task-type.js';
|
|
|
14
14
|
export { scanTodos, todoRelevanceScore } from './todo-scan.js';
|
|
15
15
|
export { estimateTokensCalibrated, estimateTokensByExtension } from './token-calibration.js';
|
|
16
16
|
export { getFileLayer, sortByLayer } from './layer-ordering.js';
|
|
17
|
+
// ─── Knowledge Mode ─────────────────────────────────────────────────────────
|
|
18
|
+
export { computeKnowledgeFeatures, scoreKnowledgeFile, deriveKnowledgeReasons, scoreAllKnowledgeCandidates, DEFAULT_KNOWLEDGE_WEIGHTS, } from './knowledge-scorer.js';
|
|
19
|
+
export { tokenizeIntent, computeKnowledgeTokenWeights, inferAnchorFolders, generateKnowledgeCandidates, } from './knowledge-candidates.js';
|
|
20
|
+
export { extractKnowledgeExcerpt } from './knowledge-excerpt.js';
|
|
21
|
+
export { buildKnowledgeBundle, buildKnowledgeTrace } from './knowledge-bundle.js';
|
|
22
|
+
export { detectSelectionMode } from './mode-detect.js';
|
|
17
23
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAEvG,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,kBAAkB,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAElH,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AACpD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAErE,OAAO,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAC7F,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAEvG,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,kBAAkB,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAElH,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AACpD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAErE,OAAO,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAC7F,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGhE,+EAA+E;AAC/E,OAAO,EACL,wBAAwB,EACxB,kBAAkB,EAClB,sBAAsB,EACtB,2BAA2B,EAC3B,yBAAyB,GAC1B,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACL,cAAc,EACd,4BAA4B,EAC5B,kBAAkB,EAClB,2BAA2B,GAC5B,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AAEjE,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAElF,OAAO,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge-mode bundle builder.
|
|
3
|
+
*
|
|
4
|
+
* Produces a CoworkBundle (context-bundle.json schema) using:
|
|
5
|
+
* - Knowledge-mode candidate generation
|
|
6
|
+
* - Knowledge-mode scoring (7 signals)
|
|
7
|
+
* - Existing stop-rule logic (reused)
|
|
8
|
+
* - Knowledge-mode excerpt extraction (head+headers)
|
|
9
|
+
*/
|
|
10
|
+
import type { CoworkBundle, KnowledgeScoredFile, KnowledgeWeights, WorkspaceIndex } from '@codeledger/types';
|
|
11
|
+
export interface KnowledgeBundleOptions {
|
|
12
|
+
intentText: string;
|
|
13
|
+
workspaceIndex: WorkspaceIndex;
|
|
14
|
+
weights?: KnowledgeWeights;
|
|
15
|
+
maxFiles?: number;
|
|
16
|
+
maxTokensEst?: number;
|
|
17
|
+
maxBytesEst?: number;
|
|
18
|
+
sufficiencyThreshold?: number;
|
|
19
|
+
excerptMaxLines?: number;
|
|
20
|
+
excerptMaxBytes?: number;
|
|
21
|
+
}
|
|
22
|
+
export declare function buildKnowledgeBundle(opts: KnowledgeBundleOptions): CoworkBundle;
|
|
23
|
+
/**
|
|
24
|
+
* Build trace artifact from scored results.
|
|
25
|
+
*/
|
|
26
|
+
export declare function buildKnowledgeTrace(scored: KnowledgeScoredFile[], weights: KnowledgeWeights, sufficiencyThreshold: number, terminatedAtRank: number, runId: string): {
|
|
27
|
+
run_id: string;
|
|
28
|
+
timestamp: string;
|
|
29
|
+
scoring_profile: 'knowledge';
|
|
30
|
+
signals: KnowledgeWeights;
|
|
31
|
+
stop_rule: {
|
|
32
|
+
sufficiency_threshold: number;
|
|
33
|
+
reached_at_rank: number;
|
|
34
|
+
};
|
|
35
|
+
ranked_top: Array<{
|
|
36
|
+
path: string;
|
|
37
|
+
score: number;
|
|
38
|
+
evidence: string[];
|
|
39
|
+
}>;
|
|
40
|
+
};
|
|
41
|
+
//# sourceMappingURL=knowledge-bundle.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-bundle.d.ts","sourceRoot":"","sources":["../src/knowledge-bundle.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAGH,OAAO,KAAK,EACV,YAAY,EAMZ,mBAAmB,EACnB,gBAAgB,EAChB,cAAc,EACf,MAAM,mBAAmB,CAAC;AAa3B,MAAM,WAAW,sBAAsB;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,cAAc,CAAC;IAC/B,OAAO,CAAC,EAAE,gBAAgB,CAAC;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,sBAAsB,GAAG,YAAY,CAyN/E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,mBAAmB,EAAE,EAC7B,OAAO,EAAE,gBAAgB,EACzB,oBAAoB,EAAE,MAAM,EAC5B,gBAAgB,EAAE,MAAM,EACxB,KAAK,EAAE,MAAM,GACZ;IACD,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,WAAW,CAAC;IAC7B,OAAO,EAAE,gBAAgB,CAAC;IAC1B,SAAS,EAAE;QAAE,qBAAqB,EAAE,MAAM,CAAC;QAAC,eAAe,EAAE,MAAM,CAAA;KAAE,CAAC;IACtE,UAAU,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC,CAAC;CACxE,CAgBA"}
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge-mode bundle builder.
|
|
3
|
+
*
|
|
4
|
+
* Produces a CoworkBundle (context-bundle.json schema) using:
|
|
5
|
+
* - Knowledge-mode candidate generation
|
|
6
|
+
* - Knowledge-mode scoring (7 signals)
|
|
7
|
+
* - Existing stop-rule logic (reused)
|
|
8
|
+
* - Knowledge-mode excerpt extraction (head+headers)
|
|
9
|
+
*/
|
|
10
|
+
import { createHash, randomUUID } from 'node:crypto';
|
|
11
|
+
import { tokenizeIntent, computeKnowledgeTokenWeights, inferAnchorFolders, generateKnowledgeCandidates, } from './knowledge-candidates.js';
|
|
12
|
+
import { scoreAllKnowledgeCandidates, DEFAULT_KNOWLEDGE_WEIGHTS, } from './knowledge-scorer.js';
|
|
13
|
+
import { extractKnowledgeExcerpt } from './knowledge-excerpt.js';
|
|
14
|
+
export function buildKnowledgeBundle(opts) {
|
|
15
|
+
const { intentText, workspaceIndex, weights = DEFAULT_KNOWLEDGE_WEIGHTS, maxFiles = 20, maxTokensEst = 12000, maxBytesEst = 350000, sufficiencyThreshold = 0.78, excerptMaxLines = 450, excerptMaxBytes = 24000, } = opts;
|
|
16
|
+
const nowMs = Date.now();
|
|
17
|
+
const runId = `run_${new Date().toISOString().replace(/[:.]/g, '-')}_${randomUUID().slice(0, 8)}`;
|
|
18
|
+
// Step 1: Tokenize intent
|
|
19
|
+
const intentTokens = tokenizeIntent(intentText);
|
|
20
|
+
const normalizedIntent = intentText.toLowerCase().trim();
|
|
21
|
+
// Step 2: Compute token weights
|
|
22
|
+
const tokenWeights = computeKnowledgeTokenWeights(intentTokens, workspaceIndex.files);
|
|
23
|
+
// Step 3: Infer anchor folders
|
|
24
|
+
const anchorFolders = inferAnchorFolders(intentTokens, workspaceIndex.files);
|
|
25
|
+
// Step 4: Generate candidates
|
|
26
|
+
const candidates = generateKnowledgeCandidates(workspaceIndex.files, intentTokens, tokenWeights);
|
|
27
|
+
// Step 5: Score all candidates
|
|
28
|
+
const scored = scoreAllKnowledgeCandidates(candidates, intentTokens, tokenWeights, anchorFolders, weights, nowMs);
|
|
29
|
+
// Step 6: Select with stop rule
|
|
30
|
+
const maxCumulative = scored.reduce((sum, f) => sum + Math.max(0, f.score), 0);
|
|
31
|
+
const selected = [];
|
|
32
|
+
let totalTokens = 0;
|
|
33
|
+
let totalBytes = 0;
|
|
34
|
+
let cumulativeScore = 0;
|
|
35
|
+
let terminationReason = 'threshold_reached';
|
|
36
|
+
let terminatedAtRank = 0;
|
|
37
|
+
let excludedDueToBudget = 0;
|
|
38
|
+
let excludedLowScore = 0;
|
|
39
|
+
// Build file info map for quick lookup
|
|
40
|
+
const fileMap = new Map(workspaceIndex.files.map((f) => [f.path, f]));
|
|
41
|
+
for (let rank = 0; rank < scored.length; rank++) {
|
|
42
|
+
const scoredFile = scored[rank];
|
|
43
|
+
const fileInfo = fileMap.get(scoredFile.path);
|
|
44
|
+
if (!fileInfo)
|
|
45
|
+
continue;
|
|
46
|
+
// Check stop conditions
|
|
47
|
+
if (selected.length >= maxFiles) {
|
|
48
|
+
terminationReason = 'max_files';
|
|
49
|
+
terminatedAtRank = rank;
|
|
50
|
+
excludedDueToBudget += scored.length - rank;
|
|
51
|
+
break;
|
|
52
|
+
}
|
|
53
|
+
if (totalTokens >= maxTokensEst) {
|
|
54
|
+
terminationReason = 'max_tokens';
|
|
55
|
+
terminatedAtRank = rank;
|
|
56
|
+
excludedDueToBudget += scored.length - rank;
|
|
57
|
+
break;
|
|
58
|
+
}
|
|
59
|
+
if (totalBytes >= maxBytesEst) {
|
|
60
|
+
terminationReason = 'max_bytes';
|
|
61
|
+
terminatedAtRank = rank;
|
|
62
|
+
excludedDueToBudget += scored.length - rank;
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
// Sufficiency threshold check
|
|
66
|
+
if (maxCumulative > 0 && selected.length > 0) {
|
|
67
|
+
const ratio = cumulativeScore / maxCumulative;
|
|
68
|
+
if (ratio >= sufficiencyThreshold) {
|
|
69
|
+
terminationReason = 'threshold_reached';
|
|
70
|
+
terminatedAtRank = rank;
|
|
71
|
+
excludedDueToBudget += scored.length - rank;
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// Extract excerpt
|
|
76
|
+
const excerpt = extractKnowledgeExcerpt(workspaceIndex.root, scoredFile.path, intentTokens, excerptMaxLines, excerptMaxBytes);
|
|
77
|
+
// Estimate tokens (5 tokens/line for natural language)
|
|
78
|
+
const tokenEst = Math.ceil(excerpt.lines_est * 5);
|
|
79
|
+
// Check if adding would blow budget (with 10% overshoot allowance)
|
|
80
|
+
if (totalTokens + tokenEst > maxTokensEst * 1.1 && selected.length > 0) {
|
|
81
|
+
excludedDueToBudget++;
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
// Build excerpt ref path
|
|
85
|
+
const excerptRef = `.codeledger/excerpts/${scoredFile.path.replace(/\//g, '__').replace(/\./g, '__')}.txt`;
|
|
86
|
+
const contentHash = createHash('md5')
|
|
87
|
+
.update(excerpt.content)
|
|
88
|
+
.digest('hex')
|
|
89
|
+
.slice(0, 12);
|
|
90
|
+
const selectedFile = {
|
|
91
|
+
path: scoredFile.path,
|
|
92
|
+
type: fileInfo.type,
|
|
93
|
+
score: Math.round(scoredFile.score * 1000) / 1000,
|
|
94
|
+
reasons: scoredFile.reasons,
|
|
95
|
+
signals: scoredFile.features,
|
|
96
|
+
size_bytes: fileInfo.size_bytes,
|
|
97
|
+
mtime: fileInfo.mtime,
|
|
98
|
+
hash: `h_${contentHash}`,
|
|
99
|
+
excerpt: {
|
|
100
|
+
ref: excerptRef,
|
|
101
|
+
bytes_est: excerpt.bytes_est,
|
|
102
|
+
lines_est: excerpt.lines_est,
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
selected.push(selectedFile);
|
|
106
|
+
totalTokens += tokenEst;
|
|
107
|
+
totalBytes += excerpt.bytes_est;
|
|
108
|
+
cumulativeScore += Math.max(0, scoredFile.score);
|
|
109
|
+
terminatedAtRank = rank + 1;
|
|
110
|
+
}
|
|
111
|
+
// Count low-score excluded files (score <= 0)
|
|
112
|
+
excludedLowScore = scored.filter((f) => f.score <= 0).length;
|
|
113
|
+
// Determine excerpt strategy used
|
|
114
|
+
const hasMarkdown = selected.some((f) => f.type === 'markdown');
|
|
115
|
+
const excerptStrategy = hasMarkdown
|
|
116
|
+
? 'head+headers'
|
|
117
|
+
: intentTokens.length > 0
|
|
118
|
+
? 'head+keyword_windows'
|
|
119
|
+
: 'head';
|
|
120
|
+
const budgets = {
|
|
121
|
+
max_files: maxFiles,
|
|
122
|
+
max_tokens_est: maxTokensEst,
|
|
123
|
+
max_bytes_est: maxBytesEst,
|
|
124
|
+
};
|
|
125
|
+
const stopRule = {
|
|
126
|
+
sufficiency_threshold: sufficiencyThreshold,
|
|
127
|
+
termination_reason: terminationReason,
|
|
128
|
+
terminated_at_rank: terminatedAtRank,
|
|
129
|
+
cumulative_score: Math.round(cumulativeScore * 1000) / 1000,
|
|
130
|
+
};
|
|
131
|
+
const SIGNALS = [
|
|
132
|
+
'keyword_match',
|
|
133
|
+
'filename_match',
|
|
134
|
+
'path_match',
|
|
135
|
+
'recency',
|
|
136
|
+
'size_penalty',
|
|
137
|
+
'doc_type_prior',
|
|
138
|
+
'folder_proximity',
|
|
139
|
+
'markdown_header_boost',
|
|
140
|
+
];
|
|
141
|
+
return {
|
|
142
|
+
bundle_version: '1.0',
|
|
143
|
+
run_id: runId,
|
|
144
|
+
created_at: new Date().toISOString(),
|
|
145
|
+
mode: 'knowledge',
|
|
146
|
+
workspace: {
|
|
147
|
+
root: workspaceIndex.root,
|
|
148
|
+
fingerprint: workspaceIndex.fingerprint,
|
|
149
|
+
file_count: workspaceIndex.file_count,
|
|
150
|
+
},
|
|
151
|
+
intent: {
|
|
152
|
+
text: intentText,
|
|
153
|
+
normalized: normalizedIntent,
|
|
154
|
+
tokens: intentTokens,
|
|
155
|
+
},
|
|
156
|
+
budgets,
|
|
157
|
+
stop_rule: stopRule,
|
|
158
|
+
selection: {
|
|
159
|
+
signals: [...SIGNALS],
|
|
160
|
+
signal_weights: weights,
|
|
161
|
+
excerpt_policy: {
|
|
162
|
+
max_excerpt_bytes: excerptMaxBytes,
|
|
163
|
+
max_lines: excerptMaxLines,
|
|
164
|
+
strategy: excerptStrategy,
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
selected,
|
|
168
|
+
excluded_summary: {
|
|
169
|
+
total_seen: workspaceIndex.file_count,
|
|
170
|
+
excluded_low_score: excludedLowScore,
|
|
171
|
+
excluded_due_to_budget: excludedDueToBudget,
|
|
172
|
+
},
|
|
173
|
+
links: {
|
|
174
|
+
trace_path: '.codeledger/trace.json',
|
|
175
|
+
excerpts_dir: '.codeledger/excerpts/',
|
|
176
|
+
progress_snapshot_path: '.codeledger/progress-snapshot.json',
|
|
177
|
+
},
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Build trace artifact from scored results.
|
|
182
|
+
*/
|
|
183
|
+
export function buildKnowledgeTrace(scored, weights, sufficiencyThreshold, terminatedAtRank, runId) {
|
|
184
|
+
return {
|
|
185
|
+
run_id: runId,
|
|
186
|
+
timestamp: new Date().toISOString(),
|
|
187
|
+
scoring_profile: 'knowledge',
|
|
188
|
+
signals: weights,
|
|
189
|
+
stop_rule: {
|
|
190
|
+
sufficiency_threshold: sufficiencyThreshold,
|
|
191
|
+
reached_at_rank: terminatedAtRank,
|
|
192
|
+
},
|
|
193
|
+
ranked_top: scored.slice(0, 30).map((f) => ({
|
|
194
|
+
path: f.path,
|
|
195
|
+
score: Math.round(f.score * 1000) / 1000,
|
|
196
|
+
evidence: f.reasons,
|
|
197
|
+
})),
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
//# sourceMappingURL=knowledge-bundle.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-bundle.js","sourceRoot":"","sources":["../src/knowledge-bundle.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAYrD,OAAO,EACL,cAAc,EACd,4BAA4B,EAC5B,kBAAkB,EAClB,2BAA2B,GAC5B,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,2BAA2B,EAC3B,yBAAyB,GAC1B,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AAcjE,MAAM,UAAU,oBAAoB,CAAC,IAA4B;IAC/D,MAAM,EACJ,UAAU,EACV,cAAc,EACd,OAAO,GAAG,yBAAyB,EACnC,QAAQ,GAAG,EAAE,EACb,YAAY,GAAG,KAAK,EACpB,WAAW,GAAG,MAAM,EACpB,oBAAoB,GAAG,IAAI,EAC3B,eAAe,GAAG,GAAG,EACrB,eAAe,GAAG,KAAK,GACxB,GAAG,IAAI,CAAC;IAET,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,KAAK,GAAG,OAAO,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,IAAI,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;IAElG,0BAA0B;IAC1B,MAAM,YAAY,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;IAChD,MAAM,gBAAgB,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAEzD,gCAAgC;IAChC,MAAM,YAAY,GAAG,4BAA4B,CAAC,YAAY,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC;IAEtF,+BAA+B;IAC/B,MAAM,aAAa,GAAG,kBAAkB,CAAC,YAAY,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC;IAE7E,8BAA8B;IAC9B,MAAM,UAAU,GAAG,2BAA2B,CAC5C,cAAc,CAAC,KAAK,EACpB,YAAY,EACZ,YAAY,CACb,CAAC;IAEF,+BAA+B;IAC/B,MAAM,MAAM,GAAG,2BAA2B,CACxC,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,aAAa,EACb,OAAO,EACP,KAAK,CACN,CAAC;IAEF,gCAAgC;IAChC,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;IAC/E,MAAM,QAAQ,GAAyB,EAAE,CAAC;IAC1C,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,iBAAiB,GAA4B,mBAAmB,CAAC;IACrE,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,mBAAmB,GAAG,CAAC,CAAC;IAC5B,IAAI,gBAAgB,GAAG,CAAC,CAAC;IAEzB,uCAAuC;IACvC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAEtE,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,MAAM,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;QAChD,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAE,CAAC;QACjC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAC9C,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,wBAAwB;QACxB,IAAI,QAAQ,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;YAChC,iBAAiB,GAAG,WAAW,CAAC;YAChC,gBAAgB,GAAG,IAAI,CAAC;YACxB,mBAAmB,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;YAC5C,MAAM;QACR,CAAC;QAED,IAAI,WAAW,IAAI,YAAY,EAAE,CAAC;YAChC,iBAAiB,GAAG,YAAY,CAAC;YACjC,gBAAgB,GAAG,IAAI,CAAC;YACxB,mBAAmB,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;YAC5C,MAAM;QACR,CAAC;QAED,IAAI,UAAU,IAAI,WAAW,EAAE,CAAC;YAC9B,iBAAiB,GAAG,WAAW,CAAC;YAChC,gBAAgB,GAAG,IAAI,CAAC;YACxB,mBAAmB,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;YAC5C,MAAM;QACR,CAAC;QAED,8BAA8B;QAC9B,IAAI,aAAa,GAAG,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7C,MAAM,KAAK,GAAG,eAAe,GAAG,aAAa,CAAC;YAC9C,IAAI,KAAK,IAAI,oBAAoB,EAAE,CAAC;gBAClC,iBAAiB,GAAG,mBAAmB,CAAC;gBACxC,gBAAgB,GAAG,IAAI,CAAC;gBACxB,mBAAmB,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;gBAC5C,MAAM;YACR,CAAC;QACH,CAAC;QAED,kBAAkB;QAClB,MAAM,OAAO,GAAG,uBAAuB,CACrC,cAAc,CAAC,IAAI,EACnB,UAAU,CAAC,IAAI,EACf,YAAY,EACZ,eAAe,EACf,eAAe,CAChB,CAAC;QAEF,uDAAuD;QACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;QAElD,mEAAmE;QACnE,IAAI,WAAW,GAAG,QAAQ,GAAG,YAAY,GAAG,GAAG,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvE,mBAAmB,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,yBAAyB;QACzB,MAAM,UAAU,GAAG,wBAAwB,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC;QAE3G,MAAM,WAAW,GAAG,UAAU,CAAC,KAAK,CAAC;aAClC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC;aACvB,MAAM,CAAC,KAAK,CAAC;aACb,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEhB,MAAM,YAAY,GAAuB;YACvC,IAAI,EAAE,UAAU,CAAC,IAAI;YACrB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,IAAI;YACjD,OAAO,EAAE,UAAU,CAAC,OAAO;YAC3B,OAAO,EAAE,UAAU,CAAC,QAAQ;YAC5B,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,IAAI,EAAE,KAAK,WAAW,EAAE;YACxB,OAAO,EAAE;gBACP,GAAG,EAAE,UAAU;gBACf,SAAS,EAAE,OAAO,CAAC,SAAS;gBAC5B,SAAS,EAAE,OAAO,CAAC,SAAS;aAC7B;SACF,CAAC;QAEF,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5B,WAAW,IAAI,QAAQ,CAAC;QACxB,UAAU,IAAI,OAAO,CAAC,SAAS,CAAC;QAChC,eAAe,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC;QACjD,gBAAgB,GAAG,IAAI,GAAG,CAAC,CAAC;IAC9B,CAAC;IAED,8CAA8C;IAC9C,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;IAE7D,kCAAkC;IAClC,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC;IAChE,MAAM,eAAe,GAA0B,WAAW;QACxD,CAAC,CAAC,cAAc;QAChB,CAAC,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC;YACvB,CAAC,CAAC,sBAAsB;YACxB,CAAC,CAAC,MAAM,CAAC;IAEb,MAAM,OAAO,GAAwB;QACnC,SAAS,EAAE,QAAQ;QACnB,cAAc,EAAE,YAAY;QAC5B,aAAa,EAAE,WAAW;KAC3B,CAAC;IAEF,MAAM,QAAQ,GAAyB;QACrC,qBAAqB,EAAE,oBAAoB;QAC3C,kBAAkB,EAAE,iBAAiB;QACrC,kBAAkB,EAAE,gBAAgB;QACpC,gBAAgB,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,IAAI;KAC5D,CAAC;IAEF,MAAM,OAAO,GAAG;QACd,eAAe;QACf,gBAAgB;QAChB,YAAY;QACZ,SAAS;QACT,cAAc;QACd,gBAAgB;QAChB,kBAAkB;QAClB,uBAAuB;KACf,CAAC;IAEX,OAAO;QACL,cAAc,EAAE,KAAK;QACrB,MAAM,EAAE,KAAK;QACb,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACpC,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE;YACT,IAAI,EAAE,cAAc,CAAC,IAAI;YACzB,WAAW,EAAE,cAAc,CAAC,WAAW;YACvC,UAAU,EAAE,cAAc,CAAC,UAAU;SACtC;QACD,MAAM,EAAE;YACN,IAAI,EAAE,UAAU;YAChB,UAAU,EAAE,gBAAgB;YAC5B,MAAM,EAAE,YAAY;SACrB;QACD,OAAO;QACP,SAAS,EAAE,QAAQ;QACnB,SAAS,EAAE;YACT,OAAO,EAAE,CAAC,GAAG,OAAO,CAAC;YACrB,cAAc,EAAE,OAAO;YACvB,cAAc,EAAE;gBACd,iBAAiB,EAAE,eAAe;gBAClC,SAAS,EAAE,eAAe;gBAC1B,QAAQ,EAAE,eAAe;aAC1B;SACF;QACD,QAAQ;QACR,gBAAgB,EAAE;YAChB,UAAU,EAAE,cAAc,CAAC,UAAU;YACrC,kBAAkB,EAAE,gBAAgB;YACpC,sBAAsB,EAAE,mBAAmB;SAC5C;QACD,KAAK,EAAE;YACL,UAAU,EAAE,wBAAwB;YACpC,YAAY,EAAE,uBAAuB;YACrC,sBAAsB,EAAE,oCAAoC;SAC7D;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CACjC,MAA6B,EAC7B,OAAyB,EACzB,oBAA4B,EAC5B,gBAAwB,EACxB,KAAa;IASb,OAAO;QACL,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,eAAe,EAAE,WAAW;QAC5B,OAAO,EAAE,OAAO;QAChB,SAAS,EAAE;YACT,qBAAqB,EAAE,oBAAoB;YAC3C,eAAe,EAAE,gBAAgB;SAClC;QACD,UAAU,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC1C,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,IAAI;YACxC,QAAQ,EAAE,CAAC,CAAC,OAAO;SACpB,CAAC,CAAC;KACJ,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge-mode candidate generation.
|
|
3
|
+
*
|
|
4
|
+
* Simpler than code-mode: no dependency graph, no test mapping, no fan-out.
|
|
5
|
+
* Instead: keyword IDF, folder proximity, recency, type filters.
|
|
6
|
+
*/
|
|
7
|
+
import type { WorkspaceFileInfo } from '@codeledger/types';
|
|
8
|
+
/**
|
|
9
|
+
* Tokenize intent text into keywords (stop-word filtered).
|
|
10
|
+
*/
|
|
11
|
+
export declare function tokenizeIntent(text: string): string[];
|
|
12
|
+
/**
|
|
13
|
+
* Compute IDF-like token weights based on file match frequency.
|
|
14
|
+
*/
|
|
15
|
+
export declare function computeKnowledgeTokenWeights(tokens: string[], files: WorkspaceFileInfo[]): Map<string, number>;
|
|
16
|
+
/**
|
|
17
|
+
* Infer "anchor" folders from intent tokens.
|
|
18
|
+
* If a token matches a top-level folder name, that folder is an anchor.
|
|
19
|
+
*/
|
|
20
|
+
export declare function inferAnchorFolders(tokens: string[], files: WorkspaceFileInfo[]): string[];
|
|
21
|
+
/**
|
|
22
|
+
* Generate knowledge-mode candidates.
|
|
23
|
+
* All files are candidates (scored by the knowledge scorer), but only
|
|
24
|
+
* supported file types get full content analysis.
|
|
25
|
+
*/
|
|
26
|
+
export declare function generateKnowledgeCandidates(files: WorkspaceFileInfo[], intentTokens: string[], tokenWeights: Map<string, number>): WorkspaceFileInfo[];
|
|
27
|
+
//# sourceMappingURL=knowledge-candidates.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-candidates.d.ts","sourceRoot":"","sources":["../src/knowledge-candidates.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAkB3D;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAMrD;AAED;;GAEG;AACH,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,MAAM,EAAE,EAChB,KAAK,EAAE,iBAAiB,EAAE,GACzB,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CA6BrB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,MAAM,EAAE,EAChB,KAAK,EAAE,iBAAiB,EAAE,GACzB,MAAM,EAAE,CAuBV;AAED;;;;GAIG;AACH,wBAAgB,2BAA2B,CACzC,KAAK,EAAE,iBAAiB,EAAE,EAC1B,YAAY,EAAE,MAAM,EAAE,EACtB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAChC,iBAAiB,EAAE,CAgCrB"}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge-mode candidate generation.
|
|
3
|
+
*
|
|
4
|
+
* Simpler than code-mode: no dependency graph, no test mapping, no fan-out.
|
|
5
|
+
* Instead: keyword IDF, folder proximity, recency, type filters.
|
|
6
|
+
*/
|
|
7
|
+
const STOP_WORDS = new Set([
|
|
8
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
9
|
+
'of', 'with', 'by', 'from', 'is', 'it', 'that', 'this', 'be', 'as',
|
|
10
|
+
'are', 'was', 'were', 'been', 'being', 'have', 'has', 'had', 'do',
|
|
11
|
+
'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
|
|
12
|
+
'shall', 'can', 'need', 'must', 'not', 'no', 'if', 'then', 'else',
|
|
13
|
+
'when', 'up', 'out', 'so', 'than', 'too', 'very', 'just', 'about',
|
|
14
|
+
'into', 'over', 'after', 'before', 'between', 'under', 'above',
|
|
15
|
+
'all', 'each', 'every', 'both', 'few', 'more', 'most', 'some', 'any',
|
|
16
|
+
'help', 'me', 'draft', 'write', 'create', 'make', 'give', 'tell',
|
|
17
|
+
'what', 'how', 'why', 'where', 'which',
|
|
18
|
+
]);
|
|
19
|
+
/** Supported file extensions for full content processing */
|
|
20
|
+
const SUPPORTED_EXTENSIONS = new Set(['.md', '.txt', '.json', '.yaml', '.yml']);
|
|
21
|
+
/**
|
|
22
|
+
* Tokenize intent text into keywords (stop-word filtered).
|
|
23
|
+
*/
|
|
24
|
+
export function tokenizeIntent(text) {
|
|
25
|
+
const lower = text.toLowerCase();
|
|
26
|
+
const words = lower
|
|
27
|
+
.split(/[^\p{L}\p{N}]+/u)
|
|
28
|
+
.filter((w) => w.length > 1 && !STOP_WORDS.has(w));
|
|
29
|
+
return [...new Set(words)];
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Compute IDF-like token weights based on file match frequency.
|
|
33
|
+
*/
|
|
34
|
+
export function computeKnowledgeTokenWeights(tokens, files) {
|
|
35
|
+
const weights = new Map();
|
|
36
|
+
for (const token of tokens) {
|
|
37
|
+
let matchCount = 0;
|
|
38
|
+
for (const file of files) {
|
|
39
|
+
const pathLower = file.path.toLowerCase();
|
|
40
|
+
if (pathLower.includes(token)) {
|
|
41
|
+
matchCount++;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
if (file.content_keywords?.some((ck) => ck === token || ck === token + 's' || token === ck + 's')) {
|
|
45
|
+
matchCount++;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (matchCount === 0) {
|
|
49
|
+
weights.set(token, 0);
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
let idf = 1 / Math.sqrt(Math.max(1, matchCount));
|
|
53
|
+
if (token.length <= 2)
|
|
54
|
+
idf = Math.min(idf, 0.35);
|
|
55
|
+
if (matchCount > 20)
|
|
56
|
+
idf = Math.min(idf, 0.35);
|
|
57
|
+
idf = Math.max(0.35, Math.min(1.0, idf));
|
|
58
|
+
weights.set(token, idf);
|
|
59
|
+
}
|
|
60
|
+
return weights;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Infer "anchor" folders from intent tokens.
|
|
64
|
+
* If a token matches a top-level folder name, that folder is an anchor.
|
|
65
|
+
*/
|
|
66
|
+
export function inferAnchorFolders(tokens, files) {
|
|
67
|
+
// Collect all unique folder paths
|
|
68
|
+
const folders = new Set();
|
|
69
|
+
for (const file of files) {
|
|
70
|
+
const parts = file.path.split('/');
|
|
71
|
+
for (let i = 1; i < parts.length; i++) {
|
|
72
|
+
folders.add(parts.slice(0, i).join('/'));
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
const anchors = [];
|
|
76
|
+
for (const folder of folders) {
|
|
77
|
+
const folderLower = folder.toLowerCase();
|
|
78
|
+
const folderName = folderLower.split('/').pop() ?? '';
|
|
79
|
+
for (const token of tokens) {
|
|
80
|
+
if (folderName === token || folderName === token + 's' || token === folderName + 's') {
|
|
81
|
+
anchors.push(folder);
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return anchors;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Generate knowledge-mode candidates.
|
|
90
|
+
* All files are candidates (scored by the knowledge scorer), but only
|
|
91
|
+
* supported file types get full content analysis.
|
|
92
|
+
*/
|
|
93
|
+
export function generateKnowledgeCandidates(files, intentTokens, tokenWeights) {
|
|
94
|
+
// All files are candidates — the scorer handles ranking.
|
|
95
|
+
// Filter out zero-relevance files: if a file has no keyword match, no
|
|
96
|
+
// path match, and is an unsupported type, skip it to save scoring time.
|
|
97
|
+
const candidates = [];
|
|
98
|
+
for (const file of files) {
|
|
99
|
+
const pathLower = file.path.toLowerCase();
|
|
100
|
+
const isSupported = SUPPORTED_EXTENSIONS.has(file.extension.toLowerCase());
|
|
101
|
+
// Always include supported file types
|
|
102
|
+
if (isSupported) {
|
|
103
|
+
candidates.push(file);
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
// For unsupported types, only include if filename/path matches a keyword
|
|
107
|
+
let hasPathMatch = false;
|
|
108
|
+
for (const token of intentTokens) {
|
|
109
|
+
const w = tokenWeights.get(token) ?? 0;
|
|
110
|
+
if (w === 0)
|
|
111
|
+
continue;
|
|
112
|
+
if (pathLower.includes(token)) {
|
|
113
|
+
hasPathMatch = true;
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (hasPathMatch) {
|
|
118
|
+
candidates.push(file);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return candidates;
|
|
122
|
+
}
|
|
123
|
+
//# sourceMappingURL=knowledge-candidates.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-candidates.js","sourceRoot":"","sources":["../src/knowledge-candidates.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACzB,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK;IACnE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI;IAClE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI;IACjE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO;IACjE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM;IACjE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACjE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO;IAC9D,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK;IACpE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAChE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO;CACvC,CAAC,CAAC;AAEH,4DAA4D;AAC5D,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEhF;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACjC,MAAM,KAAK,GAAG,KAAK;SAChB,KAAK,CAAC,iBAAiB,CAAC;SACxB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,4BAA4B,CAC1C,MAAgB,EAChB,KAA0B;IAE1B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE1C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YAC1C,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,UAAU,EAAE,CAAC;gBACb,SAAS;YACX,CAAC;YACD,IAAI,IAAI,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,KAAK,KAAK,IAAI,EAAE,KAAK,KAAK,GAAG,GAAG,IAAI,KAAK,KAAK,EAAE,GAAG,GAAG,CAAC,EAAE,CAAC;gBAClG,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;QAED,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YACtB,SAAS;QACX,CAAC;QAED,IAAI,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC;QACjD,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;YAAE,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QACjD,IAAI,UAAU,GAAG,EAAE;YAAE,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAC/C,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;QACzC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAC1B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,MAAgB,EAChB,KAA0B;IAE1B,kCAAkC;IAClC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;QACzC,MAAM,UAAU,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;QACtD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,UAAU,KAAK,KAAK,IAAI,UAAU,KAAK,KAAK,GAAG,GAAG,IAAI,KAAK,KAAK,UAAU,GAAG,GAAG,EAAE,CAAC;gBACrF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBACrB,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,2BAA2B,CACzC,KAA0B,EAC1B,YAAsB,EACtB,YAAiC;IAEjC,yDAAyD;IACzD,sEAAsE;IACtE,wEAAwE;IACxE,MAAM,UAAU,GAAwB,EAAE,CAAC;IAE3C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC;QAE3E,sCAAsC;QACtC,IAAI,WAAW,EAAE,CAAC;YAChB,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtB,SAAS;QACX,CAAC;QAED,yEAAyE;QACzE,IAAI,YAAY,GAAG,KAAK,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;YACjC,MAAM,CAAC,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACvC,IAAI,CAAC,KAAK,CAAC;gBAAE,SAAS;YACtB,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,YAAY,GAAG,IAAI,CAAC;gBACpB,MAAM;YACR,CAAC;QACH,CAAC;QACD,IAAI,YAAY,EAAE,CAAC;YACjB,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge-mode excerpt extraction.
|
|
3
|
+
*
|
|
4
|
+
* Strategies:
|
|
5
|
+
* - head: First N lines (for text, JSON, YAML)
|
|
6
|
+
* - head+headers: First N lines + all markdown headers (for .md files)
|
|
7
|
+
* - head+keyword_windows: First N lines + keyword context windows
|
|
8
|
+
*/
|
|
9
|
+
import type { CoworkExcerptStrategy } from '@codeledger/types';
|
|
10
|
+
export interface KnowledgeExcerptResult {
|
|
11
|
+
content: string;
|
|
12
|
+
lines_est: number;
|
|
13
|
+
bytes_est: number;
|
|
14
|
+
strategy: CoworkExcerptStrategy;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Extract excerpt from a knowledge-mode file.
|
|
18
|
+
*
|
|
19
|
+
* @param root Workspace root
|
|
20
|
+
* @param filePath Relative file path
|
|
21
|
+
* @param keywords Intent keywords for window extraction
|
|
22
|
+
* @param maxLines Maximum excerpt lines (default 450)
|
|
23
|
+
* @param maxBytes Maximum excerpt bytes (default 24000)
|
|
24
|
+
*/
|
|
25
|
+
export declare function extractKnowledgeExcerpt(root: string, filePath: string, keywords: string[], maxLines?: number, maxBytes?: number): KnowledgeExcerptResult;
|
|
26
|
+
//# sourceMappingURL=knowledge-excerpt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-excerpt.d.ts","sourceRoot":"","sources":["../src/knowledge-excerpt.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAI/D,MAAM,WAAW,sBAAsB;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAAE,EAClB,QAAQ,GAAE,MAAY,EACtB,QAAQ,GAAE,MAAc,GACvB,sBAAsB,CAoDxB"}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge-mode excerpt extraction.
|
|
3
|
+
*
|
|
4
|
+
* Strategies:
|
|
5
|
+
* - head: First N lines (for text, JSON, YAML)
|
|
6
|
+
* - head+headers: First N lines + all markdown headers (for .md files)
|
|
7
|
+
* - head+keyword_windows: First N lines + keyword context windows
|
|
8
|
+
*/
|
|
9
|
+
import { readFileSync, statSync } from 'node:fs';
|
|
10
|
+
import { join } from 'node:path';
|
|
11
|
+
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10 MB
|
|
12
|
+
/**
|
|
13
|
+
* Extract excerpt from a knowledge-mode file.
|
|
14
|
+
*
|
|
15
|
+
* @param root Workspace root
|
|
16
|
+
* @param filePath Relative file path
|
|
17
|
+
* @param keywords Intent keywords for window extraction
|
|
18
|
+
* @param maxLines Maximum excerpt lines (default 450)
|
|
19
|
+
* @param maxBytes Maximum excerpt bytes (default 24000)
|
|
20
|
+
*/
|
|
21
|
+
export function extractKnowledgeExcerpt(root, filePath, keywords, maxLines = 450, maxBytes = 24000) {
|
|
22
|
+
const absPath = join(root, filePath);
|
|
23
|
+
const ext = filePath.split('.').pop()?.toLowerCase() ?? '';
|
|
24
|
+
let rawContent;
|
|
25
|
+
try {
|
|
26
|
+
const size = statSync(absPath).size;
|
|
27
|
+
if (size > MAX_FILE_SIZE) {
|
|
28
|
+
return {
|
|
29
|
+
content: `[File too large: ${Math.round(size / 1024)}KB]`,
|
|
30
|
+
lines_est: 1,
|
|
31
|
+
bytes_est: 0,
|
|
32
|
+
strategy: 'head',
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
rawContent = readFileSync(absPath, 'utf-8');
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
return { content: '', lines_est: 0, bytes_est: 0, strategy: 'head' };
|
|
39
|
+
}
|
|
40
|
+
// Check if file is binary
|
|
41
|
+
if (rawContent.length > 0 && rawContent.slice(0, 8192).includes('\0')) {
|
|
42
|
+
return {
|
|
43
|
+
content: '[Binary file — metadata only]',
|
|
44
|
+
lines_est: 0,
|
|
45
|
+
bytes_est: 0,
|
|
46
|
+
strategy: 'head',
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
const lines = rawContent.split('\n');
|
|
50
|
+
// If file fits within limits, return full content
|
|
51
|
+
if (lines.length <= maxLines && rawContent.length <= maxBytes) {
|
|
52
|
+
return {
|
|
53
|
+
content: rawContent,
|
|
54
|
+
lines_est: lines.length,
|
|
55
|
+
bytes_est: rawContent.length,
|
|
56
|
+
strategy: 'head',
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
// Choose strategy based on extension
|
|
60
|
+
if (ext === 'md' || ext === 'mdx') {
|
|
61
|
+
return extractHeadPlusHeaders(lines, keywords, maxLines, maxBytes);
|
|
62
|
+
}
|
|
63
|
+
if (keywords.length > 0) {
|
|
64
|
+
return extractHeadPlusKeywordWindows(lines, keywords, maxLines, maxBytes);
|
|
65
|
+
}
|
|
66
|
+
return extractHead(lines, maxLines, maxBytes);
|
|
67
|
+
}
|
|
68
|
+
function extractHead(lines, maxLines, maxBytes) {
|
|
69
|
+
const selected = [];
|
|
70
|
+
let bytes = 0;
|
|
71
|
+
for (let i = 0; i < lines.length && selected.length < maxLines; i++) {
|
|
72
|
+
const line = lines[i];
|
|
73
|
+
if (bytes + line.length + 1 > maxBytes)
|
|
74
|
+
break;
|
|
75
|
+
selected.push(line);
|
|
76
|
+
bytes += line.length + 1;
|
|
77
|
+
}
|
|
78
|
+
if (selected.length < lines.length) {
|
|
79
|
+
selected.push(`\n[... ${lines.length - selected.length} more lines omitted ...]`);
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
content: selected.join('\n'),
|
|
83
|
+
lines_est: selected.length,
|
|
84
|
+
bytes_est: bytes,
|
|
85
|
+
strategy: 'head',
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
function extractHeadPlusHeaders(lines, keywords, maxLines, maxBytes) {
|
|
89
|
+
const includedLineNums = new Set();
|
|
90
|
+
// Pass 1: Include first 100 lines (head section)
|
|
91
|
+
const headSize = Math.min(100, lines.length);
|
|
92
|
+
for (let i = 0; i < headSize; i++) {
|
|
93
|
+
includedLineNums.add(i);
|
|
94
|
+
}
|
|
95
|
+
// Pass 2: Include all markdown header lines (^# ...)
|
|
96
|
+
for (let i = headSize; i < lines.length; i++) {
|
|
97
|
+
if (lines[i].match(/^#{1,6}\s/)) {
|
|
98
|
+
// Include header + next 2 lines for context
|
|
99
|
+
includedLineNums.add(i);
|
|
100
|
+
if (i + 1 < lines.length)
|
|
101
|
+
includedLineNums.add(i + 1);
|
|
102
|
+
if (i + 2 < lines.length)
|
|
103
|
+
includedLineNums.add(i + 2);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// Pass 3: Include keyword windows (if keywords provided)
|
|
107
|
+
if (keywords.length > 0) {
|
|
108
|
+
const windowSize = 5;
|
|
109
|
+
for (let i = headSize; i < lines.length; i++) {
|
|
110
|
+
const lineLower = lines[i].toLowerCase();
|
|
111
|
+
for (const kw of keywords) {
|
|
112
|
+
if (lineLower.includes(kw)) {
|
|
113
|
+
const start = Math.max(0, i - windowSize);
|
|
114
|
+
const end = Math.min(lines.length - 1, i + windowSize);
|
|
115
|
+
for (let j = start; j <= end; j++) {
|
|
116
|
+
includedLineNums.add(j);
|
|
117
|
+
}
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return assembleExcerpt(lines, includedLineNums, maxLines, maxBytes, 'head+headers');
|
|
124
|
+
}
|
|
125
|
+
function extractHeadPlusKeywordWindows(lines, keywords, maxLines, maxBytes) {
|
|
126
|
+
const includedLineNums = new Set();
|
|
127
|
+
// Pass 1: Head section
|
|
128
|
+
const headSize = Math.min(100, lines.length);
|
|
129
|
+
for (let i = 0; i < headSize; i++) {
|
|
130
|
+
includedLineNums.add(i);
|
|
131
|
+
}
|
|
132
|
+
// Pass 2: Keyword windows
|
|
133
|
+
const windowSize = 8;
|
|
134
|
+
for (let i = headSize; i < lines.length; i++) {
|
|
135
|
+
const lineLower = lines[i].toLowerCase();
|
|
136
|
+
for (const kw of keywords) {
|
|
137
|
+
if (lineLower.includes(kw)) {
|
|
138
|
+
const start = Math.max(0, i - windowSize);
|
|
139
|
+
const end = Math.min(lines.length - 1, i + windowSize);
|
|
140
|
+
for (let j = start; j <= end; j++) {
|
|
141
|
+
includedLineNums.add(j);
|
|
142
|
+
}
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return assembleExcerpt(lines, includedLineNums, maxLines, maxBytes, 'head+keyword_windows');
|
|
148
|
+
}
|
|
149
|
+
function assembleExcerpt(lines, includedLineNums, maxLines, maxBytes, strategy) {
|
|
150
|
+
const sorted = [...includedLineNums].sort((a, b) => a - b);
|
|
151
|
+
const parts = [];
|
|
152
|
+
let lastLine = -2;
|
|
153
|
+
let bytes = 0;
|
|
154
|
+
let lineCount = 0;
|
|
155
|
+
for (const lineNum of sorted) {
|
|
156
|
+
if (lineCount >= maxLines || bytes >= maxBytes)
|
|
157
|
+
break;
|
|
158
|
+
if (lineNum > lastLine + 1 && lastLine >= 0) {
|
|
159
|
+
const gap = `\n[... lines ${lastLine + 2}–${lineNum} omitted ...]\n`;
|
|
160
|
+
parts.push(gap);
|
|
161
|
+
bytes += gap.length;
|
|
162
|
+
}
|
|
163
|
+
const line = lines[lineNum];
|
|
164
|
+
parts.push(line);
|
|
165
|
+
bytes += line.length + 1;
|
|
166
|
+
lineCount++;
|
|
167
|
+
lastLine = lineNum;
|
|
168
|
+
}
|
|
169
|
+
if (sorted.length > 0 && sorted[sorted.length - 1] < lines.length - 1) {
|
|
170
|
+
parts.push(`\n[... ${lines.length - (sorted[sorted.length - 1] + 1)} more lines omitted ...]`);
|
|
171
|
+
}
|
|
172
|
+
return {
|
|
173
|
+
content: parts.join('\n'),
|
|
174
|
+
lines_est: lineCount,
|
|
175
|
+
bytes_est: bytes,
|
|
176
|
+
strategy,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
//# sourceMappingURL=knowledge-excerpt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-excerpt.js","sourceRoot":"","sources":["../src/knowledge-excerpt.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,QAAQ;AAShD;;;;;;;;GAQG;AACH,MAAM,UAAU,uBAAuB,CACrC,IAAY,EACZ,QAAgB,EAChB,QAAkB,EAClB,WAAmB,GAAG,EACtB,WAAmB,KAAK;IAExB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IACrC,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAE3D,IAAI,UAAkB,CAAC;IACvB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC;QACpC,IAAI,IAAI,GAAG,aAAa,EAAE,CAAC;YACzB,OAAO;gBACL,OAAO,EAAE,oBAAoB,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,KAAK;gBACzD,SAAS,EAAE,CAAC;gBACZ,SAAS,EAAE,CAAC;gBACZ,QAAQ,EAAE,MAAM;aACjB,CAAC;QACJ,CAAC;QACD,UAAU,GAAG,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;IACvE,CAAC;IAED,0BAA0B;IAC1B,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACtE,OAAO;YACL,OAAO,EAAE,+BAA+B;YACxC,SAAS,EAAE,CAAC;YACZ,SAAS,EAAE,CAAC;YACZ,QAAQ,EAAE,MAAM;SACjB,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAErC,kDAAkD;IAClD,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ,IAAI,UAAU,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9D,OAAO;YACL,OAAO,EAAE,UAAU;YACnB,SAAS,EAAE,KAAK,CAAC,MAAM;YACvB,SAAS,EAAE,UAAU,CAAC,MAAM;YAC5B,QAAQ,EAAE,MAAM;SACjB,CAAC;IACJ,CAAC;IAED,qCAAqC;IACrC,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;QAClC,OAAO,sBAAsB,CAAC,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACrE,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO,6BAA6B,CAAC,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC5E,CAAC;IAED,OAAO,WAAW,CAAC,KAAK,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,WAAW,CAClB,KAAe,EACf,QAAgB,EAChB,QAAgB;IAEhB,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;QACpE,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC;QACvB,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,QAAQ;YAAE,MAAM;QAC9C,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,KAAK,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;IAC3B,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACnC,QAAQ,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,0BAA0B,CAAC,CAAC;IACpF,CAAC;IAED,OAAO;QACL,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;QAC5B,SAAS,EAAE,QAAQ,CAAC,MAAM;QAC1B,SAAS,EAAE,KAAK;QAChB,QAAQ,EAAE,MAAM;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,sBAAsB,CAC7B,KAAe,EACf,QAAkB,EAClB,QAAgB,EAChB,QAAgB;IAEhB,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAC;IAE3C,iDAAiD;IACjD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC1B,CAAC;IAED,qDAAqD;IACrD,KAAK,IAAI,CAAC,GAAG,QAAQ,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,IAAI,KAAK,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;YACjC,4CAA4C;YAC5C,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM;gBAAE,gBAAgB,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACtD,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM;gBAAE,gBAAgB,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,yDAAyD;IACzD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,GAAG,QAAQ,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,CAAC;YAC1C,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;gBAC1B,IAAI,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;oBAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;oBAC1C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;oBACvD,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;wBAClC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;oBAC1B,CAAC;oBACD,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,eAAe,CAAC,KAAK,EAAE,gBAAgB,EAAE,QAAQ,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;AACtF,CAAC;AAED,SAAS,6BAA6B,CACpC,KAAe,EACf,QAAkB,EAClB,QAAgB,EAChB,QAAgB;IAEhB,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAC;IAE3C,uBAAuB;IACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC1B,CAAC;IAED,0BAA0B;IAC1B,MAAM,UAAU,GAAG,CAAC,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,QAAQ,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,CAAC;QAC1C,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1B,IAAI,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;gBAC1C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;gBACvD,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;oBAClC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBAC1B,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,eAAe,CAAC,KAAK,EAAE,gBAAgB,EAAE,QAAQ,EAAE,QAAQ,EAAE,sBAAsB,CAAC,CAAC;AAC9F,CAAC;AAED,SAAS,eAAe,CACtB,KAAe,EACf,gBAA6B,EAC7B,QAAgB,EAChB,QAAgB,EAChB,QAA+B;IAE/B,MAAM,MAAM,GAAG,CAAC,GAAG,gBAAgB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,QAAQ,GAAG,CAAC,CAAC,CAAC;IAClB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,OAAO,IAAI,MAAM,EAAE,CAAC;QAC7B,IAAI,SAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,QAAQ;YAAE,MAAM;QAEtD,IAAI,OAAO,GAAG,QAAQ,GAAG,CAAC,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;YAC5C,MAAM,GAAG,GAAG,gBAAgB,QAAQ,GAAG,CAAC,IAAI,OAAO,iBAAiB,CAAC;YACrE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAChB,KAAK,IAAI,GAAG,CAAC,MAAM,CAAC;QACtB,CAAC;QAED,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjB,KAAK,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QACzB,SAAS,EAAE,CAAC;QACZ,QAAQ,GAAG,OAAO,CAAC;IACrB,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvE,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,GAAG,CAAC,CAAC,0BAA0B,CAAC,CAAC;IAClG,CAAC;IAED,OAAO;QACL,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;QACzB,SAAS,EAAE,SAAS;QACpB,SAAS,EAAE,KAAK;QAChB,QAAQ;KACT,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge-mode scorer — deterministic scoring for non-code workspace files.
|
|
3
|
+
*
|
|
4
|
+
* 7 weighted signals:
|
|
5
|
+
* keyword_match — IDF-weighted keyword match in content
|
|
6
|
+
* filename_match — Keyword match against filename stems
|
|
7
|
+
* path_match — Keyword match against folder path segments
|
|
8
|
+
* recency — mtime exponential decay
|
|
9
|
+
* size_penalty — Penalize large files
|
|
10
|
+
* doc_type_prior — Extension weighting (.md > .txt > .json > others)
|
|
11
|
+
* folder_proximity — Shared path segment scoring with intent-inferred anchors
|
|
12
|
+
* markdown_header_boost — Regex ^# headers matching keywords in first N lines
|
|
13
|
+
*/
|
|
14
|
+
import type { KnowledgeFeatures, KnowledgeScoredFile, KnowledgeWeights, WorkspaceFileInfo } from '@codeledger/types';
|
|
15
|
+
/** Default knowledge-mode weights (match spec signal_weights) */
|
|
16
|
+
export declare const DEFAULT_KNOWLEDGE_WEIGHTS: KnowledgeWeights;
|
|
17
|
+
/**
|
|
18
|
+
* Compute knowledge-mode features for a single file.
|
|
19
|
+
*/
|
|
20
|
+
export declare function computeKnowledgeFeatures(file: WorkspaceFileInfo, intentTokens: string[], tokenWeights: Map<string, number>, anchorFolders: string[], nowMs: number): KnowledgeFeatures;
|
|
21
|
+
/**
|
|
22
|
+
* Score a file using weighted sum of knowledge features.
|
|
23
|
+
*/
|
|
24
|
+
export declare function scoreKnowledgeFile(features: KnowledgeFeatures, weights: KnowledgeWeights): number;
|
|
25
|
+
/**
|
|
26
|
+
* Derive human-readable reason codes from features.
|
|
27
|
+
*/
|
|
28
|
+
export declare function deriveKnowledgeReasons(features: KnowledgeFeatures, _intentTokens: string[]): string[];
|
|
29
|
+
/**
|
|
30
|
+
* Score all candidates and return sorted results.
|
|
31
|
+
*/
|
|
32
|
+
export declare function scoreAllKnowledgeCandidates(candidates: WorkspaceFileInfo[], intentTokens: string[], tokenWeights: Map<string, number>, anchorFolders: string[], weights: KnowledgeWeights, nowMs: number): KnowledgeScoredFile[];
|
|
33
|
+
//# sourceMappingURL=knowledge-scorer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-scorer.d.ts","sourceRoot":"","sources":["../src/knowledge-scorer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,EACV,iBAAiB,EACjB,mBAAmB,EACnB,gBAAgB,EAChB,iBAAiB,EAClB,MAAM,mBAAmB,CAAC;AAE3B,iEAAiE;AACjE,eAAO,MAAM,yBAAyB,EAAE,gBASvC,CAAC;AAiCF;;GAEG;AACH,wBAAgB,wBAAwB,CACtC,IAAI,EAAE,iBAAiB,EACvB,YAAY,EAAE,MAAM,EAAE,EACtB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,aAAa,EAAE,MAAM,EAAE,EACvB,KAAK,EAAE,MAAM,GACZ,iBAAiB,CA4HnB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,QAAQ,EAAE,iBAAiB,EAC3B,OAAO,EAAE,gBAAgB,GACxB,MAAM,CAWR;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,iBAAiB,EAC3B,aAAa,EAAE,MAAM,EAAE,GACtB,MAAM,EAAE,CA8BV;AAED;;GAEG;AACH,wBAAgB,2BAA2B,CACzC,UAAU,EAAE,iBAAiB,EAAE,EAC/B,YAAY,EAAE,MAAM,EAAE,EACtB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,aAAa,EAAE,MAAM,EAAE,EACvB,OAAO,EAAE,gBAAgB,EACzB,KAAK,EAAE,MAAM,GACZ,mBAAmB,EAAE,CAoBvB"}
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge-mode scorer — deterministic scoring for non-code workspace files.
|
|
3
|
+
*
|
|
4
|
+
* 7 weighted signals:
|
|
5
|
+
* keyword_match — IDF-weighted keyword match in content
|
|
6
|
+
* filename_match — Keyword match against filename stems
|
|
7
|
+
* path_match — Keyword match against folder path segments
|
|
8
|
+
* recency — mtime exponential decay
|
|
9
|
+
* size_penalty — Penalize large files
|
|
10
|
+
* doc_type_prior — Extension weighting (.md > .txt > .json > others)
|
|
11
|
+
* folder_proximity — Shared path segment scoring with intent-inferred anchors
|
|
12
|
+
* markdown_header_boost — Regex ^# headers matching keywords in first N lines
|
|
13
|
+
*/
|
|
14
|
+
/** Default knowledge-mode weights (match spec signal_weights) */
|
|
15
|
+
export const DEFAULT_KNOWLEDGE_WEIGHTS = {
|
|
16
|
+
keyword_match: 0.22,
|
|
17
|
+
filename_match: 0.14,
|
|
18
|
+
path_match: 0.10,
|
|
19
|
+
recency: 0.16,
|
|
20
|
+
size_penalty: 0.06,
|
|
21
|
+
doc_type_prior: 0.10,
|
|
22
|
+
folder_proximity: 0.14,
|
|
23
|
+
markdown_header_boost: 0.08,
|
|
24
|
+
};
|
|
25
|
+
/** Document type priors: higher = more likely to be useful knowledge content */
|
|
26
|
+
const DOC_TYPE_PRIORS = {
|
|
27
|
+
'.md': 1.0,
|
|
28
|
+
'.txt': 0.7,
|
|
29
|
+
'.json': 0.5,
|
|
30
|
+
'.yaml': 0.5,
|
|
31
|
+
'.yml': 0.5,
|
|
32
|
+
// Unsupported but present — metadata only
|
|
33
|
+
'.csv': 0.2,
|
|
34
|
+
'.xlsx': 0.1,
|
|
35
|
+
'.pdf': 0.1,
|
|
36
|
+
'.docx': 0.1,
|
|
37
|
+
};
|
|
38
|
+
/** Folder names that indicate high-value knowledge content */
|
|
39
|
+
const KNOWLEDGE_FOLDER_PRIORS = {
|
|
40
|
+
docs: 0.9,
|
|
41
|
+
spec: 0.9,
|
|
42
|
+
specifications: 0.9,
|
|
43
|
+
proposals: 0.8,
|
|
44
|
+
design: 0.8,
|
|
45
|
+
meetings: 0.7,
|
|
46
|
+
notes: 0.6,
|
|
47
|
+
product: 0.8,
|
|
48
|
+
reports: 0.7,
|
|
49
|
+
plans: 0.8,
|
|
50
|
+
planning: 0.8,
|
|
51
|
+
research: 0.7,
|
|
52
|
+
analysis: 0.7,
|
|
53
|
+
};
|
|
54
|
+
/**
|
|
55
|
+
* Compute knowledge-mode features for a single file.
|
|
56
|
+
*/
|
|
57
|
+
export function computeKnowledgeFeatures(file, intentTokens, tokenWeights, anchorFolders, nowMs) {
|
|
58
|
+
const pathLower = file.path.toLowerCase();
|
|
59
|
+
const pathSegments = pathLower.split('/');
|
|
60
|
+
const basename = pathSegments[pathSegments.length - 1]?.replace(/\.\w+$/, '') ?? '';
|
|
61
|
+
const filenameStems = basename.split(/[-_]/);
|
|
62
|
+
const contentKws = file.content_keywords ?? [];
|
|
63
|
+
// 1. keyword_match — IDF-weighted match against content keywords
|
|
64
|
+
let kwScore = 0;
|
|
65
|
+
let maxPossible = 0;
|
|
66
|
+
for (const token of intentTokens) {
|
|
67
|
+
const w = tokenWeights.get(token) ?? 1;
|
|
68
|
+
if (w === 0)
|
|
69
|
+
continue;
|
|
70
|
+
maxPossible += w;
|
|
71
|
+
if (contentKws.some((ck) => ck === token || ck === token + 's' || token === ck + 's')) {
|
|
72
|
+
kwScore += w;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
const keyword_match = maxPossible > 0 ? Math.min(1, kwScore / maxPossible) : 0;
|
|
76
|
+
// 2. filename_match — keyword match against filename stems
|
|
77
|
+
let fnScore = 0;
|
|
78
|
+
let fnMax = 0;
|
|
79
|
+
for (const token of intentTokens) {
|
|
80
|
+
const w = tokenWeights.get(token) ?? 1;
|
|
81
|
+
if (w === 0)
|
|
82
|
+
continue;
|
|
83
|
+
fnMax += w;
|
|
84
|
+
const isStemMatch = filenameStems.some((stem) => stem === token || stem === token + 's' || token === stem + 's');
|
|
85
|
+
if (isStemMatch) {
|
|
86
|
+
fnScore += w * 2.0; // 2x for filename stem exact match
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
const filename_match = fnMax > 0 ? Math.min(1, fnScore / fnMax) : 0;
|
|
90
|
+
// 3. path_match — keyword match against folder path segments
|
|
91
|
+
let pathScore = 0;
|
|
92
|
+
let pathMax = 0;
|
|
93
|
+
for (const token of intentTokens) {
|
|
94
|
+
const w = tokenWeights.get(token) ?? 1;
|
|
95
|
+
if (w === 0)
|
|
96
|
+
continue;
|
|
97
|
+
pathMax += w;
|
|
98
|
+
if (pathLower.includes(token)) {
|
|
99
|
+
pathScore += w;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
const path_match = pathMax > 0 ? Math.min(1, pathScore / pathMax) : 0;
|
|
103
|
+
// 4. recency — exponential decay based on mtime
|
|
104
|
+
const mtimeMs = new Date(file.mtime).getTime();
|
|
105
|
+
const daysSince = Math.max(0, (nowMs - mtimeMs) / (1000 * 60 * 60 * 24));
|
|
106
|
+
const recency = daysSince <= 1 ? 1.0
|
|
107
|
+
: daysSince <= 3 ? 0.9
|
|
108
|
+
: daysSince <= 7 ? 0.8
|
|
109
|
+
: daysSince <= 14 ? 0.6
|
|
110
|
+
: daysSince <= 30 ? 0.4
|
|
111
|
+
: daysSince <= 60 ? 0.2
|
|
112
|
+
: 0.1;
|
|
113
|
+
// 5. size_penalty — penalize very large files
|
|
114
|
+
const size_penalty = Math.max(0, Math.min(1, file.size_bytes / (500 * 1024)));
|
|
115
|
+
// 6. doc_type_prior — extension-based document type weighting
|
|
116
|
+
const doc_type_prior = DOC_TYPE_PRIORS[file.extension.toLowerCase()] ?? 0.05;
|
|
117
|
+
// 7. folder_proximity — shared path segments with anchor folders
|
|
118
|
+
let folder_proximity = 0;
|
|
119
|
+
if (anchorFolders.length > 0) {
|
|
120
|
+
const fileDir = pathSegments.slice(0, -1).join('/');
|
|
121
|
+
for (const anchor of anchorFolders) {
|
|
122
|
+
if (fileDir === anchor || fileDir.startsWith(anchor + '/')) {
|
|
123
|
+
folder_proximity = 1.0;
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
// Partial match: count shared segments
|
|
127
|
+
const anchorParts = anchor.split('/');
|
|
128
|
+
const fileDirParts = fileDir.split('/');
|
|
129
|
+
let shared = 0;
|
|
130
|
+
for (let i = 0; i < Math.min(anchorParts.length, fileDirParts.length); i++) {
|
|
131
|
+
if (anchorParts[i] === fileDirParts[i])
|
|
132
|
+
shared++;
|
|
133
|
+
else
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
const proximity = anchorParts.length > 0 ? shared / anchorParts.length : 0;
|
|
137
|
+
folder_proximity = Math.max(folder_proximity, proximity);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
// No anchors: use knowledge folder priors
|
|
142
|
+
for (const seg of pathSegments.slice(0, -1)) {
|
|
143
|
+
const prior = KNOWLEDGE_FOLDER_PRIORS[seg];
|
|
144
|
+
if (prior !== undefined) {
|
|
145
|
+
folder_proximity = Math.max(folder_proximity, prior);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// 8. markdown_header_boost — boost for markdown files with keyword-matching headers
|
|
150
|
+
let markdown_header_boost = 0;
|
|
151
|
+
if (file.type === 'markdown' && file.markdown_headers) {
|
|
152
|
+
const headersLower = file.markdown_headers.map((h) => h.toLowerCase());
|
|
153
|
+
let headerHits = 0;
|
|
154
|
+
for (const token of intentTokens) {
|
|
155
|
+
if (headersLower.some((h) => h.includes(token))) {
|
|
156
|
+
headerHits++;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
markdown_header_boost = intentTokens.length > 0
|
|
160
|
+
? Math.min(1, headerHits / intentTokens.length)
|
|
161
|
+
: 0;
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
keyword_match,
|
|
165
|
+
filename_match,
|
|
166
|
+
path_match,
|
|
167
|
+
recency,
|
|
168
|
+
size_penalty,
|
|
169
|
+
doc_type_prior,
|
|
170
|
+
folder_proximity,
|
|
171
|
+
markdown_header_boost,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Score a file using weighted sum of knowledge features.
|
|
176
|
+
*/
|
|
177
|
+
export function scoreKnowledgeFile(features, weights) {
|
|
178
|
+
return (features.keyword_match * weights.keyword_match +
|
|
179
|
+
features.filename_match * weights.filename_match +
|
|
180
|
+
features.path_match * weights.path_match +
|
|
181
|
+
features.recency * weights.recency -
|
|
182
|
+
features.size_penalty * weights.size_penalty +
|
|
183
|
+
features.doc_type_prior * weights.doc_type_prior +
|
|
184
|
+
features.folder_proximity * weights.folder_proximity +
|
|
185
|
+
features.markdown_header_boost * weights.markdown_header_boost);
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Derive human-readable reason codes from features.
|
|
189
|
+
*/
|
|
190
|
+
export function deriveKnowledgeReasons(features, _intentTokens) {
|
|
191
|
+
const reasons = [];
|
|
192
|
+
if (features.keyword_match > 0) {
|
|
193
|
+
// Find which tokens matched for specificity
|
|
194
|
+
reasons.push('keyword_match');
|
|
195
|
+
}
|
|
196
|
+
if (features.filename_match > 0) {
|
|
197
|
+
reasons.push('filename_match');
|
|
198
|
+
}
|
|
199
|
+
if (features.path_match > 0 && features.filename_match === 0) {
|
|
200
|
+
reasons.push('path_match');
|
|
201
|
+
}
|
|
202
|
+
if (features.recency >= 0.8) {
|
|
203
|
+
reasons.push('recency_high');
|
|
204
|
+
}
|
|
205
|
+
if (features.doc_type_prior >= 0.5) {
|
|
206
|
+
reasons.push(`doc_type_prior:${features.doc_type_prior >= 1.0 ? 'md' : 'text'}`);
|
|
207
|
+
}
|
|
208
|
+
if (features.folder_proximity >= 0.5) {
|
|
209
|
+
reasons.push('folder_proximity');
|
|
210
|
+
}
|
|
211
|
+
if (features.markdown_header_boost > 0) {
|
|
212
|
+
reasons.push('markdown_header_boost');
|
|
213
|
+
}
|
|
214
|
+
if (features.size_penalty > 0.5) {
|
|
215
|
+
reasons.push('size_penalty');
|
|
216
|
+
}
|
|
217
|
+
return reasons;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Score all candidates and return sorted results.
|
|
221
|
+
*/
|
|
222
|
+
export function scoreAllKnowledgeCandidates(candidates, intentTokens, tokenWeights, anchorFolders, weights, nowMs) {
|
|
223
|
+
const results = [];
|
|
224
|
+
for (const file of candidates) {
|
|
225
|
+
const features = computeKnowledgeFeatures(file, intentTokens, tokenWeights, anchorFolders, nowMs);
|
|
226
|
+
const score = scoreKnowledgeFile(features, weights);
|
|
227
|
+
const reasons = deriveKnowledgeReasons(features, intentTokens);
|
|
228
|
+
results.push({ path: file.path, score, features, reasons });
|
|
229
|
+
}
|
|
230
|
+
// Sort descending by score (deterministic: stable sort by path as tiebreaker)
|
|
231
|
+
results.sort((a, b) => b.score - a.score || a.path.localeCompare(b.path));
|
|
232
|
+
return results;
|
|
233
|
+
}
|
|
234
|
+
//# sourceMappingURL=knowledge-scorer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-scorer.js","sourceRoot":"","sources":["../src/knowledge-scorer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AASH,iEAAiE;AACjE,MAAM,CAAC,MAAM,yBAAyB,GAAqB;IACzD,aAAa,EAAE,IAAI;IACnB,cAAc,EAAE,IAAI;IACpB,UAAU,EAAE,IAAI;IAChB,OAAO,EAAE,IAAI;IACb,YAAY,EAAE,IAAI;IAClB,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,IAAI;IACtB,qBAAqB,EAAE,IAAI;CAC5B,CAAC;AAEF,gFAAgF;AAChF,MAAM,eAAe,GAA2B;IAC9C,KAAK,EAAE,GAAG;IACV,MAAM,EAAE,GAAG;IACX,OAAO,EAAE,GAAG;IACZ,OAAO,EAAE,GAAG;IACZ,MAAM,EAAE,GAAG;IACX,0CAA0C;IAC1C,MAAM,EAAE,GAAG;IACX,OAAO,EAAE,GAAG;IACZ,MAAM,EAAE,GAAG;IACX,OAAO,EAAE,GAAG;CACb,CAAC;AAEF,8DAA8D;AAC9D,MAAM,uBAAuB,GAA2B;IACtD,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,GAAG;IACT,cAAc,EAAE,GAAG;IACnB,SAAS,EAAE,GAAG;IACd,MAAM,EAAE,GAAG;IACX,QAAQ,EAAE,GAAG;IACb,KAAK,EAAE,GAAG;IACV,OAAO,EAAE,GAAG;IACZ,OAAO,EAAE,GAAG;IACZ,KAAK,EAAE,GAAG;IACV,QAAQ,EAAE,GAAG;IACb,QAAQ,EAAE,GAAG;IACb,QAAQ,EAAE,GAAG;CACd,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,wBAAwB,CACtC,IAAuB,EACvB,YAAsB,EACtB,YAAiC,EACjC,aAAuB,EACvB,KAAa;IAEb,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;IAC1C,MAAM,YAAY,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC;IACpF,MAAM,aAAa,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,IAAI,EAAE,CAAC;IAE/C,iEAAiE;IACjE,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;QACjC,MAAM,CAAC,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,CAAC;YAAE,SAAS;QACtB,WAAW,IAAI,CAAC,CAAC;QAEjB,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,KAAK,KAAK,IAAI,EAAE,KAAK,KAAK,GAAG,GAAG,IAAI,KAAK,KAAK,EAAE,GAAG,GAAG,CAAC,EAAE,CAAC;YACtF,OAAO,IAAI,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IACD,MAAM,aAAa,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE/E,2DAA2D;IAC3D,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;QACjC,MAAM,CAAC,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,CAAC;YAAE,SAAS;QACtB,KAAK,IAAI,CAAC,CAAC;QAEX,MAAM,WAAW,GAAG,aAAa,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,GAAG,GAAG,IAAI,KAAK,KAAK,IAAI,GAAG,GAAG,CACzE,CAAC;QACF,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,mCAAmC;QACzD,CAAC;IACH,CAAC;IACD,MAAM,cAAc,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEpE,6DAA6D;IAC7D,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;QACjC,MAAM,CAAC,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,CAAC;YAAE,SAAS;QACtB,OAAO,IAAI,CAAC,CAAC;QAEb,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC9B,SAAS,IAAI,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IACD,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEtE,gDAAgD;IAChD,MAAM,OAAO,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC;IAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IACzE,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;QAClC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;YACtB,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;gBACtB,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG;oBACvB,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG;wBACvB,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG;4BACvB,CAAC,CAAC,GAAG,CAAC;IAER,8CAA8C;IAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAE9E,8DAA8D;IAC9D,MAAM,cAAc,GAAG,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC;IAE7E,iEAAiE;IACjE,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpD,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;YACnC,IAAI,OAAO,KAAK,MAAM,IAAI,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,GAAG,CAAC,EAAE,CAAC;gBAC3D,gBAAgB,GAAG,GAAG,CAAC;gBACvB,MAAM;YACR,CAAC;YACD,uCAAuC;YACvC,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACxC,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,EAAE,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3E,IAAI,WAAW,CAAC,CAAC,CAAC,KAAK,YAAY,CAAC,CAAC,CAAC;oBAAE,MAAM,EAAE,CAAC;;oBAC5C,MAAM;YACb,CAAC;YACD,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3E,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,SAAS,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,0CAA0C;QAC1C,KAAK,MAAM,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5C,MAAM,KAAK,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;YAC3C,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBACxB,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;IACH,CAAC;IAED,oFAAoF;IACpF,IAAI,qBAAqB,GAAG,CAAC,CAAC;IAC9B,IAAI,IAAI,CAAC,IAAI,KAAK,UAAU,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtD,MAAM,YAAY,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;QACvE,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;YACjC,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBAChD,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;QACD,qBAAqB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,YAAY,CAAC,MAAM,CAAC;YAC/C,CAAC,CAAC,CAAC,CAAC;IACR,CAAC;IAED,OAAO;QACL,aAAa;QACb,cAAc;QACd,UAAU;QACV,OAAO;QACP,YAAY;QACZ,cAAc;QACd,gBAAgB;QAChB,qBAAqB;KACtB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAChC,QAA2B,EAC3B,OAAyB;IAEzB,OAAO,CACL,QAAQ,CAAC,aAAa,GAAG,OAAO,CAAC,aAAa;QAC9C,QAAQ,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc;QAChD,QAAQ,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU;QACxC,QAAQ,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO;QAClC,QAAQ,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY;QAC5C,QAAQ,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc;QAChD,QAAQ,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB;QACpD,QAAQ,CAAC,qBAAqB,GAAG,OAAO,CAAC,qBAAqB,CAC/D,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CACpC,QAA2B,EAC3B,aAAuB;IAEvB,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,IAAI,QAAQ,CAAC,aAAa,GAAG,CAAC,EAAE,CAAC;QAC/B,4CAA4C;QAC5C,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAChC,CAAC;IACD,IAAI,QAAQ,CAAC,cAAc,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IACjC,CAAC;IACD,IAAI,QAAQ,CAAC,UAAU,GAAG,CAAC,IAAI,QAAQ,CAAC,cAAc,KAAK,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC7B,CAAC;IACD,IAAI,QAAQ,CAAC,OAAO,IAAI,GAAG,EAAE,CAAC;QAC5B,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC/B,CAAC;IACD,IAAI,QAAQ,CAAC,cAAc,IAAI,GAAG,EAAE,CAAC;QACnC,OAAO,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,cAAc,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IACnF,CAAC;IACD,IAAI,QAAQ,CAAC,gBAAgB,IAAI,GAAG,EAAE,CAAC;QACrC,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACnC,CAAC;IACD,IAAI,QAAQ,CAAC,qBAAqB,GAAG,CAAC,EAAE,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IACxC,CAAC;IACD,IAAI,QAAQ,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC;QAChC,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,2BAA2B,CACzC,UAA+B,EAC/B,YAAsB,EACtB,YAAiC,EACjC,aAAuB,EACvB,OAAyB,EACzB,KAAa;IAEb,MAAM,OAAO,GAA0B,EAAE,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,wBAAwB,CACvC,IAAI,EACJ,YAAY,EACZ,YAAY,EACZ,aAAa,EACb,KAAK,CACN,CAAC;QACF,MAAM,KAAK,GAAG,kBAAkB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACpD,MAAM,OAAO,GAAG,sBAAsB,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QAE/D,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;IAC9D,CAAC;IAED,8EAA8E;IAC9E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC1E,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mode auto-detection: determine whether a workspace should use
|
|
3
|
+
* code mode or knowledge mode.
|
|
4
|
+
*
|
|
5
|
+
* Detection rules:
|
|
6
|
+
* 1. IF package.json OR pyproject.toml OR go.mod OR Cargo.toml → code mode
|
|
7
|
+
* 2. ELSE → knowledge mode
|
|
8
|
+
* 3. Override: IF 70%+ files are .md, .txt, .json → knowledge mode
|
|
9
|
+
* 4. Override: IF 50%+ files are .ts, .js, .py, .go, .rs, .java → code mode
|
|
10
|
+
* 5. Explicit --mode flag always overrides
|
|
11
|
+
*/
|
|
12
|
+
import type { SelectionMode } from '@codeledger/types';
|
|
13
|
+
export interface ModeDetectionInput {
|
|
14
|
+
/** File paths relative to workspace root */
|
|
15
|
+
filePaths: string[];
|
|
16
|
+
/** Basenames of files in the workspace root directory */
|
|
17
|
+
rootFiles: string[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Auto-detect selection mode based on workspace contents.
|
|
21
|
+
*/
|
|
22
|
+
export declare function detectSelectionMode(input: ModeDetectionInput): SelectionMode;
|
|
23
|
+
//# sourceMappingURL=mode-detect.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mode-detect.d.ts","sourceRoot":"","sources":["../src/mode-detect.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AA8BvD,MAAM,WAAW,kBAAkB;IACjC,4CAA4C;IAC5C,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,yDAAyD;IACzD,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,kBAAkB,GAAG,aAAa,CAoC5E"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mode auto-detection: determine whether a workspace should use
|
|
3
|
+
* code mode or knowledge mode.
|
|
4
|
+
*
|
|
5
|
+
* Detection rules:
|
|
6
|
+
* 1. IF package.json OR pyproject.toml OR go.mod OR Cargo.toml → code mode
|
|
7
|
+
* 2. ELSE → knowledge mode
|
|
8
|
+
* 3. Override: IF 70%+ files are .md, .txt, .json → knowledge mode
|
|
9
|
+
* 4. Override: IF 50%+ files are .ts, .js, .py, .go, .rs, .java → code mode
|
|
10
|
+
* 5. Explicit --mode flag always overrides
|
|
11
|
+
*/
|
|
12
|
+
/** Project root markers that indicate a code repository */
|
|
13
|
+
const CODE_ROOT_MARKERS = [
|
|
14
|
+
'package.json',
|
|
15
|
+
'pyproject.toml',
|
|
16
|
+
'go.mod',
|
|
17
|
+
'Cargo.toml',
|
|
18
|
+
'Gemfile',
|
|
19
|
+
'pom.xml',
|
|
20
|
+
'build.gradle',
|
|
21
|
+
'CMakeLists.txt',
|
|
22
|
+
'Makefile',
|
|
23
|
+
];
|
|
24
|
+
/** Extensions that indicate code files */
|
|
25
|
+
const CODE_EXTENSIONS = new Set([
|
|
26
|
+
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
|
|
27
|
+
'.py', '.go', '.rs', '.java', '.kt', '.cs',
|
|
28
|
+
'.c', '.cpp', '.h', '.hpp', '.rb', '.php',
|
|
29
|
+
'.swift', '.scala', '.dart',
|
|
30
|
+
]);
|
|
31
|
+
/** Extensions that indicate knowledge/document files */
|
|
32
|
+
const KNOWLEDGE_EXTENSIONS = new Set([
|
|
33
|
+
'.md', '.mdx', '.txt', '.json', '.yaml', '.yml',
|
|
34
|
+
'.csv', '.pdf', '.docx', '.xlsx', '.pptx',
|
|
35
|
+
'.rst', '.adoc', '.org',
|
|
36
|
+
]);
|
|
37
|
+
/**
|
|
38
|
+
* Auto-detect selection mode based on workspace contents.
|
|
39
|
+
*/
|
|
40
|
+
export function detectSelectionMode(input) {
|
|
41
|
+
const { filePaths, rootFiles } = input;
|
|
42
|
+
// Rule 1: Check for code project root markers
|
|
43
|
+
const hasCodeMarker = CODE_ROOT_MARKERS.some((marker) => rootFiles.some((f) => f === marker));
|
|
44
|
+
// Count file types
|
|
45
|
+
let codeCount = 0;
|
|
46
|
+
let knowledgeCount = 0;
|
|
47
|
+
const total = filePaths.length;
|
|
48
|
+
for (const fp of filePaths) {
|
|
49
|
+
const ext = getExtension(fp);
|
|
50
|
+
if (CODE_EXTENSIONS.has(ext))
|
|
51
|
+
codeCount++;
|
|
52
|
+
if (KNOWLEDGE_EXTENSIONS.has(ext))
|
|
53
|
+
knowledgeCount++;
|
|
54
|
+
}
|
|
55
|
+
// Rule 3: If 70%+ are knowledge files → knowledge mode (even if code marker present)
|
|
56
|
+
if (total > 0 && knowledgeCount / total >= 0.7) {
|
|
57
|
+
return 'knowledge';
|
|
58
|
+
}
|
|
59
|
+
// Rule 4: If 50%+ are code files → code mode
|
|
60
|
+
if (total > 0 && codeCount / total >= 0.5) {
|
|
61
|
+
return 'code';
|
|
62
|
+
}
|
|
63
|
+
// Rule 1 fallback: Code marker present → code mode
|
|
64
|
+
if (hasCodeMarker) {
|
|
65
|
+
return 'code';
|
|
66
|
+
}
|
|
67
|
+
// Default: knowledge mode
|
|
68
|
+
return 'knowledge';
|
|
69
|
+
}
|
|
70
|
+
function getExtension(filePath) {
|
|
71
|
+
const lastDot = filePath.lastIndexOf('.');
|
|
72
|
+
if (lastDot < 0)
|
|
73
|
+
return '';
|
|
74
|
+
return filePath.slice(lastDot).toLowerCase();
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=mode-detect.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mode-detect.js","sourceRoot":"","sources":["../src/mode-detect.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAIH,2DAA2D;AAC3D,MAAM,iBAAiB,GAAG;IACxB,cAAc;IACd,gBAAgB;IAChB,QAAQ;IACR,YAAY;IACZ,SAAS;IACT,SAAS;IACT,cAAc;IACd,gBAAgB;IAChB,UAAU;CACX,CAAC;AAEF,0CAA0C;AAC1C,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC5C,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK;IAC1C,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IACzC,QAAQ,EAAE,QAAQ,EAAE,OAAO;CAC5B,CAAC,CAAC;AAEH,wDAAwD;AACxD,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM;IAC/C,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO;IACzC,MAAM,EAAE,OAAO,EAAE,MAAM;CACxB,CAAC,CAAC;AASH;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAyB;IAC3D,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,GAAG,KAAK,CAAC;IAEvC,8CAA8C;IAC9C,MAAM,aAAa,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CACtD,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,MAAM,CAAC,CACpC,CAAC;IAEF,mBAAmB;IACnB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC;IAE/B,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,YAAY,CAAC,EAAE,CAAC,CAAC;QAC7B,IAAI,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS,EAAE,CAAC;QAC1C,IAAI,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,cAAc,EAAE,CAAC;IACtD,CAAC;IAED,qFAAqF;IACrF,IAAI,KAAK,GAAG,CAAC,IAAI,cAAc,GAAG,KAAK,IAAI,GAAG,EAAE,CAAC;QAC/C,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,6CAA6C;IAC7C,IAAI,KAAK,GAAG,CAAC,IAAI,SAAS,GAAG,KAAK,IAAI,GAAG,EAAE,CAAC;QAC1C,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,mDAAmD;IACnD,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,0BAA0B;IAC1B,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,SAAS,YAAY,CAAC,QAAgB;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC1C,IAAI,OAAO,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;AAC/C,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codeledger/selector",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Deterministic context selection algorithm for CodeLedger",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
}
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@codeledger/types": "0.2.
|
|
27
|
+
"@codeledger/types": "0.2.1"
|
|
28
28
|
},
|
|
29
29
|
"devDependencies": {
|
|
30
30
|
"typescript": "^5.4.0"
|