codesift-mcp 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +66 -21
- package/README.md +346 -56
- package/dist/cli/args.d.ts +2 -0
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +11 -0
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +177 -67
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/help.d.ts +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +157 -0
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/hooks.d.ts +3 -0
- package/dist/cli/hooks.d.ts.map +1 -0
- package/dist/cli/hooks.js +163 -0
- package/dist/cli/hooks.js.map +1 -0
- package/dist/cli/setup.d.ts +25 -0
- package/dist/cli/setup.d.ts.map +1 -0
- package/dist/cli/setup.js +400 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/formatters-shortening.d.ts +7 -0
- package/dist/formatters-shortening.d.ts.map +1 -0
- package/dist/formatters-shortening.js +68 -0
- package/dist/formatters-shortening.js.map +1 -0
- package/dist/formatters.d.ts +314 -0
- package/dist/formatters.d.ts.map +1 -0
- package/dist/formatters.js +396 -0
- package/dist/formatters.js.map +1 -0
- package/dist/instructions.d.ts +6 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +72 -0
- package/dist/instructions.js.map +1 -0
- package/dist/lsp/lsp-client.d.ts +21 -0
- package/dist/lsp/lsp-client.d.ts.map +1 -0
- package/dist/lsp/lsp-client.js +122 -0
- package/dist/lsp/lsp-client.js.map +1 -0
- package/dist/lsp/lsp-manager.d.ts +12 -0
- package/dist/lsp/lsp-manager.d.ts.map +1 -0
- package/dist/lsp/lsp-manager.js +82 -0
- package/dist/lsp/lsp-manager.js.map +1 -0
- package/dist/lsp/lsp-servers.d.ts +13 -0
- package/dist/lsp/lsp-servers.d.ts.map +1 -0
- package/dist/lsp/lsp-servers.js +57 -0
- package/dist/lsp/lsp-servers.js.map +1 -0
- package/dist/lsp/lsp-tools.d.ts +67 -0
- package/dist/lsp/lsp-tools.d.ts.map +1 -0
- package/dist/lsp/lsp-tools.js +359 -0
- package/dist/lsp/lsp-tools.js.map +1 -0
- package/dist/parser/extractors/_shared.d.ts +11 -0
- package/dist/parser/extractors/_shared.d.ts.map +1 -0
- package/dist/parser/extractors/_shared.js +38 -0
- package/dist/parser/extractors/_shared.js.map +1 -0
- package/dist/parser/extractors/astro.d.ts +15 -0
- package/dist/parser/extractors/astro.d.ts.map +1 -0
- package/dist/parser/extractors/astro.js +104 -0
- package/dist/parser/extractors/astro.js.map +1 -0
- package/dist/parser/extractors/conversation.d.ts +16 -0
- package/dist/parser/extractors/conversation.d.ts.map +1 -0
- package/dist/parser/extractors/conversation.js +196 -0
- package/dist/parser/extractors/conversation.js.map +1 -0
- package/dist/parser/extractors/go.d.ts.map +1 -1
- package/dist/parser/extractors/go.js +22 -45
- package/dist/parser/extractors/go.js.map +1 -1
- package/dist/parser/extractors/python.d.ts +1 -1
- package/dist/parser/extractors/python.d.ts.map +1 -1
- package/dist/parser/extractors/python.js +19 -50
- package/dist/parser/extractors/python.js.map +1 -1
- package/dist/parser/extractors/rust.d.ts +1 -1
- package/dist/parser/extractors/rust.d.ts.map +1 -1
- package/dist/parser/extractors/rust.js +7 -34
- package/dist/parser/extractors/rust.js.map +1 -1
- package/dist/parser/extractors/typescript.d.ts +1 -1
- package/dist/parser/extractors/typescript.d.ts.map +1 -1
- package/dist/parser/extractors/typescript.js +99 -68
- package/dist/parser/extractors/typescript.js.map +1 -1
- package/dist/parser/parser-manager.d.ts.map +1 -1
- package/dist/parser/parser-manager.js +12 -2
- package/dist/parser/parser-manager.js.map +1 -1
- package/dist/parser/symbol-extractor.d.ts +2 -0
- package/dist/parser/symbol-extractor.d.ts.map +1 -1
- package/dist/parser/symbol-extractor.js +2 -0
- package/dist/parser/symbol-extractor.js.map +1 -1
- package/dist/register-tools.d.ts +127 -0
- package/dist/register-tools.d.ts.map +1 -0
- package/dist/register-tools.js +1453 -0
- package/dist/register-tools.js.map +1 -0
- package/dist/retrieval/codebase-retrieval.d.ts +4 -26
- package/dist/retrieval/codebase-retrieval.d.ts.map +1 -1
- package/dist/retrieval/codebase-retrieval.js +105 -403
- package/dist/retrieval/codebase-retrieval.js.map +1 -1
- package/dist/retrieval/retrieval-constants.d.ts +27 -0
- package/dist/retrieval/retrieval-constants.d.ts.map +1 -0
- package/dist/retrieval/retrieval-constants.js +27 -0
- package/dist/retrieval/retrieval-constants.js.map +1 -0
- package/dist/retrieval/retrieval-schemas.d.ts +107 -0
- package/dist/retrieval/retrieval-schemas.d.ts.map +1 -0
- package/dist/retrieval/retrieval-schemas.js +102 -0
- package/dist/retrieval/retrieval-schemas.js.map +1 -0
- package/dist/retrieval/retrieval-utils.d.ts +40 -0
- package/dist/retrieval/retrieval-utils.d.ts.map +1 -0
- package/dist/retrieval/retrieval-utils.js +139 -0
- package/dist/retrieval/retrieval-utils.js.map +1 -0
- package/dist/retrieval/semantic-handlers.d.ts +8 -0
- package/dist/retrieval/semantic-handlers.d.ts.map +1 -0
- package/dist/retrieval/semantic-handlers.js +152 -0
- package/dist/retrieval/semantic-handlers.js.map +1 -0
- package/dist/search/bm25.d.ts +6 -1
- package/dist/search/bm25.d.ts.map +1 -1
- package/dist/search/bm25.js +95 -32
- package/dist/search/bm25.js.map +1 -1
- package/dist/search/chunker.d.ts +10 -0
- package/dist/search/chunker.d.ts.map +1 -1
- package/dist/search/chunker.js +63 -11
- package/dist/search/chunker.js.map +1 -1
- package/dist/search/reranker.d.ts +15 -0
- package/dist/search/reranker.d.ts.map +1 -0
- package/dist/search/reranker.js +126 -0
- package/dist/search/reranker.js.map +1 -0
- package/dist/search/semantic.d.ts +1 -1
- package/dist/search/semantic.d.ts.map +1 -1
- package/dist/search/semantic.js +40 -45
- package/dist/search/semantic.js.map +1 -1
- package/dist/server-helpers.d.ts +29 -0
- package/dist/server-helpers.d.ts.map +1 -0
- package/dist/server-helpers.js +312 -0
- package/dist/server-helpers.js.map +1 -0
- package/dist/server.d.ts +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -271
- package/dist/server.js.map +1 -1
- package/dist/storage/_shared.d.ts +9 -0
- package/dist/storage/_shared.d.ts.map +1 -0
- package/dist/storage/_shared.js +26 -0
- package/dist/storage/_shared.js.map +1 -0
- package/dist/storage/chunk-store.d.ts.map +1 -1
- package/dist/storage/chunk-store.js +23 -63
- package/dist/storage/chunk-store.js.map +1 -1
- package/dist/storage/embedding-store.d.ts +6 -3
- package/dist/storage/embedding-store.d.ts.map +1 -1
- package/dist/storage/embedding-store.js +54 -30
- package/dist/storage/embedding-store.js.map +1 -1
- package/dist/storage/graph-store.d.ts +48 -0
- package/dist/storage/graph-store.d.ts.map +1 -0
- package/dist/storage/graph-store.js +52 -0
- package/dist/storage/graph-store.js.map +1 -0
- package/dist/storage/index-store.d.ts +5 -0
- package/dist/storage/index-store.d.ts.map +1 -1
- package/dist/storage/index-store.js +28 -16
- package/dist/storage/index-store.js.map +1 -1
- package/dist/storage/registry.d.ts +4 -0
- package/dist/storage/registry.d.ts.map +1 -1
- package/dist/storage/registry.js +16 -16
- package/dist/storage/registry.js.map +1 -1
- package/dist/storage/usage-stats.d.ts +6 -0
- package/dist/storage/usage-stats.d.ts.map +1 -1
- package/dist/storage/usage-stats.js +59 -11
- package/dist/storage/usage-stats.js.map +1 -1
- package/dist/storage/usage-tracker.d.ts +3 -0
- package/dist/storage/usage-tracker.d.ts.map +1 -1
- package/dist/storage/usage-tracker.js +50 -132
- package/dist/storage/usage-tracker.js.map +1 -1
- package/dist/storage/watcher.d.ts +2 -1
- package/dist/storage/watcher.d.ts.map +1 -1
- package/dist/storage/watcher.js +16 -16
- package/dist/storage/watcher.js.map +1 -1
- package/dist/tools/ast-query-tools.d.ts +29 -0
- package/dist/tools/ast-query-tools.d.ts.map +1 -0
- package/dist/tools/ast-query-tools.js +110 -0
- package/dist/tools/ast-query-tools.js.map +1 -0
- package/dist/tools/boundary-tools.d.ts +31 -0
- package/dist/tools/boundary-tools.d.ts.map +1 -0
- package/dist/tools/boundary-tools.js +62 -0
- package/dist/tools/boundary-tools.js.map +1 -0
- package/dist/tools/clone-tools.d.ts +35 -0
- package/dist/tools/clone-tools.d.ts.map +1 -0
- package/dist/tools/clone-tools.js +181 -0
- package/dist/tools/clone-tools.js.map +1 -0
- package/dist/tools/community-tools.d.ts +23 -0
- package/dist/tools/community-tools.d.ts.map +1 -0
- package/dist/tools/community-tools.js +297 -0
- package/dist/tools/community-tools.js.map +1 -0
- package/dist/tools/complexity-tools.d.ts +34 -0
- package/dist/tools/complexity-tools.d.ts.map +1 -0
- package/dist/tools/complexity-tools.js +135 -0
- package/dist/tools/complexity-tools.js.map +1 -0
- package/dist/tools/context-tools.d.ts +44 -3
- package/dist/tools/context-tools.d.ts.map +1 -1
- package/dist/tools/context-tools.js +329 -99
- package/dist/tools/context-tools.js.map +1 -1
- package/dist/tools/conversation-tools.d.ts +107 -0
- package/dist/tools/conversation-tools.d.ts.map +1 -0
- package/dist/tools/conversation-tools.js +419 -0
- package/dist/tools/conversation-tools.js.map +1 -0
- package/dist/tools/coordinator-tools.d.ts +73 -0
- package/dist/tools/coordinator-tools.d.ts.map +1 -0
- package/dist/tools/coordinator-tools.js +153 -0
- package/dist/tools/coordinator-tools.js.map +1 -0
- package/dist/tools/cross-repo-tools.d.ts +43 -0
- package/dist/tools/cross-repo-tools.d.ts.map +1 -0
- package/dist/tools/cross-repo-tools.js +55 -0
- package/dist/tools/cross-repo-tools.js.map +1 -0
- package/dist/tools/diff-tools.d.ts +4 -1
- package/dist/tools/diff-tools.d.ts.map +1 -1
- package/dist/tools/diff-tools.js +23 -5
- package/dist/tools/diff-tools.js.map +1 -1
- package/dist/tools/frequency-tools.d.ts +46 -0
- package/dist/tools/frequency-tools.d.ts.map +1 -0
- package/dist/tools/frequency-tools.js +184 -0
- package/dist/tools/frequency-tools.js.map +1 -0
- package/dist/tools/generate-tools.d.ts.map +1 -1
- package/dist/tools/generate-tools.js +13 -2
- package/dist/tools/generate-tools.js.map +1 -1
- package/dist/tools/graph-tools.d.ts +44 -11
- package/dist/tools/graph-tools.d.ts.map +1 -1
- package/dist/tools/graph-tools.js +147 -104
- package/dist/tools/graph-tools.js.map +1 -1
- package/dist/tools/hotspot-tools.d.ts +24 -0
- package/dist/tools/hotspot-tools.d.ts.map +1 -0
- package/dist/tools/hotspot-tools.js +122 -0
- package/dist/tools/hotspot-tools.js.map +1 -0
- package/dist/tools/impact-tools.d.ts +13 -0
- package/dist/tools/impact-tools.d.ts.map +1 -0
- package/dist/tools/impact-tools.js +238 -0
- package/dist/tools/impact-tools.js.map +1 -0
- package/dist/tools/index-tools.d.ts +44 -3
- package/dist/tools/index-tools.d.ts.map +1 -1
- package/dist/tools/index-tools.js +530 -222
- package/dist/tools/index-tools.js.map +1 -1
- package/dist/tools/memory-tools.d.ts +35 -0
- package/dist/tools/memory-tools.d.ts.map +1 -0
- package/dist/tools/memory-tools.js +229 -0
- package/dist/tools/memory-tools.js.map +1 -0
- package/dist/tools/outline-tools.d.ts +24 -13
- package/dist/tools/outline-tools.d.ts.map +1 -1
- package/dist/tools/outline-tools.js +113 -87
- package/dist/tools/outline-tools.js.map +1 -1
- package/dist/tools/pattern-tools.d.ts +32 -0
- package/dist/tools/pattern-tools.d.ts.map +1 -0
- package/dist/tools/pattern-tools.js +116 -0
- package/dist/tools/pattern-tools.js.map +1 -0
- package/dist/tools/report-tools.d.ts +5 -0
- package/dist/tools/report-tools.d.ts.map +1 -0
- package/dist/tools/report-tools.js +167 -0
- package/dist/tools/report-tools.js.map +1 -0
- package/dist/tools/review-diff-tools.d.ts +148 -0
- package/dist/tools/review-diff-tools.d.ts.map +1 -0
- package/dist/tools/review-diff-tools.js +852 -0
- package/dist/tools/review-diff-tools.js.map +1 -0
- package/dist/tools/route-tools.d.ts +32 -0
- package/dist/tools/route-tools.d.ts.map +1 -0
- package/dist/tools/route-tools.js +276 -0
- package/dist/tools/route-tools.js.map +1 -0
- package/dist/tools/search-ranker.d.ts +5 -0
- package/dist/tools/search-ranker.d.ts.map +1 -0
- package/dist/tools/search-ranker.js +142 -0
- package/dist/tools/search-ranker.js.map +1 -0
- package/dist/tools/search-tools.d.ts +24 -1
- package/dist/tools/search-tools.d.ts.map +1 -1
- package/dist/tools/search-tools.js +459 -225
- package/dist/tools/search-tools.js.map +1 -1
- package/dist/tools/secret-tools.d.ts +104 -0
- package/dist/tools/secret-tools.d.ts.map +1 -0
- package/dist/tools/secret-tools.js +410 -0
- package/dist/tools/secret-tools.js.map +1 -0
- package/dist/tools/symbol-tools.d.ts +90 -2
- package/dist/tools/symbol-tools.d.ts.map +1 -1
- package/dist/tools/symbol-tools.js +576 -42
- package/dist/tools/symbol-tools.js.map +1 -1
- package/dist/types.d.ts +34 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/framework-detect.d.ts +5 -0
- package/dist/utils/framework-detect.d.ts.map +1 -0
- package/dist/utils/framework-detect.js +36 -0
- package/dist/utils/framework-detect.js.map +1 -0
- package/dist/utils/glob.d.ts +19 -0
- package/dist/utils/glob.d.ts.map +1 -0
- package/dist/utils/glob.js +74 -0
- package/dist/utils/glob.js.map +1 -0
- package/dist/utils/import-graph.d.ts +29 -0
- package/dist/utils/import-graph.d.ts.map +1 -0
- package/dist/utils/import-graph.js +125 -0
- package/dist/utils/import-graph.js.map +1 -0
- package/dist/utils/test-file.d.ts.map +1 -1
- package/dist/utils/test-file.js +1 -0
- package/dist/utils/test-file.js.map +1 -1
- package/dist/utils/walk.d.ts +45 -0
- package/dist/utils/walk.d.ts.map +1 -0
- package/dist/utils/walk.js +87 -0
- package/dist/utils/walk.js.map +1 -0
- package/package.json +10 -4
- package/rules/codesift.md +187 -0
- package/rules/codesift.mdc +192 -0
- package/rules/codex.md +187 -0
- package/rules/gemini.md +187 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/** Approximate characters per token for budget estimation.
|
|
2
|
+
* 3 is conservative (accounts for JSON overhead: keys, quotes, indentation).
|
|
3
|
+
* Real ratio is ~3.5 for JSON-serialized code, but 3 prevents budget overruns. */
|
|
4
|
+
export declare const CHARS_PER_TOKEN = 3;
|
|
5
|
+
/** Reciprocal Rank Fusion smoothing constant. */
|
|
6
|
+
export declare const RRF_K = 60;
|
|
7
|
+
/** Lines of proximity to consider chunks adjacent (merge threshold). */
|
|
8
|
+
export declare const ADJACENCY_GAP = 5;
|
|
9
|
+
/** Padding width for line numbers in formatted output. */
|
|
10
|
+
export declare const LINE_NUMBER_PAD = 6;
|
|
11
|
+
/** Word count threshold below which queries are not decomposed. */
|
|
12
|
+
export declare const QUERY_DECOMPOSE_THRESHOLD = 8;
|
|
13
|
+
/** Lower bound of the split-window (fraction of word count). */
|
|
14
|
+
export declare const SPLIT_WINDOW_LO = 0.35;
|
|
15
|
+
/** Upper bound of the split-window (fraction of word count). */
|
|
16
|
+
export declare const SPLIT_WINDOW_HI = 0.65;
|
|
17
|
+
/** Maximum sub-queries per codebaseRetrieval call. */
|
|
18
|
+
export declare const MAX_QUERIES = 20;
|
|
19
|
+
/** Minimum remaining token budget to include a truncated result. */
|
|
20
|
+
export declare const MIN_TRUNCATION_TOKENS = 100;
|
|
21
|
+
/** Default top-K results for semantic/hybrid queries. */
|
|
22
|
+
export declare const DEFAULT_TOP_K = 10;
|
|
23
|
+
/** Default character limit for symbol source truncation. */
|
|
24
|
+
export declare const DEFAULT_SOURCE_CHARS = 200;
|
|
25
|
+
/** Timeout in ms for embedding API calls (Voyage, OpenAI, Ollama). */
|
|
26
|
+
export declare const EMBED_TIMEOUT_MS = 30000;
|
|
27
|
+
//# sourceMappingURL=retrieval-constants.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retrieval-constants.d.ts","sourceRoot":"","sources":["../../src/retrieval/retrieval-constants.ts"],"names":[],"mappings":"AAAA;;kFAEkF;AAClF,eAAO,MAAM,eAAe,IAAI,CAAC;AAEjC,iDAAiD;AACjD,eAAO,MAAM,KAAK,KAAK,CAAC;AAExB,wEAAwE;AACxE,eAAO,MAAM,aAAa,IAAI,CAAC;AAE/B,0DAA0D;AAC1D,eAAO,MAAM,eAAe,IAAI,CAAC;AAEjC,mEAAmE;AACnE,eAAO,MAAM,yBAAyB,IAAI,CAAC;AAE3C,gEAAgE;AAChE,eAAO,MAAM,eAAe,OAAO,CAAC;AAEpC,gEAAgE;AAChE,eAAO,MAAM,eAAe,OAAO,CAAC;AAEpC,sDAAsD;AACtD,eAAO,MAAM,WAAW,KAAK,CAAC;AAE9B,oEAAoE;AACpE,eAAO,MAAM,qBAAqB,MAAM,CAAC;AAEzC,yDAAyD;AACzD,eAAO,MAAM,aAAa,KAAK,CAAC;AAEhC,4DAA4D;AAC5D,eAAO,MAAM,oBAAoB,MAAM,CAAC;AAExC,sEAAsE;AACtE,eAAO,MAAM,gBAAgB,QAAS,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/** Approximate characters per token for budget estimation.
|
|
2
|
+
* 3 is conservative (accounts for JSON overhead: keys, quotes, indentation).
|
|
3
|
+
* Real ratio is ~3.5 for JSON-serialized code, but 3 prevents budget overruns. */
|
|
4
|
+
export const CHARS_PER_TOKEN = 3;
|
|
5
|
+
/** Reciprocal Rank Fusion smoothing constant. */
|
|
6
|
+
export const RRF_K = 60;
|
|
7
|
+
/** Lines of proximity to consider chunks adjacent (merge threshold). */
|
|
8
|
+
export const ADJACENCY_GAP = 5;
|
|
9
|
+
/** Padding width for line numbers in formatted output. */
|
|
10
|
+
export const LINE_NUMBER_PAD = 6;
|
|
11
|
+
/** Word count threshold below which queries are not decomposed. */
|
|
12
|
+
export const QUERY_DECOMPOSE_THRESHOLD = 8;
|
|
13
|
+
/** Lower bound of the split-window (fraction of word count). */
|
|
14
|
+
export const SPLIT_WINDOW_LO = 0.35;
|
|
15
|
+
/** Upper bound of the split-window (fraction of word count). */
|
|
16
|
+
export const SPLIT_WINDOW_HI = 0.65;
|
|
17
|
+
/** Maximum sub-queries per codebaseRetrieval call. */
|
|
18
|
+
export const MAX_QUERIES = 20;
|
|
19
|
+
/** Minimum remaining token budget to include a truncated result. */
|
|
20
|
+
export const MIN_TRUNCATION_TOKENS = 100;
|
|
21
|
+
/** Default top-K results for semantic/hybrid queries. */
|
|
22
|
+
export const DEFAULT_TOP_K = 10;
|
|
23
|
+
/** Default character limit for symbol source truncation. */
|
|
24
|
+
export const DEFAULT_SOURCE_CHARS = 200;
|
|
25
|
+
/** Timeout in ms for embedding API calls (Voyage, OpenAI, Ollama). */
|
|
26
|
+
export const EMBED_TIMEOUT_MS = 30_000;
|
|
27
|
+
//# sourceMappingURL=retrieval-constants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retrieval-constants.js","sourceRoot":"","sources":["../../src/retrieval/retrieval-constants.ts"],"names":[],"mappings":"AAAA;;kFAEkF;AAClF,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,CAAC;AAEjC,iDAAiD;AACjD,MAAM,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;AAExB,wEAAwE;AACxE,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,CAAC;AAE/B,0DAA0D;AAC1D,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,CAAC;AAEjC,mEAAmE;AACnE,MAAM,CAAC,MAAM,yBAAyB,GAAG,CAAC,CAAC;AAE3C,gEAAgE;AAChE,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,CAAC;AAEpC,gEAAgE;AAChE,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,CAAC;AAEpC,sDAAsD;AACtD,MAAM,CAAC,MAAM,WAAW,GAAG,EAAE,CAAC;AAE9B,oEAAoE;AACpE,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAEzC,yDAAyD;AACzD,MAAM,CAAC,MAAM,aAAa,GAAG,EAAE,CAAC;AAEhC,4DAA4D;AAC5D,MAAM,CAAC,MAAM,oBAAoB,GAAG,GAAG,CAAC;AAExC,sEAAsE;AACtE,MAAM,CAAC,MAAM,gBAAgB,GAAG,MAAM,CAAC"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const SubQuerySchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
3
|
+
type: z.ZodLiteral<"symbols">;
|
|
4
|
+
query: z.ZodString;
|
|
5
|
+
kind: z.ZodOptional<z.ZodEnum<{
|
|
6
|
+
function: "function";
|
|
7
|
+
method: "method";
|
|
8
|
+
class: "class";
|
|
9
|
+
interface: "interface";
|
|
10
|
+
type: "type";
|
|
11
|
+
variable: "variable";
|
|
12
|
+
constant: "constant";
|
|
13
|
+
field: "field";
|
|
14
|
+
enum: "enum";
|
|
15
|
+
namespace: "namespace";
|
|
16
|
+
module: "module";
|
|
17
|
+
section: "section";
|
|
18
|
+
metadata: "metadata";
|
|
19
|
+
test_suite: "test_suite";
|
|
20
|
+
test_case: "test_case";
|
|
21
|
+
test_hook: "test_hook";
|
|
22
|
+
default_export: "default_export";
|
|
23
|
+
conversation_turn: "conversation_turn";
|
|
24
|
+
conversation_summary: "conversation_summary";
|
|
25
|
+
unknown: "unknown";
|
|
26
|
+
}>>;
|
|
27
|
+
file_pattern: z.ZodOptional<z.ZodString>;
|
|
28
|
+
top_k: z.ZodOptional<z.ZodNumber>;
|
|
29
|
+
source_chars: z.ZodOptional<z.ZodNumber>;
|
|
30
|
+
rerank: z.ZodOptional<z.ZodBoolean>;
|
|
31
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
32
|
+
type: z.ZodLiteral<"text">;
|
|
33
|
+
query: z.ZodString;
|
|
34
|
+
regex: z.ZodOptional<z.ZodBoolean>;
|
|
35
|
+
context_lines: z.ZodOptional<z.ZodNumber>;
|
|
36
|
+
file_pattern: z.ZodOptional<z.ZodString>;
|
|
37
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
38
|
+
type: z.ZodLiteral<"file_tree">;
|
|
39
|
+
path: z.ZodOptional<z.ZodString>;
|
|
40
|
+
path_prefix: z.ZodOptional<z.ZodString>;
|
|
41
|
+
name_pattern: z.ZodOptional<z.ZodString>;
|
|
42
|
+
depth: z.ZodOptional<z.ZodNumber>;
|
|
43
|
+
compact: z.ZodOptional<z.ZodBoolean>;
|
|
44
|
+
min_symbols: z.ZodOptional<z.ZodNumber>;
|
|
45
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
46
|
+
type: z.ZodLiteral<"outline">;
|
|
47
|
+
file_path: z.ZodString;
|
|
48
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
49
|
+
type: z.ZodLiteral<"references">;
|
|
50
|
+
symbol_name: z.ZodString;
|
|
51
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
52
|
+
type: z.ZodLiteral<"call_chain">;
|
|
53
|
+
symbol_name: z.ZodString;
|
|
54
|
+
direction: z.ZodOptional<z.ZodEnum<{
|
|
55
|
+
callers: "callers";
|
|
56
|
+
callees: "callees";
|
|
57
|
+
}>>;
|
|
58
|
+
depth: z.ZodOptional<z.ZodNumber>;
|
|
59
|
+
include_source: z.ZodOptional<z.ZodBoolean>;
|
|
60
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
61
|
+
type: z.ZodLiteral<"impact">;
|
|
62
|
+
since: z.ZodString;
|
|
63
|
+
depth: z.ZodOptional<z.ZodNumber>;
|
|
64
|
+
until: z.ZodOptional<z.ZodString>;
|
|
65
|
+
include_source: z.ZodOptional<z.ZodBoolean>;
|
|
66
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
67
|
+
type: z.ZodLiteral<"context">;
|
|
68
|
+
query: z.ZodString;
|
|
69
|
+
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
70
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
71
|
+
type: z.ZodLiteral<"knowledge_map">;
|
|
72
|
+
focus: z.ZodOptional<z.ZodString>;
|
|
73
|
+
depth: z.ZodOptional<z.ZodNumber>;
|
|
74
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
75
|
+
type: z.ZodLiteral<"semantic">;
|
|
76
|
+
query: z.ZodString;
|
|
77
|
+
top_k: z.ZodOptional<z.ZodNumber>;
|
|
78
|
+
file_filter: z.ZodOptional<z.ZodString>;
|
|
79
|
+
exclude_tests: z.ZodOptional<z.ZodBoolean>;
|
|
80
|
+
source_chars: z.ZodOptional<z.ZodNumber>;
|
|
81
|
+
rerank: z.ZodOptional<z.ZodBoolean>;
|
|
82
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
83
|
+
type: z.ZodLiteral<"hybrid">;
|
|
84
|
+
query: z.ZodString;
|
|
85
|
+
top_k: z.ZodOptional<z.ZodNumber>;
|
|
86
|
+
file_filter: z.ZodOptional<z.ZodString>;
|
|
87
|
+
exclude_tests: z.ZodOptional<z.ZodBoolean>;
|
|
88
|
+
rerank: z.ZodOptional<z.ZodBoolean>;
|
|
89
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
90
|
+
type: z.ZodLiteral<"conversation">;
|
|
91
|
+
query: z.ZodString;
|
|
92
|
+
project: z.ZodOptional<z.ZodString>;
|
|
93
|
+
limit: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
94
|
+
}, z.core.$strip>], "type">;
|
|
95
|
+
export type SubQuery = z.infer<typeof SubQuerySchema>;
|
|
96
|
+
export type SubQueryResult = {
|
|
97
|
+
type: string;
|
|
98
|
+
data: unknown;
|
|
99
|
+
tokens: number;
|
|
100
|
+
};
|
|
101
|
+
export interface CodebaseRetrievalResult {
|
|
102
|
+
results: SubQueryResult[];
|
|
103
|
+
total_tokens: number;
|
|
104
|
+
truncated: boolean;
|
|
105
|
+
query_count: number;
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=retrieval-schemas.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retrieval-schemas.d.ts","sourceRoot":"","sources":["../../src/retrieval/retrieval-schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAqGxB,eAAO,MAAM,cAAc;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2BAazB,CAAC;AAEH,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC,CAAC;AAEtD,MAAM,MAAM,cAAc,GAAG;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,OAAO,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,cAAc,EAAE,CAAC;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;CACrB"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
const SymbolKindSchema = z.enum([
|
|
3
|
+
"function", "method", "class", "interface", "type", "variable",
|
|
4
|
+
"constant", "field", "enum", "namespace", "module", "section",
|
|
5
|
+
"metadata", "test_suite", "test_case", "test_hook", "default_export",
|
|
6
|
+
"conversation_turn", "conversation_summary", "unknown",
|
|
7
|
+
]);
|
|
8
|
+
const SymbolsQuerySchema = z.object({
|
|
9
|
+
type: z.literal("symbols"),
|
|
10
|
+
query: z.string(),
|
|
11
|
+
kind: SymbolKindSchema.optional(),
|
|
12
|
+
file_pattern: z.string().optional(),
|
|
13
|
+
top_k: z.number().int().positive().optional(),
|
|
14
|
+
source_chars: z.number().int().nonnegative().optional(),
|
|
15
|
+
rerank: z.boolean().optional(),
|
|
16
|
+
});
|
|
17
|
+
const TextQuerySchema = z.object({
|
|
18
|
+
type: z.literal("text"),
|
|
19
|
+
query: z.string(),
|
|
20
|
+
regex: z.boolean().optional(),
|
|
21
|
+
context_lines: z.number().int().nonnegative().optional(),
|
|
22
|
+
file_pattern: z.string().optional(),
|
|
23
|
+
});
|
|
24
|
+
const FileTreeQuerySchema = z.object({
|
|
25
|
+
type: z.literal("file_tree"),
|
|
26
|
+
path: z.string().optional(),
|
|
27
|
+
path_prefix: z.string().optional(),
|
|
28
|
+
name_pattern: z.string().optional(),
|
|
29
|
+
depth: z.number().int().positive().optional(),
|
|
30
|
+
compact: z.boolean().optional(),
|
|
31
|
+
min_symbols: z.number().int().nonnegative().optional(),
|
|
32
|
+
});
|
|
33
|
+
const OutlineQuerySchema = z.object({
|
|
34
|
+
type: z.literal("outline"),
|
|
35
|
+
file_path: z.string(),
|
|
36
|
+
});
|
|
37
|
+
const ReferencesQuerySchema = z.object({
|
|
38
|
+
type: z.literal("references"),
|
|
39
|
+
symbol_name: z.string(),
|
|
40
|
+
});
|
|
41
|
+
const CallChainQuerySchema = z.object({
|
|
42
|
+
type: z.literal("call_chain"),
|
|
43
|
+
symbol_name: z.string(),
|
|
44
|
+
direction: z.enum(["callers", "callees"]).optional(),
|
|
45
|
+
depth: z.number().int().positive().optional(),
|
|
46
|
+
include_source: z.boolean().optional(),
|
|
47
|
+
});
|
|
48
|
+
const ImpactQuerySchema = z.object({
|
|
49
|
+
type: z.literal("impact"),
|
|
50
|
+
since: z.string(),
|
|
51
|
+
depth: z.number().int().positive().optional(),
|
|
52
|
+
until: z.string().optional(),
|
|
53
|
+
include_source: z.boolean().optional(),
|
|
54
|
+
});
|
|
55
|
+
const ContextQuerySchema = z.object({
|
|
56
|
+
type: z.literal("context"),
|
|
57
|
+
query: z.string(),
|
|
58
|
+
max_tokens: z.number().int().positive().optional(),
|
|
59
|
+
});
|
|
60
|
+
const KnowledgeMapQuerySchema = z.object({
|
|
61
|
+
type: z.literal("knowledge_map"),
|
|
62
|
+
focus: z.string().optional(),
|
|
63
|
+
depth: z.number().int().positive().optional(),
|
|
64
|
+
});
|
|
65
|
+
const SemanticQuerySchema = z.object({
|
|
66
|
+
type: z.literal("semantic"),
|
|
67
|
+
query: z.string(),
|
|
68
|
+
top_k: z.number().int().positive().optional(),
|
|
69
|
+
file_filter: z.string().optional(),
|
|
70
|
+
exclude_tests: z.boolean().optional(),
|
|
71
|
+
source_chars: z.number().int().nonnegative().optional(),
|
|
72
|
+
rerank: z.boolean().optional(),
|
|
73
|
+
});
|
|
74
|
+
const HybridQuerySchema = z.object({
|
|
75
|
+
type: z.literal("hybrid"),
|
|
76
|
+
query: z.string(),
|
|
77
|
+
top_k: z.number().int().positive().optional(),
|
|
78
|
+
file_filter: z.string().optional(),
|
|
79
|
+
exclude_tests: z.boolean().optional(),
|
|
80
|
+
rerank: z.boolean().optional(),
|
|
81
|
+
});
|
|
82
|
+
const ConversationQuerySchema = z.object({
|
|
83
|
+
type: z.literal("conversation"),
|
|
84
|
+
query: z.string(),
|
|
85
|
+
project: z.string().optional(),
|
|
86
|
+
limit: z.number().int().positive().optional().default(5),
|
|
87
|
+
});
|
|
88
|
+
export const SubQuerySchema = z.discriminatedUnion("type", [
|
|
89
|
+
SymbolsQuerySchema,
|
|
90
|
+
TextQuerySchema,
|
|
91
|
+
FileTreeQuerySchema,
|
|
92
|
+
OutlineQuerySchema,
|
|
93
|
+
ReferencesQuerySchema,
|
|
94
|
+
CallChainQuerySchema,
|
|
95
|
+
ImpactQuerySchema,
|
|
96
|
+
ContextQuerySchema,
|
|
97
|
+
KnowledgeMapQuerySchema,
|
|
98
|
+
SemanticQuerySchema,
|
|
99
|
+
HybridQuerySchema,
|
|
100
|
+
ConversationQuerySchema,
|
|
101
|
+
]);
|
|
102
|
+
//# sourceMappingURL=retrieval-schemas.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retrieval-schemas.js","sourceRoot":"","sources":["../../src/retrieval/retrieval-schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,gBAAgB,GAAG,CAAC,CAAC,IAAI,CAAC;IAC9B,UAAU,EAAE,QAAQ,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU;IAC9D,UAAU,EAAE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,SAAS;IAC7D,UAAU,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,gBAAgB;IACpE,mBAAmB,EAAE,sBAAsB,EAAE,SAAS;CACvD,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;IAC1B,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,IAAI,EAAE,gBAAgB,CAAC,QAAQ,EAAE;IACjC,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IACnC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC7C,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE;IACvD,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;CAC/B,CAAC,CAAC;AAEH,MAAM,eAAe,GAAG,CAAC,CAAC,MAAM,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;IACvB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,KAAK,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IAC7B,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE;IACxD,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CACpC,CAAC,CAAC;AAEH,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC;IAC5B,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC3B,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAClC,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IACnC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC7C,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IAC/B,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE;CACvD,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;IAC1B,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE;CACtB,CAAC,CAAC;AAEH,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC;IAC7B,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE;CACxB,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACpC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC;IAC7B,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE;IACvB,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC,QAAQ,EAAE;IACpD,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC7C,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;CACvC,CAAC,CAAC;AAEH,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC;IACzB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC7C,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC5B,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;CACvC,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;IAC1B,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;CACnD,CAAC,CAAC;AAEH,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;IACvC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,eAAe,CAAC;IAChC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC5B,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;CAC9C,CAAC,CAAC;AAEH,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC;IAC3B,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC7C,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAClC,aAAa,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IACrC,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE;IACvD,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;CAC/B,CAAC,CAAC;AAEH,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC;IACzB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC7C,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAClC,aAAa,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IACrC,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;CAC/B,CAAC,CAAC;AAEH,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;IACvC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC;IAC/B,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC9B,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;CACzD,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,kBAAkB,CAAC,MAAM,EAAE;IACzD,kBAAkB;IAClB,eAAe;IACf,mBAAmB;IACnB,kBAAkB;IAClB,qBAAqB;IACrB,oBAAoB;IACpB,iBAAiB;IACjB,kBAAkB;IAClB,uBAAuB;IACvB,mBAAmB;IACnB,iBAAiB;IACjB,uBAAuB;CACxB,CAAC,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type { CodeChunk } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Estimate token count from a string. ~4 chars per token.
|
|
4
|
+
*/
|
|
5
|
+
export declare function estimateTokens(text: string): number;
|
|
6
|
+
/**
|
|
7
|
+
* Filter embedding entries by file path substring and/or test file exclusion.
|
|
8
|
+
* Uses a lookup map to resolve the file path for each embedding ID.
|
|
9
|
+
*/
|
|
10
|
+
export declare function filterEmbeddingsByFile(embeddings: Map<string, Float32Array>, fileLookup: Map<string, string | undefined>, fileFilter: string | undefined, excludeTests: boolean): Map<string, Float32Array>;
|
|
11
|
+
/**
|
|
12
|
+
* Compute RRF scores from multiple embedding query vectors against filtered embeddings.
|
|
13
|
+
* Each vector produces a ranked list; scores are accumulated via RRF formula.
|
|
14
|
+
*/
|
|
15
|
+
export declare function computeRRFScores(vecs: number[][], filteredEmbeddings: Map<string, Float32Array>, cosSim: (a: Float32Array, b: Float32Array) => number): Map<string, number>;
|
|
16
|
+
export type ChunkEntry = {
|
|
17
|
+
startLine: number;
|
|
18
|
+
endLine: number;
|
|
19
|
+
text: string;
|
|
20
|
+
};
|
|
21
|
+
/**
|
|
22
|
+
* Group top chunk IDs by file, merge overlapping/adjacent chunks, and format
|
|
23
|
+
* as numbered plain text sections.
|
|
24
|
+
*/
|
|
25
|
+
export declare function formatChunksAsText(topIds: string[], chunks: Map<string, CodeChunk>, excludeTests: boolean): string;
|
|
26
|
+
/**
|
|
27
|
+
* Split a long query into sub-queries at natural connectors for RRF merging.
|
|
28
|
+
*/
|
|
29
|
+
export declare function decomposeQuery(query: string): string[];
|
|
30
|
+
/**
|
|
31
|
+
* Truncate symbol source to a character limit, preserving the rest of the symbol object.
|
|
32
|
+
*/
|
|
33
|
+
export declare function truncateSymbolSource<T extends {
|
|
34
|
+
source?: string;
|
|
35
|
+
}>(sym: T, limit: number): T;
|
|
36
|
+
/**
|
|
37
|
+
* Race a promise against a timeout. Rejects with a descriptive error on timeout.
|
|
38
|
+
*/
|
|
39
|
+
export declare function withTimeout<T>(promise: Promise<T>, ms: number, label: string): Promise<T>;
|
|
40
|
+
//# sourceMappingURL=retrieval-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retrieval-utils.d.ts","sourceRoot":"","sources":["../../src/retrieval/retrieval-utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAY7C;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,EACrC,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,EAC3C,UAAU,EAAE,MAAM,GAAG,SAAS,EAC9B,YAAY,EAAE,OAAO,GACpB,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,CAS3B;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,MAAM,EAAE,EAAE,EAChB,kBAAkB,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,EAC7C,MAAM,EAAE,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,KAAK,MAAM,GACnD,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAiBrB;AAED,MAAM,MAAM,UAAU,GAAG;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAE9E;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,MAAM,EAAE,EAChB,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,CAAC,EAC9B,YAAY,EAAE,OAAO,GACpB,MAAM,CAyCR;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAkBtD;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,CAAC,SAAS;IAAE,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,EAChE,GAAG,EAAE,CAAC,EACN,KAAK,EAAE,MAAM,GACZ,CAAC,CAKH;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,CAAC,EAC3B,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,EACnB,EAAE,EAAE,MAAM,EACV,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,CAAC,CAAC,CAQZ"}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { isTestFile } from "../utils/test-file.js";
|
|
2
|
+
import { CHARS_PER_TOKEN, RRF_K, ADJACENCY_GAP, LINE_NUMBER_PAD, QUERY_DECOMPOSE_THRESHOLD, SPLIT_WINDOW_LO, SPLIT_WINDOW_HI, } from "./retrieval-constants.js";
|
|
3
|
+
/**
|
|
4
|
+
* Estimate token count from a string. ~4 chars per token.
|
|
5
|
+
*/
|
|
6
|
+
export function estimateTokens(text) {
|
|
7
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Filter embedding entries by file path substring and/or test file exclusion.
|
|
11
|
+
* Uses a lookup map to resolve the file path for each embedding ID.
|
|
12
|
+
*/
|
|
13
|
+
export function filterEmbeddingsByFile(embeddings, fileLookup, fileFilter, excludeTests) {
|
|
14
|
+
if (!fileFilter && !excludeTests)
|
|
15
|
+
return embeddings;
|
|
16
|
+
return new Map([...embeddings.entries()].filter(([id]) => {
|
|
17
|
+
const file = fileLookup.get(id);
|
|
18
|
+
if (!file)
|
|
19
|
+
return false;
|
|
20
|
+
if (fileFilter && !file.includes(fileFilter))
|
|
21
|
+
return false;
|
|
22
|
+
if (excludeTests && isTestFile(file))
|
|
23
|
+
return false;
|
|
24
|
+
return true;
|
|
25
|
+
}));
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Compute RRF scores from multiple embedding query vectors against filtered embeddings.
|
|
29
|
+
* Each vector produces a ranked list; scores are accumulated via RRF formula.
|
|
30
|
+
*/
|
|
31
|
+
export function computeRRFScores(vecs, filteredEmbeddings, cosSim) {
|
|
32
|
+
const rrfScores = new Map();
|
|
33
|
+
for (const vec of vecs) {
|
|
34
|
+
if (!vec)
|
|
35
|
+
continue;
|
|
36
|
+
const qEmbed = new Float32Array(vec);
|
|
37
|
+
const subScores = [];
|
|
38
|
+
for (const [id, chunkVec] of filteredEmbeddings) {
|
|
39
|
+
if (chunkVec.length === qEmbed.length) {
|
|
40
|
+
subScores.push({ id, score: cosSim(qEmbed, chunkVec) });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
subScores.sort((a, b) => b.score - a.score);
|
|
44
|
+
subScores.forEach((s, rank) => {
|
|
45
|
+
rrfScores.set(s.id, (rrfScores.get(s.id) ?? 0) + 1 / (RRF_K + rank + 1));
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
return rrfScores;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Group top chunk IDs by file, merge overlapping/adjacent chunks, and format
|
|
52
|
+
* as numbered plain text sections.
|
|
53
|
+
*/
|
|
54
|
+
export function formatChunksAsText(topIds, chunks, excludeTests) {
|
|
55
|
+
const byFile = new Map();
|
|
56
|
+
for (const id of topIds) {
|
|
57
|
+
const chunk = chunks.get(id);
|
|
58
|
+
if (!chunk)
|
|
59
|
+
continue;
|
|
60
|
+
if (excludeTests && isTestFile(chunk.file))
|
|
61
|
+
continue;
|
|
62
|
+
const existing = byFile.get(chunk.file) ?? [];
|
|
63
|
+
existing.push({ startLine: chunk.startLine, endLine: chunk.endLine, text: chunk.text });
|
|
64
|
+
byFile.set(chunk.file, existing);
|
|
65
|
+
}
|
|
66
|
+
const sections = ["The following code sections were retrieved:"];
|
|
67
|
+
for (const [file, fileChunks] of byFile) {
|
|
68
|
+
fileChunks.sort((a, b) => a.startLine - b.startLine);
|
|
69
|
+
const merged = [];
|
|
70
|
+
for (const chunk of fileChunks) {
|
|
71
|
+
const last = merged[merged.length - 1];
|
|
72
|
+
if (last && chunk.startLine <= last.endLine + ADJACENCY_GAP) {
|
|
73
|
+
if (chunk.endLine > last.endLine) {
|
|
74
|
+
const overlapLines = last.endLine - chunk.startLine + 1;
|
|
75
|
+
const newLines = chunk.text.split("\n").slice(overlapLines);
|
|
76
|
+
last.text = last.text + "\n" + newLines.join("\n");
|
|
77
|
+
last.endLine = chunk.endLine;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
merged.push({ startLine: chunk.startLine, endLine: chunk.endLine, text: chunk.text });
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
sections.push(`Path: ${file}`);
|
|
85
|
+
for (const chunk of merged) {
|
|
86
|
+
const lines = chunk.text.split("\n");
|
|
87
|
+
const numbered = lines.map((line, i) => {
|
|
88
|
+
const lineNo = String(chunk.startLine + i).padStart(LINE_NUMBER_PAD, " ");
|
|
89
|
+
return `${lineNo}\t${line}`;
|
|
90
|
+
}).join("\n");
|
|
91
|
+
sections.push(numbered);
|
|
92
|
+
}
|
|
93
|
+
sections.push("...");
|
|
94
|
+
}
|
|
95
|
+
return sections.join("\n");
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Split a long query into sub-queries at natural connectors for RRF merging.
|
|
99
|
+
*/
|
|
100
|
+
export function decomposeQuery(query) {
|
|
101
|
+
const words = query.split(/\s+/).filter(Boolean);
|
|
102
|
+
if (words.length <= QUERY_DECOMPOSE_THRESHOLD)
|
|
103
|
+
return [query];
|
|
104
|
+
const splitWords = new Set(["and", "or", "from", "to", "with", "using", "for", "via", "then"]);
|
|
105
|
+
const lo = Math.floor(words.length * SPLIT_WINDOW_LO);
|
|
106
|
+
const hi = Math.floor(words.length * SPLIT_WINDOW_HI);
|
|
107
|
+
for (let i = lo; i <= hi; i++) {
|
|
108
|
+
if (splitWords.has((words[i] ?? "").toLowerCase())) {
|
|
109
|
+
const a = words.slice(0, i).join(" ");
|
|
110
|
+
const b = words.slice(i + 1).join(" ");
|
|
111
|
+
if (a.trim() && b.trim())
|
|
112
|
+
return [a, b];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
const mid = Math.floor(words.length / 2);
|
|
116
|
+
return [words.slice(0, mid).join(" "), words.slice(mid).join(" ")];
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Truncate symbol source to a character limit, preserving the rest of the symbol object.
|
|
120
|
+
*/
|
|
121
|
+
export function truncateSymbolSource(sym, limit) {
|
|
122
|
+
if (limit > 0 && sym.source && sym.source.length > limit) {
|
|
123
|
+
return { ...sym, source: sym.source.slice(0, limit) };
|
|
124
|
+
}
|
|
125
|
+
return sym;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Race a promise against a timeout. Rejects with a descriptive error on timeout.
|
|
129
|
+
*/
|
|
130
|
+
export function withTimeout(promise, ms, label) {
|
|
131
|
+
let timer;
|
|
132
|
+
return Promise.race([
|
|
133
|
+
promise.finally(() => clearTimeout(timer)),
|
|
134
|
+
new Promise((_resolve, reject) => {
|
|
135
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms);
|
|
136
|
+
}),
|
|
137
|
+
]);
|
|
138
|
+
}
|
|
139
|
+
//# sourceMappingURL=retrieval-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retrieval-utils.js","sourceRoot":"","sources":["../../src/retrieval/retrieval-utils.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EACL,eAAe,EACf,KAAK,EACL,aAAa,EACb,eAAe,EACf,yBAAyB,EACzB,eAAe,EACf,eAAe,GAChB,MAAM,0BAA0B,CAAC;AAElC;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;AAClD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CACpC,UAAqC,EACrC,UAA2C,EAC3C,UAA8B,EAC9B,YAAqB;IAErB,IAAI,CAAC,UAAU,IAAI,CAAC,YAAY;QAAE,OAAO,UAAU,CAAC;IACpD,OAAO,IAAI,GAAG,CAAC,CAAC,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACvD,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QACxB,IAAI,UAAU,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,OAAO,KAAK,CAAC;QAC3D,IAAI,YAAY,IAAI,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;QACnD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAC9B,IAAgB,EAChB,kBAA6C,EAC7C,MAAoD;IAEpD,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC5C,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,CAAC,GAAG;YAAE,SAAS;QACnB,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC;QACrC,MAAM,SAAS,GAAyC,EAAE,CAAC;QAC3D,KAAK,MAAM,CAAC,EAAE,EAAE,QAAQ,CAAC,IAAI,kBAAkB,EAAE,CAAC;YAChD,IAAI,QAAQ,CAAC,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC;gBACtC,SAAS,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YAC1D,CAAC;QACH,CAAC;QACD,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAC5C,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAC5B,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;QAC3E,CAAC,CAAC,CAAC;IACL,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAID;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,MAAgB,EAChB,MAA8B,EAC9B,YAAqB;IAErB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAwB,CAAC;IAC/C,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC7B,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,IAAI,YAAY,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC;YAAE,SAAS;QACrD,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAC9C,QAAQ,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;QACxF,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,QAAQ,GAAa,CAAC,6CAA6C,CAAC,CAAC;IAC3E,KAAK,MAAM,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,MAAM,EAAE,CAAC;QACxC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;QACrD,MAAM,MAAM,GAAiB,EAAE,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACvC,IAAI,IAAI,IAAI,KAAK,CAAC,SAAS,IAAI,IAAI,CAAC,OAAO,GAAG,aAAa,EAAE,CAAC;gBAC5D,IAAI,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;oBACjC,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC;oBACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;oBAC5D,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACnD,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC;gBAC/B,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YACxF,CAAC;QACH,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACrC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;gBACrC,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC;gBAC1E,OAAO,GAAG,MAAM,KAAK,IAAI,EAAE,CAAC;YAC9B,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1B,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACjD,IAAI,KAAK,CAAC,MAAM,IAAI,yBAAyB;QAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IAE9D,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;IAC/F,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;IACtD,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;IAEtD,KAAK,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YACnD,MAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE;gBAAE,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC1C,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AACrE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAClC,GAAM,EACN,KAAa;IAEb,IAAI,KAAK,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;QACzD,OAAO,EAAE,GAAG,GAAG,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,CAAC;IACxD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,OAAmB,EACnB,EAAU,EACV,KAAa;IAEb,IAAI,KAAoC,CAAC;IACzC,OAAO,OAAO,CAAC,IAAI,CAAC;QAClB,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAC1C,IAAI,OAAO,CAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE;YACtC,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,KAAK,oBAAoB,EAAE,IAAI,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtF,CAAC,CAAC;KACH,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { SubQuery, SubQueryResult } from "./retrieval-schemas.js";
|
|
2
|
+
export declare function handleSemanticQuery(repo: string, query: Extract<SubQuery, {
|
|
3
|
+
type: "semantic";
|
|
4
|
+
}>): Promise<SubQueryResult>;
|
|
5
|
+
export declare function handleHybridQuery(repo: string, query: Extract<SubQuery, {
|
|
6
|
+
type: "hybrid";
|
|
7
|
+
}>): Promise<SubQueryResult>;
|
|
8
|
+
//# sourceMappingURL=semantic-handlers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-handlers.d.ts","sourceRoot":"","sources":["../../src/retrieval/semantic-handlers.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AA8FvE,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,OAAO,CAAC,QAAQ,EAAE;IAAE,IAAI,EAAE,UAAU,CAAA;CAAE,CAAC,GAC7C,OAAO,CAAC,cAAc,CAAC,CA8CzB;AAMD,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,OAAO,CAAC,QAAQ,EAAE;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CAAC,GAC3C,OAAO,CAAC,cAAc,CAAC,CA2DzB"}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { isTestFile } from "../utils/test-file.js";
|
|
2
|
+
import { estimateTokens, filterEmbeddingsByFile, computeRRFScores, formatChunksAsText, decomposeQuery, truncateSymbolSource, withTimeout, } from "./retrieval-utils.js";
|
|
3
|
+
import { DEFAULT_TOP_K, DEFAULT_SOURCE_CHARS, EMBED_TIMEOUT_MS, RRF_K } from "./retrieval-constants.js";
|
|
4
|
+
function formatSemanticResults(results) {
|
|
5
|
+
if (results.length === 0)
|
|
6
|
+
return "(no results)";
|
|
7
|
+
return results.map((r) => {
|
|
8
|
+
const s = r.symbol;
|
|
9
|
+
const header = `${s.file}:${s.start_line} ${s.kind} ${s.name}${s.signature ? ` ${s.signature}` : ""}`;
|
|
10
|
+
return s.source ? `${header}\n${s.source}` : header;
|
|
11
|
+
}).join("\n\n");
|
|
12
|
+
}
|
|
13
|
+
async function loadSemanticContext(repo, query) {
|
|
14
|
+
const { createEmbeddingProvider, cosineSimilarity } = await import("../search/semantic.js");
|
|
15
|
+
const { loadConfig: getConfig } = await import("../config.js");
|
|
16
|
+
const { getRepo } = await import("../storage/registry.js");
|
|
17
|
+
const { loadChunks, loadChunkEmbeddings, getChunkPath, getChunkEmbeddingPath } = await import("../storage/chunk-store.js");
|
|
18
|
+
const config = getConfig();
|
|
19
|
+
if (!config.embeddingProvider) {
|
|
20
|
+
throw new Error("No embedding provider configured. Set CODESIFT_VOYAGE_API_KEY, CODESIFT_OPENAI_API_KEY, or CODESIFT_OLLAMA_URL.");
|
|
21
|
+
}
|
|
22
|
+
const topK = query.top_k ?? DEFAULT_TOP_K;
|
|
23
|
+
const fileFilter = query.file_filter;
|
|
24
|
+
const excludeTests = query.exclude_tests ?? true;
|
|
25
|
+
const provider = createEmbeddingProvider(config.embeddingProvider, config);
|
|
26
|
+
const subQueryTexts = decomposeQuery(query.query);
|
|
27
|
+
const vecs = await withTimeout(provider.embed(subQueryTexts), EMBED_TIMEOUT_MS, "Embedding API");
|
|
28
|
+
const repoMeta = await getRepo(config.registryPath, repo);
|
|
29
|
+
let chunks = null;
|
|
30
|
+
let chunkEmbeddings = null;
|
|
31
|
+
if (repoMeta) {
|
|
32
|
+
[chunks, chunkEmbeddings] = await Promise.all([
|
|
33
|
+
loadChunks(getChunkPath(repoMeta.index_path)),
|
|
34
|
+
loadChunkEmbeddings(getChunkEmbeddingPath(repoMeta.index_path)),
|
|
35
|
+
]);
|
|
36
|
+
}
|
|
37
|
+
const chunkFileLookup = chunks
|
|
38
|
+
? new Map([...chunks.entries()].map(([id, c]) => [id, c.file]))
|
|
39
|
+
: new Map();
|
|
40
|
+
const sourceEmbeddings = chunkEmbeddings ?? new Map();
|
|
41
|
+
const filteredEmbeddings = filterEmbeddingsByFile(sourceEmbeddings, chunkFileLookup, fileFilter, excludeTests);
|
|
42
|
+
return {
|
|
43
|
+
config, provider, vecs, repoMeta,
|
|
44
|
+
chunks, chunkEmbeddings, chunkFileLookup, filteredEmbeddings,
|
|
45
|
+
topK, fileFilter, excludeTests, cosineSimilarity,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
// Semantic query handler
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
export async function handleSemanticQuery(repo, query) {
|
|
52
|
+
const ctx = await loadSemanticContext(repo, query);
|
|
53
|
+
// Chunk-level semantic search (preferred path)
|
|
54
|
+
if (ctx.chunks && ctx.chunkEmbeddings) {
|
|
55
|
+
const rrfScores = computeRRFScores(ctx.vecs, ctx.filteredEmbeddings, ctx.cosineSimilarity);
|
|
56
|
+
const topIds = [...rrfScores.entries()]
|
|
57
|
+
.sort((a, b) => b[1] - a[1])
|
|
58
|
+
.slice(0, ctx.topK)
|
|
59
|
+
.map(([id]) => id);
|
|
60
|
+
let finalIds = topIds;
|
|
61
|
+
if (query.rerank) {
|
|
62
|
+
const { rerankChunkIds } = await import("../search/reranker.js");
|
|
63
|
+
finalIds = await rerankChunkIds(query.query, topIds, ctx.chunks);
|
|
64
|
+
}
|
|
65
|
+
const text = formatChunksAsText(finalIds, ctx.chunks, false);
|
|
66
|
+
return { type: "semantic", data: text, tokens: estimateTokens(text) };
|
|
67
|
+
}
|
|
68
|
+
// Fall back to symbol-level semantic search
|
|
69
|
+
const { getCodeIndex, getEmbeddingCache } = await import("../tools/index-tools.js");
|
|
70
|
+
const { searchSemantic } = await import("../search/semantic.js");
|
|
71
|
+
const index = await getCodeIndex(repo);
|
|
72
|
+
if (!index)
|
|
73
|
+
throw new Error(`Repository "${repo}" not found`);
|
|
74
|
+
const embeddings = await getEmbeddingCache(repo);
|
|
75
|
+
if (!embeddings) {
|
|
76
|
+
throw new Error(`No embeddings for "${repo}". Run index_folder with an embedding provider configured.`);
|
|
77
|
+
}
|
|
78
|
+
const sourceLimit = query.source_chars ?? DEFAULT_SOURCE_CHARS;
|
|
79
|
+
const symbolMap = new Map(index.symbols.map((s) => [s.id, s]));
|
|
80
|
+
const symFileLookup = new Map([...symbolMap.entries()].map(([id, s]) => [id, s.file]));
|
|
81
|
+
const filteredEmbeddings = filterEmbeddingsByFile(embeddings, symFileLookup, ctx.fileFilter, ctx.excludeTests);
|
|
82
|
+
const primaryVec = ctx.vecs[0];
|
|
83
|
+
if (!primaryVec)
|
|
84
|
+
throw new Error("Embedding provider returned no vector");
|
|
85
|
+
const results = searchSemantic(new Float32Array(primaryVec), filteredEmbeddings, symbolMap, ctx.topK);
|
|
86
|
+
// Truncate source then format as text (avoid double JSON serialization)
|
|
87
|
+
const truncated = results.map((r) => ({ ...r, symbol: truncateSymbolSource(r.symbol, sourceLimit) }));
|
|
88
|
+
const text = formatSemanticResults(truncated);
|
|
89
|
+
return { type: "semantic", data: text, tokens: estimateTokens(text) };
|
|
90
|
+
}
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
// Hybrid query handler (semantic + text, RRF-merged)
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
export async function handleHybridQuery(repo, query) {
|
|
95
|
+
const { searchText } = await import("../tools/search-tools.js");
|
|
96
|
+
// Run text search in parallel with semantic context loading (embed + chunk load)
|
|
97
|
+
const [ctx, textMatches] = await Promise.all([
|
|
98
|
+
loadSemanticContext(repo, query),
|
|
99
|
+
searchText(repo, query.query, { file_pattern: query.file_filter }).catch(() => []),
|
|
100
|
+
]);
|
|
101
|
+
if (!ctx.repoMeta)
|
|
102
|
+
throw new Error(`Repository "${repo}" not found`);
|
|
103
|
+
if (!ctx.chunks || !ctx.chunkEmbeddings)
|
|
104
|
+
throw new Error(`No chunk index for "${repo}"`);
|
|
105
|
+
// 1. Semantic RRF contributions
|
|
106
|
+
const rrfScores = computeRRFScores(ctx.vecs, ctx.filteredEmbeddings, ctx.cosineSimilarity);
|
|
107
|
+
// 2. Text match RRF contributions — map match line -> covering chunk
|
|
108
|
+
const fileToChunks = new Map();
|
|
109
|
+
for (const [id, chunk] of ctx.chunks) {
|
|
110
|
+
const list = fileToChunks.get(chunk.file) ?? [];
|
|
111
|
+
list.push({ id, startLine: chunk.startLine, endLine: chunk.endLine });
|
|
112
|
+
fileToChunks.set(chunk.file, list);
|
|
113
|
+
}
|
|
114
|
+
for (const list of fileToChunks.values()) {
|
|
115
|
+
list.sort((a, b) => a.startLine - b.startLine);
|
|
116
|
+
}
|
|
117
|
+
for (let rank = 0; rank < textMatches.length; rank++) {
|
|
118
|
+
const match = textMatches[rank];
|
|
119
|
+
if (!match)
|
|
120
|
+
continue;
|
|
121
|
+
if (ctx.excludeTests && isTestFile(match.file))
|
|
122
|
+
continue;
|
|
123
|
+
const list = fileToChunks.get(match.file) ?? [];
|
|
124
|
+
// Binary search for covering chunk (list is sorted by startLine)
|
|
125
|
+
let lo = 0, hi = list.length - 1;
|
|
126
|
+
while (lo <= hi) {
|
|
127
|
+
const mid = (lo + hi) >> 1;
|
|
128
|
+
const chunk = list[mid];
|
|
129
|
+
if (chunk.endLine < match.line) {
|
|
130
|
+
lo = mid + 1;
|
|
131
|
+
}
|
|
132
|
+
else if (chunk.startLine > match.line) {
|
|
133
|
+
hi = mid - 1;
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
rrfScores.set(chunk.id, (rrfScores.get(chunk.id) ?? 0) + 1 / (RRF_K + rank + 1));
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
let topIds = [...rrfScores.entries()]
|
|
142
|
+
.sort((a, b) => b[1] - a[1])
|
|
143
|
+
.slice(0, ctx.topK)
|
|
144
|
+
.map(([id]) => id);
|
|
145
|
+
if (query.rerank) {
|
|
146
|
+
const { rerankChunkIds } = await import("../search/reranker.js");
|
|
147
|
+
topIds = await rerankChunkIds(query.query, topIds, ctx.chunks);
|
|
148
|
+
}
|
|
149
|
+
const hybridText = formatChunksAsText(topIds, ctx.chunks, ctx.excludeTests);
|
|
150
|
+
return { type: "hybrid", data: hybridText, tokens: estimateTokens(hybridText) };
|
|
151
|
+
}
|
|
152
|
+
//# sourceMappingURL=semantic-handlers.js.map
|