@opencodehub/mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -0
- package/dist/analysis-bridge.d.ts +23 -0
- package/dist/analysis-bridge.d.ts.map +1 -0
- package/dist/analysis-bridge.js +83 -0
- package/dist/analysis-bridge.js.map +1 -0
- package/dist/connection-pool.d.ts +76 -0
- package/dist/connection-pool.d.ts.map +1 -0
- package/dist/connection-pool.js +179 -0
- package/dist/connection-pool.js.map +1 -0
- package/dist/error-envelope.d.ts +97 -0
- package/dist/error-envelope.d.ts.map +1 -0
- package/dist/error-envelope.js +75 -0
- package/dist/error-envelope.js.map +1 -0
- package/dist/group-resolver.d.ts +29 -0
- package/dist/group-resolver.d.ts.map +1 -0
- package/dist/group-resolver.js +100 -0
- package/dist/group-resolver.js.map +1 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +54 -0
- package/dist/index.js.map +1 -0
- package/dist/next-step-hints.d.ts +24 -0
- package/dist/next-step-hints.d.ts.map +1 -0
- package/dist/next-step-hints.js +41 -0
- package/dist/next-step-hints.js.map +1 -0
- package/dist/repo-resolver.d.ts +88 -0
- package/dist/repo-resolver.d.ts.map +1 -0
- package/dist/repo-resolver.js +211 -0
- package/dist/repo-resolver.js.map +1 -0
- package/dist/repo-uri-for-entry.d.ts +25 -0
- package/dist/repo-uri-for-entry.d.ts.map +1 -0
- package/dist/repo-uri-for-entry.js +64 -0
- package/dist/repo-uri-for-entry.js.map +1 -0
- package/dist/resources/repo-cluster.d.ts +19 -0
- package/dist/resources/repo-cluster.d.ts.map +1 -0
- package/dist/resources/repo-cluster.js +203 -0
- package/dist/resources/repo-cluster.js.map +1 -0
- package/dist/resources/repo-clusters.d.ts +14 -0
- package/dist/resources/repo-clusters.d.ts.map +1 -0
- package/dist/resources/repo-clusters.js +97 -0
- package/dist/resources/repo-clusters.js.map +1 -0
- package/dist/resources/repo-context.d.ts +12 -0
- package/dist/resources/repo-context.d.ts.map +1 -0
- package/dist/resources/repo-context.js +84 -0
- package/dist/resources/repo-context.js.map +1 -0
- package/dist/resources/repo-process.d.ts +19 -0
- package/dist/resources/repo-process.d.ts.map +1 -0
- package/dist/resources/repo-process.js +220 -0
- package/dist/resources/repo-process.js.map +1 -0
- package/dist/resources/repo-processes.d.ts +13 -0
- package/dist/resources/repo-processes.d.ts.map +1 -0
- package/dist/resources/repo-processes.js +99 -0
- package/dist/resources/repo-processes.js.map +1 -0
- package/dist/resources/repo-schema.d.ts +13 -0
- package/dist/resources/repo-schema.d.ts.map +1 -0
- package/dist/resources/repo-schema.js +99 -0
- package/dist/resources/repo-schema.js.map +1 -0
- package/dist/resources/repos.d.ts +20 -0
- package/dist/resources/repos.d.ts.map +1 -0
- package/dist/resources/repos.js +58 -0
- package/dist/resources/repos.js.map +1 -0
- package/dist/resources/store-helper.d.ts +28 -0
- package/dist/resources/store-helper.d.ts.map +1 -0
- package/dist/resources/store-helper.js +58 -0
- package/dist/resources/store-helper.js.map +1 -0
- package/dist/resources/yaml.d.ts +10 -0
- package/dist/resources/yaml.d.ts.map +1 -0
- package/dist/resources/yaml.js +16 -0
- package/dist/resources/yaml.js.map +1 -0
- package/dist/server.d.ts +46 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +194 -0
- package/dist/server.js.map +1 -0
- package/dist/staleness.d.ts +19 -0
- package/dist/staleness.d.ts.map +1 -0
- package/dist/staleness.js +40 -0
- package/dist/staleness.js.map +1 -0
- package/dist/test-utils.d.ts +170 -0
- package/dist/test-utils.d.ts.map +1 -0
- package/dist/test-utils.js +473 -0
- package/dist/test-utils.js.map +1 -0
- package/dist/tools/api-impact.d.ts +47 -0
- package/dist/tools/api-impact.d.ts.map +1 -0
- package/dist/tools/api-impact.js +199 -0
- package/dist/tools/api-impact.js.map +1 -0
- package/dist/tools/confidence.d.ts +39 -0
- package/dist/tools/confidence.d.ts.map +1 -0
- package/dist/tools/confidence.js +58 -0
- package/dist/tools/confidence.js.map +1 -0
- package/dist/tools/context.d.ts +47 -0
- package/dist/tools/context.d.ts.map +1 -0
- package/dist/tools/context.js +577 -0
- package/dist/tools/context.js.map +1 -0
- package/dist/tools/dependencies.d.ts +29 -0
- package/dist/tools/dependencies.d.ts.map +1 -0
- package/dist/tools/dependencies.js +110 -0
- package/dist/tools/dependencies.js.map +1 -0
- package/dist/tools/detect-changes.d.ts +15 -0
- package/dist/tools/detect-changes.d.ts.map +1 -0
- package/dist/tools/detect-changes.js +78 -0
- package/dist/tools/detect-changes.js.map +1 -0
- package/dist/tools/group-contracts.d.ts +26 -0
- package/dist/tools/group-contracts.d.ts.map +1 -0
- package/dist/tools/group-contracts.js +251 -0
- package/dist/tools/group-contracts.js.map +1 -0
- package/dist/tools/group-cross-repo-links.d.ts +28 -0
- package/dist/tools/group-cross-repo-links.d.ts.map +1 -0
- package/dist/tools/group-cross-repo-links.js +128 -0
- package/dist/tools/group-cross-repo-links.js.map +1 -0
- package/dist/tools/group-list.d.ts +10 -0
- package/dist/tools/group-list.d.ts.map +1 -0
- package/dist/tools/group-list.js +74 -0
- package/dist/tools/group-list.js.map +1 -0
- package/dist/tools/group-query.d.ts +40 -0
- package/dist/tools/group-query.d.ts.map +1 -0
- package/dist/tools/group-query.js +209 -0
- package/dist/tools/group-query.js.map +1 -0
- package/dist/tools/group-status.d.ts +21 -0
- package/dist/tools/group-status.d.ts.map +1 -0
- package/dist/tools/group-status.js +121 -0
- package/dist/tools/group-status.js.map +1 -0
- package/dist/tools/group-sync.d.ts +23 -0
- package/dist/tools/group-sync.d.ts.map +1 -0
- package/dist/tools/group-sync.js +112 -0
- package/dist/tools/group-sync.js.map +1 -0
- package/dist/tools/impact.d.ts +36 -0
- package/dist/tools/impact.d.ts.map +1 -0
- package/dist/tools/impact.js +232 -0
- package/dist/tools/impact.js.map +1 -0
- package/dist/tools/license-audit.d.ts +34 -0
- package/dist/tools/license-audit.d.ts.map +1 -0
- package/dist/tools/license-audit.js +108 -0
- package/dist/tools/license-audit.js.map +1 -0
- package/dist/tools/list-dead-code.d.ts +26 -0
- package/dist/tools/list-dead-code.d.ts.map +1 -0
- package/dist/tools/list-dead-code.js +110 -0
- package/dist/tools/list-dead-code.js.map +1 -0
- package/dist/tools/list-findings-delta.d.ts +36 -0
- package/dist/tools/list-findings-delta.d.ts.map +1 -0
- package/dist/tools/list-findings-delta.js +274 -0
- package/dist/tools/list-findings-delta.js.map +1 -0
- package/dist/tools/list-findings.d.ts +30 -0
- package/dist/tools/list-findings.d.ts.map +1 -0
- package/dist/tools/list-findings.js +129 -0
- package/dist/tools/list-findings.js.map +1 -0
- package/dist/tools/list-repos.d.ts +17 -0
- package/dist/tools/list-repos.d.ts.map +1 -0
- package/dist/tools/list-repos.js +63 -0
- package/dist/tools/list-repos.js.map +1 -0
- package/dist/tools/owners.d.ts +23 -0
- package/dist/tools/owners.d.ts.map +1 -0
- package/dist/tools/owners.js +103 -0
- package/dist/tools/owners.js.map +1 -0
- package/dist/tools/pack-codebase.d.ts +76 -0
- package/dist/tools/pack-codebase.d.ts.map +1 -0
- package/dist/tools/pack-codebase.js +289 -0
- package/dist/tools/pack-codebase.js.map +1 -0
- package/dist/tools/project-profile.d.ts +28 -0
- package/dist/tools/project-profile.d.ts.map +1 -0
- package/dist/tools/project-profile.js +109 -0
- package/dist/tools/project-profile.js.map +1 -0
- package/dist/tools/query.d.ts +63 -0
- package/dist/tools/query.d.ts.map +1 -0
- package/dist/tools/query.js +662 -0
- package/dist/tools/query.js.map +1 -0
- package/dist/tools/remove-dead-code.d.ts +47 -0
- package/dist/tools/remove-dead-code.d.ts.map +1 -0
- package/dist/tools/remove-dead-code.js +258 -0
- package/dist/tools/remove-dead-code.js.map +1 -0
- package/dist/tools/rename.d.ts +21 -0
- package/dist/tools/rename.d.ts.map +1 -0
- package/dist/tools/rename.js +116 -0
- package/dist/tools/rename.js.map +1 -0
- package/dist/tools/risk-trends.d.ts +19 -0
- package/dist/tools/risk-trends.d.ts.map +1 -0
- package/dist/tools/risk-trends.js +73 -0
- package/dist/tools/risk-trends.js.map +1 -0
- package/dist/tools/route-map.d.ts +27 -0
- package/dist/tools/route-map.d.ts.map +1 -0
- package/dist/tools/route-map.js +119 -0
- package/dist/tools/route-map.js.map +1 -0
- package/dist/tools/scan.d.ts +27 -0
- package/dist/tools/scan.d.ts.map +1 -0
- package/dist/tools/scan.js +136 -0
- package/dist/tools/scan.js.map +1 -0
- package/dist/tools/shape-check.d.ts +53 -0
- package/dist/tools/shape-check.d.ts.map +1 -0
- package/dist/tools/shape-check.js +161 -0
- package/dist/tools/shape-check.js.map +1 -0
- package/dist/tools/shared.d.ts +101 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +114 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/signature.d.ts +38 -0
- package/dist/tools/signature.d.ts.map +1 -0
- package/dist/tools/signature.js +332 -0
- package/dist/tools/signature.js.map +1 -0
- package/dist/tools/sql.d.ts +34 -0
- package/dist/tools/sql.d.ts.map +1 -0
- package/dist/tools/sql.js +222 -0
- package/dist/tools/sql.js.map +1 -0
- package/dist/tools/tool-map.d.ts +24 -0
- package/dist/tools/tool-map.d.ts.map +1 -0
- package/dist/tools/tool-map.js +97 -0
- package/dist/tools/tool-map.js.map +1 -0
- package/dist/tools/verdict.d.ts +33 -0
- package/dist/tools/verdict.d.ts.map +1 -0
- package/dist/tools/verdict.js +102 -0
- package/dist/tools/verdict.js.map +1 -0
- package/package.json +76 -0
|
@@ -0,0 +1,662 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `query` — true hybrid retrieval over the indexed graph.
|
|
3
|
+
*
|
|
4
|
+
* Two ranked runs, fused with Reciprocal Rank Fusion (k=60):
|
|
5
|
+
* 1. BM25 (DuckDB FTS) over `nodes.name` + `nodes.signature` +
|
|
6
|
+
* `nodes.description`. If a `symbol_summaries` table is present the
|
|
7
|
+
* corpus extends transparently (see {@link bm25CorpusHasSummaries}) so
|
|
8
|
+
* summarized prose participates as soon as the ingestion phase lands.
|
|
9
|
+
* 2. HNSW vector search over the `embeddings` table. The query text is
|
|
10
|
+
* embedded with the same gte-modernbert-base ONNX model the ingestion
|
|
11
|
+
* pipeline uses, so the vectors live in the same space.
|
|
12
|
+
*
|
|
13
|
+
* Graceful fallback:
|
|
14
|
+
* - If the `embeddings` table is empty, skip the vector leg entirely.
|
|
15
|
+
* - If the embedder weights are missing (EMBEDDER_NOT_SETUP) or any other
|
|
16
|
+
* failure blocks the embedder from opening, warn once to stderr and fall
|
|
17
|
+
* back to BM25-only. We never abort the query — the invariant is that a
|
|
18
|
+
* fresh-cloned repo still answers `query` before `codehub setup
|
|
19
|
+
* --embeddings` has been run.
|
|
20
|
+
*
|
|
21
|
+
* The response shape is stable across BM25-only and hybrid paths:
|
|
22
|
+
* `{ results, definitions, processes, process_symbols, mode }`.
|
|
23
|
+
* `results` is the primary ranked list; `definitions` mirrors it one-to-one
|
|
24
|
+
* (preserved for agents that learned the legacy shape). `processes` +
|
|
25
|
+
* `process_symbols` hold the process-grouped view: after fusion we walk
|
|
26
|
+
* PROCESS_STEP edges backwards from each top-K hit to locate the containing
|
|
27
|
+
* Process nodes, then walk PROCESS_STEP edges forward from each matched
|
|
28
|
+
* Process's entry point to enumerate its ordered member steps. Both walks
|
|
29
|
+
* happen in ONE consolidated SQL query (two CTEs + a join) so the enrichment
|
|
30
|
+
* pass is a single round-trip. When no Process touches a top-K hit, or the
|
|
31
|
+
* repo has no PROCESS_STEP edges yet, the arrays stay empty — the flat
|
|
32
|
+
* `results` ranking still covers the query.
|
|
33
|
+
*/
|
|
34
|
+
import { isAbsolute, resolve as resolvePath } from "node:path";
|
|
35
|
+
import { createNodeFs } from "@opencodehub/analysis";
|
|
36
|
+
import { assertEmbedderCompatible, openDefaultEmbedder, } from "@opencodehub/embedder";
|
|
37
|
+
import { bm25Search, embeddingsPopulated, hybridSearch, tryOpenEmbedder, } from "@opencodehub/search";
|
|
38
|
+
import { z } from "zod";
|
|
39
|
+
import { toolError, toolErrorFromUnknown } from "../error-envelope.js";
|
|
40
|
+
import { withNextSteps } from "../next-step-hints.js";
|
|
41
|
+
import { stalenessFromMeta } from "../staleness.js";
|
|
42
|
+
import { fromToolResult, repoArgShape, toToolResult, withStore, } from "./shared.js";
|
|
43
|
+
const SNIPPET_CHAR_CAP = 200;
|
|
44
|
+
/**
|
|
45
|
+
* Per-symbol cap for `content` when `include_content: true`. Keeps large
|
|
46
|
+
* symbol bodies from bloating the MCP response envelope while leaving
|
|
47
|
+
* room for the agent to see enough of the definition to reason about it.
|
|
48
|
+
*/
|
|
49
|
+
const INCLUDE_CONTENT_CHAR_CAP = 2000;
|
|
50
|
+
/** Default cap for `process_symbols` after grouping (see `max_symbols`). */
|
|
51
|
+
const DEFAULT_MAX_SYMBOLS = 50;
|
|
52
|
+
const QueryInput = {
|
|
53
|
+
query: z
|
|
54
|
+
.string()
|
|
55
|
+
.min(1)
|
|
56
|
+
.describe("Free-text search phrase; embedded + BM25-searched, then fused via RRF."),
|
|
57
|
+
...repoArgShape,
|
|
58
|
+
limit: z
|
|
59
|
+
.number()
|
|
60
|
+
.int()
|
|
61
|
+
.positive()
|
|
62
|
+
.max(100)
|
|
63
|
+
.optional()
|
|
64
|
+
.describe("Maximum number of ranked hits to return (default 10, max 100)."),
|
|
65
|
+
kinds: z
|
|
66
|
+
.array(z.string())
|
|
67
|
+
.optional()
|
|
68
|
+
.describe("Restrict to these NodeKind values (e.g. ['Function','Method'])."),
|
|
69
|
+
task_context: z
|
|
70
|
+
.string()
|
|
71
|
+
.optional()
|
|
72
|
+
.describe("What you are working on (e.g., 'adding OAuth support'). Prefixed to the query text before embedding + BM25 so the ranker sees the broader intent."),
|
|
73
|
+
goal: z
|
|
74
|
+
.string()
|
|
75
|
+
.optional()
|
|
76
|
+
.describe("What you want to find (e.g., 'existing auth validation logic'). Prefixed to the query text alongside task_context to steer the ranker."),
|
|
77
|
+
include_content: z
|
|
78
|
+
.boolean()
|
|
79
|
+
.optional()
|
|
80
|
+
.describe("When true, re-read each result's source file between startLine/endLine and attach the body as `content` (capped at 2000 chars). Default false."),
|
|
81
|
+
max_symbols: z
|
|
82
|
+
.number()
|
|
83
|
+
.int()
|
|
84
|
+
.positive()
|
|
85
|
+
.max(500)
|
|
86
|
+
.optional()
|
|
87
|
+
.describe("Maximum number of symbols to return in `process_symbols` after process grouping (default 50). `results[]` remains capped by `limit`."),
|
|
88
|
+
granularity: z
|
|
89
|
+
.enum(["symbol", "file", "community"])
|
|
90
|
+
.optional()
|
|
91
|
+
.describe("Hierarchical embedding tier to search. Defaults to 'symbol' (v1.0 behaviour). Set to 'community' to retrieve architectural clusters; set to 'file' to score files. Requires the index to have been built with `--granularity symbol,file,community`."),
|
|
92
|
+
mode: z
|
|
93
|
+
.enum(["flat", "zoom"])
|
|
94
|
+
.optional()
|
|
95
|
+
.describe("Retrieval strategy. 'flat' (default) runs one symbol-tier ANN pass fused with BM25. 'zoom' runs a coarse file-tier pass first, then restricts the symbol-tier pass to symbols inside the top file shortlist (`zoom_fanout` files by default)."),
|
|
96
|
+
zoom_fanout: z
|
|
97
|
+
.number()
|
|
98
|
+
.int()
|
|
99
|
+
.positive()
|
|
100
|
+
.max(50)
|
|
101
|
+
.optional()
|
|
102
|
+
.describe("How many files to shortlist at the coarse step when `mode=zoom`. Default 10."),
|
|
103
|
+
force_backend_mismatch: z
|
|
104
|
+
.boolean()
|
|
105
|
+
.optional()
|
|
106
|
+
.describe("Bypass the embedder fingerprint check. Lets the query proceed against an `embeddings` table populated by a different embedder than the one currently active. Vectors may be stale; results may misrank. Default false."),
|
|
107
|
+
};
|
|
108
|
+
/**
|
|
109
|
+
* Batched summary join for the top-K ranked hits. Short-circuits to an
|
|
110
|
+
* empty map when either `symbol_summaries` does not exist / is empty (the
|
|
111
|
+
* `summariesJoined` probe already ran) or the input list is empty. Any
|
|
112
|
+
* lookup failure is swallowed — summary enrichment is never load-bearing.
|
|
113
|
+
*
|
|
114
|
+
* We collapse multiple prompt-version rows per node by keeping the last
|
|
115
|
+
* one in `(node_id ASC, prompt_version ASC, content_hash ASC)` order,
|
|
116
|
+
* which is the storage layer's documented ordering contract — that
|
|
117
|
+
* deterministically selects the newest prompt version.
|
|
118
|
+
*/
|
|
119
|
+
async function lookupSummariesForHits(temporal, summariesJoined, nodeIds) {
|
|
120
|
+
const out = new Map();
|
|
121
|
+
if (!summariesJoined)
|
|
122
|
+
return out;
|
|
123
|
+
const uniqIds = Array.from(new Set(nodeIds));
|
|
124
|
+
if (uniqIds.length === 0)
|
|
125
|
+
return out;
|
|
126
|
+
try {
|
|
127
|
+
const rows = await temporal.lookupSymbolSummariesByNode(uniqIds);
|
|
128
|
+
for (const row of rows) {
|
|
129
|
+
// Overwriting per node id keeps the newest prompt version because of
|
|
130
|
+
// the ORDER BY contract in `lookupSymbolSummariesByNode`.
|
|
131
|
+
out.set(row.nodeId, row);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
// Table missing / schema drift / I/O failure: return an empty map so
|
|
136
|
+
// the query surfaces degrade silently to "no summaries attached".
|
|
137
|
+
}
|
|
138
|
+
return out;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Extensibility hook: return true iff the `symbol_summaries` table exists
|
|
142
|
+
* and is non-empty. When it does, future BM25 upgrades can JOIN it into
|
|
143
|
+
* the FTS corpus. Today this is informational — the DuckDB FTS index is
|
|
144
|
+
* built at ingestion time against `nodes` columns only — but the probe
|
|
145
|
+
* lives here so the sibling summarizer work can light up a corpus
|
|
146
|
+
* extension without re-threading the tool.
|
|
147
|
+
*/
|
|
148
|
+
async function bm25CorpusHasSummaries(temporal) {
|
|
149
|
+
// information_schema introspection is DuckDB-specific; route via the
|
|
150
|
+
// temporal-tier `exec` escape hatch so a future graph-only adapter
|
|
151
|
+
// pairing with a non-DuckDB temporal store can override this probe.
|
|
152
|
+
try {
|
|
153
|
+
const rows = await temporal.exec("SELECT COUNT(*) AS n FROM information_schema.tables WHERE table_name = 'symbol_summaries'");
|
|
154
|
+
const first = rows[0];
|
|
155
|
+
if (!first)
|
|
156
|
+
return false;
|
|
157
|
+
const hasTable = Number(first["n"] ?? 0) > 0;
|
|
158
|
+
if (!hasTable)
|
|
159
|
+
return false;
|
|
160
|
+
const rows2 = await temporal.exec("SELECT COUNT(*) AS n FROM symbol_summaries");
|
|
161
|
+
const first2 = rows2[0];
|
|
162
|
+
if (!first2)
|
|
163
|
+
return false;
|
|
164
|
+
return Number(first2["n"] ?? 0) > 0;
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
return false;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Fetch name/kind/filePath/startLine/endLine for a set of node ids in one
|
|
172
|
+
* round trip. Ids missing from the store (e.g. stale embeddings) are
|
|
173
|
+
* silently dropped from the returned map.
|
|
174
|
+
*/
|
|
175
|
+
async function hydrateNodeMeta(graph, ids) {
|
|
176
|
+
const out = new Map();
|
|
177
|
+
if (ids.length === 0)
|
|
178
|
+
return out;
|
|
179
|
+
const partners = await graph.listNodes({ ids: [...ids] });
|
|
180
|
+
for (const n of partners) {
|
|
181
|
+
const startLine = n["startLine"];
|
|
182
|
+
const endLine = n["endLine"];
|
|
183
|
+
out.set(n.id, {
|
|
184
|
+
name: n.name,
|
|
185
|
+
filePath: n.filePath,
|
|
186
|
+
kind: n.kind,
|
|
187
|
+
startLine: toLineOrNull(startLine),
|
|
188
|
+
endLine: toLineOrNull(endLine),
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
return out;
|
|
192
|
+
}
|
|
193
|
+
function toLineOrNull(raw) {
|
|
194
|
+
if (raw === null || raw === undefined)
|
|
195
|
+
return null;
|
|
196
|
+
const n = Number(raw);
|
|
197
|
+
if (!Number.isFinite(n) || n <= 0)
|
|
198
|
+
return null;
|
|
199
|
+
return Math.trunc(n);
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Pull a snippet of the source file between `[startLine, endLine]`, capped
|
|
203
|
+
* at {@link SNIPPET_CHAR_CAP} characters. Returns `null` when the file
|
|
204
|
+
* can't be read (renamed, deleted, permission error) or when the line
|
|
205
|
+
* range is missing or obviously bogus. Never throws — snippet extraction
|
|
206
|
+
* is best-effort.
|
|
207
|
+
*/
|
|
208
|
+
async function extractSnippet(fs, repoRoot, filePath, startLine, endLine) {
|
|
209
|
+
if (startLine === null || endLine === null)
|
|
210
|
+
return null;
|
|
211
|
+
if (endLine < startLine)
|
|
212
|
+
return null;
|
|
213
|
+
const abs = isAbsolute(filePath) ? filePath : resolvePath(repoRoot, filePath);
|
|
214
|
+
let source;
|
|
215
|
+
try {
|
|
216
|
+
source = await fs.readFile(abs);
|
|
217
|
+
}
|
|
218
|
+
catch {
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
const lines = source.split("\n");
|
|
222
|
+
if (lines.length === 0)
|
|
223
|
+
return null;
|
|
224
|
+
const safeStart = Math.max(1, startLine);
|
|
225
|
+
const safeEnd = Math.min(lines.length, endLine);
|
|
226
|
+
if (safeEnd < safeStart)
|
|
227
|
+
return null;
|
|
228
|
+
const slice = lines.slice(safeStart - 1, safeEnd).join("\n");
|
|
229
|
+
if (slice.length <= SNIPPET_CHAR_CAP)
|
|
230
|
+
return slice;
|
|
231
|
+
return `${slice.slice(0, SNIPPET_CHAR_CAP - 1)}…`;
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Enrich ranked hits (FusedHit or plain SymbolHit-derived) with node
|
|
235
|
+
* metadata and snippets. Order is preserved from the input list.
|
|
236
|
+
*/
|
|
237
|
+
async function enrichWithContext(graph, fs, repoRoot, hits) {
|
|
238
|
+
if (hits.length === 0)
|
|
239
|
+
return [];
|
|
240
|
+
const uniqIds = Array.from(new Set(hits.map((h) => h.nodeId)));
|
|
241
|
+
const meta = await hydrateNodeMeta(graph, uniqIds);
|
|
242
|
+
const out = [];
|
|
243
|
+
let rank = 0;
|
|
244
|
+
for (const hit of hits) {
|
|
245
|
+
const m = meta.get(hit.nodeId);
|
|
246
|
+
if (!m)
|
|
247
|
+
continue;
|
|
248
|
+
rank += 1;
|
|
249
|
+
const snippet = await extractSnippet(fs, repoRoot, m.filePath, m.startLine, m.endLine);
|
|
250
|
+
out.push({
|
|
251
|
+
rank,
|
|
252
|
+
nodeId: hit.nodeId,
|
|
253
|
+
name: m.name,
|
|
254
|
+
kind: m.kind,
|
|
255
|
+
filePath: m.filePath,
|
|
256
|
+
startLine: m.startLine,
|
|
257
|
+
endLine: m.endLine,
|
|
258
|
+
snippet,
|
|
259
|
+
score: hit.score,
|
|
260
|
+
sources: hit.sources,
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
return out;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Read the full body of a symbol from disk between `[startLine, endLine]`,
|
|
267
|
+
* capped at {@link INCLUDE_CONTENT_CHAR_CAP} characters. Best-effort: any
|
|
268
|
+
* read error or missing line range returns `null` so the caller can simply
|
|
269
|
+
* omit the `content` field for that row.
|
|
270
|
+
*
|
|
271
|
+
* Distinct from {@link extractSnippet} — that one is always on and caps at
|
|
272
|
+
* 200 chars; this one fires only when `include_content: true` and gives
|
|
273
|
+
* the agent a much larger window (2000 chars) into the symbol body.
|
|
274
|
+
*/
|
|
275
|
+
async function readSymbolContent(fs, repoRoot, filePath, startLine, endLine) {
|
|
276
|
+
if (startLine === null || endLine === null)
|
|
277
|
+
return null;
|
|
278
|
+
if (endLine < startLine)
|
|
279
|
+
return null;
|
|
280
|
+
const abs = isAbsolute(filePath) ? filePath : resolvePath(repoRoot, filePath);
|
|
281
|
+
let source;
|
|
282
|
+
try {
|
|
283
|
+
source = await fs.readFile(abs);
|
|
284
|
+
}
|
|
285
|
+
catch {
|
|
286
|
+
return null;
|
|
287
|
+
}
|
|
288
|
+
const lines = source.split("\n");
|
|
289
|
+
if (lines.length === 0)
|
|
290
|
+
return null;
|
|
291
|
+
const safeStart = Math.max(1, startLine);
|
|
292
|
+
const safeEnd = Math.min(lines.length, endLine);
|
|
293
|
+
if (safeEnd < safeStart)
|
|
294
|
+
return null;
|
|
295
|
+
const slice = lines.slice(safeStart - 1, safeEnd).join("\n");
|
|
296
|
+
if (slice.length <= INCLUDE_CONTENT_CHAR_CAP)
|
|
297
|
+
return slice;
|
|
298
|
+
return `${slice.slice(0, INCLUDE_CONTENT_CHAR_CAP - 1)}…`;
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Build the text fed to BM25 + embedding when the caller supplied
|
|
302
|
+
* `task_context` and/or `goal`. Parts are joined by " — " (em-dash with
|
|
303
|
+
* surrounding spaces) in the order `task_context — goal — query`. Empty /
|
|
304
|
+
* whitespace-only parts are dropped so the concatenation never starts with
|
|
305
|
+
* a dangling separator.
|
|
306
|
+
*/
|
|
307
|
+
function buildSearchText(query, taskContext, goal) {
|
|
308
|
+
const parts = [];
|
|
309
|
+
if (taskContext !== undefined && taskContext.trim() !== "")
|
|
310
|
+
parts.push(taskContext.trim());
|
|
311
|
+
if (goal !== undefined && goal.trim() !== "")
|
|
312
|
+
parts.push(goal.trim());
|
|
313
|
+
parts.push(query);
|
|
314
|
+
return parts.join(" — ");
|
|
315
|
+
}
|
|
316
|
+
/** Convert BM25-only hits into the uniform fused-shaped row. */
|
|
317
|
+
function bm25RowsAsFused(hits) {
|
|
318
|
+
return hits.map((h) => ({
|
|
319
|
+
nodeId: h.nodeId,
|
|
320
|
+
score: h.score,
|
|
321
|
+
sources: ["bm25"],
|
|
322
|
+
}));
|
|
323
|
+
}
|
|
324
|
+
/** Convert FusedHit[] to the enrichWithContext input shape. */
|
|
325
|
+
function fusedAsRanked(fused) {
|
|
326
|
+
return fused.map((f) => ({ nodeId: f.nodeId, score: f.score, sources: f.sources }));
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Walk PROCESS_STEP edges backwards from each top-K hit to find containing
|
|
330
|
+
* Process nodes, then walk PROCESS_STEP edges forward from each matched
|
|
331
|
+
* Process's entry point to enumerate its ordered member symbols. All of
|
|
332
|
+
* this happens in a single consolidated query: two recursive CTEs + a
|
|
333
|
+
* join against `nodes` for symbol metadata. Returns empty arrays when no
|
|
334
|
+
* processes touch a hit, or when the repo has no PROCESS_STEP edges.
|
|
335
|
+
*
|
|
336
|
+
* Depth cap of 10 on both walks matches `MAX_DEPTH` in the ingestion
|
|
337
|
+
* `processes` phase — any member reachable during ingestion is reachable
|
|
338
|
+
* here. `USING KEY` dedupes the recursion frontier so dense call graphs
|
|
339
|
+
* don't blow up.
|
|
340
|
+
*/
|
|
341
|
+
async function fetchProcessGrouping(graph, hits) {
|
|
342
|
+
if (hits.length === 0)
|
|
343
|
+
return { groups: [], symbols: [] };
|
|
344
|
+
const hitIds = Array.from(new Set(hits.map((h) => h.nodeId)));
|
|
345
|
+
if (hitIds.length === 0)
|
|
346
|
+
return { groups: [], symbols: [] };
|
|
347
|
+
try {
|
|
348
|
+
// Step 1. Walk PROCESS_STEP ancestors from each hit.
|
|
349
|
+
const ancestorIds = new Set();
|
|
350
|
+
for (const id of hitIds) {
|
|
351
|
+
ancestorIds.add(id);
|
|
352
|
+
const ancestors = await graph.traverseAncestors({
|
|
353
|
+
fromId: id,
|
|
354
|
+
edgeTypes: ["PROCESS_STEP"],
|
|
355
|
+
maxDepth: 10,
|
|
356
|
+
});
|
|
357
|
+
for (const a of ancestors)
|
|
358
|
+
ancestorIds.add(a.nodeId);
|
|
359
|
+
}
|
|
360
|
+
if (ancestorIds.size === 0)
|
|
361
|
+
return { groups: [], symbols: [] };
|
|
362
|
+
const processes = (await graph.listNodesByKind("Process"));
|
|
363
|
+
const matched = [];
|
|
364
|
+
for (const p of processes) {
|
|
365
|
+
const ep = p.entryPointId;
|
|
366
|
+
if (typeof ep === "string" && ep.length > 0 && ancestorIds.has(ep)) {
|
|
367
|
+
matched.push(p);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
if (matched.length === 0)
|
|
371
|
+
return { groups: [], symbols: [] };
|
|
372
|
+
// Step 3. BFS from each entry point along PROCESS_STEP edges.
|
|
373
|
+
const allStepEdges = await graph.listEdgesByType("PROCESS_STEP");
|
|
374
|
+
const adj = new Map();
|
|
375
|
+
const allPartnerIds = new Set();
|
|
376
|
+
for (const e of allStepEdges) {
|
|
377
|
+
const list = adj.get(e.from) ?? [];
|
|
378
|
+
list.push({ to: e.to, step: e.step ?? 0 });
|
|
379
|
+
adj.set(e.from, list);
|
|
380
|
+
allPartnerIds.add(e.from);
|
|
381
|
+
allPartnerIds.add(e.to);
|
|
382
|
+
}
|
|
383
|
+
for (const p of matched)
|
|
384
|
+
if (p.entryPointId)
|
|
385
|
+
allPartnerIds.add(p.entryPointId);
|
|
386
|
+
const allPartners = allPartnerIds.size > 0 ? await graph.listNodes({ ids: [...allPartnerIds] }) : [];
|
|
387
|
+
const byId = new Map();
|
|
388
|
+
for (const n of allPartners)
|
|
389
|
+
byId.set(n.id, n);
|
|
390
|
+
const scoreById = new Map();
|
|
391
|
+
for (const h of hits) {
|
|
392
|
+
const prev = scoreById.get(h.nodeId);
|
|
393
|
+
if (prev === undefined || h.score > prev)
|
|
394
|
+
scoreById.set(h.nodeId, h.score);
|
|
395
|
+
}
|
|
396
|
+
const groupById = new Map();
|
|
397
|
+
const symbols = [];
|
|
398
|
+
for (const proc of matched) {
|
|
399
|
+
const ep = proc.entryPointId;
|
|
400
|
+
if (typeof ep !== "string" || ep.length === 0)
|
|
401
|
+
continue;
|
|
402
|
+
const seen = new Set();
|
|
403
|
+
const queue = [{ id: ep, step: 0 }];
|
|
404
|
+
const members = [];
|
|
405
|
+
while (queue.length > 0) {
|
|
406
|
+
const cur = queue.shift();
|
|
407
|
+
if (seen.has(cur.id))
|
|
408
|
+
continue;
|
|
409
|
+
seen.add(cur.id);
|
|
410
|
+
members.push(cur);
|
|
411
|
+
if (cur.step >= 10)
|
|
412
|
+
continue;
|
|
413
|
+
const out = adj.get(cur.id) ?? [];
|
|
414
|
+
for (const e of out) {
|
|
415
|
+
if (seen.has(e.to))
|
|
416
|
+
continue;
|
|
417
|
+
queue.push({ id: e.to, step: cur.step + 1 });
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
members.sort((a, b) => {
|
|
421
|
+
if (a.step !== b.step)
|
|
422
|
+
return a.step - b.step;
|
|
423
|
+
return a.id < b.id ? -1 : a.id > b.id ? 1 : 0;
|
|
424
|
+
});
|
|
425
|
+
const inferredLabel = proc.inferredLabel;
|
|
426
|
+
const label = typeof inferredLabel === "string" && inferredLabel.length > 0 ? inferredLabel : proc.name;
|
|
427
|
+
const stepCount = Math.max(0, Math.trunc(proc.stepCount ?? 0));
|
|
428
|
+
const bucket = {
|
|
429
|
+
group: {
|
|
430
|
+
id: proc.id,
|
|
431
|
+
label,
|
|
432
|
+
processType: "flow",
|
|
433
|
+
stepCount,
|
|
434
|
+
score: 0,
|
|
435
|
+
},
|
|
436
|
+
scoreCandidates: [],
|
|
437
|
+
};
|
|
438
|
+
groupById.set(proc.id, bucket);
|
|
439
|
+
for (const m of members) {
|
|
440
|
+
const partner = byId.get(m.id);
|
|
441
|
+
const hitScore = scoreById.get(m.id);
|
|
442
|
+
if (hitScore !== undefined)
|
|
443
|
+
bucket.scoreCandidates.push(hitScore);
|
|
444
|
+
symbols.push({
|
|
445
|
+
process_id: proc.id,
|
|
446
|
+
nodeId: m.id,
|
|
447
|
+
name: partner?.name ?? "",
|
|
448
|
+
kind: partner?.kind ?? "",
|
|
449
|
+
filePath: partner?.filePath ?? "",
|
|
450
|
+
step: m.step,
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
const groups = [];
|
|
455
|
+
for (const { group, scoreCandidates } of groupById.values()) {
|
|
456
|
+
const score = scoreCandidates.length === 0 ? 0 : Math.max(...scoreCandidates);
|
|
457
|
+
groups.push({ ...group, score });
|
|
458
|
+
}
|
|
459
|
+
groups.sort((a, b) => {
|
|
460
|
+
if (b.score !== a.score)
|
|
461
|
+
return b.score - a.score;
|
|
462
|
+
return a.id < b.id ? -1 : a.id > b.id ? 1 : 0;
|
|
463
|
+
});
|
|
464
|
+
return { groups, symbols };
|
|
465
|
+
}
|
|
466
|
+
catch {
|
|
467
|
+
return { groups: [], symbols: [] };
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
export async function runQuery(ctx, args) {
|
|
471
|
+
const limit = args.limit ?? 10;
|
|
472
|
+
const maxSymbols = args.max_symbols ?? DEFAULT_MAX_SYMBOLS;
|
|
473
|
+
const includeContent = args.include_content === true;
|
|
474
|
+
// Shared HTTP-priority + ONNX-fallback factory. ONNX binding only loads
|
|
475
|
+
// on the fallback branch, so plain (non-dynamic) import is fine here.
|
|
476
|
+
const openEmbedder = ctx.openEmbedder ?? (() => openDefaultEmbedder());
|
|
477
|
+
const fsFactory = ctx.fsFactory ?? createNodeFs;
|
|
478
|
+
// `searchText` is what goes to BM25 + the embedder. When `task_context`
|
|
479
|
+
// or `goal` are present, they get prefixed so the ranker sees the broader
|
|
480
|
+
// intent; `args.query` remains the human-facing string echoed in headers.
|
|
481
|
+
const searchText = buildSearchText(args.query, args.task_context, args.goal);
|
|
482
|
+
const call = await withStore(ctx, args, async (store, resolved) => {
|
|
483
|
+
try {
|
|
484
|
+
const { graph, temporal } = store;
|
|
485
|
+
const kinds = args.kinds && args.kinds.length > 0 ? args.kinds : undefined;
|
|
486
|
+
// Probe for the symbol_summaries table so the value is recorded
|
|
487
|
+
// alongside `mode` (surfaces via structuredContent). This is a
|
|
488
|
+
// cheap metadata read; it runs once per query.
|
|
489
|
+
const summariesJoined = await bm25CorpusHasSummaries(temporal);
|
|
490
|
+
let ranked;
|
|
491
|
+
let mode = "bm25";
|
|
492
|
+
if (await embeddingsPopulated(graph)) {
|
|
493
|
+
const embedder = await tryOpenEmbedder(openEmbedder, "[mcp:query]");
|
|
494
|
+
if (embedder) {
|
|
495
|
+
try {
|
|
496
|
+
// Refuse when the persisted embedder modelId differs from
|
|
497
|
+
// the current one. Same-dim vectors from different embedders
|
|
498
|
+
// silently corrupt ranking. `force_backend_mismatch` lets
|
|
499
|
+
// the caller override.
|
|
500
|
+
const meta = await graph.getMeta();
|
|
501
|
+
const compat = assertEmbedderCompatible(meta?.embedderModelId, embedder.modelId, args.force_backend_mismatch === true);
|
|
502
|
+
if (!compat.ok) {
|
|
503
|
+
return toolError("EMBEDDER_MISMATCH", `Embedder mismatch: store was indexed with '${compat.persistedModelId}', ` +
|
|
504
|
+
`current embedder is '${compat.currentModelId}'.`, compat.hint);
|
|
505
|
+
}
|
|
506
|
+
const fused = await hybridSearch(graph, {
|
|
507
|
+
text: searchText,
|
|
508
|
+
limit,
|
|
509
|
+
...(kinds !== undefined ? { kinds } : {}),
|
|
510
|
+
...(args.mode !== undefined ? { mode: args.mode } : {}),
|
|
511
|
+
...(args.zoom_fanout !== undefined ? { zoomFanout: args.zoom_fanout } : {}),
|
|
512
|
+
...(args.granularity !== undefined ? { granularity: args.granularity } : {}),
|
|
513
|
+
}, embedder);
|
|
514
|
+
ranked = fusedAsRanked(fused);
|
|
515
|
+
mode = "hybrid";
|
|
516
|
+
}
|
|
517
|
+
finally {
|
|
518
|
+
// Always release the native session — even on error —
|
|
519
|
+
// so we don't leak ONNX runtime resources.
|
|
520
|
+
await embedder.close();
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
else {
|
|
524
|
+
const bmHits = await bm25Search(graph, {
|
|
525
|
+
text: searchText,
|
|
526
|
+
limit,
|
|
527
|
+
...(kinds !== undefined ? { kinds } : {}),
|
|
528
|
+
});
|
|
529
|
+
ranked = bm25RowsAsFused(bmHits);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
else {
|
|
533
|
+
const bmHits = await bm25Search(graph, {
|
|
534
|
+
text: searchText,
|
|
535
|
+
limit,
|
|
536
|
+
...(kinds !== undefined ? { kinds } : {}),
|
|
537
|
+
});
|
|
538
|
+
ranked = bm25RowsAsFused(bmHits);
|
|
539
|
+
}
|
|
540
|
+
const fs = fsFactory();
|
|
541
|
+
const enrichedRows = await enrichWithContext(graph, fs, resolved.repoPath, ranked);
|
|
542
|
+
// Join `symbol_summaries` onto each hit when P04 data is present.
|
|
543
|
+
// Single round trip for the whole top-K via `IN (...)`; missing rows
|
|
544
|
+
// simply omit `summary` / `signatureSummary`. Any lookup failure
|
|
545
|
+
// degrades silently — summaries are enrichment, not load-bearing.
|
|
546
|
+
const summaryMap = await lookupSummariesForHits(temporal, summariesJoined, enrichedRows.map((r) => r.nodeId));
|
|
547
|
+
const baseRows = summaryMap.size === 0
|
|
548
|
+
? enrichedRows
|
|
549
|
+
: enrichedRows.map((r) => {
|
|
550
|
+
const row = summaryMap.get(r.nodeId);
|
|
551
|
+
if (row === undefined)
|
|
552
|
+
return r;
|
|
553
|
+
return {
|
|
554
|
+
...r,
|
|
555
|
+
summary: row.summaryText,
|
|
556
|
+
...(row.signatureSummary !== undefined
|
|
557
|
+
? { signatureSummary: row.signatureSummary }
|
|
558
|
+
: {}),
|
|
559
|
+
};
|
|
560
|
+
});
|
|
561
|
+
// When `include_content` is requested, re-read each result's source
|
|
562
|
+
// between startLine/endLine and attach a capped `content` body. This
|
|
563
|
+
// is best-effort — any unreadable path simply omits the field.
|
|
564
|
+
const rows = includeContent
|
|
565
|
+
? await Promise.all(baseRows.map(async (r) => {
|
|
566
|
+
const content = await readSymbolContent(fs, resolved.repoPath, r.filePath, r.startLine, r.endLine);
|
|
567
|
+
return content !== null ? { ...r, content } : r;
|
|
568
|
+
}))
|
|
569
|
+
: baseRows;
|
|
570
|
+
const modeLabel = mode === "hybrid" ? "hybrid" : "BM25";
|
|
571
|
+
const header = `Top ${rows.length} ${modeLabel} match(es) for "${args.query}" in ${resolved.name}:`;
|
|
572
|
+
const body = rows.length === 0
|
|
573
|
+
? "(no matches — try a broader phrase or drop the kinds filter)"
|
|
574
|
+
: rows
|
|
575
|
+
.map((r) => `${r.rank}. ${r.name} [${r.kind}] — ${r.filePath}${r.startLine !== null ? `:${r.startLine}` : ""} (score ${r.score.toFixed(3)}, sources=${r.sources.join("+")})`)
|
|
576
|
+
.join("\n");
|
|
577
|
+
const next = rows.length === 0
|
|
578
|
+
? ["broaden the query or remove `kinds` filter"]
|
|
579
|
+
: [
|
|
580
|
+
`call \`context\` with symbol="${rows[0]?.name ?? ""}" to see its callers/callees`,
|
|
581
|
+
`call \`impact\` on the top match to see its blast radius`,
|
|
582
|
+
];
|
|
583
|
+
const staleness = stalenessFromMeta(resolved.meta);
|
|
584
|
+
// `definitions` mirrors `results` for agents that learned the legacy
|
|
585
|
+
// shape. `processes` + `process_symbols` come from one consolidated
|
|
586
|
+
// PROCESS_STEP walk: backward from the top-K hits to find Process
|
|
587
|
+
// nodes, then forward from each matched Process's entry point to
|
|
588
|
+
// enumerate members. Repos without PROCESS_STEP edges yet (fresh
|
|
589
|
+
// index pre-`processes` phase, or ingestion where the phase emitted
|
|
590
|
+
// no flows) naturally return empty arrays. `max_symbols` caps the
|
|
591
|
+
// flat `process_symbols` list AFTER grouping; `results[]` is always
|
|
592
|
+
// capped by `limit`.
|
|
593
|
+
const { groups: processes, symbols: processSymbols } = await fetchProcessGrouping(graph, ranked);
|
|
594
|
+
const cappedProcessSymbols = processSymbols.slice(0, maxSymbols);
|
|
595
|
+
return withNextSteps(`${header}\n${body}`, {
|
|
596
|
+
results: rows,
|
|
597
|
+
definitions: rows,
|
|
598
|
+
processes,
|
|
599
|
+
process_symbols: cappedProcessSymbols,
|
|
600
|
+
mode,
|
|
601
|
+
summaries_joined: summariesJoined,
|
|
602
|
+
}, next, staleness);
|
|
603
|
+
}
|
|
604
|
+
catch (err) {
|
|
605
|
+
return toolErrorFromUnknown(err);
|
|
606
|
+
}
|
|
607
|
+
});
|
|
608
|
+
return toToolResult(call);
|
|
609
|
+
}
|
|
610
|
+
export function registerQueryTool(server, ctx) {
|
|
611
|
+
server.registerTool("query", {
|
|
612
|
+
title: "Hybrid code-graph search",
|
|
613
|
+
description: [
|
|
614
|
+
"True hybrid retrieval over the indexed code graph: BM25 keyword search",
|
|
615
|
+
"(over symbol name + signature + description) fused with HNSW vector",
|
|
616
|
+
"search (gte-modernbert-base, 768-dim) via Reciprocal Rank Fusion (k=60).",
|
|
617
|
+
"Each result carries `rank`, `nodeId`, `name`, `kind`, `filePath`,",
|
|
618
|
+
"`startLine`/`endLine`, a capped `snippet` (~200 chars), the fused",
|
|
619
|
+
"`score`, and `sources` indicating which ranker(s) contributed (`bm25`",
|
|
620
|
+
"and/or `vector`).",
|
|
621
|
+
"Graceful fallback: when the `embeddings` table is empty, or the ONNX",
|
|
622
|
+
"weights are not installed (run `codehub setup --embeddings` to",
|
|
623
|
+
"install), the vector leg is silently skipped and BM25-only results",
|
|
624
|
+
"are returned with `mode: 'bm25'`. The query never fails because of",
|
|
625
|
+
"missing embeddings.",
|
|
626
|
+
"Use this as the first lookup step when you know the concept but not",
|
|
627
|
+
"the exact symbol. For exact-name lookups, a plain `context` call is",
|
|
628
|
+
"often sufficient.",
|
|
629
|
+
].join(" "),
|
|
630
|
+
inputSchema: QueryInput,
|
|
631
|
+
annotations: {
|
|
632
|
+
readOnlyHint: true,
|
|
633
|
+
destructiveHint: false,
|
|
634
|
+
idempotentHint: true,
|
|
635
|
+
openWorldHint: false,
|
|
636
|
+
},
|
|
637
|
+
}, async (args) => {
|
|
638
|
+
// Zod with `exactOptionalPropertyTypes` emits explicit `undefined`
|
|
639
|
+
// for unset optional properties; `QueryArgs` uses `?:` which forbids
|
|
640
|
+
// an explicit `undefined`. Strip the undefined-valued keys so the
|
|
641
|
+
// two types line up without a structural cast.
|
|
642
|
+
const typed = {
|
|
643
|
+
query: args.query,
|
|
644
|
+
...(args.repo !== undefined ? { repo: args.repo } : {}),
|
|
645
|
+
...(args.repo_uri !== undefined ? { repo_uri: args.repo_uri } : {}),
|
|
646
|
+
...(args.limit !== undefined ? { limit: args.limit } : {}),
|
|
647
|
+
...(args.kinds !== undefined ? { kinds: args.kinds } : {}),
|
|
648
|
+
...(args.task_context !== undefined ? { task_context: args.task_context } : {}),
|
|
649
|
+
...(args.goal !== undefined ? { goal: args.goal } : {}),
|
|
650
|
+
...(args.include_content !== undefined ? { include_content: args.include_content } : {}),
|
|
651
|
+
...(args.max_symbols !== undefined ? { max_symbols: args.max_symbols } : {}),
|
|
652
|
+
...(args.granularity !== undefined ? { granularity: args.granularity } : {}),
|
|
653
|
+
...(args.mode !== undefined ? { mode: args.mode } : {}),
|
|
654
|
+
...(args.zoom_fanout !== undefined ? { zoom_fanout: args.zoom_fanout } : {}),
|
|
655
|
+
...(args.force_backend_mismatch !== undefined
|
|
656
|
+
? { force_backend_mismatch: args.force_backend_mismatch }
|
|
657
|
+
: {}),
|
|
658
|
+
};
|
|
659
|
+
return fromToolResult(await runQuery(ctx, typed));
|
|
660
|
+
});
|
|
661
|
+
}
|
|
662
|
+
//# sourceMappingURL=query.js.map
|