src-mcp 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.d.mts +1 -0
- package/dist/bin.mjs +205 -0
- package/dist/bin.mjs.map +1 -0
- package/dist/index.d.mts +1 -0
- package/dist/index.mjs +8 -0
- package/dist/index.mjs.map +1 -0
- package/dist/server-B2Ms4jQx.mjs +3889 -0
- package/dist/server-B2Ms4jQx.mjs.map +1 -0
- package/package.json +10 -9
- package/dist/bin.d.ts +0 -3
- package/dist/bin.d.ts.map +0 -1
- package/dist/bin.js +0 -4
- package/dist/bin.js.map +0 -1
- package/dist/cli/adapter.d.ts +0 -7
- package/dist/cli/adapter.d.ts.map +0 -1
- package/dist/cli/adapter.js +0 -39
- package/dist/cli/adapter.js.map +0 -1
- package/dist/cli/commands/index.d.ts +0 -24
- package/dist/cli/commands/index.d.ts.map +0 -1
- package/dist/cli/commands/index.js +0 -13
- package/dist/cli/commands/index.js.map +0 -1
- package/dist/cli/commands/serve.command.d.ts +0 -21
- package/dist/cli/commands/serve.command.d.ts.map +0 -1
- package/dist/cli/commands/serve.command.js +0 -62
- package/dist/cli/commands/serve.command.js.map +0 -1
- package/dist/cli/commands/version.command.d.ts +0 -2
- package/dist/cli/commands/version.command.d.ts.map +0 -1
- package/dist/cli/commands/version.command.js +0 -12
- package/dist/cli/commands/version.command.js.map +0 -1
- package/dist/cli/index.d.ts +0 -2
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/cli/index.js +0 -15
- package/dist/cli/index.js.map +0 -1
- package/dist/cli/parser.d.ts +0 -7
- package/dist/cli/parser.d.ts.map +0 -1
- package/dist/cli/parser.js +0 -99
- package/dist/cli/parser.js.map +0 -1
- package/dist/config/index.d.ts +0 -24
- package/dist/config/index.d.ts.map +0 -1
- package/dist/config/index.js +0 -38
- package/dist/config/index.js.map +0 -1
- package/dist/core/ast/index.d.ts +0 -82
- package/dist/core/ast/index.d.ts.map +0 -1
- package/dist/core/ast/index.js +0 -204
- package/dist/core/ast/index.js.map +0 -1
- package/dist/core/ast/types.d.ts +0 -152
- package/dist/core/ast/types.d.ts.map +0 -1
- package/dist/core/ast/types.js +0 -5
- package/dist/core/ast/types.js.map +0 -1
- package/dist/core/constants.d.ts +0 -17
- package/dist/core/constants.d.ts.map +0 -1
- package/dist/core/constants.js +0 -49
- package/dist/core/constants.js.map +0 -1
- package/dist/core/embeddings/callgraph.d.ts +0 -98
- package/dist/core/embeddings/callgraph.d.ts.map +0 -1
- package/dist/core/embeddings/callgraph.js +0 -415
- package/dist/core/embeddings/callgraph.js.map +0 -1
- package/dist/core/embeddings/chunker.d.ts +0 -37
- package/dist/core/embeddings/chunker.d.ts.map +0 -1
- package/dist/core/embeddings/chunker.js +0 -298
- package/dist/core/embeddings/chunker.js.map +0 -1
- package/dist/core/embeddings/client.d.ts +0 -30
- package/dist/core/embeddings/client.d.ts.map +0 -1
- package/dist/core/embeddings/client.js +0 -65
- package/dist/core/embeddings/client.js.map +0 -1
- package/dist/core/embeddings/crossfile.d.ts +0 -58
- package/dist/core/embeddings/crossfile.d.ts.map +0 -1
- package/dist/core/embeddings/crossfile.js +0 -202
- package/dist/core/embeddings/crossfile.js.map +0 -1
- package/dist/core/embeddings/enricher.d.ts +0 -53
- package/dist/core/embeddings/enricher.d.ts.map +0 -1
- package/dist/core/embeddings/enricher.js +0 -308
- package/dist/core/embeddings/enricher.js.map +0 -1
- package/dist/core/embeddings/index.d.ts +0 -13
- package/dist/core/embeddings/index.d.ts.map +0 -1
- package/dist/core/embeddings/index.js +0 -20
- package/dist/core/embeddings/index.js.map +0 -1
- package/dist/core/embeddings/reranker.d.ts +0 -41
- package/dist/core/embeddings/reranker.d.ts.map +0 -1
- package/dist/core/embeddings/reranker.js +0 -117
- package/dist/core/embeddings/reranker.js.map +0 -1
- package/dist/core/embeddings/store.d.ts +0 -93
- package/dist/core/embeddings/store.d.ts.map +0 -1
- package/dist/core/embeddings/store.js +0 -304
- package/dist/core/embeddings/store.js.map +0 -1
- package/dist/core/embeddings/types.d.ts +0 -77
- package/dist/core/embeddings/types.d.ts.map +0 -1
- package/dist/core/embeddings/types.js +0 -5
- package/dist/core/embeddings/types.js.map +0 -1
- package/dist/core/embeddings/watcher.d.ts +0 -130
- package/dist/core/embeddings/watcher.d.ts.map +0 -1
- package/dist/core/embeddings/watcher.js +0 -448
- package/dist/core/embeddings/watcher.js.map +0 -1
- package/dist/core/fallback/index.d.ts +0 -26
- package/dist/core/fallback/index.d.ts.map +0 -1
- package/dist/core/fallback/index.js +0 -76
- package/dist/core/fallback/index.js.map +0 -1
- package/dist/core/parser/index.d.ts +0 -64
- package/dist/core/parser/index.d.ts.map +0 -1
- package/dist/core/parser/index.js +0 -205
- package/dist/core/parser/index.js.map +0 -1
- package/dist/core/parser/languages.d.ts +0 -26
- package/dist/core/parser/languages.d.ts.map +0 -1
- package/dist/core/parser/languages.js +0 -101
- package/dist/core/parser/languages.js.map +0 -1
- package/dist/core/queries/helpers.d.ts +0 -72
- package/dist/core/queries/helpers.d.ts.map +0 -1
- package/dist/core/queries/helpers.js +0 -101
- package/dist/core/queries/helpers.js.map +0 -1
- package/dist/core/queries/index.d.ts +0 -144
- package/dist/core/queries/index.d.ts.map +0 -1
- package/dist/core/queries/index.js +0 -396
- package/dist/core/queries/index.js.map +0 -1
- package/dist/core/queries/loader.d.ts +0 -46
- package/dist/core/queries/loader.d.ts.map +0 -1
- package/dist/core/queries/loader.js +0 -216
- package/dist/core/queries/loader.js.map +0 -1
- package/dist/core/queries/patterns.d.ts +0 -10
- package/dist/core/queries/patterns.d.ts.map +0 -1
- package/dist/core/queries/patterns.js +0 -112
- package/dist/core/queries/patterns.js.map +0 -1
- package/dist/core/symbols/index.d.ts +0 -70
- package/dist/core/symbols/index.d.ts.map +0 -1
- package/dist/core/symbols/index.js +0 -359
- package/dist/core/symbols/index.js.map +0 -1
- package/dist/core/unified/index.d.ts +0 -118
- package/dist/core/unified/index.d.ts.map +0 -1
- package/dist/core/unified/index.js +0 -428
- package/dist/core/unified/index.js.map +0 -1
- package/dist/core/utils/assets.d.ts +0 -34
- package/dist/core/utils/assets.d.ts.map +0 -1
- package/dist/core/utils/assets.js +0 -85
- package/dist/core/utils/assets.js.map +0 -1
- package/dist/core/utils/cache.d.ts +0 -43
- package/dist/core/utils/cache.d.ts.map +0 -1
- package/dist/core/utils/cache.js +0 -60
- package/dist/core/utils/cache.js.map +0 -1
- package/dist/core/utils/index.d.ts +0 -7
- package/dist/core/utils/index.d.ts.map +0 -1
- package/dist/core/utils/index.js +0 -10
- package/dist/core/utils/index.js.map +0 -1
- package/dist/core/utils/tsconfig.d.ts +0 -34
- package/dist/core/utils/tsconfig.d.ts.map +0 -1
- package/dist/core/utils/tsconfig.js +0 -173
- package/dist/core/utils/tsconfig.js.map +0 -1
- package/dist/features/analyze-file/index.d.ts +0 -15
- package/dist/features/analyze-file/index.d.ts.map +0 -1
- package/dist/features/analyze-file/index.js +0 -164
- package/dist/features/analyze-file/index.js.map +0 -1
- package/dist/features/get-call-graph/index.d.ts +0 -24
- package/dist/features/get-call-graph/index.d.ts.map +0 -1
- package/dist/features/get-call-graph/index.js +0 -246
- package/dist/features/get-call-graph/index.js.map +0 -1
- package/dist/features/get-index-status/index.d.ts +0 -20
- package/dist/features/get-index-status/index.d.ts.map +0 -1
- package/dist/features/get-index-status/index.js +0 -90
- package/dist/features/get-index-status/index.js.map +0 -1
- package/dist/features/index-codebase/index.d.ts +0 -24
- package/dist/features/index-codebase/index.d.ts.map +0 -1
- package/dist/features/index-codebase/index.js +0 -283
- package/dist/features/index-codebase/index.js.map +0 -1
- package/dist/features/index.d.ts +0 -15
- package/dist/features/index.d.ts.map +0 -1
- package/dist/features/index.js +0 -28
- package/dist/features/index.js.map +0 -1
- package/dist/features/info/index.d.ts +0 -19
- package/dist/features/info/index.d.ts.map +0 -1
- package/dist/features/info/index.js +0 -41
- package/dist/features/info/index.js.map +0 -1
- package/dist/features/list-symbols/index.d.ts +0 -22
- package/dist/features/list-symbols/index.d.ts.map +0 -1
- package/dist/features/list-symbols/index.js +0 -74
- package/dist/features/list-symbols/index.js.map +0 -1
- package/dist/features/parse-ast/index.d.ts +0 -12
- package/dist/features/parse-ast/index.d.ts.map +0 -1
- package/dist/features/parse-ast/index.js +0 -71
- package/dist/features/parse-ast/index.js.map +0 -1
- package/dist/features/query-code/index.d.ts +0 -23
- package/dist/features/query-code/index.d.ts.map +0 -1
- package/dist/features/query-code/index.js +0 -96
- package/dist/features/query-code/index.js.map +0 -1
- package/dist/features/search-code/index.d.ts +0 -39
- package/dist/features/search-code/index.d.ts.map +0 -1
- package/dist/features/search-code/index.js +0 -258
- package/dist/features/search-code/index.js.map +0 -1
- package/dist/features/types.d.ts +0 -14
- package/dist/features/types.d.ts.map +0 -1
- package/dist/features/types.js +0 -2
- package/dist/features/types.js.map +0 -1
- package/dist/features/update-index/index.d.ts +0 -24
- package/dist/features/update-index/index.d.ts.map +0 -1
- package/dist/features/update-index/index.js +0 -358
- package/dist/features/update-index/index.js.map +0 -1
- package/dist/features/utils/content.d.ts +0 -30
- package/dist/features/utils/content.d.ts.map +0 -1
- package/dist/features/utils/content.js +0 -49
- package/dist/features/utils/content.js.map +0 -1
- package/dist/features/utils/index.d.ts +0 -6
- package/dist/features/utils/index.d.ts.map +0 -1
- package/dist/features/utils/index.js +0 -8
- package/dist/features/utils/index.js.map +0 -1
- package/dist/features/utils/result.d.ts +0 -37
- package/dist/features/utils/result.d.ts.map +0 -1
- package/dist/features/utils/result.js +0 -53
- package/dist/features/utils/result.js.map +0 -1
- package/dist/index.d.ts +0 -2
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -4
- package/dist/index.js.map +0 -1
- package/dist/prompts/index.d.ts +0 -9
- package/dist/prompts/index.d.ts.map +0 -1
- package/dist/prompts/index.js +0 -188
- package/dist/prompts/index.js.map +0 -1
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +0 -1
- package/dist/resources/index.js +0 -17
- package/dist/resources/index.js.map +0 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js +0 -24
- package/dist/server.js.map +0 -1
- package/dist/tools/adapter.d.ts +0 -4
- package/dist/tools/adapter.d.ts.map +0 -1
- package/dist/tools/adapter.js +0 -28
- package/dist/tools/adapter.js.map +0 -1
- package/dist/tools/index.d.ts +0 -5
- package/dist/tools/index.d.ts.map +0 -1
- package/dist/tools/index.js +0 -9
- package/dist/tools/index.js.map +0 -1
- package/dist/types/index.d.ts +0 -20
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/index.js +0 -2
- package/dist/types/index.js.map +0 -1
- package/dist/utils/colors.d.ts +0 -24
- package/dist/utils/colors.d.ts.map +0 -1
- package/dist/utils/colors.js +0 -30
- package/dist/utils/colors.js.map +0 -1
- package/dist/utils/index.d.ts +0 -4
- package/dist/utils/index.d.ts.map +0 -1
- package/dist/utils/index.js +0 -4
- package/dist/utils/index.js.map +0 -1
- package/dist/utils/logger.d.ts +0 -8
- package/dist/utils/logger.d.ts.map +0 -1
- package/dist/utils/logger.js +0 -57
- package/dist/utils/logger.js.map +0 -1
- package/dist/utils/spinner.d.ts +0 -11
- package/dist/utils/spinner.d.ts.map +0 -1
- package/dist/utils/spinner.js +0 -36
- package/dist/utils/spinner.js.map +0 -1
|
@@ -0,0 +1,3889 @@
|
|
|
1
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
|
+
import { z } from "zod";
|
|
4
|
+
import * as fs from "node:fs";
|
|
5
|
+
import * as path from "node:path";
|
|
6
|
+
import ignore from "ignore";
|
|
7
|
+
import { Ollama } from "ollama";
|
|
8
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
9
|
+
import pc from "picocolors";
|
|
10
|
+
import "ora";
|
|
11
|
+
import * as crypto from "node:crypto";
|
|
12
|
+
import { existsSync, readFileSync } from "fs";
|
|
13
|
+
import { dirname, join } from "path";
|
|
14
|
+
import { Language, Parser, Query } from "web-tree-sitter";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
import { watch } from "chokidar";
|
|
17
|
+
import fg from "fast-glob";
|
|
18
|
+
import "@langchain/textsplitters";
|
|
19
|
+
|
|
20
|
+
//#region src/config/index.ts
|
|
21
|
+
const config = {
|
|
22
|
+
name: "src-mcp",
|
|
23
|
+
fullName: "SRC (Structured Repo Context)",
|
|
24
|
+
version: "1.0.2",
|
|
25
|
+
description: "MCP server for codebase analysis with Treesitter (SCM queries), AST parsing, and embedding-based indexing"
|
|
26
|
+
};
|
|
27
|
+
const nodeEnv = process.env.NODE_ENV;
|
|
28
|
+
const logLevelEnv = process.env.LOG_LEVEL;
|
|
29
|
+
const ENV = {
|
|
30
|
+
isDev: nodeEnv === "development",
|
|
31
|
+
isProd: nodeEnv === "production",
|
|
32
|
+
logLevel: logLevelEnv ?? "info"
|
|
33
|
+
};
|
|
34
|
+
/**
|
|
35
|
+
* Embedding configuration with environment variable overrides
|
|
36
|
+
*/
|
|
37
|
+
const EMBEDDING_CONFIG = {
|
|
38
|
+
ollamaBaseUrl: process.env.OLLAMA_BASE_URL ?? "http://localhost:11434",
|
|
39
|
+
embeddingModel: process.env.EMBEDDING_MODEL ?? "nomic-embed-text",
|
|
40
|
+
embeddingDimensions: Number(process.env.EMBEDDING_DIMENSIONS) || 768,
|
|
41
|
+
defaultChunkSize: Number(process.env.CHUNK_SIZE) || 1e3,
|
|
42
|
+
defaultChunkOverlap: Number(process.env.CHUNK_OVERLAP) || 200,
|
|
43
|
+
batchSize: Number(process.env.EMBEDDING_BATCH_SIZE) || 10,
|
|
44
|
+
rerankModel: process.env.RERANK_MODEL ?? "qwen2.5:1.5b"
|
|
45
|
+
};
|
|
46
|
+
/**
|
|
47
|
+
* Enrichment configuration for cross-file context
|
|
48
|
+
*/
|
|
49
|
+
const ENRICHMENT_CONFIG = {
|
|
50
|
+
includeCrossFileContext: process.env.ENRICHMENT_CROSS_FILE !== "false",
|
|
51
|
+
maxImportsToResolve: Number(process.env.ENRICHMENT_MAX_IMPORTS) || 10,
|
|
52
|
+
maxSymbolsPerImport: Number(process.env.ENRICHMENT_MAX_SYMBOLS_PER_IMPORT) || 5
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
//#endregion
|
|
56
|
+
//#region src/features/info/index.ts
|
|
57
|
+
const infoSchema = z.object({ format: z.enum(["json", "text"]).optional().default("text").describe("Output format") });
|
|
58
|
+
function getServerInfo() {
|
|
59
|
+
return {
|
|
60
|
+
name: config.name,
|
|
61
|
+
fullName: config.fullName,
|
|
62
|
+
version: config.version,
|
|
63
|
+
description: config.description
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
function execute$4(input) {
|
|
67
|
+
const info = getServerInfo();
|
|
68
|
+
if (input.format === "json") return {
|
|
69
|
+
success: true,
|
|
70
|
+
data: info,
|
|
71
|
+
message: JSON.stringify(info, null, 2)
|
|
72
|
+
};
|
|
73
|
+
const description = info.description ?? "";
|
|
74
|
+
return {
|
|
75
|
+
success: true,
|
|
76
|
+
data: info,
|
|
77
|
+
message: `${info.fullName} (${info.name}) v${info.version}\n${description}`.trim()
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
const infoFeature = {
|
|
81
|
+
name: "get_server_info",
|
|
82
|
+
description: "Get SRC server version and capabilities. Use to verify the MCP server is running correctly.",
|
|
83
|
+
schema: infoSchema,
|
|
84
|
+
execute: execute$4
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
//#endregion
|
|
88
|
+
//#region src/core/embeddings/client.ts
|
|
89
|
+
/**
|
|
90
|
+
* Ollama client for generating embeddings
|
|
91
|
+
* Uses the official ollama library
|
|
92
|
+
*/
|
|
93
|
+
var OllamaClient = class {
|
|
94
|
+
client;
|
|
95
|
+
model;
|
|
96
|
+
constructor(config$1) {
|
|
97
|
+
this.client = new Ollama({ host: config$1.ollamaBaseUrl });
|
|
98
|
+
this.model = config$1.embeddingModel;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Generate embeddings for a single text
|
|
102
|
+
*/
|
|
103
|
+
async embed(text) {
|
|
104
|
+
const result = (await this.client.embed({
|
|
105
|
+
model: this.model,
|
|
106
|
+
input: text
|
|
107
|
+
})).embeddings[0];
|
|
108
|
+
if (!result) throw new Error("No embedding returned from Ollama");
|
|
109
|
+
return result;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Generate embeddings for multiple texts in a single request
|
|
113
|
+
*/
|
|
114
|
+
async embedBatch(texts) {
|
|
115
|
+
return (await this.client.embed({
|
|
116
|
+
model: this.model,
|
|
117
|
+
input: texts
|
|
118
|
+
})).embeddings;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Check if Ollama is reachable and the model is available
|
|
122
|
+
*/
|
|
123
|
+
async healthCheck() {
|
|
124
|
+
try {
|
|
125
|
+
if (!(await this.client.list()).models.some((m) => m.name === this.model || m.name.startsWith(`${this.model}:`))) return {
|
|
126
|
+
ok: false,
|
|
127
|
+
error: `Model "${this.model}" not found. Run: ollama pull ${this.model}`
|
|
128
|
+
};
|
|
129
|
+
return { ok: true };
|
|
130
|
+
} catch (error) {
|
|
131
|
+
return {
|
|
132
|
+
ok: false,
|
|
133
|
+
error: `Cannot connect to Ollama: ${error instanceof Error ? error.message : String(error)}`
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
/**
|
|
139
|
+
* Create a new Ollama client with default config
|
|
140
|
+
*/
|
|
141
|
+
function createOllamaClient(config$1) {
|
|
142
|
+
return new OllamaClient(config$1);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
//#endregion
|
|
146
|
+
//#region src/utils/logger.ts
|
|
147
|
+
const LOG_LEVELS = {
|
|
148
|
+
debug: 0,
|
|
149
|
+
info: 1,
|
|
150
|
+
warn: 2,
|
|
151
|
+
error: 3
|
|
152
|
+
};
|
|
153
|
+
const LEVEL_COLORS = {
|
|
154
|
+
debug: pc.dim,
|
|
155
|
+
info: pc.blue,
|
|
156
|
+
warn: pc.yellow,
|
|
157
|
+
error: pc.red
|
|
158
|
+
};
|
|
159
|
+
function isValidLogLevel(level) {
|
|
160
|
+
return level in LOG_LEVELS;
|
|
161
|
+
}
|
|
162
|
+
function shouldLog(level) {
|
|
163
|
+
const configLevel = ENV.logLevel;
|
|
164
|
+
const currentLevel = isValidLogLevel(configLevel) ? LOG_LEVELS[configLevel] : LOG_LEVELS.info;
|
|
165
|
+
return LOG_LEVELS[level] >= currentLevel;
|
|
166
|
+
}
|
|
167
|
+
function formatMessage(level, message) {
|
|
168
|
+
return `${pc.dim((/* @__PURE__ */ new Date()).toISOString())} ${LEVEL_COLORS[level](level.toUpperCase().padEnd(5))} ${message}`;
|
|
169
|
+
}
|
|
170
|
+
const logger = {
|
|
171
|
+
debug(message, ...args) {
|
|
172
|
+
if (shouldLog("debug")) console.error(formatMessage("debug", message), ...args);
|
|
173
|
+
},
|
|
174
|
+
info(message, ...args) {
|
|
175
|
+
if (shouldLog("info")) console.error(formatMessage("info", message), ...args);
|
|
176
|
+
},
|
|
177
|
+
warn(message, ...args) {
|
|
178
|
+
if (shouldLog("warn")) console.warn(formatMessage("warn", message), ...args);
|
|
179
|
+
},
|
|
180
|
+
error(message, ...args) {
|
|
181
|
+
if (shouldLog("error")) console.error(formatMessage("error", message), ...args);
|
|
182
|
+
},
|
|
183
|
+
success(message, ...args) {
|
|
184
|
+
console.error(pc.green("✓ ") + message, ...args);
|
|
185
|
+
}
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
//#endregion
|
|
189
|
+
//#region src/utils/colors.ts
|
|
190
|
+
/**
|
|
191
|
+
* Color utilities for CLI output
|
|
192
|
+
*/
|
|
193
|
+
const colors = {
|
|
194
|
+
success: pc.green,
|
|
195
|
+
error: pc.red,
|
|
196
|
+
warn: pc.yellow,
|
|
197
|
+
info: pc.blue,
|
|
198
|
+
dim: pc.dim,
|
|
199
|
+
bold: pc.bold,
|
|
200
|
+
cyan: pc.cyan,
|
|
201
|
+
magenta: pc.magenta,
|
|
202
|
+
successBold: (text) => pc.bold(pc.green(text)),
|
|
203
|
+
errorBold: (text) => pc.bold(pc.red(text)),
|
|
204
|
+
infoBold: (text) => pc.bold(pc.blue(text)),
|
|
205
|
+
formatSuccess: (msg) => `${pc.green("✓")} ${msg}`,
|
|
206
|
+
formatError: (msg) => `${pc.red("✗")} ${msg}`,
|
|
207
|
+
formatInfo: (msg) => `${pc.blue("ℹ")} ${msg}`,
|
|
208
|
+
formatWarn: (msg) => `${pc.yellow("⚠")} ${msg}`,
|
|
209
|
+
formatCommand: (cmd) => pc.cyan(cmd),
|
|
210
|
+
formatValue: (val) => pc.magenta(val),
|
|
211
|
+
formatPath: (path$1) => pc.dim(path$1)
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
//#endregion
|
|
215
|
+
//#region src/core/embeddings/store.ts
|
|
216
|
+
/**
|
|
217
|
+
* LanceDB vector store for code embeddings
|
|
218
|
+
*
|
|
219
|
+
* Supports:
|
|
220
|
+
* - Vector similarity search (embeddings)
|
|
221
|
+
* - Full-text search (BM25)
|
|
222
|
+
* - Hybrid search with RRF (Reciprocal Rank Fusion)
|
|
223
|
+
*/
|
|
224
|
+
const TABLE_NAME = "code_chunks";
|
|
225
|
+
const INDEX_DIR_NAME = ".src-index";
|
|
226
|
+
/**
|
|
227
|
+
* Reciprocal Rank Fusion (RRF) to combine ranked lists
|
|
228
|
+
*
|
|
229
|
+
* RRF score = sum(1 / (k + rank_i)) for each list
|
|
230
|
+
* where k is a constant (typically 60) and rank_i is the 1-based rank in list i
|
|
231
|
+
*/
|
|
232
|
+
function rrfFusion(vectorResults, ftsResults, k = 60) {
|
|
233
|
+
const scores = /* @__PURE__ */ new Map();
|
|
234
|
+
vectorResults.forEach((result, index) => {
|
|
235
|
+
const rrfScore = 1 / (k + (index + 1));
|
|
236
|
+
const existing = scores.get(result.chunk.id);
|
|
237
|
+
if (existing) existing.score += rrfScore;
|
|
238
|
+
else scores.set(result.chunk.id, {
|
|
239
|
+
score: rrfScore,
|
|
240
|
+
result
|
|
241
|
+
});
|
|
242
|
+
});
|
|
243
|
+
ftsResults.forEach((result, index) => {
|
|
244
|
+
const rrfScore = 1 / (k + (index + 1));
|
|
245
|
+
const existing = scores.get(result.chunk.id);
|
|
246
|
+
if (existing) existing.score += rrfScore;
|
|
247
|
+
else scores.set(result.chunk.id, {
|
|
248
|
+
score: rrfScore,
|
|
249
|
+
result
|
|
250
|
+
});
|
|
251
|
+
});
|
|
252
|
+
return Array.from(scores.values()).sort((a, b) => b.score - a.score).map(({ score, result }) => ({
|
|
253
|
+
...result,
|
|
254
|
+
score
|
|
255
|
+
}));
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* LanceDB vector store wrapper
|
|
259
|
+
*/
|
|
260
|
+
var VectorStore = class {
|
|
261
|
+
db = null;
|
|
262
|
+
table = null;
|
|
263
|
+
indexPath;
|
|
264
|
+
ftsIndexCreated = false;
|
|
265
|
+
constructor(directory, _config) {
|
|
266
|
+
this.indexPath = path.join(directory, INDEX_DIR_NAME);
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Initialize the database connection
|
|
270
|
+
*/
|
|
271
|
+
async connect() {
|
|
272
|
+
this.db = await lancedb.connect(this.indexPath);
|
|
273
|
+
if ((await this.db.tableNames()).includes(TABLE_NAME)) this.table = await this.db.openTable(TABLE_NAME);
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Close the database connection
|
|
277
|
+
*/
|
|
278
|
+
close() {
|
|
279
|
+
this.db = null;
|
|
280
|
+
this.table = null;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Check if the index exists
|
|
284
|
+
*/
|
|
285
|
+
exists() {
|
|
286
|
+
return fs.existsSync(this.indexPath);
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Add embedded chunks to the store
|
|
290
|
+
*/
|
|
291
|
+
async addChunks(chunks) {
|
|
292
|
+
if (!this.db) throw new Error("Database not connected. Call connect() first.");
|
|
293
|
+
const records = chunks.map((chunk) => ({
|
|
294
|
+
id: chunk.id,
|
|
295
|
+
content: chunk.content,
|
|
296
|
+
filePath: chunk.filePath,
|
|
297
|
+
language: chunk.language,
|
|
298
|
+
startLine: chunk.startLine,
|
|
299
|
+
endLine: chunk.endLine,
|
|
300
|
+
symbolName: chunk.symbolName ?? "",
|
|
301
|
+
symbolType: chunk.symbolType ?? "",
|
|
302
|
+
vector: chunk.vector
|
|
303
|
+
}));
|
|
304
|
+
if (!this.table) this.table = await this.db.createTable(TABLE_NAME, records);
|
|
305
|
+
else await this.table.add(records);
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Create FTS (Full-Text Search) index on content column
|
|
309
|
+
* This enables BM25-based text search
|
|
310
|
+
*/
|
|
311
|
+
async createFtsIndex() {
|
|
312
|
+
if (!this.table || this.ftsIndexCreated) return;
|
|
313
|
+
try {
|
|
314
|
+
await this.table.createIndex("content", { config: lancedb.Index.fts() });
|
|
315
|
+
this.ftsIndexCreated = true;
|
|
316
|
+
logger.debug("FTS index created on content column");
|
|
317
|
+
} catch (error) {
|
|
318
|
+
if (error instanceof Error && error.message.includes("already exists")) {
|
|
319
|
+
this.ftsIndexCreated = true;
|
|
320
|
+
logger.debug("FTS index already exists");
|
|
321
|
+
} else logger.warn(`Failed to create FTS index: ${error instanceof Error ? error.message : String(error)}`);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Search for similar chunks using vector similarity
|
|
326
|
+
*/
|
|
327
|
+
async search(queryVector, limit = 10) {
|
|
328
|
+
if (!this.table) return [];
|
|
329
|
+
return (await this.table.vectorSearch(queryVector).limit(limit).toArray()).map((row) => ({
|
|
330
|
+
chunk: {
|
|
331
|
+
id: row.id,
|
|
332
|
+
content: row.content,
|
|
333
|
+
filePath: row.filePath,
|
|
334
|
+
language: row.language,
|
|
335
|
+
startLine: row.startLine,
|
|
336
|
+
endLine: row.endLine,
|
|
337
|
+
symbolName: row.symbolName || void 0,
|
|
338
|
+
symbolType: row.symbolType || void 0
|
|
339
|
+
},
|
|
340
|
+
score: row._distance ?? 0
|
|
341
|
+
}));
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Full-text search using BM25
|
|
345
|
+
*/
|
|
346
|
+
async searchFts(queryText, limit = 10) {
|
|
347
|
+
if (!this.table) return [];
|
|
348
|
+
await this.createFtsIndex();
|
|
349
|
+
try {
|
|
350
|
+
return (await this.table.query().nearestToText(queryText).limit(limit).toArray()).map((row, index) => ({
|
|
351
|
+
chunk: {
|
|
352
|
+
id: row.id,
|
|
353
|
+
content: row.content,
|
|
354
|
+
filePath: row.filePath,
|
|
355
|
+
language: row.language,
|
|
356
|
+
startLine: row.startLine,
|
|
357
|
+
endLine: row.endLine,
|
|
358
|
+
symbolName: row.symbolName || void 0,
|
|
359
|
+
symbolType: row.symbolType || void 0
|
|
360
|
+
},
|
|
361
|
+
score: 1 / (index + 1)
|
|
362
|
+
}));
|
|
363
|
+
} catch (error) {
|
|
364
|
+
logger.warn(`FTS search failed, falling back to empty results: ${error instanceof Error ? error.message : String(error)}`);
|
|
365
|
+
return [];
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Hybrid search combining vector similarity and full-text search
|
|
370
|
+
* Uses Reciprocal Rank Fusion (RRF) to combine results
|
|
371
|
+
*/
|
|
372
|
+
async searchHybrid(queryVector, queryText, limit = 10, options = {}) {
|
|
373
|
+
const { mode = "hybrid", rrfK = 60 } = options;
|
|
374
|
+
if (!this.table) return [];
|
|
375
|
+
if (mode === "vector") return this.search(queryVector, limit);
|
|
376
|
+
if (mode === "fts") return this.searchFts(queryText, limit);
|
|
377
|
+
const [vectorResults, ftsResults] = await Promise.all([this.search(queryVector, limit * 2), this.searchFts(queryText, limit * 2)]);
|
|
378
|
+
return rrfFusion(vectorResults, ftsResults, rrfK).slice(0, limit);
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Delete chunks by file path
|
|
382
|
+
*/
|
|
383
|
+
async deleteByFilePath(filePath) {
|
|
384
|
+
if (!this.table) return;
|
|
385
|
+
await this.table.delete(`"filePath" = '${filePath.replace(/'/g, "''")}'`);
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Clear all data from the store
|
|
389
|
+
*/
|
|
390
|
+
async clear() {
|
|
391
|
+
if (this.db && this.table) {
|
|
392
|
+
await this.db.dropTable(TABLE_NAME);
|
|
393
|
+
this.table = null;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Get index status
|
|
398
|
+
*/
|
|
399
|
+
async getStatus(directory) {
|
|
400
|
+
const status = {
|
|
401
|
+
directory,
|
|
402
|
+
indexPath: this.indexPath,
|
|
403
|
+
exists: this.exists(),
|
|
404
|
+
totalChunks: 0,
|
|
405
|
+
totalFiles: 0,
|
|
406
|
+
languages: {}
|
|
407
|
+
};
|
|
408
|
+
if (!this.table) return status;
|
|
409
|
+
const allRows = await this.table.query().toArray();
|
|
410
|
+
status.totalChunks = allRows.length;
|
|
411
|
+
const uniqueFiles = /* @__PURE__ */ new Set();
|
|
412
|
+
const languageCounts = {};
|
|
413
|
+
for (const row of allRows) {
|
|
414
|
+
uniqueFiles.add(row.filePath);
|
|
415
|
+
const lang = row.language;
|
|
416
|
+
languageCounts[lang] = (languageCounts[lang] ?? 0) + 1;
|
|
417
|
+
}
|
|
418
|
+
status.totalFiles = uniqueFiles.size;
|
|
419
|
+
status.languages = languageCounts;
|
|
420
|
+
return status;
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* Get all indexed file paths
|
|
424
|
+
*/
|
|
425
|
+
async getIndexedFiles() {
|
|
426
|
+
if (!this.table) return [];
|
|
427
|
+
const rows = await this.table.query().select(["filePath"]).toArray();
|
|
428
|
+
const uniqueFiles = /* @__PURE__ */ new Set();
|
|
429
|
+
for (const row of rows) uniqueFiles.add(row.filePath);
|
|
430
|
+
return Array.from(uniqueFiles);
|
|
431
|
+
}
|
|
432
|
+
};
|
|
433
|
+
/**
|
|
434
|
+
* Create a vector store for a directory
|
|
435
|
+
*/
|
|
436
|
+
function createVectorStore(directory, config$1) {
|
|
437
|
+
return new VectorStore(directory, config$1);
|
|
438
|
+
}
|
|
439
|
+
/**
|
|
440
|
+
* Get the index path for a directory
|
|
441
|
+
*/
|
|
442
|
+
function getIndexPath(directory) {
|
|
443
|
+
return path.join(directory, INDEX_DIR_NAME);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
//#endregion
|
|
447
|
+
//#region src/core/utils/assets.ts
|
|
448
|
+
/**
|
|
449
|
+
* Centralized asset directory utilities
|
|
450
|
+
*
|
|
451
|
+
* Provides consistent access to the assets directory and JSON config loading
|
|
452
|
+
* across all core modules.
|
|
453
|
+
*/
|
|
454
|
+
/**
|
|
455
|
+
* Cached assets directory path
|
|
456
|
+
*/
|
|
457
|
+
let assetsDirCache = null;
|
|
458
|
+
/**
|
|
459
|
+
* Get the assets directory path
|
|
460
|
+
*
|
|
461
|
+
* Handles both ESM and CJS contexts by trying multiple possible paths
|
|
462
|
+
* relative to the current module location.
|
|
463
|
+
*/
|
|
464
|
+
function getAssetsDir() {
|
|
465
|
+
if (assetsDirCache) return assetsDirCache;
|
|
466
|
+
const currentDir = typeof __dirname !== "undefined" ? __dirname : dirname(fileURLToPath(import.meta.url));
|
|
467
|
+
const possiblePaths = [
|
|
468
|
+
join(currentDir, "..", "..", "..", "assets"),
|
|
469
|
+
join(currentDir, "..", "..", "assets"),
|
|
470
|
+
join(process.cwd(), "assets")
|
|
471
|
+
];
|
|
472
|
+
for (const p of possiblePaths) if (existsSync(p)) {
|
|
473
|
+
assetsDirCache = p;
|
|
474
|
+
return p;
|
|
475
|
+
}
|
|
476
|
+
assetsDirCache = join(process.cwd(), "assets");
|
|
477
|
+
return assetsDirCache;
|
|
478
|
+
}
|
|
479
|
+
/**
|
|
480
|
+
* Load and parse a JSON config file from the assets directory
|
|
481
|
+
*
|
|
482
|
+
* @param filename - Name of the JSON file in assets directory
|
|
483
|
+
* @param defaultValue - Default value to return if file cannot be loaded
|
|
484
|
+
* @returns Parsed JSON content or default value
|
|
485
|
+
*/
|
|
486
|
+
function loadJsonConfig(filename, defaultValue) {
|
|
487
|
+
const configPath = join(getAssetsDir(), filename);
|
|
488
|
+
try {
|
|
489
|
+
const content = readFileSync(configPath, "utf-8");
|
|
490
|
+
return JSON.parse(content);
|
|
491
|
+
} catch {
|
|
492
|
+
return defaultValue;
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
/**
|
|
496
|
+
* Get the path to a file within the assets directory
|
|
497
|
+
*
|
|
498
|
+
* @param segments - Path segments relative to assets directory
|
|
499
|
+
* @returns Full path to the asset file
|
|
500
|
+
*/
|
|
501
|
+
function getAssetPath(...segments) {
|
|
502
|
+
return join(getAssetsDir(), ...segments);
|
|
503
|
+
}
|
|
504
|
+
/**
|
|
505
|
+
* Check if an asset file exists
|
|
506
|
+
*
|
|
507
|
+
* @param segments - Path segments relative to assets directory
|
|
508
|
+
* @returns True if the file exists
|
|
509
|
+
*/
|
|
510
|
+
function assetExists(...segments) {
|
|
511
|
+
return existsSync(getAssetPath(...segments));
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
//#endregion
|
|
515
|
+
//#region src/core/utils/cache.ts
|
|
516
|
+
/**
|
|
517
|
+
* Registry of cache clear functions
|
|
518
|
+
*/
|
|
519
|
+
const cacheRegistry = /* @__PURE__ */ new Map();
|
|
520
|
+
/**
|
|
521
|
+
* Register a cache clear function
|
|
522
|
+
*
|
|
523
|
+
* @param name - Unique name for this cache (for debugging/identification)
|
|
524
|
+
* @param clearFn - Function that clears the cache
|
|
525
|
+
*/
|
|
526
|
+
function registerCache(name, clearFn) {
|
|
527
|
+
cacheRegistry.set(name, clearFn);
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
//#endregion
|
|
531
|
+
//#region src/core/utils/tsconfig.ts
|
|
532
|
+
/**
|
|
533
|
+
* TSConfig utilities for reading path aliases
|
|
534
|
+
*
|
|
535
|
+
* Reads and parses tsconfig.json to extract path aliases
|
|
536
|
+
* in a format usable by the cross-file resolution system.
|
|
537
|
+
*/
|
|
538
|
+
/**
|
|
539
|
+
* Strip JSON comments (single-line // and multi-line)
|
|
540
|
+
*/
|
|
541
|
+
function stripJsonComments(json) {
|
|
542
|
+
let result = "";
|
|
543
|
+
let inString = false;
|
|
544
|
+
let inSingleLineComment = false;
|
|
545
|
+
let inMultiLineComment = false;
|
|
546
|
+
for (let i = 0; i < json.length; i++) {
|
|
547
|
+
const char = json.charAt(i);
|
|
548
|
+
const nextChar = json.charAt(i + 1);
|
|
549
|
+
if (inSingleLineComment) {
|
|
550
|
+
if (char === "\n") {
|
|
551
|
+
inSingleLineComment = false;
|
|
552
|
+
result += char;
|
|
553
|
+
}
|
|
554
|
+
continue;
|
|
555
|
+
}
|
|
556
|
+
if (inMultiLineComment) {
|
|
557
|
+
if (char === "*" && nextChar === "/") {
|
|
558
|
+
inMultiLineComment = false;
|
|
559
|
+
i++;
|
|
560
|
+
}
|
|
561
|
+
continue;
|
|
562
|
+
}
|
|
563
|
+
if (inString) {
|
|
564
|
+
result += char;
|
|
565
|
+
if (char === "\"" && json.charAt(i - 1) !== "\\") inString = false;
|
|
566
|
+
continue;
|
|
567
|
+
}
|
|
568
|
+
if (char === "\"") {
|
|
569
|
+
inString = true;
|
|
570
|
+
result += char;
|
|
571
|
+
} else if (char === "/" && nextChar === "/") {
|
|
572
|
+
inSingleLineComment = true;
|
|
573
|
+
i++;
|
|
574
|
+
} else if (char === "/" && nextChar === "*") {
|
|
575
|
+
inMultiLineComment = true;
|
|
576
|
+
i++;
|
|
577
|
+
} else result += char;
|
|
578
|
+
}
|
|
579
|
+
return result;
|
|
580
|
+
}
|
|
581
|
+
/**
|
|
582
|
+
* Parse tsconfig.json content
|
|
583
|
+
*/
|
|
584
|
+
function parseTsConfig(content) {
|
|
585
|
+
try {
|
|
586
|
+
const strippedContent = stripJsonComments(content);
|
|
587
|
+
return JSON.parse(strippedContent);
|
|
588
|
+
} catch (error) {
|
|
589
|
+
logger.debug(`Failed to parse tsconfig.json: ${error instanceof Error ? error.message : String(error)}`);
|
|
590
|
+
return null;
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
/**
|
|
594
|
+
* Convert tsconfig paths to simple path aliases format
|
|
595
|
+
*
|
|
596
|
+
* TSConfig format:
|
|
597
|
+
* "@core": ["src/core"]
|
|
598
|
+
* "@core/*": ["src/core/*"]
|
|
599
|
+
*
|
|
600
|
+
* Output format:
|
|
601
|
+
* "@core": "src/core"
|
|
602
|
+
* "@core/": "src/core/"
|
|
603
|
+
*/
|
|
604
|
+
function convertPaths(paths, baseUrl, projectRoot) {
|
|
605
|
+
const aliases = {};
|
|
606
|
+
for (const [pattern, targets] of Object.entries(paths)) {
|
|
607
|
+
const target = targets[0];
|
|
608
|
+
if (!target) continue;
|
|
609
|
+
if (pattern.endsWith("/*") && target.endsWith("/*")) {
|
|
610
|
+
const aliasPrefix = pattern.slice(0, -2) + "/";
|
|
611
|
+
const targetPath = target.slice(0, -2) + "/";
|
|
612
|
+
const resolvedTarget = path.join(projectRoot, baseUrl, targetPath);
|
|
613
|
+
aliases[aliasPrefix] = path.relative(projectRoot, resolvedTarget).replace(/\\/g, "/") + "/";
|
|
614
|
+
} else {
|
|
615
|
+
const resolvedTarget = path.join(projectRoot, baseUrl, target);
|
|
616
|
+
aliases[pattern] = path.relative(projectRoot, resolvedTarget).replace(/\\/g, "/");
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
return aliases;
|
|
620
|
+
}
|
|
621
|
+
/**
|
|
622
|
+
* Read tsconfig.json and extract path aliases
|
|
623
|
+
*
|
|
624
|
+
* Handles:
|
|
625
|
+
* - Comments in tsconfig (// and /* *\/)
|
|
626
|
+
* - baseUrl relative paths
|
|
627
|
+
* - Wildcard patterns (@core/* -> src/core/*)
|
|
628
|
+
* - Exact patterns (@core -> src/core)
|
|
629
|
+
*
|
|
630
|
+
* @param projectRoot - The project root directory containing tsconfig.json
|
|
631
|
+
* @returns Path aliases in simple format, or empty object if not found/invalid
|
|
632
|
+
*/
|
|
633
|
+
function readPathAliases(projectRoot) {
|
|
634
|
+
const tsconfigPath = path.join(projectRoot, "tsconfig.json");
|
|
635
|
+
if (!fs.existsSync(tsconfigPath)) {
|
|
636
|
+
logger.debug(`No tsconfig.json found at ${tsconfigPath}`);
|
|
637
|
+
return {};
|
|
638
|
+
}
|
|
639
|
+
try {
|
|
640
|
+
const tsconfig = parseTsConfig(fs.readFileSync(tsconfigPath, "utf-8"));
|
|
641
|
+
if (!tsconfig) return {};
|
|
642
|
+
const paths = tsconfig.compilerOptions?.paths;
|
|
643
|
+
const baseUrl = tsconfig.compilerOptions?.baseUrl ?? ".";
|
|
644
|
+
if (!paths || Object.keys(paths).length === 0) {
|
|
645
|
+
logger.debug("No paths defined in tsconfig.json");
|
|
646
|
+
return {};
|
|
647
|
+
}
|
|
648
|
+
const aliases = convertPaths(paths, baseUrl, projectRoot);
|
|
649
|
+
logger.debug(`Loaded ${String(Object.keys(aliases).length)} path aliases from tsconfig.json`);
|
|
650
|
+
return aliases;
|
|
651
|
+
} catch (error) {
|
|
652
|
+
logger.debug(`Failed to read tsconfig.json: ${error instanceof Error ? error.message : String(error)}`);
|
|
653
|
+
return {};
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
/**
|
|
657
|
+
* Get cache key for memoization
|
|
658
|
+
*/
|
|
659
|
+
const pathAliasCache = /* @__PURE__ */ new Map();
|
|
660
|
+
/**
|
|
661
|
+
* Read path aliases with caching
|
|
662
|
+
*/
|
|
663
|
+
function readPathAliasesCached(projectRoot) {
|
|
664
|
+
const normalizedRoot = path.normalize(projectRoot);
|
|
665
|
+
const cached = pathAliasCache.get(normalizedRoot);
|
|
666
|
+
if (cached !== void 0) return cached;
|
|
667
|
+
const aliases = readPathAliases(projectRoot);
|
|
668
|
+
pathAliasCache.set(normalizedRoot, aliases);
|
|
669
|
+
return aliases;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
//#endregion
|
|
673
|
+
//#region src/core/parser/languages.ts
|
|
674
|
+
/**
|
|
675
|
+
* Language configuration and mapping for Tree-sitter parsers
|
|
676
|
+
* Reads from centralized assets/languages.json
|
|
677
|
+
*/
|
|
678
|
+
let configCache$1 = null;
|
|
679
|
+
let languagesCache = null;
|
|
680
|
+
let extensionMapCache = null;
|
|
681
|
+
function loadConfig() {
|
|
682
|
+
if (configCache$1) return configCache$1;
|
|
683
|
+
configCache$1 = loadJsonConfig("languages.json", { treesitter: {} });
|
|
684
|
+
return configCache$1;
|
|
685
|
+
}
|
|
686
|
+
function buildLanguages() {
|
|
687
|
+
if (languagesCache) return languagesCache;
|
|
688
|
+
const config$1 = loadConfig();
|
|
689
|
+
languagesCache = {};
|
|
690
|
+
for (const [name, lang] of Object.entries(config$1.treesitter)) {
|
|
691
|
+
languagesCache[name] = {
|
|
692
|
+
name,
|
|
693
|
+
wasm: lang.wasm,
|
|
694
|
+
queries: lang.queries,
|
|
695
|
+
extensions: lang.extensions,
|
|
696
|
+
aliases: lang.aliases
|
|
697
|
+
};
|
|
698
|
+
if (lang.aliases) for (const alias of lang.aliases) languagesCache[alias] = {
|
|
699
|
+
name,
|
|
700
|
+
wasm: lang.wasm,
|
|
701
|
+
queries: lang.queries,
|
|
702
|
+
extensions: lang.extensions,
|
|
703
|
+
aliases: lang.aliases
|
|
704
|
+
};
|
|
705
|
+
}
|
|
706
|
+
return languagesCache;
|
|
707
|
+
}
|
|
708
|
+
function buildExtensionMap() {
|
|
709
|
+
if (extensionMapCache) return extensionMapCache;
|
|
710
|
+
const languages = buildLanguages();
|
|
711
|
+
extensionMapCache = {};
|
|
712
|
+
for (const config$1 of Object.values(languages)) for (const ext of config$1.extensions) extensionMapCache[ext] = config$1;
|
|
713
|
+
return extensionMapCache;
|
|
714
|
+
}
|
|
715
|
+
/** Get language configuration from file path */
|
|
716
|
+
function getLanguageFromPath(filePath) {
|
|
717
|
+
const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
|
|
718
|
+
return buildExtensionMap()[ext];
|
|
719
|
+
}
|
|
720
|
+
/** Get language configuration by name */
|
|
721
|
+
function getLanguageByName(name) {
|
|
722
|
+
return buildLanguages()[name.toLowerCase()];
|
|
723
|
+
}
|
|
724
|
+
/** Clear caches (for testing) */
|
|
725
|
+
function clearLanguageCache$1() {
|
|
726
|
+
configCache$1 = null;
|
|
727
|
+
languagesCache = null;
|
|
728
|
+
extensionMapCache = null;
|
|
729
|
+
}
|
|
730
|
+
const LANGUAGES = buildLanguages();
|
|
731
|
+
const EXTENSION_MAP = buildExtensionMap();
|
|
732
|
+
registerCache("languages:config", clearLanguageCache$1);
|
|
733
|
+
|
|
734
|
+
//#endregion
|
|
735
|
+
//#region src/core/parser/index.ts
|
|
736
|
+
/**
|
|
737
|
+
* Tree-sitter parser module
|
|
738
|
+
*
|
|
739
|
+
* Provides code parsing functionality using web-tree-sitter
|
|
740
|
+
* WASM files are loaded from local assets directory for minimal bundle size
|
|
741
|
+
*/
|
|
742
|
+
/**
|
|
743
|
+
* Parser initialization state
|
|
744
|
+
*/
|
|
745
|
+
let isInitialized = false;
|
|
746
|
+
let initPromise = null;
|
|
747
|
+
/**
|
|
748
|
+
* Cache for loaded languages
|
|
749
|
+
*/
|
|
750
|
+
const languageCache = /* @__PURE__ */ new Map();
|
|
751
|
+
/**
|
|
752
|
+
* Parser instance (reused)
|
|
753
|
+
*/
|
|
754
|
+
let parser = null;
|
|
755
|
+
/**
|
|
756
|
+
* Initialize the Tree-sitter WASM module
|
|
757
|
+
* Must be called before any parsing operations
|
|
758
|
+
*/
|
|
759
|
+
async function initializeParser() {
|
|
760
|
+
if (isInitialized) return;
|
|
761
|
+
if (initPromise) return initPromise;
|
|
762
|
+
initPromise = (async () => {
|
|
763
|
+
await Parser.init();
|
|
764
|
+
parser = new Parser();
|
|
765
|
+
isInitialized = true;
|
|
766
|
+
})();
|
|
767
|
+
return initPromise;
|
|
768
|
+
}
|
|
769
|
+
/**
|
|
770
|
+
* Get or create a parser instance
|
|
771
|
+
*/
|
|
772
|
+
async function getParser() {
|
|
773
|
+
await initializeParser();
|
|
774
|
+
if (!parser) throw new Error("Parser not initialized");
|
|
775
|
+
return parser;
|
|
776
|
+
}
|
|
777
|
+
/**
|
|
778
|
+
* Load a language grammar from local assets
|
|
779
|
+
*/
|
|
780
|
+
async function loadLanguage(config$1) {
|
|
781
|
+
const cacheKey = config$1.name;
|
|
782
|
+
const cached = languageCache.get(cacheKey);
|
|
783
|
+
if (cached) return cached;
|
|
784
|
+
await initializeParser();
|
|
785
|
+
const wasmPath = join(getAssetsDir(), "wasm", `tree-sitter-${config$1.name}.wasm`);
|
|
786
|
+
if (!existsSync(wasmPath)) throw new Error(`WASM file not found for language ${config$1.name}: ${wasmPath}`);
|
|
787
|
+
const language = await Language.load(wasmPath);
|
|
788
|
+
languageCache.set(cacheKey, language);
|
|
789
|
+
return language;
|
|
790
|
+
}
|
|
791
|
+
/**
|
|
792
|
+
* Parse code content
|
|
793
|
+
*/
|
|
794
|
+
async function parseCode(content, options = {}) {
|
|
795
|
+
const { language, filePath } = options;
|
|
796
|
+
let config$1;
|
|
797
|
+
if (language) {
|
|
798
|
+
config$1 = getLanguageByName(language);
|
|
799
|
+
if (!config$1) throw new Error(`Unsupported language: ${language}`);
|
|
800
|
+
} else if (filePath) {
|
|
801
|
+
config$1 = getLanguageFromPath(filePath);
|
|
802
|
+
if (!config$1) throw new Error(`Could not detect language for file: ${filePath}`);
|
|
803
|
+
} else throw new Error("Either language or filePath must be provided");
|
|
804
|
+
const languageInstance = await loadLanguage(config$1);
|
|
805
|
+
const parserInstance = await getParser();
|
|
806
|
+
parserInstance.setLanguage(languageInstance);
|
|
807
|
+
const tree = parserInstance.parse(content);
|
|
808
|
+
if (!tree) throw new Error("Failed to parse content");
|
|
809
|
+
return {
|
|
810
|
+
tree,
|
|
811
|
+
language: config$1.name,
|
|
812
|
+
parser: parserInstance,
|
|
813
|
+
languageInstance
|
|
814
|
+
};
|
|
815
|
+
}
|
|
816
|
+
/**
|
|
817
|
+
* Convert Tree-sitter position to our Position type
|
|
818
|
+
*/
|
|
819
|
+
function toPosition(point, offset) {
|
|
820
|
+
return {
|
|
821
|
+
line: point.row + 1,
|
|
822
|
+
column: point.column,
|
|
823
|
+
offset
|
|
824
|
+
};
|
|
825
|
+
}
|
|
826
|
+
/**
|
|
827
|
+
* Convert Tree-sitter node to ASTNode
|
|
828
|
+
*/
|
|
829
|
+
function toASTNode(node, maxDepth, currentDepth = 0) {
|
|
830
|
+
const astNode = {
|
|
831
|
+
type: node.type,
|
|
832
|
+
text: node.text,
|
|
833
|
+
start: toPosition(node.startPosition, node.startIndex),
|
|
834
|
+
end: toPosition(node.endPosition, node.endIndex),
|
|
835
|
+
isNamed: node.isNamed
|
|
836
|
+
};
|
|
837
|
+
if (maxDepth !== void 0 && currentDepth >= maxDepth) return astNode;
|
|
838
|
+
if (node.childCount > 0) {
|
|
839
|
+
const namedChildren = node.namedChildren;
|
|
840
|
+
if (namedChildren.length > 0) astNode.children = namedChildren.map((child) => toASTNode(child, maxDepth, currentDepth + 1));
|
|
841
|
+
}
|
|
842
|
+
const treeLang = node.tree.language;
|
|
843
|
+
const fields = {};
|
|
844
|
+
const langFields = treeLang.fields;
|
|
845
|
+
for (const fieldName of langFields) if (fieldName) {
|
|
846
|
+
const fieldNode = node.childForFieldName(fieldName);
|
|
847
|
+
if (fieldNode) fields[fieldName] = toASTNode(fieldNode, maxDepth, currentDepth + 1);
|
|
848
|
+
}
|
|
849
|
+
if (Object.keys(fields).length > 0) astNode.fields = fields;
|
|
850
|
+
return astNode;
|
|
851
|
+
}
|
|
852
|
+
/**
|
|
853
|
+
* Clear the language cache (useful for testing)
|
|
854
|
+
*/
|
|
855
|
+
function clearLanguageCache() {
|
|
856
|
+
languageCache.clear();
|
|
857
|
+
}
|
|
858
|
+
/**
|
|
859
|
+
* Reset the parser state (useful for testing)
|
|
860
|
+
*/
|
|
861
|
+
function resetParser() {
|
|
862
|
+
languageCache.clear();
|
|
863
|
+
parser = null;
|
|
864
|
+
isInitialized = false;
|
|
865
|
+
initPromise = null;
|
|
866
|
+
}
|
|
867
|
+
registerCache("parser:languageCache", clearLanguageCache);
|
|
868
|
+
registerCache("parser:state", resetParser);
|
|
869
|
+
|
|
870
|
+
//#endregion
|
|
871
|
+
//#region src/core/queries/helpers.ts
|
|
872
|
+
/**
|
|
873
|
+
* Find a capture by exact name
|
|
874
|
+
*
|
|
875
|
+
* @param captures - Array of captures from a query match
|
|
876
|
+
* @param name - Exact capture name to find
|
|
877
|
+
* @returns The matching capture or undefined
|
|
878
|
+
*/
|
|
879
|
+
function findCapture(captures, name) {
|
|
880
|
+
return captures.find((c) => c.name === name);
|
|
881
|
+
}
|
|
882
|
+
/**
|
|
883
|
+
* Find a capture matching any of the given names
|
|
884
|
+
*
|
|
885
|
+
* @param captures - Array of captures from a query match
|
|
886
|
+
* @param names - Array of capture names to search for
|
|
887
|
+
* @returns The first matching capture or undefined
|
|
888
|
+
*/
|
|
889
|
+
function findCaptureByNames(captures, names) {
|
|
890
|
+
return captures.find((c) => names.includes(c.name));
|
|
891
|
+
}
|
|
892
|
+
/**
|
|
893
|
+
* Find a capture by name prefix
|
|
894
|
+
*
|
|
895
|
+
* @param captures - Array of captures from a query match
|
|
896
|
+
* @param prefix - Prefix to match (e.g., "definition." matches "definition.function")
|
|
897
|
+
* @returns The first matching capture or undefined
|
|
898
|
+
*/
|
|
899
|
+
function findCaptureByPrefix(captures, prefix) {
|
|
900
|
+
return captures.find((c) => c.name.startsWith(prefix));
|
|
901
|
+
}
|
|
902
|
+
/**
|
|
903
|
+
* Extract the suffix from a capture name after the prefix
|
|
904
|
+
*
|
|
905
|
+
* @param captureName - Full capture name (e.g., "definition.function")
|
|
906
|
+
* @param prefix - Prefix to remove (e.g., "definition.")
|
|
907
|
+
* @returns The suffix (e.g., "function") or the original name if prefix not found
|
|
908
|
+
*/
|
|
909
|
+
function getCaptureKind(captureName, prefix) {
|
|
910
|
+
return captureName.startsWith(prefix) ? captureName.slice(prefix.length) : captureName;
|
|
911
|
+
}
|
|
912
|
+
/**
|
|
913
|
+
* Create a deduplication set from node offsets
|
|
914
|
+
*
|
|
915
|
+
* @returns Object with add and has methods for tracking seen offsets
|
|
916
|
+
*/
|
|
917
|
+
function createOffsetTracker() {
|
|
918
|
+
const seen = /* @__PURE__ */ new Set();
|
|
919
|
+
return {
|
|
920
|
+
add: (node) => seen.add(node.start.offset),
|
|
921
|
+
has: (node) => seen.has(node.start.offset)
|
|
922
|
+
};
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
//#endregion
|
|
926
|
+
//#region src/core/queries/loader.ts
|
|
927
|
+
/**
|
|
928
|
+
* SCM Query file loader
|
|
929
|
+
*
|
|
930
|
+
* Loads official Tree-sitter .scm query files from local assets directory
|
|
931
|
+
* Supports inheritance via `; inherits: lang1,lang2` directives
|
|
932
|
+
*/
|
|
933
|
+
/**
|
|
934
|
+
* Cache for loaded SCM queries (with inheritance resolved)
|
|
935
|
+
*/
|
|
936
|
+
const scmCache = /* @__PURE__ */ new Map();
|
|
937
|
+
/**
|
|
938
|
+
* Normalize language name for directory lookup
|
|
939
|
+
*/
|
|
940
|
+
function normalizeLanguageName(language) {
|
|
941
|
+
if (language === "csharp") return "c_sharp";
|
|
942
|
+
if (language === "tsx") return "typescript";
|
|
943
|
+
return language;
|
|
944
|
+
}
|
|
945
|
+
/**
|
|
946
|
+
* Get the path to a .scm query file
|
|
947
|
+
*/
|
|
948
|
+
function getSCMPath(language, queryType) {
|
|
949
|
+
const relativePath = join("queries", normalizeLanguageName(language), `${queryType}.scm`);
|
|
950
|
+
if (assetExists(relativePath)) return join(getAssetsDir(), relativePath);
|
|
951
|
+
}
|
|
952
|
+
/**
|
|
953
|
+
* Parse inherit directives from SCM content
|
|
954
|
+
* Supports: `; inherits: lang1,lang2` and `; inherits lang1`
|
|
955
|
+
*/
|
|
956
|
+
function parseInherits(content) {
|
|
957
|
+
const inherits = [];
|
|
958
|
+
const lines = content.replace(/\r\n/g, "\n").replace(/\r/g, "\n").split("\n");
|
|
959
|
+
for (const line of lines) {
|
|
960
|
+
const match = /^;\s*inherits:?\s+([^\s].*)$/.exec(line);
|
|
961
|
+
if (match?.[1]) {
|
|
962
|
+
const langs = match[1].split(",").map((l) => l.trim());
|
|
963
|
+
inherits.push(...langs);
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
return inherits;
|
|
967
|
+
}
|
|
968
|
+
/**
|
|
969
|
+
* Remove inherit directives from SCM content
|
|
970
|
+
*/
|
|
971
|
+
function removeInheritDirectives(content) {
|
|
972
|
+
return content.replace(/\r\n/g, "\n").replace(/\r/g, "\n").split("\n").filter((line) => !/^;\s*inherits:?\s+/.exec(line)).join("\n");
|
|
973
|
+
}
|
|
974
|
+
/**
|
|
975
|
+
* Load a raw .scm file without resolving inheritance
|
|
976
|
+
*/
|
|
977
|
+
function loadRawSCM(language, queryType) {
|
|
978
|
+
const scmPath = join(getAssetsDir(), "queries", language, `${queryType}.scm`);
|
|
979
|
+
if (!existsSync(scmPath)) return;
|
|
980
|
+
try {
|
|
981
|
+
return readFileSync(scmPath, "utf-8");
|
|
982
|
+
} catch {
|
|
983
|
+
return;
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
/**
|
|
987
|
+
* Load a .scm query file for a language with inheritance resolved
|
|
988
|
+
*
|
|
989
|
+
* @param language - Language name (e.g., "javascript", "python")
|
|
990
|
+
* @param queryType - Type of query (e.g., "tags", "highlights")
|
|
991
|
+
* @param visited - Set of visited languages to prevent circular inheritance
|
|
992
|
+
* @returns Query string or undefined if not found
|
|
993
|
+
*/
|
|
994
|
+
function loadSCMQuery(language, queryType, visited = /* @__PURE__ */ new Set()) {
|
|
995
|
+
const cacheKey = `${language}:${queryType}`;
|
|
996
|
+
if (scmCache.has(cacheKey)) return scmCache.get(cacheKey);
|
|
997
|
+
if (visited.has(language)) return;
|
|
998
|
+
visited.add(language);
|
|
999
|
+
const rawContent = loadRawSCM(normalizeLanguageName(language), queryType);
|
|
1000
|
+
if (!rawContent) return;
|
|
1001
|
+
const inherits = parseInherits(rawContent);
|
|
1002
|
+
const ownContent = removeInheritDirectives(rawContent).trim();
|
|
1003
|
+
const inheritedParts = [];
|
|
1004
|
+
for (const inheritLang of inherits) {
|
|
1005
|
+
const inheritedContent = loadSCMQuery(inheritLang, queryType, visited);
|
|
1006
|
+
if (inheritedContent) inheritedParts.push(inheritedContent);
|
|
1007
|
+
}
|
|
1008
|
+
const finalContent = [...inheritedParts, ownContent].filter(Boolean).join("\n\n");
|
|
1009
|
+
if (finalContent) scmCache.set(cacheKey, finalContent);
|
|
1010
|
+
return finalContent || void 0;
|
|
1011
|
+
}
|
|
1012
|
+
/**
|
|
1013
|
+
* Load tags.scm for symbol extraction
|
|
1014
|
+
*/
|
|
1015
|
+
function loadTagsQuery(language) {
|
|
1016
|
+
return loadSCMQuery(language, "tags");
|
|
1017
|
+
}
|
|
1018
|
+
/**
|
|
1019
|
+
* Load highlights.scm for syntax highlighting
|
|
1020
|
+
*/
|
|
1021
|
+
function loadHighlightsQuery(language) {
|
|
1022
|
+
return loadSCMQuery(language, "highlights");
|
|
1023
|
+
}
|
|
1024
|
+
/**
|
|
1025
|
+
* Load locals.scm for local variable scoping
|
|
1026
|
+
*/
|
|
1027
|
+
function loadLocalsQuery(language) {
|
|
1028
|
+
return loadSCMQuery(language, "locals");
|
|
1029
|
+
}
|
|
1030
|
+
/**
|
|
1031
|
+
* Check if a language has official tags.scm
|
|
1032
|
+
*/
|
|
1033
|
+
function hasOfficialTags(language) {
|
|
1034
|
+
return getSCMPath(language, "tags") !== void 0;
|
|
1035
|
+
}
|
|
1036
|
+
/**
|
|
1037
|
+
* Clear the SCM cache
|
|
1038
|
+
*/
|
|
1039
|
+
function clearSCMCache() {
|
|
1040
|
+
scmCache.clear();
|
|
1041
|
+
}
|
|
1042
|
+
registerCache("queries:scm", clearSCMCache);
|
|
1043
|
+
|
|
1044
|
+
//#endregion
|
|
1045
|
+
//#region src/core/queries/patterns.ts
|
|
1046
|
+
const GENERIC_PATTERNS = {
|
|
1047
|
+
comments: `[(comment) @comment]`,
|
|
1048
|
+
strings: `[(string) @string (template_string) @string]`,
|
|
1049
|
+
imports: `(import_statement) @import.statement`,
|
|
1050
|
+
exports: `(export_statement) @export.statement`,
|
|
1051
|
+
variables: `[
|
|
1052
|
+
(variable_declaration (variable_declarator name: (identifier) @variable.name) @variable.declaration)
|
|
1053
|
+
(lexical_declaration (variable_declarator name: (identifier) @variable.name) @variable.declaration)
|
|
1054
|
+
]`,
|
|
1055
|
+
types: `[
|
|
1056
|
+
(type_alias_declaration name: (type_identifier) @type.alias) @type.definition
|
|
1057
|
+
(enum_declaration name: (identifier) @enum.name) @enum.definition
|
|
1058
|
+
]`
|
|
1059
|
+
};
|
|
1060
|
+
const FALLBACK_PATTERNS = {
|
|
1061
|
+
typescript: { functions: `[
|
|
1062
|
+
(function_declaration name: (identifier) @function.name) @function.definition
|
|
1063
|
+
(method_definition name: (property_identifier) @function.name) @function.definition
|
|
1064
|
+
(lexical_declaration (variable_declarator name: (identifier) @function.name value: [(arrow_function) (function_expression)]) @function.definition)
|
|
1065
|
+
]` },
|
|
1066
|
+
json: { strings: `[(string) @string]` },
|
|
1067
|
+
yaml: {
|
|
1068
|
+
strings: `[(string_scalar) @string (double_quote_scalar) @string (single_quote_scalar) @string]`,
|
|
1069
|
+
comments: `[(comment) @comment]`
|
|
1070
|
+
},
|
|
1071
|
+
toml: {
|
|
1072
|
+
strings: `[(string) @string]`,
|
|
1073
|
+
comments: `[(comment) @comment]`
|
|
1074
|
+
},
|
|
1075
|
+
bash: {
|
|
1076
|
+
functions: `(function_definition name: (word) @function.name) @function.definition`,
|
|
1077
|
+
comments: `[(comment) @comment]`,
|
|
1078
|
+
strings: `[(string) @string (raw_string) @string]`,
|
|
1079
|
+
variables: `(variable_assignment name: (variable_name) @variable.name) @variable.declaration`
|
|
1080
|
+
},
|
|
1081
|
+
html: {
|
|
1082
|
+
strings: `[(attribute_value) @string (quoted_attribute_value) @string]`,
|
|
1083
|
+
comments: `[(comment) @comment]`
|
|
1084
|
+
},
|
|
1085
|
+
css: {
|
|
1086
|
+
comments: `[(comment) @comment]`,
|
|
1087
|
+
strings: `[(string_value) @string]`
|
|
1088
|
+
},
|
|
1089
|
+
scala: {
|
|
1090
|
+
functions: `(function_definition (identifier) @function.name) @function.definition`,
|
|
1091
|
+
classes: `[
|
|
1092
|
+
(class_definition (identifier) @class.name) @class.definition
|
|
1093
|
+
(object_definition (identifier) @class.name) @class.definition
|
|
1094
|
+
(trait_definition (identifier) @class.name) @class.definition
|
|
1095
|
+
]`,
|
|
1096
|
+
comments: `[(comment) @comment]`,
|
|
1097
|
+
strings: `[(string) @string]`
|
|
1098
|
+
},
|
|
1099
|
+
swift: {
|
|
1100
|
+
functions: `[
|
|
1101
|
+
(function_declaration (simple_identifier) @function.name) @function.definition
|
|
1102
|
+
(init_declaration) @function.definition
|
|
1103
|
+
]`,
|
|
1104
|
+
classes: `[
|
|
1105
|
+
(class_declaration (type_identifier) @class.name) @class.definition
|
|
1106
|
+
(protocol_declaration (type_identifier) @class.name) @class.definition
|
|
1107
|
+
]`,
|
|
1108
|
+
comments: `[(comment) @comment (multiline_comment) @comment]`,
|
|
1109
|
+
strings: `[(line_string_literal) @string]`
|
|
1110
|
+
},
|
|
1111
|
+
ocaml: {
|
|
1112
|
+
functions: `(value_definition (let_binding (value_name) @function.name)) @function.definition`,
|
|
1113
|
+
classes: `[
|
|
1114
|
+
(type_definition (type_binding (type_constructor) @class.name)) @class.definition
|
|
1115
|
+
(module_definition (module_binding (module_name) @class.name)) @class.definition
|
|
1116
|
+
]`,
|
|
1117
|
+
comments: `[(comment) @comment]`,
|
|
1118
|
+
strings: `[(string) @string]`
|
|
1119
|
+
},
|
|
1120
|
+
svelte: {
|
|
1121
|
+
comments: `[(comment) @comment]`,
|
|
1122
|
+
strings: `[(attribute_value) @string (quoted_attribute_value) @string]`
|
|
1123
|
+
}
|
|
1124
|
+
};
|
|
1125
|
+
function getQueryPattern(language, preset) {
|
|
1126
|
+
return FALLBACK_PATTERNS[language]?.[preset] ?? GENERIC_PATTERNS[preset];
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
//#endregion
|
|
1130
|
+
//#region src/core/queries/index.ts
|
|
1131
|
+
/**
|
|
1132
|
+
* SCM Query engine for Tree-sitter
|
|
1133
|
+
*
|
|
1134
|
+
* Supports both official .scm query files and custom preset patterns
|
|
1135
|
+
*/
|
|
1136
|
+
/**
|
|
1137
|
+
* Execute a SCM query on parsed code
|
|
1138
|
+
*/
|
|
1139
|
+
function executeQuery(tree, languageInstance, queryString, language, options = {}) {
|
|
1140
|
+
const { maxMatches, startIndex, endIndex } = options;
|
|
1141
|
+
let query;
|
|
1142
|
+
try {
|
|
1143
|
+
query = new Query(languageInstance, queryString);
|
|
1144
|
+
} catch (error) {
|
|
1145
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1146
|
+
throw new Error(`Invalid query: ${message}`);
|
|
1147
|
+
}
|
|
1148
|
+
const queryMatches = query.matches(tree.rootNode, {
|
|
1149
|
+
startIndex,
|
|
1150
|
+
endIndex
|
|
1151
|
+
});
|
|
1152
|
+
const matches = [];
|
|
1153
|
+
let count = 0;
|
|
1154
|
+
for (const match of queryMatches) {
|
|
1155
|
+
if (maxMatches !== void 0 && count >= maxMatches) break;
|
|
1156
|
+
const captures = match.captures.map((capture) => ({
|
|
1157
|
+
name: capture.name,
|
|
1158
|
+
node: toASTNode(capture.node)
|
|
1159
|
+
}));
|
|
1160
|
+
matches.push({
|
|
1161
|
+
pattern: match.patternIndex,
|
|
1162
|
+
captures
|
|
1163
|
+
});
|
|
1164
|
+
count++;
|
|
1165
|
+
}
|
|
1166
|
+
return {
|
|
1167
|
+
matches,
|
|
1168
|
+
count,
|
|
1169
|
+
query: queryString,
|
|
1170
|
+
language,
|
|
1171
|
+
source: "preset"
|
|
1172
|
+
};
|
|
1173
|
+
}
|
|
1174
|
+
/**
|
|
1175
|
+
* Execute an official .scm query file
|
|
1176
|
+
*
|
|
1177
|
+
* @param tree - Parsed tree
|
|
1178
|
+
* @param languageInstance - Tree-sitter language instance
|
|
1179
|
+
* @param language - Language name
|
|
1180
|
+
* @param queryType - Type of query (tags, highlights, locals, etc.)
|
|
1181
|
+
* @param options - Query options
|
|
1182
|
+
*/
|
|
1183
|
+
function executeOfficialQuery(tree, languageInstance, language, queryType, options = {}) {
|
|
1184
|
+
let queryString;
|
|
1185
|
+
switch (queryType) {
|
|
1186
|
+
case "tags":
|
|
1187
|
+
queryString = loadTagsQuery(language);
|
|
1188
|
+
break;
|
|
1189
|
+
case "highlights":
|
|
1190
|
+
queryString = loadHighlightsQuery(language);
|
|
1191
|
+
break;
|
|
1192
|
+
case "locals":
|
|
1193
|
+
queryString = loadLocalsQuery(language);
|
|
1194
|
+
break;
|
|
1195
|
+
case "injections":
|
|
1196
|
+
case "indents":
|
|
1197
|
+
case "folds": return;
|
|
1198
|
+
}
|
|
1199
|
+
if (!queryString) return;
|
|
1200
|
+
try {
|
|
1201
|
+
return {
|
|
1202
|
+
...executeQuery(tree, languageInstance, queryString, language, options),
|
|
1203
|
+
source: "official"
|
|
1204
|
+
};
|
|
1205
|
+
} catch {
|
|
1206
|
+
return;
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
/**
|
|
1210
|
+
* Execute tags.scm for comprehensive symbol extraction
|
|
1211
|
+
*
|
|
1212
|
+
* This uses the official Tree-sitter tags.scm file which provides:
|
|
1213
|
+
* - Function definitions with documentation
|
|
1214
|
+
* - Class definitions
|
|
1215
|
+
* - Method definitions
|
|
1216
|
+
* - Module/interface definitions
|
|
1217
|
+
* - Reference tracking (calls, types)
|
|
1218
|
+
*/
|
|
1219
|
+
function executeTagsQuery(tree, languageInstance, language, options = {}) {
|
|
1220
|
+
return executeOfficialQuery(tree, languageInstance, language, "tags", options);
|
|
1221
|
+
}
|
|
1222
|
+
/**
|
|
1223
|
+
* Execute a preset query
|
|
1224
|
+
* Uses official tags.scm for functions/classes when available,
|
|
1225
|
+
* otherwise falls back to preset patterns
|
|
1226
|
+
*/
|
|
1227
|
+
function executePresetQuery(tree, languageInstance, language, preset, options = {}) {
|
|
1228
|
+
const { maxMatches } = options;
|
|
1229
|
+
const fallbackPattern = getQueryPattern(language, preset);
|
|
1230
|
+
if (preset === "functions" || preset === "classes") {
|
|
1231
|
+
if (hasOfficialTags(language)) {
|
|
1232
|
+
const { definitions } = extractSymbolsFromTags(tree, languageInstance, language);
|
|
1233
|
+
let filteredDefs = preset === "functions" ? definitions.filter((d) => d.kind === "function" || d.kind === "method") : definitions.filter((d) => d.kind === "class" || d.kind === "interface" || d.kind === "module");
|
|
1234
|
+
if (filteredDefs.length > 0) {
|
|
1235
|
+
if (maxMatches !== void 0 && filteredDefs.length > maxMatches) filteredDefs = filteredDefs.slice(0, maxMatches);
|
|
1236
|
+
const matches = filteredDefs.map((def) => ({
|
|
1237
|
+
pattern: 0,
|
|
1238
|
+
captures: [{
|
|
1239
|
+
name: preset === "functions" ? "function.definition" : "class.definition",
|
|
1240
|
+
node: def.node
|
|
1241
|
+
}, {
|
|
1242
|
+
name: `${preset.slice(0, -1)}.name`,
|
|
1243
|
+
node: def.nameNode
|
|
1244
|
+
}]
|
|
1245
|
+
}));
|
|
1246
|
+
return {
|
|
1247
|
+
matches,
|
|
1248
|
+
count: matches.length,
|
|
1249
|
+
query: `[tags.scm ${preset}]`,
|
|
1250
|
+
language,
|
|
1251
|
+
source: "official"
|
|
1252
|
+
};
|
|
1253
|
+
}
|
|
1254
|
+
if (fallbackPattern) return executeQuery(tree, languageInstance, fallbackPattern, language, options);
|
|
1255
|
+
return {
|
|
1256
|
+
matches: [],
|
|
1257
|
+
count: 0,
|
|
1258
|
+
query: `[tags.scm ${preset}]`,
|
|
1259
|
+
language,
|
|
1260
|
+
source: "official"
|
|
1261
|
+
};
|
|
1262
|
+
}
|
|
1263
|
+
if (fallbackPattern) return executeQuery(tree, languageInstance, fallbackPattern, language, options);
|
|
1264
|
+
}
|
|
1265
|
+
if (!fallbackPattern) throw new Error(`No '${preset}' query pattern available for ${language}`);
|
|
1266
|
+
return executeQuery(tree, languageInstance, fallbackPattern, language, options);
|
|
1267
|
+
}
|
|
1268
|
+
/**
|
|
1269
|
+
* Extract symbols using official tags.scm when available
|
|
1270
|
+
*
|
|
1271
|
+
* This is the recommended method for symbol extraction as it uses
|
|
1272
|
+
* the official Tree-sitter query files for better accuracy.
|
|
1273
|
+
*/
|
|
1274
|
+
function extractSymbolsFromTags(tree, languageInstance, language) {
|
|
1275
|
+
const result = executeTagsQuery(tree, languageInstance, language);
|
|
1276
|
+
if (!result) return {
|
|
1277
|
+
definitions: [],
|
|
1278
|
+
references: []
|
|
1279
|
+
};
|
|
1280
|
+
const definitions = [];
|
|
1281
|
+
const references = [];
|
|
1282
|
+
for (const match of result.matches) {
|
|
1283
|
+
const nameCapture = findCapture(match.captures, "name");
|
|
1284
|
+
if (!nameCapture) continue;
|
|
1285
|
+
const defCapture = findCaptureByPrefix(match.captures, "definition.");
|
|
1286
|
+
const refCapture = findCaptureByPrefix(match.captures, "reference.");
|
|
1287
|
+
const docCapture = findCapture(match.captures, "doc");
|
|
1288
|
+
if (defCapture) {
|
|
1289
|
+
const kind = getCaptureKind(defCapture.name, "definition.");
|
|
1290
|
+
definitions.push({
|
|
1291
|
+
name: nameCapture.node.text,
|
|
1292
|
+
kind,
|
|
1293
|
+
node: defCapture.node,
|
|
1294
|
+
nameNode: nameCapture.node,
|
|
1295
|
+
documentation: docCapture?.node.text
|
|
1296
|
+
});
|
|
1297
|
+
} else if (refCapture) {
|
|
1298
|
+
const kind = getCaptureKind(refCapture.name, "reference.");
|
|
1299
|
+
references.push({
|
|
1300
|
+
name: nameCapture.node.text,
|
|
1301
|
+
kind,
|
|
1302
|
+
node: refCapture.node,
|
|
1303
|
+
nameNode: nameCapture.node
|
|
1304
|
+
});
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
return {
|
|
1308
|
+
definitions,
|
|
1309
|
+
references
|
|
1310
|
+
};
|
|
1311
|
+
}
|
|
1312
|
+
/**
|
|
1313
|
+
* Get function name from a function node
|
|
1314
|
+
*/
|
|
1315
|
+
function getFunctionName(funcNode) {
|
|
1316
|
+
if (funcNode.fields?.name) {
|
|
1317
|
+
const nameNode = funcNode.fields.name;
|
|
1318
|
+
if (!Array.isArray(nameNode)) return nameNode.text;
|
|
1319
|
+
}
|
|
1320
|
+
if (funcNode.children) for (const child of funcNode.children) {
|
|
1321
|
+
if (child.type === "identifier" || child.type === "property_identifier" || child.type === "field_identifier") return child.text;
|
|
1322
|
+
if (child.type === "function_declarator") return getFunctionName(child);
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
/**
|
|
1326
|
+
* Get class name from a class node
|
|
1327
|
+
*/
|
|
1328
|
+
function getClassName(classNode) {
|
|
1329
|
+
if (classNode.fields?.name) {
|
|
1330
|
+
const nameNode = classNode.fields.name;
|
|
1331
|
+
if (!Array.isArray(nameNode)) return nameNode.text;
|
|
1332
|
+
}
|
|
1333
|
+
if (classNode.children) {
|
|
1334
|
+
for (const child of classNode.children) if (child.type === "identifier" || child.type === "type_identifier") return child.text;
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
//#endregion
|
|
1339
|
+
//#region src/core/symbols/index.ts
|
|
1340
|
+
/**
|
|
1341
|
+
* Extract symbols from parsed code
|
|
1342
|
+
*/
|
|
1343
|
+
function extractSymbols(tree, languageInstance, language, filter = {}) {
|
|
1344
|
+
const symbols = [];
|
|
1345
|
+
const { types, excludeTypes } = filter;
|
|
1346
|
+
const shouldInclude = (type) => {
|
|
1347
|
+
if (types && !types.includes(type)) return false;
|
|
1348
|
+
if (excludeTypes?.includes(type)) return false;
|
|
1349
|
+
return true;
|
|
1350
|
+
};
|
|
1351
|
+
if (shouldInclude("function") || shouldInclude("method") || shouldInclude("class") || shouldInclude("interface")) {
|
|
1352
|
+
const tagsResult = executePresetQuery(tree, languageInstance, language, "functions");
|
|
1353
|
+
if (shouldInclude("function") || shouldInclude("method")) for (const match of tagsResult.matches) {
|
|
1354
|
+
const defCapture = findCapture(match.captures, "function.definition");
|
|
1355
|
+
const nameCapture = findCapture(match.captures, "function.name");
|
|
1356
|
+
if (defCapture) {
|
|
1357
|
+
const name = nameCapture?.node.text ?? getFunctionName(defCapture.node);
|
|
1358
|
+
if (name) {
|
|
1359
|
+
const type = defCapture.node.type.includes("method") || defCapture.node.type === "method_definition" ? "method" : "function";
|
|
1360
|
+
if (shouldInclude(type)) symbols.push({
|
|
1361
|
+
name,
|
|
1362
|
+
type,
|
|
1363
|
+
start: defCapture.node.start,
|
|
1364
|
+
end: defCapture.node.end,
|
|
1365
|
+
signature: extractFunctionSignature(defCapture.node),
|
|
1366
|
+
modifiers: extractModifiers(defCapture.node)
|
|
1367
|
+
});
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
if (shouldInclude("class") || shouldInclude("interface")) {
|
|
1373
|
+
const classResult = executePresetQuery(tree, languageInstance, language, "classes");
|
|
1374
|
+
for (const match of classResult.matches) {
|
|
1375
|
+
const defCapture = findCapture(match.captures, "class.definition");
|
|
1376
|
+
const nameCapture = findCapture(match.captures, "class.name");
|
|
1377
|
+
if (defCapture) {
|
|
1378
|
+
const name = nameCapture?.node.text ?? getClassName(defCapture.node);
|
|
1379
|
+
if (name) {
|
|
1380
|
+
const nodeType = defCapture.node.type;
|
|
1381
|
+
let symbolType = "class";
|
|
1382
|
+
if (nodeType.includes("interface") || nodeType === "interface_declaration") symbolType = "interface";
|
|
1383
|
+
else if (nodeType.includes("struct")) symbolType = "interface";
|
|
1384
|
+
if (shouldInclude(symbolType)) symbols.push({
|
|
1385
|
+
name,
|
|
1386
|
+
type: symbolType,
|
|
1387
|
+
start: defCapture.node.start,
|
|
1388
|
+
end: defCapture.node.end,
|
|
1389
|
+
modifiers: extractModifiers(defCapture.node)
|
|
1390
|
+
});
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
if (shouldInclude("variable") || shouldInclude("constant")) try {
|
|
1396
|
+
const varResult = executePresetQuery(tree, languageInstance, language, "variables");
|
|
1397
|
+
for (const match of varResult.matches) {
|
|
1398
|
+
const nameCapture = findCaptureByNames(match.captures, [
|
|
1399
|
+
"variable.name",
|
|
1400
|
+
"constant.name",
|
|
1401
|
+
"field.name"
|
|
1402
|
+
]);
|
|
1403
|
+
const declCapture = findCaptureByNames(match.captures, [
|
|
1404
|
+
"variable.declaration",
|
|
1405
|
+
"constant.declaration",
|
|
1406
|
+
"field.declaration"
|
|
1407
|
+
]);
|
|
1408
|
+
if (nameCapture && declCapture) {
|
|
1409
|
+
const type = declCapture.node.text.startsWith("const ") || findCapture(match.captures, "constant.name") !== void 0 ? "constant" : "variable";
|
|
1410
|
+
if (shouldInclude(type)) symbols.push({
|
|
1411
|
+
name: nameCapture.node.text,
|
|
1412
|
+
type,
|
|
1413
|
+
start: declCapture.node.start,
|
|
1414
|
+
end: declCapture.node.end,
|
|
1415
|
+
modifiers: extractModifiers(declCapture.node)
|
|
1416
|
+
});
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
} catch {}
|
|
1420
|
+
if (shouldInclude("interface") || shouldInclude("type") || shouldInclude("enum")) try {
|
|
1421
|
+
const typeResult = executePresetQuery(tree, languageInstance, language, "types");
|
|
1422
|
+
for (const match of typeResult.matches) {
|
|
1423
|
+
const nameCapture = findCaptureByNames(match.captures, [
|
|
1424
|
+
"type.name",
|
|
1425
|
+
"interface.name",
|
|
1426
|
+
"enum.name",
|
|
1427
|
+
"type.alias"
|
|
1428
|
+
]);
|
|
1429
|
+
const defCapture = findCaptureByNames(match.captures, [
|
|
1430
|
+
"type.definition",
|
|
1431
|
+
"interface.definition",
|
|
1432
|
+
"enum.definition"
|
|
1433
|
+
]);
|
|
1434
|
+
if (nameCapture && defCapture) {
|
|
1435
|
+
let type = "type";
|
|
1436
|
+
if (nameCapture.name === "interface.name") type = "interface";
|
|
1437
|
+
else if (nameCapture.name === "enum.name") type = "enum";
|
|
1438
|
+
if (shouldInclude(type)) symbols.push({
|
|
1439
|
+
name: nameCapture.node.text,
|
|
1440
|
+
type,
|
|
1441
|
+
start: defCapture.node.start,
|
|
1442
|
+
end: defCapture.node.end,
|
|
1443
|
+
modifiers: extractModifiers(defCapture.node)
|
|
1444
|
+
});
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
} catch {}
|
|
1448
|
+
return {
|
|
1449
|
+
symbols,
|
|
1450
|
+
summary: {
|
|
1451
|
+
functions: symbols.filter((s) => s.type === "function").length,
|
|
1452
|
+
classes: symbols.filter((s) => s.type === "class").length,
|
|
1453
|
+
variables: symbols.filter((s) => s.type === "variable").length,
|
|
1454
|
+
constants: symbols.filter((s) => s.type === "constant").length,
|
|
1455
|
+
interfaces: symbols.filter((s) => s.type === "interface").length,
|
|
1456
|
+
types: symbols.filter((s) => s.type === "type").length,
|
|
1457
|
+
enums: symbols.filter((s) => s.type === "enum").length,
|
|
1458
|
+
methods: symbols.filter((s) => s.type === "method").length,
|
|
1459
|
+
properties: symbols.filter((s) => s.type === "property").length,
|
|
1460
|
+
total: symbols.length
|
|
1461
|
+
}
|
|
1462
|
+
};
|
|
1463
|
+
}
|
|
1464
|
+
/**
|
|
1465
|
+
* Extract function signature from AST node
|
|
1466
|
+
*/
|
|
1467
|
+
function extractFunctionSignature(node) {
|
|
1468
|
+
const text = node.text;
|
|
1469
|
+
const jsMatch = /^(async\s+)?function\s*\*?\s*(\w*)\s*(<[^>]*>)?\s*\([^)]*\)(\s*:\s*[^{]+)?/.exec(text);
|
|
1470
|
+
if (jsMatch) return jsMatch[0].trim();
|
|
1471
|
+
const arrowMatch = /^\([^)]*\)\s*(:\s*[^=]+)?\s*=>/.exec(text);
|
|
1472
|
+
if (arrowMatch) return arrowMatch[0].trim();
|
|
1473
|
+
const pyMatch = /^def\s+(\w+)\s*\([^)]*\)(\s*->\s*[^:]+)?:/.exec(text);
|
|
1474
|
+
if (pyMatch) return pyMatch[0].replace(/:$/, "").trim();
|
|
1475
|
+
const goMatch = /^func\s*(\([^)]*\)\s*)?(\w+)\s*\([^)]*\)/.exec(text);
|
|
1476
|
+
if (goMatch) return goMatch[0].trim();
|
|
1477
|
+
const firstLine = text.split(/[{:]/)[0];
|
|
1478
|
+
return firstLine ? firstLine.trim() : void 0;
|
|
1479
|
+
}
|
|
1480
|
+
/**
|
|
1481
|
+
* Extract modifiers from AST node
|
|
1482
|
+
*/
|
|
1483
|
+
function extractModifiers(node) {
|
|
1484
|
+
const modifiers = [];
|
|
1485
|
+
const text = node.text;
|
|
1486
|
+
for (const mod of [
|
|
1487
|
+
"export",
|
|
1488
|
+
"default",
|
|
1489
|
+
"async",
|
|
1490
|
+
"static",
|
|
1491
|
+
"public",
|
|
1492
|
+
"private",
|
|
1493
|
+
"protected",
|
|
1494
|
+
"readonly",
|
|
1495
|
+
"abstract",
|
|
1496
|
+
"const",
|
|
1497
|
+
"let",
|
|
1498
|
+
"var",
|
|
1499
|
+
"final",
|
|
1500
|
+
"override",
|
|
1501
|
+
"pub",
|
|
1502
|
+
"mut"
|
|
1503
|
+
]) if ((/* @__PURE__ */ new RegExp(`\\b${mod}\\b`)).test(text.slice(0, 100))) modifiers.push(mod);
|
|
1504
|
+
return modifiers.length > 0 ? modifiers : void 0;
|
|
1505
|
+
}
|
|
1506
|
+
/**
|
|
1507
|
+
* Extract imports from parsed code
|
|
1508
|
+
*/
|
|
1509
|
+
function extractImports(tree, languageInstance, language) {
|
|
1510
|
+
const imports = [];
|
|
1511
|
+
try {
|
|
1512
|
+
const result = executePresetQuery(tree, languageInstance, language, "imports");
|
|
1513
|
+
const tracker = createOffsetTracker();
|
|
1514
|
+
for (const match of result.matches) {
|
|
1515
|
+
const stmtCapture = findCaptureByNames(match.captures, ["import.statement", "include.statement"]);
|
|
1516
|
+
if (!stmtCapture || tracker.has(stmtCapture.node)) continue;
|
|
1517
|
+
tracker.add(stmtCapture.node);
|
|
1518
|
+
const sourceCapture = findCaptureByNames(match.captures, [
|
|
1519
|
+
"import.source",
|
|
1520
|
+
"import.path",
|
|
1521
|
+
"include.path"
|
|
1522
|
+
]);
|
|
1523
|
+
const defaultCapture = findCapture(match.captures, "import.default");
|
|
1524
|
+
const nameCaptures = match.captures.filter((c) => c.name === "import.name");
|
|
1525
|
+
const source = sourceCapture ? sourceCapture.node.text.replace(/['"]/g, "") : "";
|
|
1526
|
+
const names = [];
|
|
1527
|
+
if (defaultCapture) names.push({ name: defaultCapture.node.text });
|
|
1528
|
+
for (const nameCapture of nameCaptures) names.push({ name: nameCapture.node.text });
|
|
1529
|
+
imports.push({
|
|
1530
|
+
source,
|
|
1531
|
+
names,
|
|
1532
|
+
isDefault: !!defaultCapture && nameCaptures.length === 0,
|
|
1533
|
+
start: stmtCapture.node.start,
|
|
1534
|
+
end: stmtCapture.node.end
|
|
1535
|
+
});
|
|
1536
|
+
}
|
|
1537
|
+
} catch {}
|
|
1538
|
+
return imports;
|
|
1539
|
+
}
|
|
1540
|
+
/**
|
|
1541
|
+
* Extract exports from parsed code
|
|
1542
|
+
*/
|
|
1543
|
+
function extractExports(tree, languageInstance, language) {
|
|
1544
|
+
const exportList = [];
|
|
1545
|
+
try {
|
|
1546
|
+
const result = executePresetQuery(tree, languageInstance, language, "exports");
|
|
1547
|
+
const tracker = createOffsetTracker();
|
|
1548
|
+
for (const match of result.matches) {
|
|
1549
|
+
const stmtCapture = findCaptureByNames(match.captures, [
|
|
1550
|
+
"export.statement",
|
|
1551
|
+
"export.function",
|
|
1552
|
+
"export.class",
|
|
1553
|
+
"export.type"
|
|
1554
|
+
]);
|
|
1555
|
+
if (!stmtCapture || tracker.has(stmtCapture.node)) continue;
|
|
1556
|
+
tracker.add(stmtCapture.node);
|
|
1557
|
+
const nameCapture = findCapture(match.captures, "export.name");
|
|
1558
|
+
const text = stmtCapture.node.text;
|
|
1559
|
+
const isDefault = text.includes("export default");
|
|
1560
|
+
let name = nameCapture?.node.text;
|
|
1561
|
+
if (!name) {
|
|
1562
|
+
const nameMatch = /export\s+(?:default\s+)?(?:function|class|const|let|var|interface|type|enum)\s+(\w+)/.exec(text);
|
|
1563
|
+
if (nameMatch?.[1]) name = nameMatch[1];
|
|
1564
|
+
}
|
|
1565
|
+
exportList.push({
|
|
1566
|
+
name: name ?? "default",
|
|
1567
|
+
isDefault,
|
|
1568
|
+
start: stmtCapture.node.start,
|
|
1569
|
+
end: stmtCapture.node.end
|
|
1570
|
+
});
|
|
1571
|
+
}
|
|
1572
|
+
} catch {}
|
|
1573
|
+
return exportList;
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
//#endregion
|
|
1577
|
+
//#region src/core/embeddings/chunker.ts
|
|
1578
|
+
/**
|
|
1579
|
+
* Code chunker for splitting source files into embeddable chunks
|
|
1580
|
+
*
|
|
1581
|
+
* Uses tree-sitter for semantic chunking based on symbols (functions, classes, etc.)
|
|
1582
|
+
* This produces much better embeddings than character-based splitting.
|
|
1583
|
+
*/
|
|
1584
|
+
/**
|
|
1585
|
+
* Generate a unique ID for a chunk
|
|
1586
|
+
*/
|
|
1587
|
+
function generateChunkId(filePath, content, startLine) {
|
|
1588
|
+
return `chunk_${crypto.createHash("md5").update(`${filePath}:${String(startLine)}:${content}`).digest("hex").slice(0, 12)}`;
|
|
1589
|
+
}
|
|
1590
|
+
/**
|
|
1591
|
+
* Detect language from file extension
|
|
1592
|
+
*/
|
|
1593
|
+
function detectLanguage(filePath) {
|
|
1594
|
+
return {
|
|
1595
|
+
ts: "typescript",
|
|
1596
|
+
tsx: "typescript",
|
|
1597
|
+
js: "javascript",
|
|
1598
|
+
jsx: "javascript",
|
|
1599
|
+
mjs: "javascript",
|
|
1600
|
+
cjs: "javascript",
|
|
1601
|
+
py: "python",
|
|
1602
|
+
rs: "rust",
|
|
1603
|
+
go: "go",
|
|
1604
|
+
java: "java",
|
|
1605
|
+
kt: "kotlin",
|
|
1606
|
+
rb: "ruby",
|
|
1607
|
+
php: "php",
|
|
1608
|
+
c: "c",
|
|
1609
|
+
cpp: "cpp",
|
|
1610
|
+
h: "c",
|
|
1611
|
+
hpp: "cpp",
|
|
1612
|
+
cs: "csharp",
|
|
1613
|
+
swift: "swift",
|
|
1614
|
+
scala: "scala",
|
|
1615
|
+
vue: "vue",
|
|
1616
|
+
svelte: "svelte",
|
|
1617
|
+
md: "markdown",
|
|
1618
|
+
json: "json",
|
|
1619
|
+
yaml: "yaml",
|
|
1620
|
+
yml: "yaml",
|
|
1621
|
+
toml: "toml",
|
|
1622
|
+
xml: "xml",
|
|
1623
|
+
html: "html",
|
|
1624
|
+
css: "css",
|
|
1625
|
+
scss: "scss",
|
|
1626
|
+
less: "less",
|
|
1627
|
+
sql: "sql",
|
|
1628
|
+
sh: "bash",
|
|
1629
|
+
bash: "bash",
|
|
1630
|
+
zsh: "bash"
|
|
1631
|
+
}[filePath.split(".").pop()?.toLowerCase() ?? ""] ?? "unknown";
|
|
1632
|
+
}
|
|
1633
|
+
/**
|
|
1634
|
+
* Get line number from byte offset
|
|
1635
|
+
*/
|
|
1636
|
+
function getLineFromOffset(content, offset) {
|
|
1637
|
+
return (content.slice(0, offset).match(/\n/g) ?? []).length + 1;
|
|
1638
|
+
}
|
|
1639
|
+
/**
|
|
1640
|
+
* Extract code content for a symbol using its offsets
|
|
1641
|
+
*/
|
|
1642
|
+
function getSymbolContent(content, symbol) {
|
|
1643
|
+
return content.slice(symbol.start.offset, symbol.end.offset);
|
|
1644
|
+
}
|
|
1645
|
+
/**
|
|
1646
|
+
* Split large content into smaller chunks while respecting line boundaries
|
|
1647
|
+
*/
|
|
1648
|
+
function splitLargeContent(content, maxSize, overlap) {
|
|
1649
|
+
const normalizedContent = content.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
1650
|
+
if (normalizedContent.length <= maxSize) return [normalizedContent];
|
|
1651
|
+
const chunks = [];
|
|
1652
|
+
const lines = normalizedContent.split("\n");
|
|
1653
|
+
let currentChunk = [];
|
|
1654
|
+
let currentSize = 0;
|
|
1655
|
+
for (const line of lines) {
|
|
1656
|
+
const lineSize = line.length + 1;
|
|
1657
|
+
if (currentSize + lineSize > maxSize && currentChunk.length > 0) {
|
|
1658
|
+
chunks.push(currentChunk.join("\n"));
|
|
1659
|
+
const overlapLines = [];
|
|
1660
|
+
let overlapSize = 0;
|
|
1661
|
+
for (let i = currentChunk.length - 1; i >= 0 && overlapSize < overlap; i--) {
|
|
1662
|
+
const l = currentChunk[i];
|
|
1663
|
+
if (l !== void 0) {
|
|
1664
|
+
overlapLines.unshift(l);
|
|
1665
|
+
overlapSize += l.length + 1;
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1668
|
+
currentChunk = overlapLines;
|
|
1669
|
+
currentSize = overlapSize;
|
|
1670
|
+
}
|
|
1671
|
+
currentChunk.push(line);
|
|
1672
|
+
currentSize += lineSize;
|
|
1673
|
+
}
|
|
1674
|
+
if (currentChunk.length > 0) chunks.push(currentChunk.join("\n"));
|
|
1675
|
+
return chunks;
|
|
1676
|
+
}
|
|
1677
|
+
/**
|
|
1678
|
+
* Create a chunk from content
|
|
1679
|
+
*/
|
|
1680
|
+
function createChunk(filePath, language, content, startLine, endLine, symbolName, symbolType) {
|
|
1681
|
+
return {
|
|
1682
|
+
id: generateChunkId(filePath, content, startLine),
|
|
1683
|
+
content,
|
|
1684
|
+
filePath,
|
|
1685
|
+
language,
|
|
1686
|
+
startLine,
|
|
1687
|
+
endLine,
|
|
1688
|
+
symbolName,
|
|
1689
|
+
symbolType
|
|
1690
|
+
};
|
|
1691
|
+
}
|
|
1692
|
+
/**
|
|
1693
|
+
* Chunk a source file using tree-sitter for semantic boundaries
|
|
1694
|
+
*
|
|
1695
|
+
* Strategy:
|
|
1696
|
+
* 1. Parse file with tree-sitter and extract symbols
|
|
1697
|
+
* 2. Each function/class/method becomes its own chunk
|
|
1698
|
+
* 3. Code between symbols (imports, top-level code) is grouped together
|
|
1699
|
+
* 4. Large symbols are split at line boundaries if they exceed maxSize
|
|
1700
|
+
*/
|
|
1701
|
+
async function chunkFile(filePath, content, config$1) {
|
|
1702
|
+
const language = detectLanguage(filePath);
|
|
1703
|
+
const maxSize = config$1.defaultChunkSize;
|
|
1704
|
+
const overlap = config$1.defaultChunkOverlap;
|
|
1705
|
+
let symbols = [];
|
|
1706
|
+
try {
|
|
1707
|
+
const parseResult = await parseCode(content, { filePath });
|
|
1708
|
+
symbols = extractSymbols(parseResult.tree, parseResult.languageInstance, parseResult.language).symbols;
|
|
1709
|
+
} catch (error) {
|
|
1710
|
+
logger.debug(`Tree-sitter parsing failed for ${filePath}, using fallback chunking: ${error instanceof Error ? error.message : String(error)}`);
|
|
1711
|
+
return fallbackChunk(filePath, content, language, maxSize, overlap);
|
|
1712
|
+
}
|
|
1713
|
+
if (symbols.length === 0) return fallbackChunk(filePath, content, language, maxSize, overlap);
|
|
1714
|
+
const sortedSymbols = [...symbols].sort((a, b) => a.start.offset - b.start.offset);
|
|
1715
|
+
const regions = [];
|
|
1716
|
+
let lastEndOffset = 0;
|
|
1717
|
+
for (const symbol of sortedSymbols) {
|
|
1718
|
+
if (symbol.start.offset > lastEndOffset) {
|
|
1719
|
+
if (content.slice(lastEndOffset, symbol.start.offset).trim().length > 0) regions.push({
|
|
1720
|
+
content: content.slice(lastEndOffset, symbol.start.offset),
|
|
1721
|
+
startOffset: lastEndOffset,
|
|
1722
|
+
endOffset: symbol.start.offset,
|
|
1723
|
+
startLine: getLineFromOffset(content, lastEndOffset),
|
|
1724
|
+
endLine: getLineFromOffset(content, symbol.start.offset)
|
|
1725
|
+
});
|
|
1726
|
+
}
|
|
1727
|
+
const symbolContent = getSymbolContent(content, symbol);
|
|
1728
|
+
regions.push({
|
|
1729
|
+
content: symbolContent,
|
|
1730
|
+
startOffset: symbol.start.offset,
|
|
1731
|
+
endOffset: symbol.end.offset,
|
|
1732
|
+
startLine: symbol.start.line,
|
|
1733
|
+
endLine: symbol.end.line,
|
|
1734
|
+
symbolName: symbol.name,
|
|
1735
|
+
symbolType: symbol.type
|
|
1736
|
+
});
|
|
1737
|
+
lastEndOffset = Math.max(lastEndOffset, symbol.end.offset);
|
|
1738
|
+
}
|
|
1739
|
+
if (lastEndOffset < content.length) {
|
|
1740
|
+
if (content.slice(lastEndOffset).trim().length > 0) regions.push({
|
|
1741
|
+
content: content.slice(lastEndOffset),
|
|
1742
|
+
startOffset: lastEndOffset,
|
|
1743
|
+
endOffset: content.length,
|
|
1744
|
+
startLine: getLineFromOffset(content, lastEndOffset),
|
|
1745
|
+
endLine: getLineFromOffset(content, content.length)
|
|
1746
|
+
});
|
|
1747
|
+
}
|
|
1748
|
+
const chunks = [];
|
|
1749
|
+
for (const region of regions) {
|
|
1750
|
+
const regionContent = region.content.trim();
|
|
1751
|
+
if (regionContent.length === 0) continue;
|
|
1752
|
+
if (regionContent.length <= maxSize) chunks.push(createChunk(filePath, language, regionContent, region.startLine, region.endLine, region.symbolName, region.symbolType));
|
|
1753
|
+
else {
|
|
1754
|
+
const parts = splitLargeContent(regionContent, maxSize, overlap);
|
|
1755
|
+
let currentLine = region.startLine;
|
|
1756
|
+
for (const part of parts) {
|
|
1757
|
+
const partLines = (part.match(/\n/g) ?? []).length + 1;
|
|
1758
|
+
chunks.push(createChunk(filePath, language, part, currentLine, currentLine + partLines - 1, region.symbolName, region.symbolType));
|
|
1759
|
+
currentLine += partLines - Math.floor(overlap / 50);
|
|
1760
|
+
}
|
|
1761
|
+
}
|
|
1762
|
+
}
|
|
1763
|
+
return chunks;
|
|
1764
|
+
}
|
|
1765
|
+
/**
|
|
1766
|
+
* Fallback chunking when tree-sitter fails or finds no symbols
|
|
1767
|
+
* Uses simple line-based splitting
|
|
1768
|
+
*/
|
|
1769
|
+
function fallbackChunk(filePath, content, language, maxSize, overlap) {
|
|
1770
|
+
if (content.trim().length === 0) return [];
|
|
1771
|
+
const chunks = [];
|
|
1772
|
+
const parts = splitLargeContent(content, maxSize, overlap);
|
|
1773
|
+
let currentLine = 1;
|
|
1774
|
+
for (const part of parts) {
|
|
1775
|
+
const partLines = (part.match(/\n/g) ?? []).length + 1;
|
|
1776
|
+
chunks.push(createChunk(filePath, language, part, currentLine, currentLine + partLines - 1));
|
|
1777
|
+
currentLine += partLines - Math.floor(overlap / 50);
|
|
1778
|
+
}
|
|
1779
|
+
return chunks;
|
|
1780
|
+
}
|
|
1781
|
+
/**
|
|
1782
|
+
* Supported file extensions for indexing
|
|
1783
|
+
*/
|
|
1784
|
+
const SUPPORTED_EXTENSIONS = [
|
|
1785
|
+
".ts",
|
|
1786
|
+
".tsx",
|
|
1787
|
+
".js",
|
|
1788
|
+
".jsx",
|
|
1789
|
+
".mjs",
|
|
1790
|
+
".cjs",
|
|
1791
|
+
".py",
|
|
1792
|
+
".rs",
|
|
1793
|
+
".go",
|
|
1794
|
+
".java",
|
|
1795
|
+
".kt",
|
|
1796
|
+
".rb",
|
|
1797
|
+
".php",
|
|
1798
|
+
".c",
|
|
1799
|
+
".cpp",
|
|
1800
|
+
".h",
|
|
1801
|
+
".hpp",
|
|
1802
|
+
".cs",
|
|
1803
|
+
".swift",
|
|
1804
|
+
".scala",
|
|
1805
|
+
".vue",
|
|
1806
|
+
".svelte",
|
|
1807
|
+
".md"
|
|
1808
|
+
];
|
|
1809
|
+
/**
|
|
1810
|
+
* Check if a file should be indexed
|
|
1811
|
+
*/
|
|
1812
|
+
function shouldIndexFile(filePath) {
|
|
1813
|
+
const ext = "." + (filePath.split(".").pop()?.toLowerCase() ?? "");
|
|
1814
|
+
return SUPPORTED_EXTENSIONS.includes(ext);
|
|
1815
|
+
}
|
|
1816
|
+
|
|
1817
|
+
//#endregion
|
|
1818
|
+
//#region src/core/embeddings/crossfile.ts
|
|
1819
|
+
/**
|
|
1820
|
+
* Cross-file context resolution for enriched embeddings
|
|
1821
|
+
*
|
|
1822
|
+
* Resolves imports and includes relevant symbol definitions from
|
|
1823
|
+
* imported files to provide better context for semantic search.
|
|
1824
|
+
*/
|
|
1825
|
+
const resolvedFileCache = /* @__PURE__ */ new Map();
|
|
1826
|
+
/**
|
|
1827
|
+
* Clear the resolved file cache
|
|
1828
|
+
*/
|
|
1829
|
+
function clearCrossFileCache() {
|
|
1830
|
+
resolvedFileCache.clear();
|
|
1831
|
+
}
|
|
1832
|
+
registerCache("embeddings:crossFileCache", clearCrossFileCache);
|
|
1833
|
+
/**
|
|
1834
|
+
* Common file extensions to try when resolving imports
|
|
1835
|
+
*/
|
|
1836
|
+
const EXTENSIONS = [
|
|
1837
|
+
".ts",
|
|
1838
|
+
".tsx",
|
|
1839
|
+
".js",
|
|
1840
|
+
".jsx",
|
|
1841
|
+
".mjs",
|
|
1842
|
+
".cjs"
|
|
1843
|
+
];
|
|
1844
|
+
/**
|
|
1845
|
+
* Resolve an import source to an absolute file path
|
|
1846
|
+
*/
|
|
1847
|
+
function resolveImportPath(importSource, currentFilePath, options) {
|
|
1848
|
+
const { projectRoot, pathAliases = {} } = options;
|
|
1849
|
+
if (!importSource.startsWith(".") && !importSource.startsWith("@") && !Object.keys(pathAliases).some((alias) => importSource.startsWith(alias))) return null;
|
|
1850
|
+
let resolvedPath;
|
|
1851
|
+
for (const [alias, target] of Object.entries(pathAliases)) if (importSource.startsWith(alias)) {
|
|
1852
|
+
const relativePart = importSource.slice(alias.length);
|
|
1853
|
+
resolvedPath = path.join(projectRoot, target, relativePart);
|
|
1854
|
+
break;
|
|
1855
|
+
}
|
|
1856
|
+
if (resolvedPath === void 0) if (importSource.startsWith(".")) {
|
|
1857
|
+
const currentDir = path.dirname(currentFilePath);
|
|
1858
|
+
resolvedPath = path.resolve(currentDir, importSource);
|
|
1859
|
+
} else return null;
|
|
1860
|
+
for (const ext of EXTENSIONS) {
|
|
1861
|
+
const withExt = resolvedPath + ext;
|
|
1862
|
+
if (fs.existsSync(withExt) && fs.statSync(withExt).isFile()) return withExt;
|
|
1863
|
+
}
|
|
1864
|
+
if (fs.existsSync(resolvedPath) && fs.statSync(resolvedPath).isFile()) return resolvedPath;
|
|
1865
|
+
for (const ext of EXTENSIONS) {
|
|
1866
|
+
const indexPath = path.join(resolvedPath, `index${ext}`);
|
|
1867
|
+
if (fs.existsSync(indexPath) && fs.statSync(indexPath).isFile()) return indexPath;
|
|
1868
|
+
}
|
|
1869
|
+
return null;
|
|
1870
|
+
}
|
|
1871
|
+
/**
|
|
1872
|
+
* Analyze a resolved file and extract its symbols
|
|
1873
|
+
*/
|
|
1874
|
+
async function analyzeResolvedFile(filePath) {
|
|
1875
|
+
const cached = resolvedFileCache.get(filePath);
|
|
1876
|
+
if (cached !== void 0) return cached;
|
|
1877
|
+
try {
|
|
1878
|
+
const parseResult = await parseCode(fs.readFileSync(filePath, "utf-8"), { filePath });
|
|
1879
|
+
const { symbols } = extractSymbols(parseResult.tree, parseResult.languageInstance, parseResult.language);
|
|
1880
|
+
const result = {
|
|
1881
|
+
symbols,
|
|
1882
|
+
exports: extractExports(parseResult.tree, parseResult.languageInstance, parseResult.language)
|
|
1883
|
+
};
|
|
1884
|
+
resolvedFileCache.set(filePath, result);
|
|
1885
|
+
return result;
|
|
1886
|
+
} catch (error) {
|
|
1887
|
+
logger.debug(`Failed to analyze ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
1888
|
+
resolvedFileCache.set(filePath, null);
|
|
1889
|
+
return null;
|
|
1890
|
+
}
|
|
1891
|
+
}
|
|
1892
|
+
/**
|
|
1893
|
+
* Find symbols that match imported names
|
|
1894
|
+
*/
|
|
1895
|
+
function findImportedSymbols(importStatement, symbols, exports) {
|
|
1896
|
+
const importedNames = /* @__PURE__ */ new Set();
|
|
1897
|
+
for (const name of importStatement.names) importedNames.add(name.name);
|
|
1898
|
+
if (importStatement.isNamespace) {
|
|
1899
|
+
const exportedNames = new Set(exports.map((e) => e.name));
|
|
1900
|
+
return symbols.filter((s) => exportedNames.has(s.name));
|
|
1901
|
+
}
|
|
1902
|
+
if (importStatement.isDefault) {
|
|
1903
|
+
const defaultExport = exports.find((e) => e.isDefault);
|
|
1904
|
+
if (defaultExport) importedNames.add(defaultExport.name);
|
|
1905
|
+
}
|
|
1906
|
+
return symbols.filter((s) => importedNames.has(s.name));
|
|
1907
|
+
}
|
|
1908
|
+
/**
|
|
1909
|
+
* Resolve imports and get cross-file context
|
|
1910
|
+
*/
|
|
1911
|
+
async function resolveCrossFileContext(imports, currentFilePath, options) {
|
|
1912
|
+
const maxImports = options.maxImports ?? 10;
|
|
1913
|
+
const maxSymbolsPerFile = options.maxSymbolsPerFile ?? 5;
|
|
1914
|
+
const resolvedImports = [];
|
|
1915
|
+
for (const imp of imports.slice(0, maxImports)) {
|
|
1916
|
+
const resolvedPath = resolveImportPath(imp.source, currentFilePath, options);
|
|
1917
|
+
if (!resolvedPath) {
|
|
1918
|
+
resolvedImports.push({
|
|
1919
|
+
import: imp,
|
|
1920
|
+
resolvedPath: null,
|
|
1921
|
+
symbols: [],
|
|
1922
|
+
exports: []
|
|
1923
|
+
});
|
|
1924
|
+
continue;
|
|
1925
|
+
}
|
|
1926
|
+
const analysis = await analyzeResolvedFile(resolvedPath);
|
|
1927
|
+
if (!analysis) {
|
|
1928
|
+
resolvedImports.push({
|
|
1929
|
+
import: imp,
|
|
1930
|
+
resolvedPath,
|
|
1931
|
+
symbols: [],
|
|
1932
|
+
exports: []
|
|
1933
|
+
});
|
|
1934
|
+
continue;
|
|
1935
|
+
}
|
|
1936
|
+
const importedSymbols = findImportedSymbols(imp, analysis.symbols, analysis.exports).slice(0, maxSymbolsPerFile);
|
|
1937
|
+
resolvedImports.push({
|
|
1938
|
+
import: imp,
|
|
1939
|
+
resolvedPath,
|
|
1940
|
+
symbols: importedSymbols,
|
|
1941
|
+
exports: analysis.exports
|
|
1942
|
+
});
|
|
1943
|
+
}
|
|
1944
|
+
return {
|
|
1945
|
+
resolvedImports,
|
|
1946
|
+
importedSymbolsSummary: buildImportedSymbolsSummary(resolvedImports)
|
|
1947
|
+
};
|
|
1948
|
+
}
|
|
1949
|
+
/**
|
|
1950
|
+
* Build a summary string of imported symbols for enrichment
|
|
1951
|
+
*/
|
|
1952
|
+
function buildImportedSymbolsSummary(resolvedImports) {
|
|
1953
|
+
const lines = [];
|
|
1954
|
+
for (const resolved of resolvedImports) {
|
|
1955
|
+
if (resolved.symbols.length === 0) continue;
|
|
1956
|
+
const symbolDescriptions = resolved.symbols.map((s) => {
|
|
1957
|
+
if (s.signature) return `${s.name}: ${s.signature}`;
|
|
1958
|
+
return `${s.name} (${s.type})`;
|
|
1959
|
+
});
|
|
1960
|
+
if (symbolDescriptions.length > 0) lines.push(`From ${resolved.import.source}: ${symbolDescriptions.join("; ")}`);
|
|
1961
|
+
}
|
|
1962
|
+
return lines.join("\n");
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
//#endregion
|
|
1966
|
+
//#region src/core/embeddings/enricher.ts
|
|
1967
|
+
/**
|
|
1968
|
+
* AST cache per file path to avoid re-parsing
|
|
1969
|
+
*/
|
|
1970
|
+
const astCache = /* @__PURE__ */ new Map();
|
|
1971
|
+
/**
|
|
1972
|
+
* Clear the AST cache
|
|
1973
|
+
*/
|
|
1974
|
+
function clearASTCache() {
|
|
1975
|
+
astCache.clear();
|
|
1976
|
+
}
|
|
1977
|
+
registerCache("embeddings:astCache", clearASTCache);
|
|
1978
|
+
/** Maximum number of imports to include in enriched content */
|
|
1979
|
+
const MAX_IMPORTS = 10;
|
|
1980
|
+
/** Maximum number of exports to include in enriched content */
|
|
1981
|
+
const MAX_EXPORTS = 10;
|
|
1982
|
+
/**
|
|
1983
|
+
* Get or create file analysis from cache
|
|
1984
|
+
*/
|
|
1985
|
+
async function getFileAnalysis(filePath, content, options) {
|
|
1986
|
+
const cached = astCache.get(filePath);
|
|
1987
|
+
if (cached) return cached;
|
|
1988
|
+
try {
|
|
1989
|
+
const parseResult = await parseCode(content, { filePath });
|
|
1990
|
+
const { symbols } = extractSymbols(parseResult.tree, parseResult.languageInstance, parseResult.language);
|
|
1991
|
+
const imports = extractImports(parseResult.tree, parseResult.languageInstance, parseResult.language);
|
|
1992
|
+
const analysis = {
|
|
1993
|
+
parseResult,
|
|
1994
|
+
symbols,
|
|
1995
|
+
imports,
|
|
1996
|
+
exports: extractExports(parseResult.tree, parseResult.languageInstance, parseResult.language)
|
|
1997
|
+
};
|
|
1998
|
+
if ((options?.includeCrossFileContext ?? ENRICHMENT_CONFIG.includeCrossFileContext) && imports.length > 0 && options?.projectRoot) try {
|
|
1999
|
+
const crossFileContext = await resolveCrossFileContext(imports, filePath, {
|
|
2000
|
+
projectRoot: options.projectRoot,
|
|
2001
|
+
pathAliases: options.pathAliases,
|
|
2002
|
+
maxImports: ENRICHMENT_CONFIG.maxImportsToResolve,
|
|
2003
|
+
maxSymbolsPerFile: ENRICHMENT_CONFIG.maxSymbolsPerImport
|
|
2004
|
+
});
|
|
2005
|
+
analysis.crossFileContext = crossFileContext;
|
|
2006
|
+
logger.debug(`Resolved cross-file context for ${filePath}: ${String(crossFileContext.resolvedImports.length)} imports`);
|
|
2007
|
+
} catch (error) {
|
|
2008
|
+
logger.debug(`Failed to resolve cross-file context for ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
2009
|
+
}
|
|
2010
|
+
astCache.set(filePath, analysis);
|
|
2011
|
+
return analysis;
|
|
2012
|
+
} catch (error) {
|
|
2013
|
+
logger.debug(`Failed to parse ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
2014
|
+
return null;
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
/**
|
|
2018
|
+
* Find symbols that overlap with a chunk's line range
|
|
2019
|
+
*/
|
|
2020
|
+
function findSymbolsInRange(symbols, startLine, endLine) {
|
|
2021
|
+
const chunkSymbols = [];
|
|
2022
|
+
for (const symbol of symbols) {
|
|
2023
|
+
const symbolStart = symbol.start.line;
|
|
2024
|
+
const symbolEnd = symbol.end.line;
|
|
2025
|
+
if (symbolStart <= endLine && symbolEnd >= startLine) chunkSymbols.push({
|
|
2026
|
+
name: symbol.name,
|
|
2027
|
+
type: symbol.type,
|
|
2028
|
+
signature: symbol.signature
|
|
2029
|
+
});
|
|
2030
|
+
}
|
|
2031
|
+
return chunkSymbols;
|
|
2032
|
+
}
|
|
2033
|
+
/**
|
|
2034
|
+
* Format import sources for enrichment header
|
|
2035
|
+
*/
|
|
2036
|
+
function formatImportSources(imports) {
|
|
2037
|
+
return imports.slice(0, MAX_IMPORTS).map((imp) => imp.source).filter((source) => source.length > 0).join(", ");
|
|
2038
|
+
}
|
|
2039
|
+
/**
|
|
2040
|
+
* Format export names for enrichment header
|
|
2041
|
+
*/
|
|
2042
|
+
function formatExportNames(exports) {
|
|
2043
|
+
return exports.slice(0, MAX_EXPORTS).map((exp) => exp.name).filter((name) => name.length > 0 && name !== "default").join(", ");
|
|
2044
|
+
}
|
|
2045
|
+
/**
|
|
2046
|
+
* Format symbols for enrichment header
|
|
2047
|
+
*/
|
|
2048
|
+
function formatSymbols(symbols) {
|
|
2049
|
+
return symbols.map((s) => `${s.name} (${s.type})`).join(", ");
|
|
2050
|
+
}
|
|
2051
|
+
/**
|
|
2052
|
+
* Build enriched content with metadata header
|
|
2053
|
+
*/
|
|
2054
|
+
function buildEnrichedContent(chunk, symbols, imports, exports, crossFileContext) {
|
|
2055
|
+
const headerLines = [];
|
|
2056
|
+
headerLines.push(`File: ${chunk.filePath}`);
|
|
2057
|
+
headerLines.push(`Language: ${chunk.language}`);
|
|
2058
|
+
if (symbols.length > 0) headerLines.push(`Symbols: ${formatSymbols(symbols)}`);
|
|
2059
|
+
if (imports.length > 0) {
|
|
2060
|
+
const importStr = formatImportSources(imports);
|
|
2061
|
+
if (importStr.length > 0) headerLines.push(`Imports: ${importStr}`);
|
|
2062
|
+
}
|
|
2063
|
+
if (exports.length > 0) {
|
|
2064
|
+
const exportStr = formatExportNames(exports);
|
|
2065
|
+
if (exportStr.length > 0) headerLines.push(`Exports: ${exportStr}`);
|
|
2066
|
+
}
|
|
2067
|
+
if (crossFileContext && crossFileContext.importedSymbolsSummary.length > 0) headerLines.push(`Imported definitions:\n${crossFileContext.importedSymbolsSummary}`);
|
|
2068
|
+
return headerLines.join("\n") + "\n\n---\n" + chunk.content;
|
|
2069
|
+
}
|
|
2070
|
+
/**
|
|
2071
|
+
* Enrich all chunks from a single file (optimized - parses once)
|
|
2072
|
+
*/
|
|
2073
|
+
async function enrichChunksFromFile(chunks, content, options) {
|
|
2074
|
+
if (chunks.length === 0) return [];
|
|
2075
|
+
const filePath = chunks[0]?.filePath;
|
|
2076
|
+
if (!filePath) return chunks.map((chunk) => {
|
|
2077
|
+
const basicHeader = `File: ${chunk.filePath}\nLanguage: ${chunk.language}\n\n---\n`;
|
|
2078
|
+
return {
|
|
2079
|
+
...chunk,
|
|
2080
|
+
enrichedContent: basicHeader + chunk.content,
|
|
2081
|
+
containedSymbols: [],
|
|
2082
|
+
wasEnriched: false
|
|
2083
|
+
};
|
|
2084
|
+
});
|
|
2085
|
+
const analysis = await getFileAnalysis(filePath, content, options);
|
|
2086
|
+
if (!analysis) return chunks.map((chunk) => {
|
|
2087
|
+
const basicHeader = `File: ${chunk.filePath}\nLanguage: ${chunk.language}\n\n---\n`;
|
|
2088
|
+
return {
|
|
2089
|
+
...chunk,
|
|
2090
|
+
enrichedContent: basicHeader + chunk.content,
|
|
2091
|
+
containedSymbols: [],
|
|
2092
|
+
wasEnriched: false
|
|
2093
|
+
};
|
|
2094
|
+
});
|
|
2095
|
+
return chunks.map((chunk) => {
|
|
2096
|
+
const chunkSymbols = findSymbolsInRange(analysis.symbols, chunk.startLine, chunk.endLine);
|
|
2097
|
+
const enrichedContent = buildEnrichedContent(chunk, chunkSymbols, analysis.imports, analysis.exports, analysis.crossFileContext);
|
|
2098
|
+
return {
|
|
2099
|
+
...chunk,
|
|
2100
|
+
enrichedContent,
|
|
2101
|
+
containedSymbols: chunkSymbols,
|
|
2102
|
+
wasEnriched: true
|
|
2103
|
+
};
|
|
2104
|
+
});
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
//#endregion
|
|
2108
|
+
//#region src/core/embeddings/watcher.ts
|
|
2109
|
+
/**
|
|
2110
|
+
* File watcher for automatic index updates
|
|
2111
|
+
*
|
|
2112
|
+
* Features:
|
|
2113
|
+
* - SHA-256 hash comparison to detect real content changes
|
|
2114
|
+
* - Debounce (5s default) to handle rapid changes
|
|
2115
|
+
* - Persistent hash cache to avoid unnecessary re-indexing
|
|
2116
|
+
* - fast-glob for efficient file scanning
|
|
2117
|
+
*/
|
|
2118
|
+
/** Default debounce delay in milliseconds */
|
|
2119
|
+
const DEFAULT_DEBOUNCE_MS = 5e3;
|
|
2120
|
+
/** Cache file name for storing hashes */
|
|
2121
|
+
const HASH_CACHE_FILE$1 = ".src-index-hashes.json";
|
|
2122
|
+
var IndexWatcher = class {
|
|
2123
|
+
directory;
|
|
2124
|
+
config;
|
|
2125
|
+
debounceMs;
|
|
2126
|
+
ollamaClient;
|
|
2127
|
+
vectorStore;
|
|
2128
|
+
watcher = null;
|
|
2129
|
+
ig;
|
|
2130
|
+
isProcessing = false;
|
|
2131
|
+
hashCache = {};
|
|
2132
|
+
pendingChanges = /* @__PURE__ */ new Map();
|
|
2133
|
+
operationQueue = [];
|
|
2134
|
+
onReady;
|
|
2135
|
+
onError;
|
|
2136
|
+
onIndexed;
|
|
2137
|
+
onRemoved;
|
|
2138
|
+
constructor(options) {
|
|
2139
|
+
this.directory = path.resolve(options.directory);
|
|
2140
|
+
this.config = options.config;
|
|
2141
|
+
this.debounceMs = options.debounceMs ?? DEFAULT_DEBOUNCE_MS;
|
|
2142
|
+
this.ollamaClient = new OllamaClient(options.config);
|
|
2143
|
+
this.vectorStore = new VectorStore(this.directory, options.config);
|
|
2144
|
+
this.ig = this.createIgnoreFilter();
|
|
2145
|
+
this.onReady = options.onReady;
|
|
2146
|
+
this.onError = options.onError;
|
|
2147
|
+
this.onIndexed = options.onIndexed;
|
|
2148
|
+
this.onRemoved = options.onRemoved;
|
|
2149
|
+
this.loadHashCache();
|
|
2150
|
+
}
|
|
2151
|
+
/**
|
|
2152
|
+
* Compute SHA-256 hash of content
|
|
2153
|
+
*/
|
|
2154
|
+
computeHash(content) {
|
|
2155
|
+
return crypto.createHash("sha256").update(content, "utf8").digest("hex");
|
|
2156
|
+
}
|
|
2157
|
+
/**
|
|
2158
|
+
* Get hash cache file path
|
|
2159
|
+
*/
|
|
2160
|
+
getHashCachePath() {
|
|
2161
|
+
return path.join(this.directory, ".src-index", HASH_CACHE_FILE$1);
|
|
2162
|
+
}
|
|
2163
|
+
/**
|
|
2164
|
+
* Load hash cache from disk
|
|
2165
|
+
*/
|
|
2166
|
+
loadHashCache() {
|
|
2167
|
+
const cachePath = this.getHashCachePath();
|
|
2168
|
+
if (fs.existsSync(cachePath)) try {
|
|
2169
|
+
const content = fs.readFileSync(cachePath, "utf-8");
|
|
2170
|
+
this.hashCache = JSON.parse(content);
|
|
2171
|
+
logger.debug(`Loaded ${String(Object.keys(this.hashCache).length)} cached hashes`);
|
|
2172
|
+
} catch {
|
|
2173
|
+
this.hashCache = {};
|
|
2174
|
+
}
|
|
2175
|
+
}
|
|
2176
|
+
/**
|
|
2177
|
+
* Save hash cache to disk
|
|
2178
|
+
*/
|
|
2179
|
+
saveHashCache() {
|
|
2180
|
+
const cachePath = this.getHashCachePath();
|
|
2181
|
+
const cacheDir = path.dirname(cachePath);
|
|
2182
|
+
try {
|
|
2183
|
+
if (!fs.existsSync(cacheDir)) fs.mkdirSync(cacheDir, { recursive: true });
|
|
2184
|
+
fs.writeFileSync(cachePath, JSON.stringify(this.hashCache, null, 2));
|
|
2185
|
+
} catch (err) {
|
|
2186
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
2187
|
+
logger.debug(`Failed to save hash cache: ${error.message}`);
|
|
2188
|
+
}
|
|
2189
|
+
}
|
|
2190
|
+
/**
|
|
2191
|
+
* Check if file content has changed by comparing hashes
|
|
2192
|
+
*/
|
|
2193
|
+
hasContentChanged(filePath, content) {
|
|
2194
|
+
const newHash = this.computeHash(content);
|
|
2195
|
+
if (this.hashCache[filePath] === newHash) return false;
|
|
2196
|
+
this.hashCache[filePath] = newHash;
|
|
2197
|
+
return true;
|
|
2198
|
+
}
|
|
2199
|
+
/**
|
|
2200
|
+
* Remove file from hash cache
|
|
2201
|
+
*/
|
|
2202
|
+
removeFromHashCache(filePath) {
|
|
2203
|
+
const { [filePath]: _, ...rest } = this.hashCache;
|
|
2204
|
+
this.hashCache = rest;
|
|
2205
|
+
}
|
|
2206
|
+
/**
|
|
2207
|
+
* Create ignore filter from .gitignore
|
|
2208
|
+
*/
|
|
2209
|
+
createIgnoreFilter() {
|
|
2210
|
+
const ig = ignore();
|
|
2211
|
+
const gitignorePath = path.join(this.directory, ".gitignore");
|
|
2212
|
+
if (fs.existsSync(gitignorePath)) try {
|
|
2213
|
+
const content = fs.readFileSync(gitignorePath, "utf-8");
|
|
2214
|
+
ig.add(content);
|
|
2215
|
+
} catch {}
|
|
2216
|
+
return ig;
|
|
2217
|
+
}
|
|
2218
|
+
/**
|
|
2219
|
+
* Check if a file should be indexed
|
|
2220
|
+
*/
|
|
2221
|
+
shouldIndex(filePath) {
|
|
2222
|
+
const relativePath = path.relative(this.directory, filePath).replace(/\\/g, "/");
|
|
2223
|
+
if (relativePath.split("/").some((part) => part.startsWith("."))) return false;
|
|
2224
|
+
if (this.ig.ignores(relativePath)) return false;
|
|
2225
|
+
return shouldIndexFile(filePath);
|
|
2226
|
+
}
|
|
2227
|
+
/**
|
|
2228
|
+
* Schedule a file change with debouncing
|
|
2229
|
+
*/
|
|
2230
|
+
scheduleChange(type, filePath) {
|
|
2231
|
+
const existing = this.pendingChanges.get(filePath);
|
|
2232
|
+
if (existing) clearTimeout(existing.timer);
|
|
2233
|
+
const timer = setTimeout(() => {
|
|
2234
|
+
this.pendingChanges.delete(filePath);
|
|
2235
|
+
this.queueOperation(async () => this.processChange(type, filePath));
|
|
2236
|
+
}, this.debounceMs);
|
|
2237
|
+
this.pendingChanges.set(filePath, {
|
|
2238
|
+
type,
|
|
2239
|
+
filePath,
|
|
2240
|
+
timer
|
|
2241
|
+
});
|
|
2242
|
+
logger.debug(`Scheduled ${type}: ${path.basename(filePath)} (${String(this.debounceMs)}ms)`);
|
|
2243
|
+
}
|
|
2244
|
+
/**
|
|
2245
|
+
* Process a file change after debounce
|
|
2246
|
+
*/
|
|
2247
|
+
async processChange(type, filePath) {
|
|
2248
|
+
if (type === "unlink") await this.removeFile(filePath);
|
|
2249
|
+
else await this.indexFile(filePath);
|
|
2250
|
+
}
|
|
2251
|
+
/**
|
|
2252
|
+
* Index a single file
|
|
2253
|
+
*/
|
|
2254
|
+
async indexFile(filePath) {
|
|
2255
|
+
if (!this.shouldIndex(filePath)) return;
|
|
2256
|
+
try {
|
|
2257
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
2258
|
+
if (!this.hasContentChanged(filePath, content)) {
|
|
2259
|
+
logger.debug(`Skipped (unchanged): ${path.basename(filePath)}`);
|
|
2260
|
+
return;
|
|
2261
|
+
}
|
|
2262
|
+
const chunks = await chunkFile(filePath, content, this.config);
|
|
2263
|
+
if (chunks.length === 0) return;
|
|
2264
|
+
const enrichedChunks = await enrichChunksFromFile(chunks, content);
|
|
2265
|
+
const texts = enrichedChunks.map((c) => c.enrichedContent);
|
|
2266
|
+
const embeddings = await this.ollamaClient.embedBatch(texts);
|
|
2267
|
+
const embeddedChunks = enrichedChunks.map((chunk, i) => ({
|
|
2268
|
+
id: chunk.id,
|
|
2269
|
+
content: chunk.content,
|
|
2270
|
+
filePath: chunk.filePath,
|
|
2271
|
+
language: chunk.language,
|
|
2272
|
+
startLine: chunk.startLine,
|
|
2273
|
+
endLine: chunk.endLine,
|
|
2274
|
+
symbolName: chunk.symbolName,
|
|
2275
|
+
symbolType: chunk.symbolType,
|
|
2276
|
+
vector: embeddings[i] ?? []
|
|
2277
|
+
}));
|
|
2278
|
+
await this.vectorStore.deleteByFilePath(filePath);
|
|
2279
|
+
await this.vectorStore.addChunks(embeddedChunks);
|
|
2280
|
+
this.saveHashCache();
|
|
2281
|
+
logger.debug(`Indexed: ${path.relative(this.directory, filePath)}`);
|
|
2282
|
+
this.onIndexed?.(filePath);
|
|
2283
|
+
} catch (err) {
|
|
2284
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
2285
|
+
logger.error(`Failed to index ${filePath}: ${error.message}`);
|
|
2286
|
+
this.onError?.(error);
|
|
2287
|
+
}
|
|
2288
|
+
}
|
|
2289
|
+
/**
|
|
2290
|
+
* Remove a file from the index
|
|
2291
|
+
*/
|
|
2292
|
+
async removeFile(filePath) {
|
|
2293
|
+
try {
|
|
2294
|
+
await this.vectorStore.deleteByFilePath(filePath);
|
|
2295
|
+
this.removeFromHashCache(filePath);
|
|
2296
|
+
this.saveHashCache();
|
|
2297
|
+
logger.debug(`Removed: ${path.relative(this.directory, filePath)}`);
|
|
2298
|
+
this.onRemoved?.(filePath);
|
|
2299
|
+
} catch (err) {
|
|
2300
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
2301
|
+
logger.error(`Failed to remove ${filePath}: ${error.message}`);
|
|
2302
|
+
this.onError?.(error);
|
|
2303
|
+
}
|
|
2304
|
+
}
|
|
2305
|
+
/**
|
|
2306
|
+
* Queue an operation to prevent concurrent modifications
|
|
2307
|
+
*/
|
|
2308
|
+
queueOperation(operation) {
|
|
2309
|
+
this.operationQueue.push(operation);
|
|
2310
|
+
this.processQueue();
|
|
2311
|
+
}
|
|
2312
|
+
/**
|
|
2313
|
+
* Process queued operations sequentially
|
|
2314
|
+
*/
|
|
2315
|
+
async processQueue() {
|
|
2316
|
+
if (this.isProcessing) return;
|
|
2317
|
+
this.isProcessing = true;
|
|
2318
|
+
while (this.operationQueue.length > 0) {
|
|
2319
|
+
const operation = this.operationQueue.shift();
|
|
2320
|
+
if (operation) try {
|
|
2321
|
+
await operation();
|
|
2322
|
+
} catch (err) {
|
|
2323
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
2324
|
+
logger.error(`Operation failed: ${error.message}`);
|
|
2325
|
+
}
|
|
2326
|
+
}
|
|
2327
|
+
this.isProcessing = false;
|
|
2328
|
+
}
|
|
2329
|
+
/**
|
|
2330
|
+
* Collect files using fast-glob
|
|
2331
|
+
*/
|
|
2332
|
+
async collectFilesWithGlob() {
|
|
2333
|
+
return (await fg(`**/*.{${SUPPORTED_EXTENSIONS.map((ext) => ext.slice(1)).join(",")}}`, {
|
|
2334
|
+
cwd: this.directory,
|
|
2335
|
+
absolute: true,
|
|
2336
|
+
ignore: ["**/.*", "**/.*/**"],
|
|
2337
|
+
dot: false,
|
|
2338
|
+
onlyFiles: true,
|
|
2339
|
+
followSymbolicLinks: false
|
|
2340
|
+
})).filter((file) => {
|
|
2341
|
+
const relativePath = path.relative(this.directory, file).replace(/\\/g, "/");
|
|
2342
|
+
return !this.ig.ignores(relativePath);
|
|
2343
|
+
});
|
|
2344
|
+
}
|
|
2345
|
+
/**
|
|
2346
|
+
* Perform full initial indexing
|
|
2347
|
+
*/
|
|
2348
|
+
async fullIndex() {
|
|
2349
|
+
logger.info("Starting full index...");
|
|
2350
|
+
const files = await this.collectFilesWithGlob();
|
|
2351
|
+
let indexed = 0;
|
|
2352
|
+
let skipped = 0;
|
|
2353
|
+
for (const filePath of files) try {
|
|
2354
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
2355
|
+
if (!this.hasContentChanged(filePath, content)) {
|
|
2356
|
+
skipped++;
|
|
2357
|
+
continue;
|
|
2358
|
+
}
|
|
2359
|
+
const chunks = await chunkFile(filePath, content, this.config);
|
|
2360
|
+
if (chunks.length === 0) continue;
|
|
2361
|
+
const enrichedChunks = await enrichChunksFromFile(chunks, content);
|
|
2362
|
+
const texts = enrichedChunks.map((c) => c.enrichedContent);
|
|
2363
|
+
const embeddings = await this.ollamaClient.embedBatch(texts);
|
|
2364
|
+
const embeddedChunks = enrichedChunks.map((chunk, i) => ({
|
|
2365
|
+
id: chunk.id,
|
|
2366
|
+
content: chunk.content,
|
|
2367
|
+
filePath: chunk.filePath,
|
|
2368
|
+
language: chunk.language,
|
|
2369
|
+
startLine: chunk.startLine,
|
|
2370
|
+
endLine: chunk.endLine,
|
|
2371
|
+
symbolName: chunk.symbolName,
|
|
2372
|
+
symbolType: chunk.symbolType,
|
|
2373
|
+
vector: embeddings[i] ?? []
|
|
2374
|
+
}));
|
|
2375
|
+
await this.vectorStore.addChunks(embeddedChunks);
|
|
2376
|
+
indexed++;
|
|
2377
|
+
} catch (err) {
|
|
2378
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
2379
|
+
logger.debug(`Error indexing ${filePath}: ${error.message}`);
|
|
2380
|
+
}
|
|
2381
|
+
this.saveHashCache();
|
|
2382
|
+
logger.info(`Full index: ${String(indexed)} indexed, ${String(skipped)} skipped`);
|
|
2383
|
+
}
|
|
2384
|
+
/**
|
|
2385
|
+
* Start watching for file changes
|
|
2386
|
+
*/
|
|
2387
|
+
async start() {
|
|
2388
|
+
const health = await this.ollamaClient.healthCheck();
|
|
2389
|
+
if (!health.ok) throw new Error(health.error ?? "Ollama is not available");
|
|
2390
|
+
await this.vectorStore.connect();
|
|
2391
|
+
if (!this.vectorStore.exists()) await this.fullIndex();
|
|
2392
|
+
this.watcher = watch(this.directory, {
|
|
2393
|
+
ignored: (filePath) => {
|
|
2394
|
+
const relativePath = path.relative(this.directory, filePath).replace(/\\/g, "/");
|
|
2395
|
+
if (!relativePath) return false;
|
|
2396
|
+
if (relativePath.split("/").some((part) => part.startsWith("."))) return true;
|
|
2397
|
+
return this.ig.ignores(relativePath);
|
|
2398
|
+
},
|
|
2399
|
+
persistent: true,
|
|
2400
|
+
ignoreInitial: true,
|
|
2401
|
+
awaitWriteFinish: {
|
|
2402
|
+
stabilityThreshold: 500,
|
|
2403
|
+
pollInterval: 100
|
|
2404
|
+
}
|
|
2405
|
+
});
|
|
2406
|
+
this.watcher.on("add", (filePath) => {
|
|
2407
|
+
if (shouldIndexFile(filePath)) this.scheduleChange("add", filePath);
|
|
2408
|
+
});
|
|
2409
|
+
this.watcher.on("change", (filePath) => {
|
|
2410
|
+
if (shouldIndexFile(filePath)) this.scheduleChange("change", filePath);
|
|
2411
|
+
});
|
|
2412
|
+
this.watcher.on("unlink", (filePath) => {
|
|
2413
|
+
if (shouldIndexFile(filePath)) this.scheduleChange("unlink", filePath);
|
|
2414
|
+
});
|
|
2415
|
+
this.watcher.on("ready", () => {
|
|
2416
|
+
logger.info(`Watching: ${this.directory} (${String(this.debounceMs)}ms debounce)`);
|
|
2417
|
+
this.onReady?.();
|
|
2418
|
+
});
|
|
2419
|
+
this.watcher.on("error", (err) => {
|
|
2420
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
2421
|
+
logger.error(`Watcher error: ${error.message}`);
|
|
2422
|
+
this.onError?.(error);
|
|
2423
|
+
});
|
|
2424
|
+
}
|
|
2425
|
+
/**
|
|
2426
|
+
* Stop watching and cleanup
|
|
2427
|
+
*/
|
|
2428
|
+
async stop() {
|
|
2429
|
+
for (const pending of this.pendingChanges.values()) clearTimeout(pending.timer);
|
|
2430
|
+
this.pendingChanges.clear();
|
|
2431
|
+
this.saveHashCache();
|
|
2432
|
+
if (this.watcher) {
|
|
2433
|
+
await this.watcher.close();
|
|
2434
|
+
this.watcher = null;
|
|
2435
|
+
}
|
|
2436
|
+
this.vectorStore.close();
|
|
2437
|
+
logger.info("Watcher stopped");
|
|
2438
|
+
}
|
|
2439
|
+
/**
|
|
2440
|
+
* Check if watcher is running
|
|
2441
|
+
*/
|
|
2442
|
+
isRunning() {
|
|
2443
|
+
return this.watcher !== null;
|
|
2444
|
+
}
|
|
2445
|
+
/**
|
|
2446
|
+
* Clear the hash cache
|
|
2447
|
+
*/
|
|
2448
|
+
clearCache() {
|
|
2449
|
+
this.hashCache = {};
|
|
2450
|
+
const cachePath = this.getHashCachePath();
|
|
2451
|
+
if (fs.existsSync(cachePath)) fs.unlinkSync(cachePath);
|
|
2452
|
+
logger.info("Hash cache cleared");
|
|
2453
|
+
}
|
|
2454
|
+
/**
|
|
2455
|
+
* Get cache statistics
|
|
2456
|
+
*/
|
|
2457
|
+
getCacheStats() {
|
|
2458
|
+
return {
|
|
2459
|
+
cachedFiles: Object.keys(this.hashCache).length,
|
|
2460
|
+
cacheSize: JSON.stringify(this.hashCache).length
|
|
2461
|
+
};
|
|
2462
|
+
}
|
|
2463
|
+
};
|
|
2464
|
+
/**
|
|
2465
|
+
* Create a new index watcher
|
|
2466
|
+
*/
|
|
2467
|
+
function createIndexWatcher(options) {
|
|
2468
|
+
return new IndexWatcher(options);
|
|
2469
|
+
}
|
|
2470
|
+
|
|
2471
|
+
//#endregion
|
|
2472
|
+
//#region src/core/embeddings/reranker.ts
|
|
2473
|
+
/**
|
|
2474
|
+
* Parse LLM response to extract relevance score
|
|
2475
|
+
*/
|
|
2476
|
+
function parseScore(response) {
|
|
2477
|
+
const match = /\b(\d+(?:\.\d+)?)\b/.exec(response);
|
|
2478
|
+
if (match?.[1]) {
|
|
2479
|
+
const score = parseFloat(match[1]);
|
|
2480
|
+
if (score >= 0 && score <= 10) return score;
|
|
2481
|
+
if (score > 10 && score <= 100) return score / 10;
|
|
2482
|
+
}
|
|
2483
|
+
return 5;
|
|
2484
|
+
}
|
|
2485
|
+
/**
|
|
2486
|
+
* Score a single query-document pair using Ollama
|
|
2487
|
+
*/
|
|
2488
|
+
async function scoreResult(query, content, options) {
|
|
2489
|
+
const model = options.model ?? "llama3.2";
|
|
2490
|
+
const timeout = options.timeout ?? 3e4;
|
|
2491
|
+
const prompt = `Rate the relevance of the following code snippet to the search query on a scale of 0-10.
|
|
2492
|
+
0 = completely irrelevant
|
|
2493
|
+
5 = somewhat relevant
|
|
2494
|
+
10 = highly relevant and directly answers the query
|
|
2495
|
+
|
|
2496
|
+
Query: "${query}"
|
|
2497
|
+
|
|
2498
|
+
Code:
|
|
2499
|
+
\`\`\`
|
|
2500
|
+
${content.slice(0, 1e3)}
|
|
2501
|
+
\`\`\`
|
|
2502
|
+
|
|
2503
|
+
Respond with ONLY a number between 0 and 10.`;
|
|
2504
|
+
try {
|
|
2505
|
+
const response = await fetch(`${options.ollamaBaseUrl}/api/generate`, {
|
|
2506
|
+
method: "POST",
|
|
2507
|
+
headers: { "Content-Type": "application/json" },
|
|
2508
|
+
body: JSON.stringify({
|
|
2509
|
+
model,
|
|
2510
|
+
prompt,
|
|
2511
|
+
stream: false,
|
|
2512
|
+
options: {
|
|
2513
|
+
temperature: 0,
|
|
2514
|
+
num_predict: 10
|
|
2515
|
+
}
|
|
2516
|
+
}),
|
|
2517
|
+
signal: AbortSignal.timeout(timeout)
|
|
2518
|
+
});
|
|
2519
|
+
if (!response.ok) {
|
|
2520
|
+
logger.warn(`Re-ranking request failed: ${response.statusText}`);
|
|
2521
|
+
return 5;
|
|
2522
|
+
}
|
|
2523
|
+
return parseScore((await response.json()).response ?? "5");
|
|
2524
|
+
} catch (error) {
|
|
2525
|
+
logger.warn(`Re-ranking error: ${error instanceof Error ? error.message : String(error)}`);
|
|
2526
|
+
return 5;
|
|
2527
|
+
}
|
|
2528
|
+
}
|
|
2529
|
+
/**
|
|
2530
|
+
* Re-rank search results using LLM scoring
|
|
2531
|
+
*
|
|
2532
|
+
* Takes initial search results and re-scores them based on
|
|
2533
|
+
* semantic relevance to the query using an LLM.
|
|
2534
|
+
*/
|
|
2535
|
+
async function rerank(query, results, options) {
|
|
2536
|
+
const maxResults = options.maxResults ?? 20;
|
|
2537
|
+
const toRerank = results.slice(0, maxResults);
|
|
2538
|
+
if (toRerank.length === 0) return [];
|
|
2539
|
+
logger.debug(`Re-ranking ${String(toRerank.length)} results for: ${query}`);
|
|
2540
|
+
const batchSize = 5;
|
|
2541
|
+
const rerankedResults = [];
|
|
2542
|
+
for (let i = 0; i < toRerank.length; i += batchSize) {
|
|
2543
|
+
const batch = toRerank.slice(i, i + batchSize);
|
|
2544
|
+
const scores = await Promise.all(batch.map(async (result) => scoreResult(query, result.chunk.content, options)));
|
|
2545
|
+
for (let j = 0; j < batch.length; j++) {
|
|
2546
|
+
const result = batch[j];
|
|
2547
|
+
const score = scores[j];
|
|
2548
|
+
if (result !== void 0 && score !== void 0) rerankedResults.push({
|
|
2549
|
+
...result,
|
|
2550
|
+
originalScore: result.score,
|
|
2551
|
+
rerankScore: score,
|
|
2552
|
+
score
|
|
2553
|
+
});
|
|
2554
|
+
}
|
|
2555
|
+
}
|
|
2556
|
+
rerankedResults.sort((a, b) => b.rerankScore - a.rerankScore);
|
|
2557
|
+
logger.debug(`Re-ranking complete, top score: ${String(rerankedResults[0]?.rerankScore ?? 0)}`);
|
|
2558
|
+
return rerankedResults;
|
|
2559
|
+
}
|
|
2560
|
+
|
|
2561
|
+
//#endregion
|
|
2562
|
+
//#region src/core/embeddings/callgraph.ts
|
|
2563
|
+
/**
|
|
2564
|
+
* Call graph extraction and storage
|
|
2565
|
+
*
|
|
2566
|
+
* Extracts function call relationships from code using tree-sitter
|
|
2567
|
+
* to build a graph showing which functions call which.
|
|
2568
|
+
*
|
|
2569
|
+
* Features:
|
|
2570
|
+
* - Persistent caching in .src-index/call-graph.json
|
|
2571
|
+
* - Hash-based invalidation for changed files
|
|
2572
|
+
*/
|
|
2573
|
+
/**
|
|
2574
|
+
* Compute SHA-256 hash of content
|
|
2575
|
+
*/
|
|
2576
|
+
function computeHash$1(content) {
|
|
2577
|
+
return crypto.createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
2578
|
+
}
|
|
2579
|
+
/**
|
|
2580
|
+
* Get call graph cache path for a directory
|
|
2581
|
+
*/
|
|
2582
|
+
function getCachePath(directory) {
|
|
2583
|
+
return path.join(directory, ".src-index", "call-graph.json");
|
|
2584
|
+
}
|
|
2585
|
+
/**
|
|
2586
|
+
* Save call graph to persistent cache
|
|
2587
|
+
*/
|
|
2588
|
+
function saveCallGraphCache(directory, graph, fileHashes) {
|
|
2589
|
+
try {
|
|
2590
|
+
const cachePath = getCachePath(directory);
|
|
2591
|
+
const cacheDir = path.dirname(cachePath);
|
|
2592
|
+
if (!fs.existsSync(cacheDir)) fs.mkdirSync(cacheDir, { recursive: true });
|
|
2593
|
+
const serialized = {
|
|
2594
|
+
nodes: Object.fromEntries(graph.nodes),
|
|
2595
|
+
files: graph.files,
|
|
2596
|
+
edgeCount: graph.edgeCount,
|
|
2597
|
+
fileHashes,
|
|
2598
|
+
timestamp: Date.now()
|
|
2599
|
+
};
|
|
2600
|
+
fs.writeFileSync(cachePath, JSON.stringify(serialized), "utf-8");
|
|
2601
|
+
logger.debug(`Call graph cache saved: ${String(graph.nodes.size)} nodes`);
|
|
2602
|
+
} catch {
|
|
2603
|
+
logger.debug("Call graph cache save skipped: directory not writable");
|
|
2604
|
+
}
|
|
2605
|
+
}
|
|
2606
|
+
/**
|
|
2607
|
+
* Load call graph from persistent cache if valid
|
|
2608
|
+
*/
|
|
2609
|
+
function loadCallGraphCache(directory, currentHashes) {
|
|
2610
|
+
const cachePath = getCachePath(directory);
|
|
2611
|
+
if (!fs.existsSync(cachePath)) return null;
|
|
2612
|
+
try {
|
|
2613
|
+
const content = fs.readFileSync(cachePath, "utf-8");
|
|
2614
|
+
const cached = JSON.parse(content);
|
|
2615
|
+
const cachedFiles = new Set(Object.keys(cached.fileHashes));
|
|
2616
|
+
const currentFiles = new Set(Object.keys(currentHashes));
|
|
2617
|
+
if (cachedFiles.size !== currentFiles.size) {
|
|
2618
|
+
logger.debug("Call graph cache invalid: file count changed");
|
|
2619
|
+
return null;
|
|
2620
|
+
}
|
|
2621
|
+
for (const [filePath, hash] of Object.entries(currentHashes)) if (cached.fileHashes[filePath] !== hash) {
|
|
2622
|
+
logger.debug(`Call graph cache invalid: ${filePath} changed`);
|
|
2623
|
+
return null;
|
|
2624
|
+
}
|
|
2625
|
+
const nodes = new Map(Object.entries(cached.nodes));
|
|
2626
|
+
logger.debug(`Call graph cache loaded: ${String(nodes.size)} nodes`);
|
|
2627
|
+
return {
|
|
2628
|
+
nodes,
|
|
2629
|
+
files: cached.files,
|
|
2630
|
+
edgeCount: cached.edgeCount
|
|
2631
|
+
};
|
|
2632
|
+
} catch (error) {
|
|
2633
|
+
logger.debug(`Failed to load call graph cache: ${error instanceof Error ? error.message : String(error)}`);
|
|
2634
|
+
return null;
|
|
2635
|
+
}
|
|
2636
|
+
}
|
|
2637
|
+
const callGraphCache = /* @__PURE__ */ new Map();
|
|
2638
|
+
/**
|
|
2639
|
+
* Clear the call graph cache
|
|
2640
|
+
*/
|
|
2641
|
+
function clearCallGraphCache() {
|
|
2642
|
+
callGraphCache.clear();
|
|
2643
|
+
}
|
|
2644
|
+
registerCache("embeddings:callGraphCache", clearCallGraphCache);
|
|
2645
|
+
/**
|
|
2646
|
+
* Extract function calls from a tree-sitter node
|
|
2647
|
+
*/
|
|
2648
|
+
function extractCallsFromTree(tree, languageInstance, language) {
|
|
2649
|
+
const callsBySymbol = /* @__PURE__ */ new Map();
|
|
2650
|
+
const pattern = {
|
|
2651
|
+
typescript: `
|
|
2652
|
+
(call_expression
|
|
2653
|
+
function: [(identifier) @callee
|
|
2654
|
+
(member_expression property: (property_identifier) @callee)]
|
|
2655
|
+
arguments: (arguments) @args)
|
|
2656
|
+
`,
|
|
2657
|
+
javascript: `
|
|
2658
|
+
(call_expression
|
|
2659
|
+
function: [(identifier) @callee
|
|
2660
|
+
(member_expression property: (property_identifier) @callee)]
|
|
2661
|
+
arguments: (arguments) @args)
|
|
2662
|
+
`,
|
|
2663
|
+
python: `
|
|
2664
|
+
(call
|
|
2665
|
+
function: [(identifier) @callee
|
|
2666
|
+
(attribute attribute: (identifier) @callee)]
|
|
2667
|
+
arguments: (argument_list) @args)
|
|
2668
|
+
`,
|
|
2669
|
+
go: `
|
|
2670
|
+
(call_expression
|
|
2671
|
+
function: [(identifier) @callee
|
|
2672
|
+
(selector_expression field: (field_identifier) @callee)]
|
|
2673
|
+
arguments: (argument_list) @args)
|
|
2674
|
+
`
|
|
2675
|
+
}[language];
|
|
2676
|
+
if (!pattern) return callsBySymbol;
|
|
2677
|
+
try {
|
|
2678
|
+
const matches = new Query(languageInstance, pattern).matches(tree.rootNode);
|
|
2679
|
+
const callCaptures = [];
|
|
2680
|
+
for (const match of matches) for (const capture of match.captures) if (capture.name === "callee") callCaptures.push({
|
|
2681
|
+
callee: capture.node.text,
|
|
2682
|
+
position: {
|
|
2683
|
+
line: capture.node.startPosition.row + 1,
|
|
2684
|
+
column: capture.node.startPosition.column,
|
|
2685
|
+
offset: capture.node.startIndex
|
|
2686
|
+
}
|
|
2687
|
+
});
|
|
2688
|
+
if (callCaptures.length > 0) callsBySymbol.set("__global__", callCaptures.map((c) => ({
|
|
2689
|
+
callee: c.callee,
|
|
2690
|
+
position: c.position
|
|
2691
|
+
})));
|
|
2692
|
+
} catch (error) {
|
|
2693
|
+
logger.debug(`Failed to extract calls for ${language}: ${error instanceof Error ? error.message : String(error)}`);
|
|
2694
|
+
}
|
|
2695
|
+
return callsBySymbol;
|
|
2696
|
+
}
|
|
2697
|
+
/**
|
|
2698
|
+
* Associate calls with their containing symbols
|
|
2699
|
+
*/
|
|
2700
|
+
function associateCallsWithSymbols(symbols, allCalls) {
|
|
2701
|
+
const callsBySymbol = /* @__PURE__ */ new Map();
|
|
2702
|
+
const sortedSymbols = [...symbols].filter((s) => s.type === "function" || s.type === "method").sort((a, b) => a.start.offset - b.start.offset);
|
|
2703
|
+
for (const call of allCalls) {
|
|
2704
|
+
let containingSymbol = null;
|
|
2705
|
+
for (const symbol of sortedSymbols) if (call.position.offset >= symbol.start.offset && call.position.offset <= symbol.end.offset) containingSymbol = symbol;
|
|
2706
|
+
else if (call.position.offset < symbol.start.offset) break;
|
|
2707
|
+
const symbolName = containingSymbol?.name ?? "__global__";
|
|
2708
|
+
const existing = callsBySymbol.get(symbolName) ?? [];
|
|
2709
|
+
existing.push(call);
|
|
2710
|
+
callsBySymbol.set(symbolName, existing);
|
|
2711
|
+
}
|
|
2712
|
+
return callsBySymbol;
|
|
2713
|
+
}
|
|
2714
|
+
/**
|
|
2715
|
+
* Analyze a file and extract call graph data
|
|
2716
|
+
*/
|
|
2717
|
+
async function analyzeFileForCallGraph(filePath, content) {
|
|
2718
|
+
const cached = callGraphCache.get(filePath);
|
|
2719
|
+
if (cached) return cached;
|
|
2720
|
+
try {
|
|
2721
|
+
const parseResult = await parseCode(content, { filePath });
|
|
2722
|
+
const { symbols } = extractSymbols(parseResult.tree, parseResult.languageInstance, parseResult.language);
|
|
2723
|
+
const data = {
|
|
2724
|
+
symbols,
|
|
2725
|
+
calls: associateCallsWithSymbols(symbols, extractCallsFromTree(parseResult.tree, parseResult.languageInstance, parseResult.language).get("__global__") ?? [])
|
|
2726
|
+
};
|
|
2727
|
+
callGraphCache.set(filePath, data);
|
|
2728
|
+
return data;
|
|
2729
|
+
} catch (error) {
|
|
2730
|
+
logger.debug(`Failed to analyze ${filePath} for call graph: ${error instanceof Error ? error.message : String(error)}`);
|
|
2731
|
+
return null;
|
|
2732
|
+
}
|
|
2733
|
+
}
|
|
2734
|
+
/**
|
|
2735
|
+
* Build a call graph from multiple files
|
|
2736
|
+
*
|
|
2737
|
+
* Uses persistent caching with hash-based invalidation for performance.
|
|
2738
|
+
*/
|
|
2739
|
+
async function buildCallGraph(files) {
|
|
2740
|
+
if (files.length === 0) return {
|
|
2741
|
+
nodes: /* @__PURE__ */ new Map(),
|
|
2742
|
+
files: [],
|
|
2743
|
+
edgeCount: 0
|
|
2744
|
+
};
|
|
2745
|
+
const fileHashes = {};
|
|
2746
|
+
for (const file of files) fileHashes[file.path] = computeHash$1(file.content);
|
|
2747
|
+
const baseDir = findCommonDirectory(files.map((f) => f.path));
|
|
2748
|
+
const cached = loadCallGraphCache(baseDir, fileHashes);
|
|
2749
|
+
if (cached) return cached;
|
|
2750
|
+
const nodes = /* @__PURE__ */ new Map();
|
|
2751
|
+
const filePaths = [];
|
|
2752
|
+
let edgeCount = 0;
|
|
2753
|
+
for (const file of files) {
|
|
2754
|
+
filePaths.push(file.path);
|
|
2755
|
+
const data = await analyzeFileForCallGraph(file.path, file.content);
|
|
2756
|
+
if (!data) continue;
|
|
2757
|
+
for (const symbol of data.symbols) if (symbol.type === "function" || symbol.type === "method") {
|
|
2758
|
+
const qualifiedName = `${file.path}:${symbol.name}`;
|
|
2759
|
+
nodes.set(qualifiedName, {
|
|
2760
|
+
name: symbol.name,
|
|
2761
|
+
qualifiedName,
|
|
2762
|
+
filePath: file.path,
|
|
2763
|
+
type: symbol.type,
|
|
2764
|
+
start: symbol.start,
|
|
2765
|
+
end: symbol.end,
|
|
2766
|
+
calls: [],
|
|
2767
|
+
calledBy: []
|
|
2768
|
+
});
|
|
2769
|
+
}
|
|
2770
|
+
}
|
|
2771
|
+
for (const file of files) {
|
|
2772
|
+
const data = callGraphCache.get(file.path);
|
|
2773
|
+
if (!data) continue;
|
|
2774
|
+
for (const [symbolName, calls] of data.calls) {
|
|
2775
|
+
const callerKey = `${file.path}:${symbolName}`;
|
|
2776
|
+
const callerNode = nodes.get(callerKey);
|
|
2777
|
+
if (!callerNode && symbolName !== "__global__") continue;
|
|
2778
|
+
for (const call of calls) for (const [nodeKey, node] of nodes) if (node.name === call.callee) {
|
|
2779
|
+
if (callerNode) callerNode.calls.push(nodeKey);
|
|
2780
|
+
node.calledBy.push(callerKey);
|
|
2781
|
+
edgeCount++;
|
|
2782
|
+
}
|
|
2783
|
+
}
|
|
2784
|
+
}
|
|
2785
|
+
const graph = {
|
|
2786
|
+
nodes,
|
|
2787
|
+
files: filePaths,
|
|
2788
|
+
edgeCount
|
|
2789
|
+
};
|
|
2790
|
+
saveCallGraphCache(baseDir, graph, fileHashes);
|
|
2791
|
+
return graph;
|
|
2792
|
+
}
|
|
2793
|
+
/**
|
|
2794
|
+
* Find common directory from a list of file paths
|
|
2795
|
+
*/
|
|
2796
|
+
function findCommonDirectory(paths) {
|
|
2797
|
+
if (paths.length === 0) return ".";
|
|
2798
|
+
const firstPathStr = paths[0];
|
|
2799
|
+
if (!firstPathStr) return ".";
|
|
2800
|
+
if (paths.length === 1) return path.dirname(firstPathStr);
|
|
2801
|
+
const segments = paths.map((p) => path.normalize(p).split(path.sep));
|
|
2802
|
+
const firstPath = segments[0];
|
|
2803
|
+
if (!firstPath) return ".";
|
|
2804
|
+
let commonLength = 0;
|
|
2805
|
+
for (let i = 0; i < firstPath.length; i++) {
|
|
2806
|
+
const segment = firstPath[i];
|
|
2807
|
+
if (segment && segments.every((s) => s[i] === segment)) commonLength = i + 1;
|
|
2808
|
+
else break;
|
|
2809
|
+
}
|
|
2810
|
+
const commonDir = firstPath.slice(0, commonLength).join(path.sep);
|
|
2811
|
+
if (commonDir && fs.existsSync(commonDir) && fs.statSync(commonDir).isFile()) return path.dirname(commonDir);
|
|
2812
|
+
return commonDir || ".";
|
|
2813
|
+
}
|
|
2814
|
+
/**
|
|
2815
|
+
* Get callers and callees for a specific function
|
|
2816
|
+
*/
|
|
2817
|
+
function getCallContext(graph, filePath, functionName) {
|
|
2818
|
+
const qualifiedName = `${filePath}:${functionName}`;
|
|
2819
|
+
const node = graph.nodes.get(qualifiedName);
|
|
2820
|
+
if (!node) return null;
|
|
2821
|
+
const callers = [];
|
|
2822
|
+
const callees = [];
|
|
2823
|
+
for (const callerKey of node.calledBy) {
|
|
2824
|
+
const caller = graph.nodes.get(callerKey);
|
|
2825
|
+
if (caller) callers.push(caller);
|
|
2826
|
+
}
|
|
2827
|
+
for (const calleeKey of node.calls) {
|
|
2828
|
+
const callee = graph.nodes.get(calleeKey);
|
|
2829
|
+
if (callee) callees.push(callee);
|
|
2830
|
+
}
|
|
2831
|
+
return {
|
|
2832
|
+
callers,
|
|
2833
|
+
callees
|
|
2834
|
+
};
|
|
2835
|
+
}
|
|
2836
|
+
|
|
2837
|
+
//#endregion
|
|
2838
|
+
//#region src/features/index-codebase/index.ts
|
|
2839
|
+
/**
|
|
2840
|
+
* Index Codebase Feature
|
|
2841
|
+
*
|
|
2842
|
+
* Indexes a directory by:
|
|
2843
|
+
* 1. Scanning for supported files
|
|
2844
|
+
* 2. Chunking each file
|
|
2845
|
+
* 3. Generating embeddings via Ollama
|
|
2846
|
+
* 4. Storing in LanceDB
|
|
2847
|
+
*/
|
|
2848
|
+
/** Default concurrency for parallel file processing */
|
|
2849
|
+
const DEFAULT_CONCURRENCY = 4;
|
|
2850
|
+
/**
|
|
2851
|
+
* Process items in parallel with concurrency limit using worker pool pattern
|
|
2852
|
+
*/
|
|
2853
|
+
async function parallelMap(items, processor, concurrency) {
|
|
2854
|
+
const results = new Array(items.length);
|
|
2855
|
+
let currentIndex = 0;
|
|
2856
|
+
const worker = async () => {
|
|
2857
|
+
while (currentIndex < items.length) {
|
|
2858
|
+
const index = currentIndex++;
|
|
2859
|
+
const item = items[index];
|
|
2860
|
+
if (item !== void 0) results[index] = await processor(item);
|
|
2861
|
+
}
|
|
2862
|
+
};
|
|
2863
|
+
const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => worker());
|
|
2864
|
+
await Promise.all(workers);
|
|
2865
|
+
return results.filter((r) => r !== void 0);
|
|
2866
|
+
}
|
|
2867
|
+
const indexCodebaseSchema = z.object({
|
|
2868
|
+
directory: z.string().optional().default(".").describe("Path to the directory to index (defaults to current directory)"),
|
|
2869
|
+
force: z.boolean().optional().default(false).describe("Force re-indexing even if index exists"),
|
|
2870
|
+
exclude: z.array(z.string()).optional().default([]).describe("Additional glob patterns to exclude"),
|
|
2871
|
+
concurrency: z.number().int().positive().optional().default(DEFAULT_CONCURRENCY).describe("Number of files to process in parallel (default: 4)")
|
|
2872
|
+
});
|
|
2873
|
+
/**
|
|
2874
|
+
* Create an ignore instance with gitignore patterns and additional exclusions
|
|
2875
|
+
*/
|
|
2876
|
+
function createIgnoreFilter$2(baseDir, additionalExclusions) {
|
|
2877
|
+
const ig = ignore();
|
|
2878
|
+
const gitignorePath = path.join(baseDir, ".gitignore");
|
|
2879
|
+
if (fs.existsSync(gitignorePath)) try {
|
|
2880
|
+
const content = fs.readFileSync(gitignorePath, "utf-8");
|
|
2881
|
+
ig.add(content);
|
|
2882
|
+
} catch {}
|
|
2883
|
+
if (additionalExclusions.length > 0) ig.add(additionalExclusions);
|
|
2884
|
+
return ig;
|
|
2885
|
+
}
|
|
2886
|
+
/**
|
|
2887
|
+
* Check if a name starts with a dot (hidden file/folder)
|
|
2888
|
+
*/
|
|
2889
|
+
function isHidden$2(name) {
|
|
2890
|
+
return name.startsWith(".");
|
|
2891
|
+
}
|
|
2892
|
+
/**
|
|
2893
|
+
* Recursively collect files from a directory
|
|
2894
|
+
*/
|
|
2895
|
+
function collectFiles$2(dir, ig, baseDir) {
|
|
2896
|
+
const files = [];
|
|
2897
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
2898
|
+
for (const entry of entries) {
|
|
2899
|
+
if (isHidden$2(entry.name)) continue;
|
|
2900
|
+
const fullPath = path.join(dir, entry.name);
|
|
2901
|
+
const relativePath = path.relative(baseDir, fullPath).replace(/\\/g, "/");
|
|
2902
|
+
if (ig.ignores(relativePath)) continue;
|
|
2903
|
+
if (entry.isDirectory()) files.push(...collectFiles$2(fullPath, ig, baseDir));
|
|
2904
|
+
else if (entry.isFile() && shouldIndexFile(entry.name)) files.push(fullPath);
|
|
2905
|
+
}
|
|
2906
|
+
return files;
|
|
2907
|
+
}
|
|
2908
|
+
/**
|
|
2909
|
+
* Execute the index_codebase feature
|
|
2910
|
+
*/
|
|
2911
|
+
async function execute$3(input) {
|
|
2912
|
+
const { directory, force, exclude, concurrency } = input;
|
|
2913
|
+
if (!fs.existsSync(directory)) return {
|
|
2914
|
+
success: false,
|
|
2915
|
+
error: `Directory not found: ${directory}`
|
|
2916
|
+
};
|
|
2917
|
+
const absoluteDir = path.resolve(directory);
|
|
2918
|
+
const ollamaClient = createOllamaClient(EMBEDDING_CONFIG);
|
|
2919
|
+
const vectorStore = createVectorStore(absoluteDir, EMBEDDING_CONFIG);
|
|
2920
|
+
const health = await ollamaClient.healthCheck();
|
|
2921
|
+
if (!health.ok) return {
|
|
2922
|
+
success: false,
|
|
2923
|
+
error: health.error ?? "Ollama is not available"
|
|
2924
|
+
};
|
|
2925
|
+
if (vectorStore.exists() && !force) return {
|
|
2926
|
+
success: false,
|
|
2927
|
+
error: "Index already exists. Use force=true to re-index or search_code to query."
|
|
2928
|
+
};
|
|
2929
|
+
const result = {
|
|
2930
|
+
directory: absoluteDir,
|
|
2931
|
+
filesIndexed: 0,
|
|
2932
|
+
chunksCreated: 0,
|
|
2933
|
+
languages: {},
|
|
2934
|
+
errors: []
|
|
2935
|
+
};
|
|
2936
|
+
try {
|
|
2937
|
+
await vectorStore.connect();
|
|
2938
|
+
if (force && vectorStore.exists()) await vectorStore.clear();
|
|
2939
|
+
const files = collectFiles$2(absoluteDir, createIgnoreFilter$2(absoluteDir, exclude), absoluteDir);
|
|
2940
|
+
if (files.length === 0) return {
|
|
2941
|
+
success: true,
|
|
2942
|
+
message: "No indexable files found in directory",
|
|
2943
|
+
data: result
|
|
2944
|
+
};
|
|
2945
|
+
const pathAliases = readPathAliasesCached(absoluteDir);
|
|
2946
|
+
const aliasCount = Object.keys(pathAliases).length;
|
|
2947
|
+
const enrichmentOptions = {
|
|
2948
|
+
projectRoot: absoluteDir,
|
|
2949
|
+
pathAliases,
|
|
2950
|
+
includeCrossFileContext: true
|
|
2951
|
+
};
|
|
2952
|
+
logger.debug(`Indexing ${String(files.length)} files with concurrency=${String(concurrency)} (projectRoot: ${absoluteDir}, ${String(aliasCount)} path aliases)`);
|
|
2953
|
+
const processFile = async (filePath) => {
|
|
2954
|
+
try {
|
|
2955
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
2956
|
+
return { chunks: await enrichChunksFromFile(await chunkFile(filePath, content, EMBEDDING_CONFIG), content, enrichmentOptions) };
|
|
2957
|
+
} catch (err) {
|
|
2958
|
+
return {
|
|
2959
|
+
chunks: [],
|
|
2960
|
+
error: `Error processing ${filePath}: ${err instanceof Error ? err.message : String(err)}`
|
|
2961
|
+
};
|
|
2962
|
+
}
|
|
2963
|
+
};
|
|
2964
|
+
const fileResults = await parallelMap(files, processFile, concurrency);
|
|
2965
|
+
const allEnrichedChunks = [];
|
|
2966
|
+
for (const fileResult of fileResults) if (fileResult.error) result.errors.push(fileResult.error);
|
|
2967
|
+
else {
|
|
2968
|
+
allEnrichedChunks.push(...fileResult.chunks);
|
|
2969
|
+
result.filesIndexed++;
|
|
2970
|
+
for (const chunk of fileResult.chunks) result.languages[chunk.language] = (result.languages[chunk.language] ?? 0) + 1;
|
|
2971
|
+
}
|
|
2972
|
+
const { batchSize } = EMBEDDING_CONFIG;
|
|
2973
|
+
const embeddedChunks = [];
|
|
2974
|
+
for (let i = 0; i < allEnrichedChunks.length; i += batchSize) {
|
|
2975
|
+
const batch = allEnrichedChunks.slice(i, i + batchSize);
|
|
2976
|
+
const texts = batch.map((c) => c.enrichedContent);
|
|
2977
|
+
try {
|
|
2978
|
+
const embeddings = await ollamaClient.embedBatch(texts);
|
|
2979
|
+
for (let j = 0; j < batch.length; j++) {
|
|
2980
|
+
const chunk = batch[j];
|
|
2981
|
+
const vector = embeddings[j];
|
|
2982
|
+
if (chunk && vector) embeddedChunks.push({
|
|
2983
|
+
id: chunk.id,
|
|
2984
|
+
content: chunk.content,
|
|
2985
|
+
filePath: chunk.filePath,
|
|
2986
|
+
language: chunk.language,
|
|
2987
|
+
startLine: chunk.startLine,
|
|
2988
|
+
endLine: chunk.endLine,
|
|
2989
|
+
symbolName: chunk.symbolName,
|
|
2990
|
+
symbolType: chunk.symbolType,
|
|
2991
|
+
vector
|
|
2992
|
+
});
|
|
2993
|
+
}
|
|
2994
|
+
} catch (err) {
|
|
2995
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
2996
|
+
result.errors.push(`Embedding batch error: ${errorMsg}`);
|
|
2997
|
+
}
|
|
2998
|
+
}
|
|
2999
|
+
if (embeddedChunks.length > 0) {
|
|
3000
|
+
await vectorStore.addChunks(embeddedChunks);
|
|
3001
|
+
result.chunksCreated = embeddedChunks.length;
|
|
3002
|
+
}
|
|
3003
|
+
vectorStore.close();
|
|
3004
|
+
return {
|
|
3005
|
+
success: true,
|
|
3006
|
+
message: result.errors.length > 0 ? `Indexed ${String(result.filesIndexed)} files (${String(result.chunksCreated)} chunks) with ${String(result.errors.length)} errors` : `Successfully indexed ${String(result.filesIndexed)} files (${String(result.chunksCreated)} chunks)`,
|
|
3007
|
+
data: result
|
|
3008
|
+
};
|
|
3009
|
+
} catch (err) {
|
|
3010
|
+
vectorStore.close();
|
|
3011
|
+
return {
|
|
3012
|
+
success: false,
|
|
3013
|
+
error: `Indexing failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
3014
|
+
data: result
|
|
3015
|
+
};
|
|
3016
|
+
}
|
|
3017
|
+
}
|
|
3018
|
+
const indexCodebaseFeature = {
|
|
3019
|
+
name: "index_codebase",
|
|
3020
|
+
description: "Index a codebase for semantic code search. USE THIS FIRST before search_code. Required once per project - creates vector embeddings for 50+ languages. After initial indexing, use update_index for incremental updates.",
|
|
3021
|
+
schema: indexCodebaseSchema,
|
|
3022
|
+
execute: execute$3
|
|
3023
|
+
};
|
|
3024
|
+
|
|
3025
|
+
//#endregion
|
|
3026
|
+
//#region src/features/search-code/index.ts
|
|
3027
|
+
/**
|
|
3028
|
+
* Search Code Feature
|
|
3029
|
+
*
|
|
3030
|
+
* Performs hybrid search on indexed codebase combining:
|
|
3031
|
+
* 1. Vector similarity search (semantic embeddings via Ollama)
|
|
3032
|
+
* 2. Full-text search (BM25 keyword matching)
|
|
3033
|
+
* 3. RRF (Reciprocal Rank Fusion) to combine results
|
|
3034
|
+
*
|
|
3035
|
+
* Supports three search modes:
|
|
3036
|
+
* - 'hybrid' (default): Best of both vector and keyword search
|
|
3037
|
+
* - 'vector': Semantic search only
|
|
3038
|
+
* - 'fts': Keyword search only
|
|
3039
|
+
*
|
|
3040
|
+
* Optional features:
|
|
3041
|
+
* - LLM re-ranking for improved relevance
|
|
3042
|
+
* - Call context to show callers/callees for each result
|
|
3043
|
+
*/
|
|
3044
|
+
const searchCodeSchema = z.object({
|
|
3045
|
+
query: z.string().min(1).describe("Natural language search query"),
|
|
3046
|
+
directory: z.string().optional().default(".").describe("Path to the indexed directory (defaults to current directory)"),
|
|
3047
|
+
limit: z.number().int().positive().optional().default(10).describe("Maximum number of results to return"),
|
|
3048
|
+
threshold: z.number().min(0).max(2).optional().describe("Maximum distance threshold for results (lower = more similar)"),
|
|
3049
|
+
mode: z.enum([
|
|
3050
|
+
"vector",
|
|
3051
|
+
"fts",
|
|
3052
|
+
"hybrid"
|
|
3053
|
+
]).optional().default("hybrid").describe("Search mode: 'vector' (semantic only), 'fts' (keyword only), 'hybrid' (combined with RRF fusion)"),
|
|
3054
|
+
rerank: z.boolean().optional().default(true).describe("Enable LLM re-ranking for improved relevance (enabled by default)"),
|
|
3055
|
+
includeCallContext: z.boolean().optional().default(true).describe("Include caller/callee information for each result (uses cached call graph)")
|
|
3056
|
+
});
|
|
3057
|
+
/**
|
|
3058
|
+
* Create gitignore filter
|
|
3059
|
+
*/
|
|
3060
|
+
function createIgnoreFilter$1(directory) {
|
|
3061
|
+
const ig = ignore();
|
|
3062
|
+
ig.add([
|
|
3063
|
+
"node_modules",
|
|
3064
|
+
".git",
|
|
3065
|
+
"dist",
|
|
3066
|
+
"build",
|
|
3067
|
+
".src-index"
|
|
3068
|
+
]);
|
|
3069
|
+
const gitignorePath = path.join(directory, ".gitignore");
|
|
3070
|
+
if (fs.existsSync(gitignorePath)) {
|
|
3071
|
+
const content = fs.readFileSync(gitignorePath, "utf-8");
|
|
3072
|
+
ig.add(content);
|
|
3073
|
+
}
|
|
3074
|
+
return ig;
|
|
3075
|
+
}
|
|
3076
|
+
/**
|
|
3077
|
+
* Check if hidden file/folder
|
|
3078
|
+
*/
|
|
3079
|
+
function isHidden$1(name) {
|
|
3080
|
+
return name.startsWith(".");
|
|
3081
|
+
}
|
|
3082
|
+
/**
|
|
3083
|
+
* Recursively collect files for call graph
|
|
3084
|
+
*/
|
|
3085
|
+
function collectFiles$1(dir, ig, baseDir) {
|
|
3086
|
+
const files = [];
|
|
3087
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
3088
|
+
for (const entry of entries) {
|
|
3089
|
+
if (isHidden$1(entry.name)) continue;
|
|
3090
|
+
const fullPath = path.join(dir, entry.name);
|
|
3091
|
+
const relativePath = path.relative(baseDir, fullPath).replace(/\\/g, "/");
|
|
3092
|
+
if (ig.ignores(relativePath)) continue;
|
|
3093
|
+
if (entry.isDirectory()) files.push(...collectFiles$1(fullPath, ig, baseDir));
|
|
3094
|
+
else if (entry.isFile() && shouldIndexFile(entry.name)) files.push(fullPath);
|
|
3095
|
+
}
|
|
3096
|
+
return files;
|
|
3097
|
+
}
|
|
3098
|
+
/**
|
|
3099
|
+
* Format search results for output
|
|
3100
|
+
*/
|
|
3101
|
+
function formatResults(results, baseDir) {
|
|
3102
|
+
return results.map((r) => ({
|
|
3103
|
+
filePath: path.relative(baseDir, r.chunk.filePath),
|
|
3104
|
+
language: r.chunk.language,
|
|
3105
|
+
startLine: r.chunk.startLine,
|
|
3106
|
+
endLine: r.chunk.endLine,
|
|
3107
|
+
content: r.chunk.content,
|
|
3108
|
+
score: r.score,
|
|
3109
|
+
symbolName: r.chunk.symbolName,
|
|
3110
|
+
symbolType: r.chunk.symbolType
|
|
3111
|
+
}));
|
|
3112
|
+
}
|
|
3113
|
+
/**
|
|
3114
|
+
* Execute the search_code feature
|
|
3115
|
+
*/
|
|
3116
|
+
async function execute$2(input) {
|
|
3117
|
+
const { query, directory, limit, threshold, mode, rerank: enableRerank, includeCallContext } = input;
|
|
3118
|
+
if (!fs.existsSync(directory)) return {
|
|
3119
|
+
success: false,
|
|
3120
|
+
error: `Directory not found: ${directory}`
|
|
3121
|
+
};
|
|
3122
|
+
const absoluteDir = path.resolve(directory);
|
|
3123
|
+
const ollamaClient = createOllamaClient(EMBEDDING_CONFIG);
|
|
3124
|
+
const vectorStore = createVectorStore(absoluteDir, EMBEDDING_CONFIG);
|
|
3125
|
+
if (!vectorStore.exists()) return {
|
|
3126
|
+
success: false,
|
|
3127
|
+
error: `No index found for directory. Run index_codebase first: ${absoluteDir}`
|
|
3128
|
+
};
|
|
3129
|
+
try {
|
|
3130
|
+
const health = await ollamaClient.healthCheck();
|
|
3131
|
+
if (!health.ok) return {
|
|
3132
|
+
success: false,
|
|
3133
|
+
error: health.error ?? "Ollama is not available"
|
|
3134
|
+
};
|
|
3135
|
+
await vectorStore.connect();
|
|
3136
|
+
const queryVector = await ollamaClient.embed(query);
|
|
3137
|
+
let results = await vectorStore.searchHybrid(queryVector, query, limit, { mode });
|
|
3138
|
+
if (threshold !== void 0 && mode === "vector") results = results.filter((r) => r.score <= threshold);
|
|
3139
|
+
if (enableRerank && results.length > 0) results = await rerank(query, results, {
|
|
3140
|
+
ollamaBaseUrl: EMBEDDING_CONFIG.ollamaBaseUrl,
|
|
3141
|
+
model: EMBEDDING_CONFIG.rerankModel,
|
|
3142
|
+
maxResults: limit
|
|
3143
|
+
});
|
|
3144
|
+
vectorStore.close();
|
|
3145
|
+
let formattedResults = formatResults(results, absoluteDir);
|
|
3146
|
+
if (includeCallContext && formattedResults.length > 0) {
|
|
3147
|
+
const callGraph = await buildCallGraph(collectFiles$1(absoluteDir, createIgnoreFilter$1(absoluteDir), absoluteDir).map((f) => ({
|
|
3148
|
+
path: f,
|
|
3149
|
+
content: fs.readFileSync(f, "utf-8")
|
|
3150
|
+
})));
|
|
3151
|
+
formattedResults = formattedResults.map((result) => {
|
|
3152
|
+
if (!result.symbolName) return result;
|
|
3153
|
+
const context = getCallContext(callGraph, path.join(absoluteDir, result.filePath), result.symbolName);
|
|
3154
|
+
if (context) return {
|
|
3155
|
+
...result,
|
|
3156
|
+
callContext: {
|
|
3157
|
+
callers: context.callers.map((c) => c.name),
|
|
3158
|
+
callees: context.callees.map((c) => c.name)
|
|
3159
|
+
}
|
|
3160
|
+
};
|
|
3161
|
+
return result;
|
|
3162
|
+
});
|
|
3163
|
+
}
|
|
3164
|
+
const output = {
|
|
3165
|
+
query,
|
|
3166
|
+
directory: absoluteDir,
|
|
3167
|
+
resultsCount: formattedResults.length,
|
|
3168
|
+
results: formattedResults
|
|
3169
|
+
};
|
|
3170
|
+
if (formattedResults.length === 0) return {
|
|
3171
|
+
success: true,
|
|
3172
|
+
message: "No matching code found",
|
|
3173
|
+
data: output
|
|
3174
|
+
};
|
|
3175
|
+
const resultLines = formattedResults.map((r, i) => {
|
|
3176
|
+
const location = `${r.filePath}:${String(r.startLine)}-${String(r.endLine)}`;
|
|
3177
|
+
const symbol = r.symbolName ? ` (${r.symbolType ?? "symbol"}: ${r.symbolName})` : "";
|
|
3178
|
+
const preview = r.content.slice(0, 100).replace(/\n/g, " ");
|
|
3179
|
+
let callInfo = "";
|
|
3180
|
+
if (r.callContext) {
|
|
3181
|
+
const callers = r.callContext.callers.length > 0 ? `Called by: ${r.callContext.callers.slice(0, 3).join(", ")}${r.callContext.callers.length > 3 ? "..." : ""}` : "";
|
|
3182
|
+
const callees = r.callContext.callees.length > 0 ? `Calls: ${r.callContext.callees.slice(0, 3).join(", ")}${r.callContext.callees.length > 3 ? "..." : ""}` : "";
|
|
3183
|
+
if (callers || callees) callInfo = `\n ${[callers, callees].filter(Boolean).join(" | ")}`;
|
|
3184
|
+
}
|
|
3185
|
+
return `${String(i + 1)}. [${r.language}] ${location}${symbol}\n ${preview}...${callInfo}`;
|
|
3186
|
+
});
|
|
3187
|
+
return {
|
|
3188
|
+
success: true,
|
|
3189
|
+
message: `Found ${String(formattedResults.length)} results for "${query}":\n\n${resultLines.join("\n\n")}`,
|
|
3190
|
+
data: output
|
|
3191
|
+
};
|
|
3192
|
+
} catch (err) {
|
|
3193
|
+
vectorStore.close();
|
|
3194
|
+
return {
|
|
3195
|
+
success: false,
|
|
3196
|
+
error: `Search failed: ${err instanceof Error ? err.message : String(err)}`
|
|
3197
|
+
};
|
|
3198
|
+
}
|
|
3199
|
+
}
|
|
3200
|
+
const searchCodeFeature = {
|
|
3201
|
+
name: "search_code",
|
|
3202
|
+
description: "Search code semantically using natural language queries. USE THIS to find code by concept/meaning (e.g., 'authentication logic', 'error handling'). Requires index_codebase first. Returns relevant code chunks with file locations, function names, and call relationships (who calls what).",
|
|
3203
|
+
schema: searchCodeSchema,
|
|
3204
|
+
execute: execute$2
|
|
3205
|
+
};
|
|
3206
|
+
|
|
3207
|
+
//#endregion
|
|
3208
|
+
//#region src/features/get-index-status/index.ts
|
|
3209
|
+
/**
|
|
3210
|
+
* Get Index Status Feature
|
|
3211
|
+
*
|
|
3212
|
+
* Returns information about the embedding index for a directory:
|
|
3213
|
+
* - Whether an index exists
|
|
3214
|
+
* - Total chunks and files indexed
|
|
3215
|
+
* - Language breakdown
|
|
3216
|
+
*/
|
|
3217
|
+
const getIndexStatusSchema = z.object({ directory: z.string().optional().default(".").describe("Path to the directory to check (defaults to current directory)") });
|
|
3218
|
+
/**
|
|
3219
|
+
* Execute the get_index_status feature
|
|
3220
|
+
*/
|
|
3221
|
+
async function execute$1(input) {
|
|
3222
|
+
const { directory } = input;
|
|
3223
|
+
if (!fs.existsSync(directory)) return {
|
|
3224
|
+
success: false,
|
|
3225
|
+
error: `Directory not found: ${directory}`
|
|
3226
|
+
};
|
|
3227
|
+
const absoluteDir = path.resolve(directory);
|
|
3228
|
+
const indexPath = getIndexPath(absoluteDir);
|
|
3229
|
+
if (!fs.existsSync(indexPath)) {
|
|
3230
|
+
const status = {
|
|
3231
|
+
directory: absoluteDir,
|
|
3232
|
+
indexPath,
|
|
3233
|
+
exists: false,
|
|
3234
|
+
totalChunks: 0,
|
|
3235
|
+
totalFiles: 0,
|
|
3236
|
+
languages: {}
|
|
3237
|
+
};
|
|
3238
|
+
return {
|
|
3239
|
+
success: true,
|
|
3240
|
+
message: `No index found for ${absoluteDir}. Run index_codebase to create one.`,
|
|
3241
|
+
data: status
|
|
3242
|
+
};
|
|
3243
|
+
}
|
|
3244
|
+
try {
|
|
3245
|
+
const vectorStore = createVectorStore(absoluteDir, EMBEDDING_CONFIG);
|
|
3246
|
+
await vectorStore.connect();
|
|
3247
|
+
const status = await vectorStore.getStatus(absoluteDir);
|
|
3248
|
+
vectorStore.close();
|
|
3249
|
+
const languageLines = Object.entries(status.languages).sort(([, a], [, b]) => b - a).map(([lang, count]) => ` - ${lang}: ${String(count)} chunks`);
|
|
3250
|
+
return {
|
|
3251
|
+
success: true,
|
|
3252
|
+
message: [
|
|
3253
|
+
`Index Status for ${absoluteDir}`,
|
|
3254
|
+
``,
|
|
3255
|
+
`Index Path: ${status.indexPath}`,
|
|
3256
|
+
`Total Files: ${String(status.totalFiles)}`,
|
|
3257
|
+
`Total Chunks: ${String(status.totalChunks)}`,
|
|
3258
|
+
``,
|
|
3259
|
+
`Languages:`,
|
|
3260
|
+
...languageLines
|
|
3261
|
+
].join("\n"),
|
|
3262
|
+
data: status
|
|
3263
|
+
};
|
|
3264
|
+
} catch (err) {
|
|
3265
|
+
return {
|
|
3266
|
+
success: false,
|
|
3267
|
+
error: `Failed to read index status: ${err instanceof Error ? err.message : String(err)}`
|
|
3268
|
+
};
|
|
3269
|
+
}
|
|
3270
|
+
}
|
|
3271
|
+
const getIndexStatusFeature = {
|
|
3272
|
+
name: "get_index_status",
|
|
3273
|
+
description: "Check if a codebase is indexed and ready for search. USE THIS to verify index exists before searching. Returns file count, chunk count, and indexed languages.",
|
|
3274
|
+
schema: getIndexStatusSchema,
|
|
3275
|
+
execute: execute$1
|
|
3276
|
+
};
|
|
3277
|
+
|
|
3278
|
+
//#endregion
|
|
3279
|
+
//#region src/features/get-call-graph/index.ts
|
|
3280
|
+
/**
|
|
3281
|
+
* Get Call Graph Feature
|
|
3282
|
+
*
|
|
3283
|
+
* Analyzes function call relationships in a codebase.
|
|
3284
|
+
* Can either:
|
|
3285
|
+
* 1. Build a full call graph for a directory
|
|
3286
|
+
* 2. Query callers/callees for a specific function
|
|
3287
|
+
*/
|
|
3288
|
+
const getCallGraphSchema = z.object({
|
|
3289
|
+
directory: z.string().optional().default(".").describe("Path to the directory to analyze"),
|
|
3290
|
+
functionName: z.string().optional().describe("Optional: specific function name to query callers/callees for"),
|
|
3291
|
+
filePath: z.string().optional().describe("Optional: file path to narrow down function search (used with functionName)"),
|
|
3292
|
+
maxDepth: z.number().int().positive().optional().default(2).describe("Maximum depth for call chain traversal (default: 2)"),
|
|
3293
|
+
exclude: z.array(z.string()).optional().default([]).describe("Glob patterns to exclude from analysis")
|
|
3294
|
+
});
|
|
3295
|
+
|
|
3296
|
+
//#endregion
|
|
3297
|
+
//#region src/features/update-index/index.ts
|
|
3298
|
+
/**
|
|
3299
|
+
* Update Index Feature
|
|
3300
|
+
*
|
|
3301
|
+
* Incrementally updates the codebase index by:
|
|
3302
|
+
* 1. Detecting files that have changed since last indexing
|
|
3303
|
+
* 2. Re-indexing only the changed files
|
|
3304
|
+
* 3. Removing deleted files from the index
|
|
3305
|
+
*
|
|
3306
|
+
* Uses SHA-256 hash comparison to detect real content changes.
|
|
3307
|
+
*/
|
|
3308
|
+
/** Cache file name for storing hashes */
|
|
3309
|
+
const HASH_CACHE_FILE = ".src-index-hashes.json";
|
|
3310
|
+
const updateIndexSchema = z.object({
|
|
3311
|
+
directory: z.string().optional().default(".").describe("Path to the indexed directory"),
|
|
3312
|
+
dryRun: z.boolean().optional().default(false).describe("Only report changes without updating the index"),
|
|
3313
|
+
force: z.boolean().optional().default(false).describe("Force re-index of all files (ignore hash cache)")
|
|
3314
|
+
});
|
|
3315
|
+
/**
|
|
3316
|
+
* Compute SHA-256 hash of content
|
|
3317
|
+
*/
|
|
3318
|
+
function computeHash(content) {
|
|
3319
|
+
return crypto.createHash("sha256").update(content, "utf8").digest("hex");
|
|
3320
|
+
}
|
|
3321
|
+
/**
|
|
3322
|
+
* Get hash cache file path
|
|
3323
|
+
*/
|
|
3324
|
+
function getHashCachePath(directory) {
|
|
3325
|
+
return path.join(directory, ".src-index", HASH_CACHE_FILE);
|
|
3326
|
+
}
|
|
3327
|
+
/**
|
|
3328
|
+
* Load hash cache from disk
|
|
3329
|
+
*/
|
|
3330
|
+
function loadHashCache(directory) {
|
|
3331
|
+
const cachePath = getHashCachePath(directory);
|
|
3332
|
+
if (fs.existsSync(cachePath)) try {
|
|
3333
|
+
const content = fs.readFileSync(cachePath, "utf-8");
|
|
3334
|
+
return JSON.parse(content);
|
|
3335
|
+
} catch {
|
|
3336
|
+
return {};
|
|
3337
|
+
}
|
|
3338
|
+
return {};
|
|
3339
|
+
}
|
|
3340
|
+
/**
|
|
3341
|
+
* Save hash cache to disk
|
|
3342
|
+
*/
|
|
3343
|
+
function saveHashCache(directory, cache) {
|
|
3344
|
+
const cachePath = getHashCachePath(directory);
|
|
3345
|
+
const cacheDir = path.dirname(cachePath);
|
|
3346
|
+
if (!fs.existsSync(cacheDir)) fs.mkdirSync(cacheDir, { recursive: true });
|
|
3347
|
+
fs.writeFileSync(cachePath, JSON.stringify(cache, null, 2));
|
|
3348
|
+
}
|
|
3349
|
+
/**
|
|
3350
|
+
* Create gitignore filter
|
|
3351
|
+
*/
|
|
3352
|
+
function createIgnoreFilter(directory) {
|
|
3353
|
+
const ig = ignore();
|
|
3354
|
+
ig.add([
|
|
3355
|
+
"node_modules",
|
|
3356
|
+
".git",
|
|
3357
|
+
"dist",
|
|
3358
|
+
"build",
|
|
3359
|
+
".src-index"
|
|
3360
|
+
]);
|
|
3361
|
+
const gitignorePath = path.join(directory, ".gitignore");
|
|
3362
|
+
if (fs.existsSync(gitignorePath)) {
|
|
3363
|
+
const content = fs.readFileSync(gitignorePath, "utf-8");
|
|
3364
|
+
ig.add(content);
|
|
3365
|
+
}
|
|
3366
|
+
return ig;
|
|
3367
|
+
}
|
|
3368
|
+
/**
|
|
3369
|
+
* Check if a name starts with a dot (hidden)
|
|
3370
|
+
*/
|
|
3371
|
+
function isHidden(name) {
|
|
3372
|
+
return name.startsWith(".");
|
|
3373
|
+
}
|
|
3374
|
+
/**
|
|
3375
|
+
* Recursively collect files
|
|
3376
|
+
*/
|
|
3377
|
+
function collectFiles(dir, ig, baseDir) {
|
|
3378
|
+
const files = [];
|
|
3379
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
3380
|
+
for (const entry of entries) {
|
|
3381
|
+
if (isHidden(entry.name)) continue;
|
|
3382
|
+
const fullPath = path.join(dir, entry.name);
|
|
3383
|
+
const relativePath = path.relative(baseDir, fullPath).replace(/\\/g, "/");
|
|
3384
|
+
if (ig.ignores(relativePath)) continue;
|
|
3385
|
+
if (entry.isDirectory()) files.push(...collectFiles(fullPath, ig, baseDir));
|
|
3386
|
+
else if (entry.isFile() && shouldIndexFile(entry.name)) files.push(fullPath);
|
|
3387
|
+
}
|
|
3388
|
+
return files;
|
|
3389
|
+
}
|
|
3390
|
+
/**
|
|
3391
|
+
* Execute the update_index feature
|
|
3392
|
+
*/
|
|
3393
|
+
async function execute(input) {
|
|
3394
|
+
const { directory, dryRun, force } = input;
|
|
3395
|
+
if (!fs.existsSync(directory)) return {
|
|
3396
|
+
success: false,
|
|
3397
|
+
error: `Directory not found: ${directory}`
|
|
3398
|
+
};
|
|
3399
|
+
const absoluteDir = path.resolve(directory);
|
|
3400
|
+
const ollamaClient = createOllamaClient(EMBEDDING_CONFIG);
|
|
3401
|
+
const vectorStore = createVectorStore(absoluteDir, EMBEDDING_CONFIG);
|
|
3402
|
+
if (!vectorStore.exists()) return {
|
|
3403
|
+
success: false,
|
|
3404
|
+
error: `No index found for directory. Run index_codebase first: ${absoluteDir}`
|
|
3405
|
+
};
|
|
3406
|
+
const result = {
|
|
3407
|
+
directory: absoluteDir,
|
|
3408
|
+
dryRun,
|
|
3409
|
+
added: [],
|
|
3410
|
+
modified: [],
|
|
3411
|
+
removed: [],
|
|
3412
|
+
unchanged: 0,
|
|
3413
|
+
errors: []
|
|
3414
|
+
};
|
|
3415
|
+
try {
|
|
3416
|
+
if (!dryRun) {
|
|
3417
|
+
const health = await ollamaClient.healthCheck();
|
|
3418
|
+
if (!health.ok) return {
|
|
3419
|
+
success: false,
|
|
3420
|
+
error: health.error ?? "Ollama is not available"
|
|
3421
|
+
};
|
|
3422
|
+
}
|
|
3423
|
+
await vectorStore.connect();
|
|
3424
|
+
const hashCache = force ? {} : loadHashCache(absoluteDir);
|
|
3425
|
+
const newHashCache = {};
|
|
3426
|
+
const ig = createIgnoreFilter(absoluteDir);
|
|
3427
|
+
const currentFiles = new Set(collectFiles(absoluteDir, ig, absoluteDir));
|
|
3428
|
+
const indexedFiles = new Set(await vectorStore.getIndexedFiles());
|
|
3429
|
+
const filesToProcess = [];
|
|
3430
|
+
for (const filePath of currentFiles) {
|
|
3431
|
+
const hash = computeHash(fs.readFileSync(filePath, "utf-8"));
|
|
3432
|
+
newHashCache[filePath] = hash;
|
|
3433
|
+
if (!indexedFiles.has(filePath)) {
|
|
3434
|
+
result.added.push(path.relative(absoluteDir, filePath));
|
|
3435
|
+
filesToProcess.push({
|
|
3436
|
+
path: filePath,
|
|
3437
|
+
type: "add"
|
|
3438
|
+
});
|
|
3439
|
+
} else if (hashCache[filePath] !== hash) {
|
|
3440
|
+
result.modified.push(path.relative(absoluteDir, filePath));
|
|
3441
|
+
filesToProcess.push({
|
|
3442
|
+
path: filePath,
|
|
3443
|
+
type: "modify"
|
|
3444
|
+
});
|
|
3445
|
+
} else result.unchanged++;
|
|
3446
|
+
}
|
|
3447
|
+
for (const filePath of indexedFiles) if (!currentFiles.has(filePath)) result.removed.push(path.relative(absoluteDir, filePath));
|
|
3448
|
+
if (dryRun) {
|
|
3449
|
+
vectorStore.close();
|
|
3450
|
+
return {
|
|
3451
|
+
success: true,
|
|
3452
|
+
message: buildDryRunMessage(result),
|
|
3453
|
+
data: result
|
|
3454
|
+
};
|
|
3455
|
+
}
|
|
3456
|
+
const enrichmentOptions = {
|
|
3457
|
+
projectRoot: absoluteDir,
|
|
3458
|
+
pathAliases: readPathAliasesCached(absoluteDir),
|
|
3459
|
+
includeCrossFileContext: true
|
|
3460
|
+
};
|
|
3461
|
+
const embeddedChunks = [];
|
|
3462
|
+
for (const { path: filePath, type } of filesToProcess) try {
|
|
3463
|
+
if (type === "modify") await vectorStore.deleteByFilePath(filePath);
|
|
3464
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
3465
|
+
const chunks = await chunkFile(filePath, content, EMBEDDING_CONFIG);
|
|
3466
|
+
if (chunks.length === 0) continue;
|
|
3467
|
+
const enrichedChunks = await enrichChunksFromFile(chunks, content, enrichmentOptions);
|
|
3468
|
+
const texts = enrichedChunks.map((c) => c.enrichedContent);
|
|
3469
|
+
const embeddings = await ollamaClient.embedBatch(texts);
|
|
3470
|
+
for (let i = 0; i < enrichedChunks.length; i++) {
|
|
3471
|
+
const chunk = enrichedChunks[i];
|
|
3472
|
+
const vector = embeddings[i];
|
|
3473
|
+
if (chunk && vector) embeddedChunks.push({
|
|
3474
|
+
id: chunk.id,
|
|
3475
|
+
content: chunk.content,
|
|
3476
|
+
filePath: chunk.filePath,
|
|
3477
|
+
language: chunk.language,
|
|
3478
|
+
startLine: chunk.startLine,
|
|
3479
|
+
endLine: chunk.endLine,
|
|
3480
|
+
symbolName: chunk.symbolName,
|
|
3481
|
+
symbolType: chunk.symbolType,
|
|
3482
|
+
vector
|
|
3483
|
+
});
|
|
3484
|
+
}
|
|
3485
|
+
} catch (err) {
|
|
3486
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
3487
|
+
result.errors.push(`Error processing ${filePath}: ${errorMsg}`);
|
|
3488
|
+
}
|
|
3489
|
+
if (embeddedChunks.length > 0) await vectorStore.addChunks(embeddedChunks);
|
|
3490
|
+
for (const relativePath of result.removed) {
|
|
3491
|
+
const filePath = path.join(absoluteDir, relativePath);
|
|
3492
|
+
await vectorStore.deleteByFilePath(filePath);
|
|
3493
|
+
}
|
|
3494
|
+
saveHashCache(absoluteDir, newHashCache);
|
|
3495
|
+
vectorStore.close();
|
|
3496
|
+
return {
|
|
3497
|
+
success: true,
|
|
3498
|
+
message: buildResultMessage(result),
|
|
3499
|
+
data: result
|
|
3500
|
+
};
|
|
3501
|
+
} catch (err) {
|
|
3502
|
+
vectorStore.close();
|
|
3503
|
+
return {
|
|
3504
|
+
success: false,
|
|
3505
|
+
error: `Update failed: ${err instanceof Error ? err.message : String(err)}`
|
|
3506
|
+
};
|
|
3507
|
+
}
|
|
3508
|
+
}
|
|
3509
|
+
/**
|
|
3510
|
+
* Build message for dry run
|
|
3511
|
+
*/
|
|
3512
|
+
function buildDryRunMessage(result) {
|
|
3513
|
+
const lines = ["Dry run - changes detected:"];
|
|
3514
|
+
if (result.added.length > 0) {
|
|
3515
|
+
lines.push(`\nFiles to add (${String(result.added.length)}):`);
|
|
3516
|
+
for (const f of result.added.slice(0, 10)) lines.push(` + ${f}`);
|
|
3517
|
+
if (result.added.length > 10) lines.push(` ... and ${String(result.added.length - 10)} more`);
|
|
3518
|
+
}
|
|
3519
|
+
if (result.modified.length > 0) {
|
|
3520
|
+
lines.push(`\nFiles to update (${String(result.modified.length)}):`);
|
|
3521
|
+
for (const f of result.modified.slice(0, 10)) lines.push(` ~ ${f}`);
|
|
3522
|
+
if (result.modified.length > 10) lines.push(` ... and ${String(result.modified.length - 10)} more`);
|
|
3523
|
+
}
|
|
3524
|
+
if (result.removed.length > 0) {
|
|
3525
|
+
lines.push(`\nFiles to remove (${String(result.removed.length)}):`);
|
|
3526
|
+
for (const f of result.removed.slice(0, 10)) lines.push(` - ${f}`);
|
|
3527
|
+
if (result.removed.length > 10) lines.push(` ... and ${String(result.removed.length - 10)} more`);
|
|
3528
|
+
}
|
|
3529
|
+
lines.push(`\nUnchanged: ${String(result.unchanged)} files`);
|
|
3530
|
+
if (result.added.length === 0 && result.modified.length === 0 && result.removed.length === 0) return "Index is up to date - no changes detected.";
|
|
3531
|
+
lines.push("\nRun without --dryRun to apply changes.");
|
|
3532
|
+
return lines.join("\n");
|
|
3533
|
+
}
|
|
3534
|
+
/**
|
|
3535
|
+
* Build message for actual update
|
|
3536
|
+
*/
|
|
3537
|
+
function buildResultMessage(result) {
|
|
3538
|
+
if (result.added.length + result.modified.length + result.removed.length === 0) return "Index is up to date - no changes needed.";
|
|
3539
|
+
const lines = ["Index updated successfully:"];
|
|
3540
|
+
if (result.added.length > 0) lines.push(` Added: ${String(result.added.length)} files`);
|
|
3541
|
+
if (result.modified.length > 0) lines.push(` Modified: ${String(result.modified.length)} files`);
|
|
3542
|
+
if (result.removed.length > 0) lines.push(` Removed: ${String(result.removed.length)} files`);
|
|
3543
|
+
lines.push(` Unchanged: ${String(result.unchanged)} files`);
|
|
3544
|
+
if (result.errors.length > 0) {
|
|
3545
|
+
lines.push(`\nErrors (${String(result.errors.length)}):`);
|
|
3546
|
+
for (const err of result.errors.slice(0, 5)) lines.push(` - ${err}`);
|
|
3547
|
+
}
|
|
3548
|
+
return lines.join("\n");
|
|
3549
|
+
}
|
|
3550
|
+
const updateIndexFeature = {
|
|
3551
|
+
name: "update_index",
|
|
3552
|
+
description: "Refresh the search index after code changes. USE THIS instead of re-indexing - it's fast because it only processes changed files (SHA-256 hash detection). Use dryRun=true to preview changes first.",
|
|
3553
|
+
schema: updateIndexSchema,
|
|
3554
|
+
execute
|
|
3555
|
+
};
|
|
3556
|
+
|
|
3557
|
+
//#endregion
|
|
3558
|
+
//#region src/features/parse-ast/index.ts
|
|
3559
|
+
const parseAstSchema = z.object({
|
|
3560
|
+
file_path: z.string().optional().describe("Path to the file to parse (either file_path or content required)"),
|
|
3561
|
+
content: z.string().optional().describe("Code content to parse directly (either file_path or content required)"),
|
|
3562
|
+
language: z.string().optional().describe("Language name (auto-detected from file path if not provided)"),
|
|
3563
|
+
max_depth: z.number().int().positive().optional().describe("Maximum depth of AST to return (default: unlimited)")
|
|
3564
|
+
}).refine((data) => data.file_path ?? data.content, { message: "Either file_path or content must be provided" });
|
|
3565
|
+
|
|
3566
|
+
//#endregion
|
|
3567
|
+
//#region src/features/query-code/index.ts
|
|
3568
|
+
const presetValues = [
|
|
3569
|
+
"functions",
|
|
3570
|
+
"classes",
|
|
3571
|
+
"imports",
|
|
3572
|
+
"exports",
|
|
3573
|
+
"comments",
|
|
3574
|
+
"strings",
|
|
3575
|
+
"variables",
|
|
3576
|
+
"types"
|
|
3577
|
+
];
|
|
3578
|
+
const queryCodeSchema = z.object({
|
|
3579
|
+
file_path: z.string().optional().describe("Path to the file to query (either file_path or content required)"),
|
|
3580
|
+
content: z.string().optional().describe("Code content to query directly (either file_path or content required)"),
|
|
3581
|
+
language: z.string().optional().describe("Language name (auto-detected from file path if not provided)"),
|
|
3582
|
+
query: z.string().optional().describe("SCM query pattern (either query or preset required)"),
|
|
3583
|
+
preset: z.enum(presetValues).optional().describe("Preset query name: functions, classes, imports, exports, comments, strings, variables, types"),
|
|
3584
|
+
max_matches: z.number().int().positive().optional().describe("Maximum number of matches to return")
|
|
3585
|
+
}).refine((data) => data.file_path ?? data.content, { message: "Either file_path or content must be provided" }).refine((data) => data.query ?? data.preset, { message: "Either query or preset must be provided" });
|
|
3586
|
+
|
|
3587
|
+
//#endregion
|
|
3588
|
+
//#region src/features/list-symbols/index.ts
|
|
3589
|
+
const symbolTypeValues = [
|
|
3590
|
+
"function",
|
|
3591
|
+
"class",
|
|
3592
|
+
"variable",
|
|
3593
|
+
"constant",
|
|
3594
|
+
"interface",
|
|
3595
|
+
"type",
|
|
3596
|
+
"enum",
|
|
3597
|
+
"method",
|
|
3598
|
+
"property"
|
|
3599
|
+
];
|
|
3600
|
+
const listSymbolsSchema = z.object({
|
|
3601
|
+
file_path: z.string().optional().describe("Path to the file to analyze (either file_path or content required)"),
|
|
3602
|
+
content: z.string().optional().describe("Code content to analyze directly (either file_path or content required)"),
|
|
3603
|
+
language: z.string().optional().describe("Language name (auto-detected from file path if not provided)"),
|
|
3604
|
+
types: z.array(z.enum(symbolTypeValues)).optional().describe("Filter by symbol types: function, class, variable, constant, interface, type, enum, method, property")
|
|
3605
|
+
}).refine((data) => data.file_path ?? data.content, { message: "Either file_path or content must be provided" });
|
|
3606
|
+
|
|
3607
|
+
//#endregion
|
|
3608
|
+
//#region src/core/fallback/index.ts
|
|
3609
|
+
let langchainConfig = null;
|
|
3610
|
+
function clearConfigCache() {
|
|
3611
|
+
langchainConfig = null;
|
|
3612
|
+
}
|
|
3613
|
+
registerCache("fallback:config", clearConfigCache);
|
|
3614
|
+
|
|
3615
|
+
//#endregion
|
|
3616
|
+
//#region src/core/unified/index.ts
|
|
3617
|
+
let configCache = null;
|
|
3618
|
+
let binaryExtensionsCache = null;
|
|
3619
|
+
let extensionToLanguageCache = null;
|
|
3620
|
+
let specialFilenamesCache = null;
|
|
3621
|
+
/** Clear caches (for testing) */
|
|
3622
|
+
function clearUnifiedCache() {
|
|
3623
|
+
configCache = null;
|
|
3624
|
+
binaryExtensionsCache = null;
|
|
3625
|
+
extensionToLanguageCache = null;
|
|
3626
|
+
specialFilenamesCache = null;
|
|
3627
|
+
}
|
|
3628
|
+
registerCache("unified:config", clearUnifiedCache);
|
|
3629
|
+
|
|
3630
|
+
//#endregion
|
|
3631
|
+
//#region src/features/analyze-file/index.ts
|
|
3632
|
+
const analyzeFileSchema = z.object({
|
|
3633
|
+
file_path: z.string().describe("Path to the file to analyze"),
|
|
3634
|
+
include_ast: z.boolean().default(false).describe("Include full AST in response (default: false, can be verbose)"),
|
|
3635
|
+
include_symbols: z.boolean().default(true).describe("Include extracted symbols (default: true)"),
|
|
3636
|
+
include_imports: z.boolean().default(true).describe("Include import statements (default: true)"),
|
|
3637
|
+
include_exports: z.boolean().default(true).describe("Include export statements (default: true)"),
|
|
3638
|
+
ast_max_depth: z.number().int().positive().optional().describe("Maximum depth for AST if included"),
|
|
3639
|
+
include_chunks: z.boolean().default(false).describe("Include text chunks for fallback parsing (default: false)")
|
|
3640
|
+
});
|
|
3641
|
+
|
|
3642
|
+
//#endregion
|
|
3643
|
+
//#region src/features/index.ts
|
|
3644
|
+
const features = [
|
|
3645
|
+
infoFeature,
|
|
3646
|
+
indexCodebaseFeature,
|
|
3647
|
+
searchCodeFeature,
|
|
3648
|
+
getIndexStatusFeature,
|
|
3649
|
+
updateIndexFeature
|
|
3650
|
+
];
|
|
3651
|
+
|
|
3652
|
+
//#endregion
|
|
3653
|
+
//#region src/tools/adapter.ts
|
|
3654
|
+
function zodToMcpSchema(schema) {
|
|
3655
|
+
if (schema instanceof z.ZodObject) return schema.shape;
|
|
3656
|
+
return { input: schema };
|
|
3657
|
+
}
|
|
3658
|
+
function registerFeatureAsTool(server, feature) {
|
|
3659
|
+
const mcpSchema = zodToMcpSchema(feature.schema);
|
|
3660
|
+
server.tool(feature.name, feature.description, mcpSchema, async (params) => {
|
|
3661
|
+
const result = feature.execute(params);
|
|
3662
|
+
const formatResult = (res) => ({
|
|
3663
|
+
content: [{
|
|
3664
|
+
type: "text",
|
|
3665
|
+
text: res.message ?? JSON.stringify(res.data, null, 2)
|
|
3666
|
+
}],
|
|
3667
|
+
isError: !res.success
|
|
3668
|
+
});
|
|
3669
|
+
if (result instanceof Promise) return await result.then(formatResult);
|
|
3670
|
+
return formatResult(result);
|
|
3671
|
+
});
|
|
3672
|
+
}
|
|
3673
|
+
|
|
3674
|
+
//#endregion
|
|
3675
|
+
//#region src/tools/index.ts
|
|
3676
|
+
function registerTools(server) {
|
|
3677
|
+
for (const feature of features) registerFeatureAsTool(server, feature);
|
|
3678
|
+
}
|
|
3679
|
+
|
|
3680
|
+
//#endregion
|
|
3681
|
+
//#region src/resources/index.ts
|
|
3682
|
+
function registerResources(server) {
|
|
3683
|
+
server.resource("server_info", "src://server/info", (uri) => {
|
|
3684
|
+
const info = getServerInfo();
|
|
3685
|
+
return { contents: [{
|
|
3686
|
+
uri: uri.href,
|
|
3687
|
+
mimeType: "application/json",
|
|
3688
|
+
text: JSON.stringify(info, null, 2)
|
|
3689
|
+
}] };
|
|
3690
|
+
});
|
|
3691
|
+
}
|
|
3692
|
+
|
|
3693
|
+
//#endregion
|
|
3694
|
+
//#region src/prompts/index.ts
|
|
3695
|
+
/**
|
|
3696
|
+
* Register MCP prompts
|
|
3697
|
+
*
|
|
3698
|
+
* Prompts are reusable templates that help AI assistants understand
|
|
3699
|
+
* how to use SRC effectively for code search and analysis.
|
|
3700
|
+
*/
|
|
3701
|
+
function registerPrompts(server) {
|
|
3702
|
+
server.registerPrompt("src-overview", {
|
|
3703
|
+
title: "SRC Overview",
|
|
3704
|
+
description: "Learn about SRC capabilities and when to use it for code search and analysis"
|
|
3705
|
+
}, () => ({ messages: [{
|
|
3706
|
+
role: "user",
|
|
3707
|
+
content: {
|
|
3708
|
+
type: "text",
|
|
3709
|
+
text: `# SRC (Structured Repo Context) - Overview
|
|
3710
|
+
|
|
3711
|
+
## What is SRC?
|
|
3712
|
+
SRC is a semantic code search MCP server. It indexes codebases and provides intelligent search using:
|
|
3713
|
+
- **Vector embeddings** for semantic similarity (understands meaning, not just keywords)
|
|
3714
|
+
- **BM25 keyword search** for exact matches
|
|
3715
|
+
- **Hybrid search** combining both with RRF fusion
|
|
3716
|
+
- **LLM re-ranking** for optimal relevance
|
|
3717
|
+
- **Call graph analysis** showing function relationships
|
|
3718
|
+
|
|
3719
|
+
## When to use SRC?
|
|
3720
|
+
|
|
3721
|
+
**USE SRC when the user wants to:**
|
|
3722
|
+
- Find code by meaning/concept ("find authentication logic", "where is error handling")
|
|
3723
|
+
- Understand code relationships ("what calls this function", "what does this function call")
|
|
3724
|
+
- Search across a large codebase
|
|
3725
|
+
- Find similar code patterns
|
|
3726
|
+
- Explore unfamiliar code
|
|
3727
|
+
|
|
3728
|
+
**DON'T USE SRC for:**
|
|
3729
|
+
- Reading a specific file (use file read tools instead)
|
|
3730
|
+
- Simple text search in a single file (use grep/search)
|
|
3731
|
+
- Non-code queries
|
|
3732
|
+
|
|
3733
|
+
## Typical Workflow
|
|
3734
|
+
|
|
3735
|
+
1. **Check status**: Use \`get_index_status\` to see if index exists
|
|
3736
|
+
2. **Index if needed**: Use \`index_codebase\` (only once per project)
|
|
3737
|
+
3. **Search**: Use \`search_code\` with natural language queries
|
|
3738
|
+
|
|
3739
|
+
Note: When using \`serve\` mode, the server auto-indexes on startup and watches for file changes.
|
|
3740
|
+
|
|
3741
|
+
## Supported Languages
|
|
3742
|
+
- **Full AST support (18)**: JavaScript, TypeScript, Python, Rust, Go, Java, C, C++, C#, Ruby, PHP, Kotlin, Scala, Swift, HTML, Svelte, OCaml
|
|
3743
|
+
- **Text splitting (16+)**: Markdown, LaTeX, Solidity, Haskell, Elixir, and more
|
|
3744
|
+
- **Generic (30+)**: Config files, shell scripts, SQL, and more
|
|
3745
|
+
|
|
3746
|
+
## Tips
|
|
3747
|
+
- Use natural language queries: "authentication middleware" not "auth*"
|
|
3748
|
+
- The hybrid search mode (default) works best for most queries
|
|
3749
|
+
- Call context is included by default - shows who calls what`
|
|
3750
|
+
}
|
|
3751
|
+
}] }));
|
|
3752
|
+
server.registerPrompt("code-search-workflow", {
|
|
3753
|
+
title: "Code Search Workflow",
|
|
3754
|
+
description: "Step-by-step guide for searching code with SRC"
|
|
3755
|
+
}, () => ({ messages: [{
|
|
3756
|
+
role: "user",
|
|
3757
|
+
content: {
|
|
3758
|
+
type: "text",
|
|
3759
|
+
text: `# Code Search Workflow with SRC
|
|
3760
|
+
|
|
3761
|
+
## Step 1: Check Index
|
|
3762
|
+
\`\`\`
|
|
3763
|
+
get_index_status()
|
|
3764
|
+
\`\`\`
|
|
3765
|
+
|
|
3766
|
+
## Step 2: Index if Needed
|
|
3767
|
+
If no index exists:
|
|
3768
|
+
\`\`\`
|
|
3769
|
+
index_codebase()
|
|
3770
|
+
\`\`\`
|
|
3771
|
+
|
|
3772
|
+
## Step 3: Search
|
|
3773
|
+
\`\`\`
|
|
3774
|
+
search_code(query: "your search query here")
|
|
3775
|
+
\`\`\`
|
|
3776
|
+
|
|
3777
|
+
## search_code Arguments
|
|
3778
|
+
|
|
3779
|
+
| Argument | Type | Default | Description |
|
|
3780
|
+
|----------|------|---------|-------------|
|
|
3781
|
+
| query | string | required | Natural language search query |
|
|
3782
|
+
| limit | number | 10 | Max results to return |
|
|
3783
|
+
| mode | "hybrid" / "vector" / "fts" | "hybrid" | Search mode |
|
|
3784
|
+
| rerank | boolean | true | LLM re-ranking for better relevance |
|
|
3785
|
+
| includeCallContext | boolean | true | Include caller/callee info |
|
|
3786
|
+
| threshold | number | - | Distance threshold (vector mode only) |
|
|
3787
|
+
|
|
3788
|
+
## Search Modes
|
|
3789
|
+
- **hybrid** (default): Vector + BM25 + RRF fusion - best overall
|
|
3790
|
+
- **vector**: Semantic similarity only - good for conceptual queries
|
|
3791
|
+
- **fts**: Keyword search only - good for exact identifiers
|
|
3792
|
+
|
|
3793
|
+
## Examples
|
|
3794
|
+
\`\`\`
|
|
3795
|
+
// Find authentication code
|
|
3796
|
+
search_code(query: "user authentication and login")
|
|
3797
|
+
|
|
3798
|
+
// More results
|
|
3799
|
+
search_code(query: "error handling", limit: 20)
|
|
3800
|
+
|
|
3801
|
+
// Exact identifier search
|
|
3802
|
+
search_code(query: "UserAuthService", mode: "fts")
|
|
3803
|
+
|
|
3804
|
+
// Without call context (faster)
|
|
3805
|
+
search_code(query: "database queries", includeCallContext: false)
|
|
3806
|
+
\`\`\``
|
|
3807
|
+
}
|
|
3808
|
+
}] }));
|
|
3809
|
+
server.registerPrompt("search-tips", {
|
|
3810
|
+
title: "Search Tips",
|
|
3811
|
+
description: "Tips for writing effective code search queries"
|
|
3812
|
+
}, () => ({ messages: [{
|
|
3813
|
+
role: "user",
|
|
3814
|
+
content: {
|
|
3815
|
+
type: "text",
|
|
3816
|
+
text: `# Effective Code Search Tips
|
|
3817
|
+
|
|
3818
|
+
## Good Query Examples
|
|
3819
|
+
|
|
3820
|
+
| Goal | Good Query | Why |
|
|
3821
|
+
|------|------------|-----|
|
|
3822
|
+
| Find auth code | "user authentication and login validation" | Describes the concept |
|
|
3823
|
+
| Find error handling | "error handling and exception catching" | Uses related terms |
|
|
3824
|
+
| Find API endpoints | "REST API route handlers" | Specifies the pattern |
|
|
3825
|
+
| Find database code | "database query and data persistence" | Covers the domain |
|
|
3826
|
+
| Find a function | "calculateTotalPrice function" | Includes the name |
|
|
3827
|
+
|
|
3828
|
+
## Query Writing Tips
|
|
3829
|
+
|
|
3830
|
+
1. **Be descriptive, not literal**
|
|
3831
|
+
- Good: "user password validation and hashing"
|
|
3832
|
+
- Bad: "validatePassword"
|
|
3833
|
+
|
|
3834
|
+
2. **Include context**
|
|
3835
|
+
- Good: "authentication middleware for Express routes"
|
|
3836
|
+
- Bad: "auth middleware"
|
|
3837
|
+
|
|
3838
|
+
3. **Use domain language**
|
|
3839
|
+
- Good: "shopping cart checkout process"
|
|
3840
|
+
- Bad: "cart function"
|
|
3841
|
+
|
|
3842
|
+
4. **Combine concepts**
|
|
3843
|
+
- Good: "file upload with size validation and error handling"
|
|
3844
|
+
- Bad: "upload"
|
|
3845
|
+
|
|
3846
|
+
## Search Mode Selection
|
|
3847
|
+
|
|
3848
|
+
| Mode | Use When |
|
|
3849
|
+
|------|----------|
|
|
3850
|
+
| **hybrid** | Default choice, works for most queries |
|
|
3851
|
+
| **vector** | Conceptual searches like "code that handles retries" |
|
|
3852
|
+
| **fts** | Exact identifiers like "UserAuthService" |
|
|
3853
|
+
|
|
3854
|
+
## Understanding Results
|
|
3855
|
+
|
|
3856
|
+
Each result includes:
|
|
3857
|
+
- **content**: The matching code chunk
|
|
3858
|
+
- **filePath**: Source file location
|
|
3859
|
+
- **startLine/endLine**: Line numbers
|
|
3860
|
+
- **symbolName/Type**: Function or class name if detected
|
|
3861
|
+
- **score**: Relevance score (higher = better match)
|
|
3862
|
+
- **callers**: Functions that call this code
|
|
3863
|
+
- **callees**: Functions this code calls`
|
|
3864
|
+
}
|
|
3865
|
+
}] }));
|
|
3866
|
+
}
|
|
3867
|
+
|
|
3868
|
+
//#endregion
|
|
3869
|
+
//#region src/server.ts
|
|
3870
|
+
function createServer() {
|
|
3871
|
+
const server = new McpServer({
|
|
3872
|
+
name: config.name,
|
|
3873
|
+
version: config.version
|
|
3874
|
+
});
|
|
3875
|
+
registerTools(server);
|
|
3876
|
+
registerResources(server);
|
|
3877
|
+
registerPrompts(server);
|
|
3878
|
+
return server;
|
|
3879
|
+
}
|
|
3880
|
+
async function startServer() {
|
|
3881
|
+
const server = createServer();
|
|
3882
|
+
const transport = new StdioServerTransport();
|
|
3883
|
+
await server.connect(transport);
|
|
3884
|
+
logger.info(`${config.name} v${config.version} started`);
|
|
3885
|
+
}
|
|
3886
|
+
|
|
3887
|
+
//#endregion
|
|
3888
|
+
export { logger as a, colors as i, features as n, EMBEDDING_CONFIG as o, createIndexWatcher as r, config as s, startServer as t };
|
|
3889
|
+
//# sourceMappingURL=server-B2Ms4jQx.mjs.map
|