kiri-mcp-server 0.16.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +65 -22
- package/dist/package.json +4 -2
- package/dist/src/client/proxy.js +0 -0
- package/dist/src/daemon/daemon.js +0 -0
- package/dist/src/indexer/codeintel/dart/adapter.d.ts +36 -0
- package/dist/src/indexer/codeintel/dart/adapter.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/dart/adapter.js +60 -0
- package/dist/src/indexer/codeintel/dart/adapter.js.map +1 -0
- package/dist/src/indexer/codeintel/dart/index.d.ts +7 -0
- package/dist/src/indexer/codeintel/dart/index.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/dart/index.js +7 -0
- package/dist/src/indexer/codeintel/dart/index.js.map +1 -0
- package/dist/src/indexer/codeintel/index.d.ts +30 -0
- package/dist/src/indexer/codeintel/index.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/index.js +32 -0
- package/dist/src/indexer/codeintel/index.js.map +1 -0
- package/dist/src/indexer/codeintel/java/analyzer.d.ts +22 -0
- package/dist/src/indexer/codeintel/java/analyzer.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/java/analyzer.js +281 -0
- package/dist/src/indexer/codeintel/java/analyzer.js.map +1 -0
- package/dist/src/indexer/codeintel/java/index.d.ts +7 -0
- package/dist/src/indexer/codeintel/java/index.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/java/index.js +7 -0
- package/dist/src/indexer/codeintel/java/index.js.map +1 -0
- package/dist/src/indexer/codeintel/php/analyzer.d.ts +23 -0
- package/dist/src/indexer/codeintel/php/analyzer.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/php/analyzer.js +342 -0
- package/dist/src/indexer/codeintel/php/analyzer.js.map +1 -0
- package/dist/src/indexer/codeintel/php/index.d.ts +7 -0
- package/dist/src/indexer/codeintel/php/index.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/php/index.js +7 -0
- package/dist/src/indexer/codeintel/php/index.js.map +1 -0
- package/dist/src/indexer/codeintel/registry.d.ts +76 -0
- package/dist/src/indexer/codeintel/registry.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/registry.js +127 -0
- package/dist/src/indexer/codeintel/registry.js.map +1 -0
- package/dist/src/indexer/codeintel/rust/analyzer.d.ts +14 -0
- package/dist/src/indexer/codeintel/rust/analyzer.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/rust/analyzer.js +388 -0
- package/dist/src/indexer/codeintel/rust/analyzer.js.map +1 -0
- package/dist/src/indexer/codeintel/rust/index.d.ts +5 -0
- package/dist/src/indexer/codeintel/rust/index.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/rust/index.js +5 -0
- package/dist/src/indexer/codeintel/rust/index.js.map +1 -0
- package/dist/src/indexer/codeintel/swift/analyzer.d.ts +22 -0
- package/dist/src/indexer/codeintel/swift/analyzer.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/swift/analyzer.js +271 -0
- package/dist/src/indexer/codeintel/swift/analyzer.js.map +1 -0
- package/dist/src/indexer/codeintel/swift/index.d.ts +7 -0
- package/dist/src/indexer/codeintel/swift/index.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/swift/index.js +7 -0
- package/dist/src/indexer/codeintel/swift/index.js.map +1 -0
- package/dist/src/indexer/codeintel/types.d.ts +114 -0
- package/dist/src/indexer/codeintel/types.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/types.js +13 -0
- package/dist/src/indexer/codeintel/types.js.map +1 -0
- package/dist/src/indexer/codeintel/typescript/analyzer.d.ts +22 -0
- package/dist/src/indexer/codeintel/typescript/analyzer.d.ts.map +1 -0
- package/dist/{indexer/codeintel.js → src/indexer/codeintel/typescript/analyzer.js} +62 -34
- package/dist/src/indexer/codeintel/typescript/analyzer.js.map +1 -0
- package/dist/src/indexer/codeintel/typescript/index.d.ts +7 -0
- package/dist/src/indexer/codeintel/typescript/index.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/typescript/index.js +7 -0
- package/dist/src/indexer/codeintel/typescript/index.js.map +1 -0
- package/dist/src/indexer/codeintel/utils.d.ts +91 -0
- package/dist/src/indexer/codeintel/utils.d.ts.map +1 -0
- package/dist/src/indexer/codeintel/utils.js +145 -0
- package/dist/src/indexer/codeintel/utils.js.map +1 -0
- package/dist/src/indexer/codeintel.d.ts +33 -26
- package/dist/src/indexer/codeintel.d.ts.map +1 -1
- package/dist/src/indexer/codeintel.js +56 -1078
- package/dist/src/indexer/codeintel.js.map +1 -1
- package/dist/src/indexer/graph-metrics.d.ts.map +1 -1
- package/dist/src/indexer/graph-metrics.js +16 -4
- package/dist/src/indexer/graph-metrics.js.map +1 -1
- package/dist/src/server/boost-profiles.d.ts +1 -1
- package/dist/src/server/boost-profiles.d.ts.map +1 -1
- package/dist/src/server/boost-profiles.js +22 -0
- package/dist/src/server/boost-profiles.js.map +1 -1
- package/dist/src/server/main.js +0 -0
- package/dist/src/server/rpc.js +4 -4
- package/dist/src/server/rpc.js.map +1 -1
- package/package.json +10 -2
- package/dist/client/cli.js +0 -68
- package/dist/client/cli.js.map +0 -1
- package/dist/client/index.js +0 -5
- package/dist/client/index.js.map +0 -1
- package/dist/eval/metrics.js +0 -47
- package/dist/eval/metrics.js.map +0 -1
- package/dist/indexer/cli.js +0 -362
- package/dist/indexer/cli.js.map +0 -1
- package/dist/indexer/codeintel.js.map +0 -1
- package/dist/indexer/git.js +0 -30
- package/dist/indexer/git.js.map +0 -1
- package/dist/indexer/language.js +0 -34
- package/dist/indexer/language.js.map +0 -1
- package/dist/indexer/pipeline/filters/denylist.js +0 -71
- package/dist/indexer/pipeline/filters/denylist.js.map +0 -1
- package/dist/indexer/schema.js +0 -101
- package/dist/indexer/schema.js.map +0 -1
- package/dist/server/bootstrap.js +0 -19
- package/dist/server/bootstrap.js.map +0 -1
- package/dist/server/context.js +0 -1
- package/dist/server/context.js.map +0 -1
- package/dist/server/fallbacks/degradeController.js +0 -69
- package/dist/server/fallbacks/degradeController.js.map +0 -1
- package/dist/server/handlers.js +0 -1268
- package/dist/server/handlers.js.map +0 -1
- package/dist/server/main.js +0 -151
- package/dist/server/main.js.map +0 -1
- package/dist/server/observability/metrics.js +0 -56
- package/dist/server/observability/metrics.js.map +0 -1
- package/dist/server/observability/tracing.js +0 -58
- package/dist/server/observability/tracing.js.map +0 -1
- package/dist/server/rpc.js +0 -477
- package/dist/server/rpc.js.map +0 -1
- package/dist/server/runtime.js +0 -47
- package/dist/server/runtime.js.map +0 -1
- package/dist/server/scoring.js +0 -116
- package/dist/server/scoring.js.map +0 -1
- package/dist/server/stdio.js +0 -76
- package/dist/server/stdio.js.map +0 -1
- package/dist/shared/duckdb.js +0 -119
- package/dist/shared/duckdb.js.map +0 -1
- package/dist/shared/embedding.js +0 -98
- package/dist/shared/embedding.js.map +0 -1
- package/dist/shared/index.js +0 -9
- package/dist/shared/index.js.map +0 -1
- package/dist/shared/security/config.js +0 -64
- package/dist/shared/security/config.js.map +0 -1
- package/dist/shared/security/masker.js +0 -56
- package/dist/shared/security/masker.js.map +0 -1
- package/dist/shared/tokenizer.js +0 -4
- package/dist/shared/tokenizer.js.map +0 -1
- package/dist/shared/utils/simpleYaml.js +0 -89
- package/dist/shared/utils/simpleYaml.js.map +0 -1
- package/dist/src/server/rrf.d.ts +0 -86
- package/dist/src/server/rrf.d.ts.map +0 -1
- package/dist/src/server/rrf.js +0 -108
- package/dist/src/server/rrf.js.map +0 -1
- package/dist/src/shared/embedding/engine.d.ts +0 -38
- package/dist/src/shared/embedding/engine.d.ts.map +0 -1
- package/dist/src/shared/embedding/engine.js +0 -6
- package/dist/src/shared/embedding/engine.js.map +0 -1
- package/dist/src/shared/embedding/lsh-engine.d.ts +0 -11
- package/dist/src/shared/embedding/lsh-engine.d.ts.map +0 -1
- package/dist/src/shared/embedding/lsh-engine.js +0 -14
- package/dist/src/shared/embedding/lsh-engine.js.map +0 -1
- package/dist/src/shared/embedding/registry.d.ts +0 -25
- package/dist/src/shared/embedding/registry.d.ts.map +0 -1
- package/dist/src/shared/embedding/registry.js +0 -50
- package/dist/src/shared/embedding/registry.js.map +0 -1
- package/dist/src/shared/embedding/semantic-engine.d.ts +0 -14
- package/dist/src/shared/embedding/semantic-engine.d.ts.map +0 -1
- package/dist/src/shared/embedding/semantic-engine.js +0 -50
- package/dist/src/shared/embedding/semantic-engine.js.map +0 -1
- package/dist/src/shared/models/model-manager.d.ts +0 -38
- package/dist/src/shared/models/model-manager.d.ts.map +0 -1
- package/dist/src/shared/models/model-manager.js +0 -116
- package/dist/src/shared/models/model-manager.js.map +0 -1
- package/dist/src/shared/models/model-manifest.d.ts +0 -22
- package/dist/src/shared/models/model-manifest.d.ts.map +0 -1
- package/dist/src/shared/models/model-manifest.js +0 -24
- package/dist/src/shared/models/model-manifest.js.map +0 -1
package/dist/server/handlers.js
DELETED
|
@@ -1,1268 +0,0 @@
|
|
|
1
|
-
import path from "node:path";
|
|
2
|
-
import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
|
|
3
|
-
import { encode as encodeGPT } from "../shared/tokenizer.js";
|
|
4
|
-
import { coerceProfileName, loadScoringProfile } from "./scoring.js";
|
|
5
|
-
const DEFAULT_SEARCH_LIMIT = 50;
|
|
6
|
-
const DEFAULT_SNIPPET_WINDOW = 150;
|
|
7
|
-
const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
|
|
8
|
-
const MAX_BUNDLE_LIMIT = 20;
|
|
9
|
-
const MAX_KEYWORDS = 12;
|
|
10
|
-
const MAX_MATCHES_PER_KEYWORD = 40;
|
|
11
|
-
const MAX_DEPENDENCY_SEEDS = 8;
|
|
12
|
-
const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
|
|
13
|
-
const NEARBY_LIMIT = 6;
|
|
14
|
-
const FALLBACK_SNIPPET_WINDOW = 40; // Reduced from 120 to optimize token usage
|
|
15
|
-
const MAX_RERANK_LIMIT = 50;
|
|
16
|
-
const STOP_WORDS = new Set([
|
|
17
|
-
"the",
|
|
18
|
-
"and",
|
|
19
|
-
"for",
|
|
20
|
-
"with",
|
|
21
|
-
"from",
|
|
22
|
-
"this",
|
|
23
|
-
"that",
|
|
24
|
-
"have",
|
|
25
|
-
"has",
|
|
26
|
-
"will",
|
|
27
|
-
"would",
|
|
28
|
-
"into",
|
|
29
|
-
"about",
|
|
30
|
-
"there",
|
|
31
|
-
"their",
|
|
32
|
-
"your",
|
|
33
|
-
"fix",
|
|
34
|
-
"test",
|
|
35
|
-
"tests",
|
|
36
|
-
"issue",
|
|
37
|
-
"error",
|
|
38
|
-
"bug",
|
|
39
|
-
"fail",
|
|
40
|
-
"failing",
|
|
41
|
-
"make",
|
|
42
|
-
"when",
|
|
43
|
-
"where",
|
|
44
|
-
"should",
|
|
45
|
-
"could",
|
|
46
|
-
"need",
|
|
47
|
-
"goal",
|
|
48
|
-
]);
|
|
49
|
-
function normalizeLimit(limit) {
|
|
50
|
-
if (!limit || Number.isNaN(limit)) {
|
|
51
|
-
return DEFAULT_SEARCH_LIMIT;
|
|
52
|
-
}
|
|
53
|
-
return Math.min(Math.max(1, Math.floor(limit)), 100);
|
|
54
|
-
}
|
|
55
|
-
function buildPreview(content, query) {
|
|
56
|
-
const lowerContent = content.toLowerCase();
|
|
57
|
-
const lowerQuery = query.toLowerCase();
|
|
58
|
-
const index = lowerContent.indexOf(lowerQuery);
|
|
59
|
-
if (index === -1) {
|
|
60
|
-
return { preview: content.slice(0, 240), line: 1 };
|
|
61
|
-
}
|
|
62
|
-
const prefix = content.slice(0, index);
|
|
63
|
-
const prefixLines = prefix.split(/\r?\n/);
|
|
64
|
-
const matchLine = prefix.length === 0 ? 1 : prefixLines.length;
|
|
65
|
-
const snippetStart = Math.max(0, index - 120);
|
|
66
|
-
const snippetEnd = Math.min(content.length, index + query.length + 120);
|
|
67
|
-
const preview = content.slice(snippetStart, snippetEnd);
|
|
68
|
-
return { preview, line: matchLine };
|
|
69
|
-
}
|
|
70
|
-
function normalizeBundleLimit(limit) {
|
|
71
|
-
if (!limit || Number.isNaN(limit)) {
|
|
72
|
-
return DEFAULT_BUNDLE_LIMIT;
|
|
73
|
-
}
|
|
74
|
-
return Math.min(Math.max(1, Math.floor(limit)), MAX_BUNDLE_LIMIT);
|
|
75
|
-
}
|
|
76
|
-
/**
|
|
77
|
-
* トークン化戦略を取得
|
|
78
|
-
* 環境変数またはデフォルト値から決定
|
|
79
|
-
*/
|
|
80
|
-
function getTokenizationStrategy() {
|
|
81
|
-
const strategy = process.env.KIRI_TOKENIZATION_STRATEGY?.toLowerCase();
|
|
82
|
-
if (strategy === "legacy" || strategy === "hybrid") {
|
|
83
|
-
return strategy;
|
|
84
|
-
}
|
|
85
|
-
return "phrase-aware"; // デフォルト
|
|
86
|
-
}
|
|
87
|
-
/**
|
|
88
|
-
* 引用符で囲まれたフレーズを抽出
|
|
89
|
-
* 例: 'search "page-agent handler" test' → ["page-agent handler"]
|
|
90
|
-
*/
|
|
91
|
-
function extractQuotedPhrases(text) {
|
|
92
|
-
const phrases = [];
|
|
93
|
-
const quotePattern = /"([^"]+)"|'([^']+)'/g;
|
|
94
|
-
let match;
|
|
95
|
-
let remaining = text;
|
|
96
|
-
// eslint-disable-next-line no-cond-assign
|
|
97
|
-
while ((match = quotePattern.exec(text)) !== null) {
|
|
98
|
-
const phrase = (match[1] || match[2] || "").trim().toLowerCase();
|
|
99
|
-
if (phrase.length >= 3) {
|
|
100
|
-
phrases.push(phrase);
|
|
101
|
-
}
|
|
102
|
-
remaining = remaining.replace(match[0], " ");
|
|
103
|
-
}
|
|
104
|
-
return { phrases, remaining };
|
|
105
|
-
}
|
|
106
|
-
/**
|
|
107
|
-
* ハイフン区切り用語を抽出
|
|
108
|
-
* 例: "page-agent lambda-handler" → ["page-agent", "lambda-handler"]
|
|
109
|
-
*/
|
|
110
|
-
function extractHyphenatedTerms(text) {
|
|
111
|
-
// マッチ条件: 英数字 + ハイフン + 英数字(少なくとも3文字以上)
|
|
112
|
-
const hyphenPattern = /\b[a-z0-9]+(?:-[a-z0-9]+)+\b/gi;
|
|
113
|
-
const matches = text.match(hyphenPattern) || [];
|
|
114
|
-
return matches
|
|
115
|
-
.map((term) => term.toLowerCase())
|
|
116
|
-
.filter((term) => term.length >= 3 && !STOP_WORDS.has(term));
|
|
117
|
-
}
|
|
118
|
-
/**
|
|
119
|
-
* パスライクな用語を抽出
|
|
120
|
-
* 例: "lambda/page-agent/handler" → ["lambda", "page-agent", "handler"]
|
|
121
|
-
*/
|
|
122
|
-
function extractPathSegments(text) {
|
|
123
|
-
const pathPattern = /\b[a-z0-9_-]+(?:\/[a-z0-9_-]+)+\b/gi;
|
|
124
|
-
const matches = text.match(pathPattern) || [];
|
|
125
|
-
const segments = [];
|
|
126
|
-
for (const path of matches) {
|
|
127
|
-
const parts = path.toLowerCase().split("/");
|
|
128
|
-
for (const part of parts) {
|
|
129
|
-
if (part.length >= 3 && !STOP_WORDS.has(part) && !segments.includes(part)) {
|
|
130
|
-
segments.push(part);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
return segments;
|
|
135
|
-
}
|
|
136
|
-
/**
|
|
137
|
-
* 通常の単語を抽出(レガシーロジック)
|
|
138
|
-
*/
|
|
139
|
-
function extractRegularWords(text, strategy) {
|
|
140
|
-
const splitPattern = strategy === "legacy" ? /[^a-z0-9_]+/iu : /[^a-z0-9_-]+/iu;
|
|
141
|
-
const words = text
|
|
142
|
-
.toLowerCase()
|
|
143
|
-
.split(splitPattern)
|
|
144
|
-
.map((word) => word.trim())
|
|
145
|
-
.filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
|
|
146
|
-
return words;
|
|
147
|
-
}
|
|
148
|
-
/**
|
|
149
|
-
* テキストからキーワード、フレーズ、パスセグメントを抽出
|
|
150
|
-
* トークン化戦略に基づいて、ハイフン区切り用語の処理方法を変更
|
|
151
|
-
*/
|
|
152
|
-
function extractKeywords(text) {
|
|
153
|
-
const strategy = getTokenizationStrategy();
|
|
154
|
-
const result = {
|
|
155
|
-
phrases: [],
|
|
156
|
-
keywords: [],
|
|
157
|
-
pathSegments: [],
|
|
158
|
-
};
|
|
159
|
-
// Phase 1: 引用符で囲まれたフレーズを抽出
|
|
160
|
-
const { phrases: quotedPhrases, remaining: afterQuotes } = extractQuotedPhrases(text);
|
|
161
|
-
result.phrases.push(...quotedPhrases);
|
|
162
|
-
// Phase 2: パスセグメントを抽出
|
|
163
|
-
const pathSegments = extractPathSegments(afterQuotes);
|
|
164
|
-
result.pathSegments.push(...pathSegments);
|
|
165
|
-
// Phase 3: ハイフン区切り用語を抽出(phrase-aware または hybrid モード)
|
|
166
|
-
if (strategy === "phrase-aware" || strategy === "hybrid") {
|
|
167
|
-
const hyphenatedTerms = extractHyphenatedTerms(afterQuotes);
|
|
168
|
-
result.phrases.push(...hyphenatedTerms);
|
|
169
|
-
// hybrid モードの場合、ハイフン区切り用語を分割したキーワードも追加
|
|
170
|
-
if (strategy === "hybrid") {
|
|
171
|
-
for (const term of hyphenatedTerms) {
|
|
172
|
-
const parts = term.split("-").filter((part) => part.length >= 3 && !STOP_WORDS.has(part));
|
|
173
|
-
result.keywords.push(...parts);
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
// Phase 4: 通常の単語を抽出
|
|
178
|
-
const regularWords = extractRegularWords(afterQuotes, strategy);
|
|
179
|
-
// 重複を除去しながら、最大キーワード数まで追加
|
|
180
|
-
for (const word of regularWords) {
|
|
181
|
-
if (!result.keywords.includes(word) && !result.phrases.includes(word)) {
|
|
182
|
-
result.keywords.push(word);
|
|
183
|
-
if (result.keywords.length >= MAX_KEYWORDS) {
|
|
184
|
-
break;
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
return result;
|
|
189
|
-
}
|
|
190
|
-
function ensureCandidate(map, filePath) {
|
|
191
|
-
let candidate = map.get(filePath);
|
|
192
|
-
if (!candidate) {
|
|
193
|
-
candidate = {
|
|
194
|
-
path: filePath,
|
|
195
|
-
score: 0,
|
|
196
|
-
reasons: new Set(),
|
|
197
|
-
matchLine: null,
|
|
198
|
-
content: null,
|
|
199
|
-
totalLines: null,
|
|
200
|
-
lang: null,
|
|
201
|
-
ext: null,
|
|
202
|
-
embedding: null,
|
|
203
|
-
semanticSimilarity: null,
|
|
204
|
-
};
|
|
205
|
-
map.set(filePath, candidate);
|
|
206
|
-
}
|
|
207
|
-
return candidate;
|
|
208
|
-
}
|
|
209
|
-
function parseEmbedding(vectorJson, vectorDims) {
|
|
210
|
-
if (!vectorJson || !vectorDims || vectorDims <= 0) {
|
|
211
|
-
return null;
|
|
212
|
-
}
|
|
213
|
-
try {
|
|
214
|
-
const parsed = JSON.parse(vectorJson);
|
|
215
|
-
if (!Array.isArray(parsed)) {
|
|
216
|
-
return null;
|
|
217
|
-
}
|
|
218
|
-
const values = [];
|
|
219
|
-
for (let i = 0; i < parsed.length && i < vectorDims; i += 1) {
|
|
220
|
-
const raw = parsed[i];
|
|
221
|
-
const num = typeof raw === "number" ? raw : Number(raw);
|
|
222
|
-
if (!Number.isFinite(num)) {
|
|
223
|
-
return null;
|
|
224
|
-
}
|
|
225
|
-
values.push(num);
|
|
226
|
-
}
|
|
227
|
-
return values.length === vectorDims ? values : null;
|
|
228
|
-
}
|
|
229
|
-
catch {
|
|
230
|
-
return null;
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
function applyStructuralScores(candidates, queryEmbedding, structuralWeight) {
|
|
234
|
-
if (!queryEmbedding || structuralWeight <= 0) {
|
|
235
|
-
return;
|
|
236
|
-
}
|
|
237
|
-
for (const candidate of candidates) {
|
|
238
|
-
if (!candidate.embedding) {
|
|
239
|
-
continue;
|
|
240
|
-
}
|
|
241
|
-
const similarity = structuralSimilarity(queryEmbedding, candidate.embedding);
|
|
242
|
-
if (!Number.isFinite(similarity) || similarity <= 0) {
|
|
243
|
-
continue;
|
|
244
|
-
}
|
|
245
|
-
candidate.semanticSimilarity = similarity;
|
|
246
|
-
candidate.score += structuralWeight * similarity;
|
|
247
|
-
candidate.reasons.add(`structural:${similarity.toFixed(2)}`);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
async function fetchEmbeddingMap(db, repoId, paths) {
|
|
251
|
-
const map = new Map();
|
|
252
|
-
if (paths.length === 0) {
|
|
253
|
-
return map;
|
|
254
|
-
}
|
|
255
|
-
const placeholders = paths.map(() => "?").join(", ");
|
|
256
|
-
const rows = await db.all(`
|
|
257
|
-
SELECT path, vector_json, dims AS vector_dims
|
|
258
|
-
FROM file_embedding
|
|
259
|
-
WHERE repo_id = ? AND path IN (${placeholders})
|
|
260
|
-
`, [repoId, ...paths]);
|
|
261
|
-
for (const row of rows) {
|
|
262
|
-
const embedding = parseEmbedding(row.vector_json, row.vector_dims);
|
|
263
|
-
if (embedding) {
|
|
264
|
-
map.set(row.path, embedding);
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
return map;
|
|
268
|
-
}
|
|
269
|
-
async function loadFileContent(db, repoId, filePath) {
|
|
270
|
-
const rows = await db.all(`
|
|
271
|
-
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
272
|
-
FROM file f
|
|
273
|
-
JOIN blob b ON b.hash = f.blob_hash
|
|
274
|
-
LEFT JOIN file_embedding fe
|
|
275
|
-
ON fe.repo_id = f.repo_id
|
|
276
|
-
AND fe.path = f.path
|
|
277
|
-
WHERE f.repo_id = ? AND f.path = ?
|
|
278
|
-
LIMIT 1
|
|
279
|
-
`, [repoId, filePath]);
|
|
280
|
-
const row = rows[0];
|
|
281
|
-
if (!row || row.is_binary || row.content === null) {
|
|
282
|
-
return null;
|
|
283
|
-
}
|
|
284
|
-
const totalLines = row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length;
|
|
285
|
-
return {
|
|
286
|
-
content: row.content,
|
|
287
|
-
lang: row.lang,
|
|
288
|
-
ext: row.ext,
|
|
289
|
-
totalLines,
|
|
290
|
-
embedding: parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null),
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
function selectSnippet(snippets, matchLine) {
|
|
294
|
-
const firstSnippet = snippets[0];
|
|
295
|
-
if (!firstSnippet) {
|
|
296
|
-
return null;
|
|
297
|
-
}
|
|
298
|
-
if (matchLine === null) {
|
|
299
|
-
return firstSnippet;
|
|
300
|
-
}
|
|
301
|
-
const containing = snippets.find((snippet) => matchLine >= snippet.start_line && matchLine <= snippet.end_line);
|
|
302
|
-
if (containing) {
|
|
303
|
-
return containing;
|
|
304
|
-
}
|
|
305
|
-
if (matchLine < firstSnippet.start_line) {
|
|
306
|
-
return firstSnippet;
|
|
307
|
-
}
|
|
308
|
-
const lastSnippet = snippets[snippets.length - 1];
|
|
309
|
-
return lastSnippet ?? firstSnippet;
|
|
310
|
-
}
|
|
311
|
-
function buildSnippetPreview(content, startLine, endLine) {
|
|
312
|
-
const lines = content.split(/\r?\n/);
|
|
313
|
-
const startIndex = Math.max(0, Math.min(startLine - 1, lines.length));
|
|
314
|
-
const endIndex = Math.max(startIndex, Math.min(endLine, lines.length));
|
|
315
|
-
const snippet = lines.slice(startIndex, endIndex).join("\n");
|
|
316
|
-
if (snippet.length <= 240) {
|
|
317
|
-
return snippet;
|
|
318
|
-
}
|
|
319
|
-
return `${snippet.slice(0, 239)}…`;
|
|
320
|
-
}
|
|
321
|
-
/**
|
|
322
|
-
* トークン数を推定(コンテンツベース)
|
|
323
|
-
* 実際のGPTトークナイザーを使用して正確にカウント
|
|
324
|
-
*
|
|
325
|
-
* @param content - ファイル全体のコンテンツ
|
|
326
|
-
* @param startLine - 開始行(1-indexed)
|
|
327
|
-
* @param endLine - 終了行(1-indexed)
|
|
328
|
-
* @returns 推定トークン数
|
|
329
|
-
*/
|
|
330
|
-
function estimateTokensFromContent(content, startLine, endLine) {
|
|
331
|
-
const lines = content.split(/\r?\n/);
|
|
332
|
-
const startIndex = Math.max(0, startLine - 1);
|
|
333
|
-
const endIndex = Math.min(endLine, lines.length);
|
|
334
|
-
const selectedLines = lines.slice(startIndex, endIndex);
|
|
335
|
-
const text = selectedLines.join("\n");
|
|
336
|
-
try {
|
|
337
|
-
// 実際のGPTトークナイザーを使用
|
|
338
|
-
return encodeGPT(text).length;
|
|
339
|
-
}
|
|
340
|
-
catch (error) {
|
|
341
|
-
// フォールバック: 平均的な英語テキストで4文字 ≈ 1トークン
|
|
342
|
-
console.warn("Token encoding failed, using character-based fallback", error);
|
|
343
|
-
return Math.max(1, Math.ceil(text.length / 4));
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
/**
|
|
347
|
-
* 複数単語クエリを単語分割してOR検索条件を構築
|
|
348
|
-
* @param query - 検索クエリ文字列
|
|
349
|
-
* @returns 単語配列(2文字以下を除外)
|
|
350
|
-
*/
|
|
351
|
-
function splitQueryWords(query) {
|
|
352
|
-
// 空白、スラッシュ、ハイフン、アンダースコアで分割
|
|
353
|
-
const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
|
|
354
|
-
return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
|
|
355
|
-
}
|
|
356
|
-
/**
|
|
357
|
-
* ファイルタイプに基づいてスコアをブーストする
|
|
358
|
-
* プロファイルに応じて実装ファイルまたはドキュメントを優遇
|
|
359
|
-
* @param path - ファイルパス
|
|
360
|
-
* @param baseScore - 元のスコア
|
|
361
|
-
* @param profile - ブーストプロファイル ("default" | "docs" | "none")
|
|
362
|
-
* @returns ブースト適用後のスコア
|
|
363
|
-
*/
|
|
364
|
-
function applyFileTypeBoost(path, baseScore, profile = "default") {
|
|
365
|
-
// Blacklisted directories that are almost always irrelevant for code context
|
|
366
|
-
const blacklistedDirs = [
|
|
367
|
-
".cursor/",
|
|
368
|
-
".devcontainer/",
|
|
369
|
-
".serena/",
|
|
370
|
-
"__mocks__/",
|
|
371
|
-
"docs/",
|
|
372
|
-
".git/",
|
|
373
|
-
"node_modules/",
|
|
374
|
-
];
|
|
375
|
-
if (blacklistedDirs.some((dir) => path.startsWith(dir))) {
|
|
376
|
-
return -100; // Effectively remove it
|
|
377
|
-
}
|
|
378
|
-
if (profile === "none") {
|
|
379
|
-
return baseScore;
|
|
380
|
-
}
|
|
381
|
-
if (profile === "docs") {
|
|
382
|
-
if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
|
|
383
|
-
return baseScore * 1.8; // Stronger boost for docs
|
|
384
|
-
}
|
|
385
|
-
if (path.startsWith("src/") &&
|
|
386
|
-
(path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
|
|
387
|
-
return baseScore * 0.5; // Stronger penalty for implementation files
|
|
388
|
-
}
|
|
389
|
-
return baseScore;
|
|
390
|
-
}
|
|
391
|
-
// Default profile: prioritize implementation files, heavily penalize docs
|
|
392
|
-
const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
|
|
393
|
-
if (docExtensions.some((ext) => path.endsWith(ext))) {
|
|
394
|
-
return baseScore * 0.1; // Heavy penalty for docs
|
|
395
|
-
}
|
|
396
|
-
if (path.startsWith("src/app/")) {
|
|
397
|
-
return baseScore * 1.8;
|
|
398
|
-
}
|
|
399
|
-
if (path.startsWith("src/components/")) {
|
|
400
|
-
return baseScore * 1.7;
|
|
401
|
-
}
|
|
402
|
-
if (path.startsWith("src/lib/")) {
|
|
403
|
-
return baseScore * 1.6;
|
|
404
|
-
}
|
|
405
|
-
if (path.startsWith("src/") &&
|
|
406
|
-
(path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
|
|
407
|
-
return baseScore * 1.5;
|
|
408
|
-
}
|
|
409
|
-
if (path.startsWith("tests/") || path.startsWith("test/")) {
|
|
410
|
-
return baseScore * 0.2; // Also penalize tests in default mode
|
|
411
|
-
}
|
|
412
|
-
return baseScore;
|
|
413
|
-
}
|
|
414
|
-
/**
|
|
415
|
-
* contextBundle専用のブーストプロファイル適用
|
|
416
|
-
* candidateのスコアと理由を直接変更する
|
|
417
|
-
* @param candidate - スコアリング対象の候補
|
|
418
|
-
* @param row - ファイル情報(path, ext)
|
|
419
|
-
* @param profile - ブーストプロファイル
|
|
420
|
-
*/
|
|
421
|
-
function applyBoostProfile(candidate, row, profile, extractedTerms, pathMatchWeight) {
|
|
422
|
-
if (profile === "none") {
|
|
423
|
-
return;
|
|
424
|
-
}
|
|
425
|
-
const { path, ext } = row;
|
|
426
|
-
const lowerPath = path.toLowerCase();
|
|
427
|
-
const fileName = path.split("/").pop() ?? "";
|
|
428
|
-
// パスベースのスコアリング: goalのキーワード/フレーズがファイルパスに含まれる場合にブースト
|
|
429
|
-
if (extractedTerms && pathMatchWeight && pathMatchWeight > 0) {
|
|
430
|
-
// フレーズがパスに完全一致する場合(最高の重み)
|
|
431
|
-
for (const phrase of extractedTerms.phrases) {
|
|
432
|
-
if (lowerPath.includes(phrase)) {
|
|
433
|
-
candidate.score += pathMatchWeight * 1.5; // 1.5倍のブースト
|
|
434
|
-
candidate.reasons.add(`path-phrase:${phrase}`);
|
|
435
|
-
break; // 最初のマッチのみ適用
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
// パスセグメントがマッチする場合(中程度の重み)
|
|
439
|
-
const pathParts = lowerPath.split("/");
|
|
440
|
-
for (const segment of extractedTerms.pathSegments) {
|
|
441
|
-
if (pathParts.includes(segment)) {
|
|
442
|
-
candidate.score += pathMatchWeight;
|
|
443
|
-
candidate.reasons.add(`path-segment:${segment}`);
|
|
444
|
-
break; // 最初のマッチのみ適用
|
|
445
|
-
}
|
|
446
|
-
}
|
|
447
|
-
// 通常のキーワードがパスに含まれる場合(低い重み)
|
|
448
|
-
for (const keyword of extractedTerms.keywords) {
|
|
449
|
-
if (lowerPath.includes(keyword)) {
|
|
450
|
-
candidate.score += pathMatchWeight * 0.5; // 0.5倍のブースト
|
|
451
|
-
candidate.reasons.add(`path-keyword:${keyword}`);
|
|
452
|
-
break; // 最初のマッチのみ適用
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
// Blacklisted directories that are almost always irrelevant for code context
|
|
457
|
-
const blacklistedDirs = [
|
|
458
|
-
".cursor/",
|
|
459
|
-
".devcontainer/",
|
|
460
|
-
".serena/",
|
|
461
|
-
"__mocks__/",
|
|
462
|
-
"docs/",
|
|
463
|
-
"test/",
|
|
464
|
-
"tests/",
|
|
465
|
-
".git/",
|
|
466
|
-
"node_modules/",
|
|
467
|
-
"db/migrate/",
|
|
468
|
-
"db/migrations/",
|
|
469
|
-
"config/",
|
|
470
|
-
"dist/",
|
|
471
|
-
"build/",
|
|
472
|
-
"out/",
|
|
473
|
-
"coverage/",
|
|
474
|
-
".vscode/",
|
|
475
|
-
".idea/",
|
|
476
|
-
"tmp/",
|
|
477
|
-
"temp/",
|
|
478
|
-
];
|
|
479
|
-
if (blacklistedDirs.some((dir) => path.startsWith(dir))) {
|
|
480
|
-
candidate.score = -100; // Effectively remove it
|
|
481
|
-
candidate.reasons.add("penalty:blacklisted-dir");
|
|
482
|
-
return;
|
|
483
|
-
}
|
|
484
|
-
// Penalize test files explicitly (even if outside test directories)
|
|
485
|
-
const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
|
|
486
|
-
if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
|
|
487
|
-
candidate.score -= 2.0; // Strong penalty for test files
|
|
488
|
-
candidate.reasons.add("penalty:test-file");
|
|
489
|
-
return;
|
|
490
|
-
}
|
|
491
|
-
// Penalize lock files and package manifests
|
|
492
|
-
const lockFiles = [
|
|
493
|
-
"package-lock.json",
|
|
494
|
-
"pnpm-lock.yaml",
|
|
495
|
-
"yarn.lock",
|
|
496
|
-
"bun.lockb",
|
|
497
|
-
"Gemfile.lock",
|
|
498
|
-
"Cargo.lock",
|
|
499
|
-
"poetry.lock",
|
|
500
|
-
];
|
|
501
|
-
if (lockFiles.some((lockFile) => fileName === lockFile)) {
|
|
502
|
-
candidate.score -= 3.0; // Very strong penalty for lock files
|
|
503
|
-
candidate.reasons.add("penalty:lock-file");
|
|
504
|
-
return;
|
|
505
|
-
}
|
|
506
|
-
// Penalize configuration files
|
|
507
|
-
const configPatterns = [
|
|
508
|
-
".config.js",
|
|
509
|
-
".config.ts",
|
|
510
|
-
".config.mjs",
|
|
511
|
-
".config.cjs",
|
|
512
|
-
"tsconfig.json",
|
|
513
|
-
"jsconfig.json",
|
|
514
|
-
"package.json",
|
|
515
|
-
".eslintrc",
|
|
516
|
-
".prettierrc",
|
|
517
|
-
"jest.config",
|
|
518
|
-
"vite.config",
|
|
519
|
-
"vitest.config",
|
|
520
|
-
"webpack.config",
|
|
521
|
-
"rollup.config",
|
|
522
|
-
];
|
|
523
|
-
if (configPatterns.some((pattern) => lowerPath.endsWith(pattern) || fileName.startsWith(".env")) ||
|
|
524
|
-
fileName === "Dockerfile" ||
|
|
525
|
-
fileName === "docker-compose.yml" ||
|
|
526
|
-
fileName === "docker-compose.yaml") {
|
|
527
|
-
candidate.score -= 1.5; // Strong penalty for config files
|
|
528
|
-
candidate.reasons.add("penalty:config-file");
|
|
529
|
-
return;
|
|
530
|
-
}
|
|
531
|
-
// Penalize migration files (by path content)
|
|
532
|
-
if (lowerPath.includes("migrate") || lowerPath.includes("migration")) {
|
|
533
|
-
candidate.score -= 2.0; // Strong penalty for migrations
|
|
534
|
-
candidate.reasons.add("penalty:migration-file");
|
|
535
|
-
return;
|
|
536
|
-
}
|
|
537
|
-
if (profile === "docs") {
|
|
538
|
-
// DOCS PROFILE: Boost docs, penalize code
|
|
539
|
-
if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
|
|
540
|
-
candidate.score += 0.8;
|
|
541
|
-
candidate.reasons.add("boost:doc-file");
|
|
542
|
-
}
|
|
543
|
-
else if (path.startsWith("src/") && (ext === ".ts" || ext === ".tsx" || ext === ".js")) {
|
|
544
|
-
candidate.score -= 0.5;
|
|
545
|
-
candidate.reasons.add("penalty:impl-file");
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
else if (profile === "default") {
|
|
549
|
-
// DEFAULT PROFILE: Penalize docs heavily, boost implementation files.
|
|
550
|
-
// Penalize documentation and other non-code files
|
|
551
|
-
const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
|
|
552
|
-
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
553
|
-
candidate.score -= 1.0; // Strong penalty to overcome structural similarity
|
|
554
|
-
candidate.reasons.add("penalty:doc-file");
|
|
555
|
-
}
|
|
556
|
-
// Boost implementation files, with more specific paths getting higher scores
|
|
557
|
-
if (path.startsWith("src/app/")) {
|
|
558
|
-
candidate.score += 0.8;
|
|
559
|
-
candidate.reasons.add("boost:app-file");
|
|
560
|
-
}
|
|
561
|
-
else if (path.startsWith("src/components/")) {
|
|
562
|
-
candidate.score += 0.7;
|
|
563
|
-
candidate.reasons.add("boost:component-file");
|
|
564
|
-
}
|
|
565
|
-
else if (path.startsWith("src/lib/")) {
|
|
566
|
-
candidate.score += 0.6;
|
|
567
|
-
candidate.reasons.add("boost:lib-file");
|
|
568
|
-
}
|
|
569
|
-
else if (path.startsWith("src/")) {
|
|
570
|
-
if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
|
|
571
|
-
candidate.score += 0.5;
|
|
572
|
-
candidate.reasons.add("boost:impl-file");
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
}
|
|
576
|
-
}
|
|
577
|
-
export async function filesSearch(context, params) {
|
|
578
|
-
const { db, repoId } = context;
|
|
579
|
-
const { query } = params;
|
|
580
|
-
if (!query || query.trim().length === 0) {
|
|
581
|
-
throw new Error("files_search requires a non-empty query. Provide a search keyword to continue.");
|
|
582
|
-
}
|
|
583
|
-
const limit = normalizeLimit(params.limit);
|
|
584
|
-
const hasFTS = context.features?.fts ?? false;
|
|
585
|
-
let sql;
|
|
586
|
-
let values;
|
|
587
|
-
if (hasFTS) {
|
|
588
|
-
// FTS拡張利用可能: fts_main_blob.match_bm25 を使用
|
|
589
|
-
const conditions = ["f.repo_id = ?"];
|
|
590
|
-
values = [repoId];
|
|
591
|
-
// 言語・拡張子フィルタ
|
|
592
|
-
if (params.lang) {
|
|
593
|
-
conditions.push("COALESCE(f.lang, '') = ?");
|
|
594
|
-
values.push(params.lang);
|
|
595
|
-
}
|
|
596
|
-
if (params.ext) {
|
|
597
|
-
conditions.push("COALESCE(f.ext, '') = ?");
|
|
598
|
-
values.push(params.ext);
|
|
599
|
-
}
|
|
600
|
-
if (params.path_prefix) {
|
|
601
|
-
conditions.push("f.path LIKE ?");
|
|
602
|
-
values.push(`${params.path_prefix}%`);
|
|
603
|
-
}
|
|
604
|
-
// FTS検索(BM25スコアリング)
|
|
605
|
-
sql = `
|
|
606
|
-
SELECT f.path, f.lang, f.ext, b.content, fts.score
|
|
607
|
-
FROM file f
|
|
608
|
-
JOIN blob b ON b.hash = f.blob_hash
|
|
609
|
-
JOIN (
|
|
610
|
-
SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
|
|
611
|
-
FROM blob
|
|
612
|
-
WHERE score IS NOT NULL
|
|
613
|
-
) fts ON fts.hash = b.hash
|
|
614
|
-
WHERE ${conditions.join(" AND ")}
|
|
615
|
-
ORDER BY fts.score DESC
|
|
616
|
-
LIMIT ?
|
|
617
|
-
`;
|
|
618
|
-
values.unshift(query); // FTSクエリを先頭に追加
|
|
619
|
-
values.push(limit);
|
|
620
|
-
}
|
|
621
|
-
else {
|
|
622
|
-
// FTS拡張利用不可: ILIKE検索(Phase 1の単語分割ロジック)
|
|
623
|
-
const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
|
|
624
|
-
values = [repoId];
|
|
625
|
-
const words = splitQueryWords(query);
|
|
626
|
-
if (words.length === 1) {
|
|
627
|
-
conditions.push("b.content ILIKE '%' || ? || '%'");
|
|
628
|
-
values.push(query);
|
|
629
|
-
}
|
|
630
|
-
else {
|
|
631
|
-
const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
|
|
632
|
-
conditions.push(`(${wordConditions.join(" OR ")})`);
|
|
633
|
-
values.push(...words);
|
|
634
|
-
}
|
|
635
|
-
if (params.lang) {
|
|
636
|
-
conditions.push("COALESCE(f.lang, '') = ?");
|
|
637
|
-
values.push(params.lang);
|
|
638
|
-
}
|
|
639
|
-
if (params.ext) {
|
|
640
|
-
conditions.push("COALESCE(f.ext, '') = ?");
|
|
641
|
-
values.push(params.ext);
|
|
642
|
-
}
|
|
643
|
-
if (params.path_prefix) {
|
|
644
|
-
conditions.push("f.path LIKE ?");
|
|
645
|
-
values.push(`${params.path_prefix}%`);
|
|
646
|
-
}
|
|
647
|
-
sql = `
|
|
648
|
-
SELECT f.path, f.lang, f.ext, b.content
|
|
649
|
-
FROM file f
|
|
650
|
-
JOIN blob b ON b.hash = f.blob_hash
|
|
651
|
-
WHERE ${conditions.join(" AND ")}
|
|
652
|
-
ORDER BY f.path
|
|
653
|
-
LIMIT ?
|
|
654
|
-
`;
|
|
655
|
-
values.push(limit);
|
|
656
|
-
}
|
|
657
|
-
const rows = await db.all(sql, values);
|
|
658
|
-
const boostProfile = params.boost_profile ?? "default";
|
|
659
|
-
return rows
|
|
660
|
-
.map((row) => {
|
|
661
|
-
const { preview, line } = buildPreview(row.content ?? "", query);
|
|
662
|
-
const baseScore = row.score ?? 1.0; // FTS時はBM25スコア、ILIKE時は1.0
|
|
663
|
-
const boostedScore = applyFileTypeBoost(row.path, baseScore, boostProfile);
|
|
664
|
-
return {
|
|
665
|
-
path: row.path,
|
|
666
|
-
preview,
|
|
667
|
-
matchLine: line,
|
|
668
|
-
lang: row.lang,
|
|
669
|
-
ext: row.ext,
|
|
670
|
-
score: boostedScore,
|
|
671
|
-
};
|
|
672
|
-
})
|
|
673
|
-
.sort((a, b) => b.score - a.score); // スコアの高い順に再ソート
|
|
674
|
-
}
|
|
675
|
-
export async function snippetsGet(context, params) {
|
|
676
|
-
const { db, repoId } = context;
|
|
677
|
-
if (!params.path) {
|
|
678
|
-
throw new Error("snippets_get requires a file path. Specify a tracked text file path to continue.");
|
|
679
|
-
}
|
|
680
|
-
const rows = await db.all(`
|
|
681
|
-
SELECT f.path, f.lang, f.ext, f.is_binary, b.content
|
|
682
|
-
FROM file f
|
|
683
|
-
JOIN blob b ON b.hash = f.blob_hash
|
|
684
|
-
WHERE f.repo_id = ? AND f.path = ?
|
|
685
|
-
LIMIT 1
|
|
686
|
-
`, [repoId, params.path]);
|
|
687
|
-
if (rows.length === 0) {
|
|
688
|
-
throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
|
|
689
|
-
}
|
|
690
|
-
const row = rows[0];
|
|
691
|
-
if (!row) {
|
|
692
|
-
throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
|
|
693
|
-
}
|
|
694
|
-
if (row.is_binary) {
|
|
695
|
-
throw new Error("Binary snippets are not supported. Choose a text file to preview its content.");
|
|
696
|
-
}
|
|
697
|
-
if (row.content === null) {
|
|
698
|
-
throw new Error("Snippet content is unavailable. Re-run the indexer to refresh DuckDB state.");
|
|
699
|
-
}
|
|
700
|
-
const lines = row.content.split(/\r?\n/);
|
|
701
|
-
const totalLines = lines.length;
|
|
702
|
-
const snippetRows = await db.all(`
|
|
703
|
-
SELECT s.snippet_id, s.start_line, s.end_line, s.symbol_id, sym.name AS symbol_name, sym.kind AS symbol_kind
|
|
704
|
-
FROM snippet s
|
|
705
|
-
LEFT JOIN symbol sym
|
|
706
|
-
ON sym.repo_id = s.repo_id
|
|
707
|
-
AND sym.path = s.path
|
|
708
|
-
AND sym.symbol_id = s.symbol_id
|
|
709
|
-
WHERE s.repo_id = ? AND s.path = ?
|
|
710
|
-
ORDER BY s.start_line
|
|
711
|
-
`, [repoId, params.path]);
|
|
712
|
-
const requestedStart = params.start_line ?? 1;
|
|
713
|
-
const requestedEnd = params.end_line ?? Math.min(totalLines, requestedStart + DEFAULT_SNIPPET_WINDOW - 1);
|
|
714
|
-
const useSymbolSnippets = snippetRows.length > 0 && params.end_line === undefined;
|
|
715
|
-
let snippetSelection = null;
|
|
716
|
-
if (useSymbolSnippets) {
|
|
717
|
-
snippetSelection =
|
|
718
|
-
snippetRows.find((snippet) => requestedStart >= snippet.start_line && requestedStart <= snippet.end_line) ?? null;
|
|
719
|
-
if (!snippetSelection) {
|
|
720
|
-
const firstSnippet = snippetRows[0];
|
|
721
|
-
if (firstSnippet && requestedStart < firstSnippet.start_line) {
|
|
722
|
-
snippetSelection = firstSnippet;
|
|
723
|
-
}
|
|
724
|
-
else {
|
|
725
|
-
snippetSelection = snippetRows[snippetRows.length - 1] ?? null;
|
|
726
|
-
}
|
|
727
|
-
}
|
|
728
|
-
}
|
|
729
|
-
let startLine;
|
|
730
|
-
let endLine;
|
|
731
|
-
let symbolName = null;
|
|
732
|
-
let symbolKind = null;
|
|
733
|
-
if (snippetSelection) {
|
|
734
|
-
startLine = snippetSelection.start_line;
|
|
735
|
-
endLine = snippetSelection.end_line;
|
|
736
|
-
symbolName = snippetSelection.symbol_name;
|
|
737
|
-
symbolKind = snippetSelection.symbol_kind;
|
|
738
|
-
}
|
|
739
|
-
else {
|
|
740
|
-
startLine = Math.max(1, Math.min(totalLines, requestedStart));
|
|
741
|
-
endLine = Math.max(startLine, Math.min(totalLines, requestedEnd));
|
|
742
|
-
}
|
|
743
|
-
const snippetContent = lines.slice(startLine - 1, endLine).join("\n");
|
|
744
|
-
return {
|
|
745
|
-
path: row.path,
|
|
746
|
-
startLine,
|
|
747
|
-
endLine,
|
|
748
|
-
content: snippetContent,
|
|
749
|
-
totalLines,
|
|
750
|
-
symbolName,
|
|
751
|
-
symbolKind,
|
|
752
|
-
};
|
|
753
|
-
}
|
|
754
|
-
export async function contextBundle(context, params) {
|
|
755
|
-
const { db, repoId } = context;
|
|
756
|
-
const goal = params.goal?.trim() ?? "";
|
|
757
|
-
if (goal.length === 0) {
|
|
758
|
-
throw new Error("context_bundle requires a non-empty goal. Describe your objective to receive context.");
|
|
759
|
-
}
|
|
760
|
-
const limit = normalizeBundleLimit(params.limit);
|
|
761
|
-
const artifacts = params.artifacts ?? {};
|
|
762
|
-
// スコアリング重みをロード(将来的には設定ファイルや引数から)
|
|
763
|
-
const profileName = coerceProfileName(params.profile ?? null);
|
|
764
|
-
const weights = loadScoringProfile(profileName);
|
|
765
|
-
const keywordSources = [goal];
|
|
766
|
-
if (artifacts.failing_tests && artifacts.failing_tests.length > 0) {
|
|
767
|
-
keywordSources.push(artifacts.failing_tests.join(" "));
|
|
768
|
-
}
|
|
769
|
-
if (artifacts.last_diff) {
|
|
770
|
-
keywordSources.push(artifacts.last_diff);
|
|
771
|
-
}
|
|
772
|
-
if (artifacts.editing_path) {
|
|
773
|
-
keywordSources.push(artifacts.editing_path);
|
|
774
|
-
}
|
|
775
|
-
const semanticSeed = keywordSources.join(" ");
|
|
776
|
-
const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
|
|
777
|
-
const extractedTerms = extractKeywords(semanticSeed);
|
|
778
|
-
// フォールバック: editing_pathからキーワードを抽出
|
|
779
|
-
if (extractedTerms.phrases.length === 0 &&
|
|
780
|
-
extractedTerms.keywords.length === 0 &&
|
|
781
|
-
artifacts.editing_path) {
|
|
782
|
-
const pathSegments = artifacts.editing_path
|
|
783
|
-
.split(/[/_.-]/)
|
|
784
|
-
.map((segment) => segment.toLowerCase())
|
|
785
|
-
.filter((segment) => segment.length >= 3 && !STOP_WORDS.has(segment));
|
|
786
|
-
extractedTerms.pathSegments.push(...pathSegments.slice(0, MAX_KEYWORDS));
|
|
787
|
-
}
|
|
788
|
-
const candidates = new Map();
|
|
789
|
-
const stringMatchSeeds = new Set();
|
|
790
|
-
const fileCache = new Map();
|
|
791
|
-
// フレーズマッチング(高い重み: textMatch × 2)
|
|
792
|
-
for (const phrase of extractedTerms.phrases) {
|
|
793
|
-
const rows = await db.all(`
|
|
794
|
-
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
795
|
-
FROM file f
|
|
796
|
-
JOIN blob b ON b.hash = f.blob_hash
|
|
797
|
-
LEFT JOIN file_embedding fe
|
|
798
|
-
ON fe.repo_id = f.repo_id
|
|
799
|
-
AND fe.path = f.path
|
|
800
|
-
WHERE f.repo_id = ?
|
|
801
|
-
AND f.is_binary = FALSE
|
|
802
|
-
AND b.content ILIKE '%' || ? || '%'
|
|
803
|
-
ORDER BY f.path
|
|
804
|
-
LIMIT ?
|
|
805
|
-
`, [repoId, phrase, MAX_MATCHES_PER_KEYWORD]);
|
|
806
|
-
for (const row of rows) {
|
|
807
|
-
if (row.content === null) {
|
|
808
|
-
continue;
|
|
809
|
-
}
|
|
810
|
-
const candidate = ensureCandidate(candidates, row.path);
|
|
811
|
-
// フレーズマッチは通常の2倍のスコア
|
|
812
|
-
candidate.score += weights.textMatch * 2.0;
|
|
813
|
-
candidate.reasons.add(`phrase:${phrase}`);
|
|
814
|
-
// Apply boost profile to prioritize/penalize files based on type and location
|
|
815
|
-
const boostProfile = params.boost_profile ?? "default";
|
|
816
|
-
applyBoostProfile(candidate, row, boostProfile, extractedTerms, weights.pathMatch);
|
|
817
|
-
const { line } = buildPreview(row.content, phrase);
|
|
818
|
-
candidate.matchLine =
|
|
819
|
-
candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
|
|
820
|
-
candidate.content ?? (candidate.content = row.content);
|
|
821
|
-
candidate.lang ?? (candidate.lang = row.lang);
|
|
822
|
-
candidate.ext ?? (candidate.ext = row.ext);
|
|
823
|
-
candidate.totalLines ?? (candidate.totalLines = row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length);
|
|
824
|
-
candidate.embedding ?? (candidate.embedding = parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null));
|
|
825
|
-
stringMatchSeeds.add(row.path);
|
|
826
|
-
if (!fileCache.has(row.path)) {
|
|
827
|
-
fileCache.set(row.path, {
|
|
828
|
-
content: row.content,
|
|
829
|
-
lang: row.lang,
|
|
830
|
-
ext: row.ext,
|
|
831
|
-
totalLines: candidate.totalLines ?? 0,
|
|
832
|
-
embedding: candidate.embedding,
|
|
833
|
-
});
|
|
834
|
-
}
|
|
835
|
-
}
|
|
836
|
-
}
|
|
837
|
-
// キーワードマッチング(通常の重み)
|
|
838
|
-
for (const keyword of extractedTerms.keywords) {
|
|
839
|
-
const rows = await db.all(`
|
|
840
|
-
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
841
|
-
FROM file f
|
|
842
|
-
JOIN blob b ON b.hash = f.blob_hash
|
|
843
|
-
LEFT JOIN file_embedding fe
|
|
844
|
-
ON fe.repo_id = f.repo_id
|
|
845
|
-
AND fe.path = f.path
|
|
846
|
-
WHERE f.repo_id = ?
|
|
847
|
-
AND f.is_binary = FALSE
|
|
848
|
-
AND b.content ILIKE '%' || ? || '%'
|
|
849
|
-
ORDER BY f.path
|
|
850
|
-
LIMIT ?
|
|
851
|
-
`, [repoId, keyword, MAX_MATCHES_PER_KEYWORD]);
|
|
852
|
-
for (const row of rows) {
|
|
853
|
-
if (row.content === null) {
|
|
854
|
-
continue;
|
|
855
|
-
}
|
|
856
|
-
const candidate = ensureCandidate(candidates, row.path);
|
|
857
|
-
candidate.score += weights.textMatch;
|
|
858
|
-
candidate.reasons.add(`text:${keyword}`);
|
|
859
|
-
// Apply boost profile to prioritize/penalize files based on type and location
|
|
860
|
-
const boostProfile = params.boost_profile ?? "default";
|
|
861
|
-
applyBoostProfile(candidate, row, boostProfile, extractedTerms, weights.pathMatch);
|
|
862
|
-
const { line } = buildPreview(row.content, keyword);
|
|
863
|
-
candidate.matchLine =
|
|
864
|
-
candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
|
|
865
|
-
candidate.content ?? (candidate.content = row.content);
|
|
866
|
-
candidate.lang ?? (candidate.lang = row.lang);
|
|
867
|
-
candidate.ext ?? (candidate.ext = row.ext);
|
|
868
|
-
candidate.totalLines ?? (candidate.totalLines = row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length);
|
|
869
|
-
candidate.embedding ?? (candidate.embedding = parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null));
|
|
870
|
-
stringMatchSeeds.add(row.path);
|
|
871
|
-
if (!fileCache.has(row.path)) {
|
|
872
|
-
fileCache.set(row.path, {
|
|
873
|
-
content: row.content,
|
|
874
|
-
lang: row.lang,
|
|
875
|
-
ext: row.ext,
|
|
876
|
-
totalLines: candidate.totalLines ?? 0,
|
|
877
|
-
embedding: candidate.embedding,
|
|
878
|
-
});
|
|
879
|
-
}
|
|
880
|
-
}
|
|
881
|
-
}
|
|
882
|
-
if (artifacts.editing_path) {
|
|
883
|
-
const editingCandidate = ensureCandidate(candidates, artifacts.editing_path);
|
|
884
|
-
editingCandidate.score += weights.editingPath;
|
|
885
|
-
editingCandidate.reasons.add("artifact:editing_path");
|
|
886
|
-
editingCandidate.matchLine ?? (editingCandidate.matchLine = 1);
|
|
887
|
-
}
|
|
888
|
-
// SQL injection防御: ファイルパスの検証パターン
|
|
889
|
-
const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
|
|
890
|
-
const dependencySeeds = new Set();
|
|
891
|
-
for (const pathSeed of stringMatchSeeds) {
|
|
892
|
-
if (!SAFE_PATH_PATTERN.test(pathSeed)) {
|
|
893
|
-
console.warn(`Skipping potentially unsafe path in dependency seeds: ${pathSeed}`);
|
|
894
|
-
continue;
|
|
895
|
-
}
|
|
896
|
-
dependencySeeds.add(pathSeed);
|
|
897
|
-
if (dependencySeeds.size >= MAX_DEPENDENCY_SEEDS) {
|
|
898
|
-
break;
|
|
899
|
-
}
|
|
900
|
-
}
|
|
901
|
-
if (artifacts.editing_path) {
|
|
902
|
-
if (!SAFE_PATH_PATTERN.test(artifacts.editing_path)) {
|
|
903
|
-
throw new Error(`Invalid editing_path format. Path must contain only alphanumeric characters, underscores, dots, hyphens, and forward slashes.`);
|
|
904
|
-
}
|
|
905
|
-
dependencySeeds.add(artifacts.editing_path);
|
|
906
|
-
}
|
|
907
|
-
if (dependencySeeds.size > 0) {
|
|
908
|
-
// SQL injection防御: プレースホルダー生成前にサイズを検証
|
|
909
|
-
if (dependencySeeds.size > MAX_DEPENDENCY_SEEDS_QUERY_LIMIT) {
|
|
910
|
-
throw new Error(`Too many dependency seeds: ${dependencySeeds.size} (max ${MAX_DEPENDENCY_SEEDS_QUERY_LIMIT}). Narrow your search criteria.`);
|
|
911
|
-
}
|
|
912
|
-
const placeholders = Array.from(dependencySeeds, () => "?").join(", ");
|
|
913
|
-
// 防御的チェック: プレースホルダーが正しい形式であることを確認
|
|
914
|
-
// 期待される形式: "?, ?, ..." (クエスチョンマーク、カンマ、スペースのみ)
|
|
915
|
-
if (!/^(\?)(,\s*\?)*$/.test(placeholders)) {
|
|
916
|
-
throw new Error("Invalid placeholder generation detected. Operation aborted for safety.");
|
|
917
|
-
}
|
|
918
|
-
const depRows = await db.all(`
|
|
919
|
-
SELECT src_path, dst_kind, dst, rel
|
|
920
|
-
FROM dependency
|
|
921
|
-
WHERE repo_id = ? AND src_path IN (${placeholders})
|
|
922
|
-
`, [repoId, ...dependencySeeds]);
|
|
923
|
-
for (const dep of depRows) {
|
|
924
|
-
if (dep.dst_kind !== "path") {
|
|
925
|
-
continue;
|
|
926
|
-
}
|
|
927
|
-
const candidate = ensureCandidate(candidates, dep.dst);
|
|
928
|
-
candidate.score += weights.dependency;
|
|
929
|
-
candidate.reasons.add(`dep:${dep.src_path}`);
|
|
930
|
-
}
|
|
931
|
-
}
|
|
932
|
-
if (artifacts.editing_path) {
|
|
933
|
-
const directory = path.posix.dirname(artifacts.editing_path);
|
|
934
|
-
if (directory && directory !== ".") {
|
|
935
|
-
const nearRows = await db.all(`
|
|
936
|
-
SELECT path
|
|
937
|
-
FROM file
|
|
938
|
-
WHERE repo_id = ?
|
|
939
|
-
AND is_binary = FALSE
|
|
940
|
-
AND path LIKE ?
|
|
941
|
-
ORDER BY path
|
|
942
|
-
LIMIT ?
|
|
943
|
-
`, [repoId, `${directory}/%`, NEARBY_LIMIT + 1]);
|
|
944
|
-
for (const near of nearRows) {
|
|
945
|
-
if (near.path === artifacts.editing_path) {
|
|
946
|
-
continue;
|
|
947
|
-
}
|
|
948
|
-
const candidate = ensureCandidate(candidates, near.path);
|
|
949
|
-
candidate.score += weights.proximity;
|
|
950
|
-
candidate.reasons.add(`near:${directory}`);
|
|
951
|
-
}
|
|
952
|
-
}
|
|
953
|
-
}
|
|
954
|
-
const materializedCandidates = [];
|
|
955
|
-
for (const candidate of candidates.values()) {
|
|
956
|
-
if (!candidate.content) {
|
|
957
|
-
const cached = fileCache.get(candidate.path);
|
|
958
|
-
if (cached) {
|
|
959
|
-
candidate.content = cached.content;
|
|
960
|
-
candidate.lang = cached.lang;
|
|
961
|
-
candidate.ext = cached.ext;
|
|
962
|
-
candidate.totalLines = cached.totalLines;
|
|
963
|
-
candidate.embedding = cached.embedding;
|
|
964
|
-
}
|
|
965
|
-
else {
|
|
966
|
-
const loaded = await loadFileContent(db, repoId, candidate.path);
|
|
967
|
-
if (!loaded) {
|
|
968
|
-
continue;
|
|
969
|
-
}
|
|
970
|
-
candidate.content = loaded.content;
|
|
971
|
-
candidate.lang = loaded.lang;
|
|
972
|
-
candidate.ext = loaded.ext;
|
|
973
|
-
candidate.totalLines = loaded.totalLines;
|
|
974
|
-
candidate.embedding = loaded.embedding;
|
|
975
|
-
fileCache.set(candidate.path, loaded);
|
|
976
|
-
}
|
|
977
|
-
}
|
|
978
|
-
materializedCandidates.push(candidate);
|
|
979
|
-
}
|
|
980
|
-
if (materializedCandidates.length === 0) {
|
|
981
|
-
return { context: [], tokens_estimate: 0 };
|
|
982
|
-
}
|
|
983
|
-
applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
|
|
984
|
-
const sortedCandidates = materializedCandidates
|
|
985
|
-
.filter((candidate) => candidate.score > 0) // Filter out candidates with negative or zero scores
|
|
986
|
-
.sort((a, b) => {
|
|
987
|
-
if (b.score === a.score) {
|
|
988
|
-
return a.path.localeCompare(b.path);
|
|
989
|
-
}
|
|
990
|
-
return b.score - a.score;
|
|
991
|
-
})
|
|
992
|
-
.slice(0, limit);
|
|
993
|
-
const maxScore = Math.max(...sortedCandidates.map((candidate) => candidate.score));
|
|
994
|
-
const results = [];
|
|
995
|
-
for (const candidate of sortedCandidates) {
|
|
996
|
-
if (!candidate.content) {
|
|
997
|
-
continue;
|
|
998
|
-
}
|
|
999
|
-
const snippets = await db.all(`
|
|
1000
|
-
SELECT s.snippet_id, s.start_line, s.end_line, s.symbol_id, sym.name AS symbol_name, sym.kind AS symbol_kind
|
|
1001
|
-
FROM snippet s
|
|
1002
|
-
LEFT JOIN symbol sym
|
|
1003
|
-
ON sym.repo_id = s.repo_id
|
|
1004
|
-
AND sym.path = s.path
|
|
1005
|
-
AND sym.symbol_id = s.symbol_id
|
|
1006
|
-
WHERE s.repo_id = ? AND s.path = ?
|
|
1007
|
-
ORDER BY s.start_line
|
|
1008
|
-
`, [repoId, candidate.path]);
|
|
1009
|
-
const selected = selectSnippet(snippets, candidate.matchLine);
|
|
1010
|
-
let startLine;
|
|
1011
|
-
let endLine;
|
|
1012
|
-
if (selected) {
|
|
1013
|
-
startLine = selected.start_line;
|
|
1014
|
-
endLine = selected.end_line;
|
|
1015
|
-
}
|
|
1016
|
-
else {
|
|
1017
|
-
const totalLines = candidate.totalLines ?? 0;
|
|
1018
|
-
const matchLine = candidate.matchLine ?? 1;
|
|
1019
|
-
const windowHalf = Math.floor(FALLBACK_SNIPPET_WINDOW / 2);
|
|
1020
|
-
startLine = Math.max(1, matchLine - windowHalf);
|
|
1021
|
-
endLine = Math.min(totalLines === 0 ? matchLine + windowHalf : totalLines, startLine + FALLBACK_SNIPPET_WINDOW - 1);
|
|
1022
|
-
}
|
|
1023
|
-
if (endLine < startLine) {
|
|
1024
|
-
endLine = startLine;
|
|
1025
|
-
}
|
|
1026
|
-
const reasons = new Set(candidate.reasons);
|
|
1027
|
-
if (selected && selected.symbol_name) {
|
|
1028
|
-
reasons.add(`symbol:${selected.symbol_name}`);
|
|
1029
|
-
}
|
|
1030
|
-
const normalizedScore = maxScore > 0 ? candidate.score / maxScore : 0;
|
|
1031
|
-
const item = {
|
|
1032
|
-
path: candidate.path,
|
|
1033
|
-
range: [startLine, endLine],
|
|
1034
|
-
why: Array.from(reasons).sort(),
|
|
1035
|
-
score: Number.isFinite(normalizedScore) ? normalizedScore : 0,
|
|
1036
|
-
};
|
|
1037
|
-
// Add preview only if not in compact mode
|
|
1038
|
-
if (!params.compact) {
|
|
1039
|
-
item.preview = buildSnippetPreview(candidate.content, startLine, endLine);
|
|
1040
|
-
}
|
|
1041
|
-
results.push(item);
|
|
1042
|
-
}
|
|
1043
|
-
// コンテンツベースのトークン推定を使用(より正確)
|
|
1044
|
-
const tokensEstimate = results.reduce((acc, item) => {
|
|
1045
|
-
const candidate = sortedCandidates.find((c) => c.path === item.path);
|
|
1046
|
-
if (candidate && candidate.content) {
|
|
1047
|
-
return acc + estimateTokensFromContent(candidate.content, item.range[0], item.range[1]);
|
|
1048
|
-
}
|
|
1049
|
-
// フォールバック: 行ベース推定(コンテンツが利用不可の場合)
|
|
1050
|
-
const lineCount = Math.max(1, item.range[1] - item.range[0] + 1);
|
|
1051
|
-
return acc + lineCount * 4;
|
|
1052
|
-
}, 0);
|
|
1053
|
-
return { context: results, tokens_estimate: tokensEstimate };
|
|
1054
|
-
}
|
|
1055
|
-
export async function semanticRerank(context, params) {
|
|
1056
|
-
const text = params.text?.trim() ?? "";
|
|
1057
|
-
if (text.length === 0) {
|
|
1058
|
-
throw new Error("semantic_rerank requires non-empty text. Describe the intent to compute semantic similarity.");
|
|
1059
|
-
}
|
|
1060
|
-
if (!Array.isArray(params.candidates) || params.candidates.length === 0) {
|
|
1061
|
-
return { candidates: [] };
|
|
1062
|
-
}
|
|
1063
|
-
const uniqueCandidates = [];
|
|
1064
|
-
const seenPaths = new Set();
|
|
1065
|
-
for (const candidate of params.candidates) {
|
|
1066
|
-
if (!candidate || typeof candidate.path !== "string" || candidate.path.length === 0) {
|
|
1067
|
-
continue;
|
|
1068
|
-
}
|
|
1069
|
-
if (seenPaths.has(candidate.path)) {
|
|
1070
|
-
continue;
|
|
1071
|
-
}
|
|
1072
|
-
seenPaths.add(candidate.path);
|
|
1073
|
-
uniqueCandidates.push(candidate);
|
|
1074
|
-
if (uniqueCandidates.length >= MAX_RERANK_LIMIT) {
|
|
1075
|
-
break;
|
|
1076
|
-
}
|
|
1077
|
-
}
|
|
1078
|
-
if (uniqueCandidates.length === 0) {
|
|
1079
|
-
return { candidates: [] };
|
|
1080
|
-
}
|
|
1081
|
-
const limitRaw = params.k ?? uniqueCandidates.length;
|
|
1082
|
-
const limit = Math.max(1, Math.min(MAX_RERANK_LIMIT, Math.floor(limitRaw)));
|
|
1083
|
-
const profileName = coerceProfileName(params.profile ?? null);
|
|
1084
|
-
const weights = loadScoringProfile(profileName);
|
|
1085
|
-
const structuralWeight = weights.structural;
|
|
1086
|
-
const queryEmbedding = generateEmbedding(text)?.values ?? null;
|
|
1087
|
-
let embeddingMap = new Map();
|
|
1088
|
-
if (queryEmbedding && structuralWeight > 0) {
|
|
1089
|
-
const paths = uniqueCandidates.map((candidate) => candidate.path);
|
|
1090
|
-
embeddingMap = await fetchEmbeddingMap(context.db, context.repoId, paths);
|
|
1091
|
-
}
|
|
1092
|
-
const scored = uniqueCandidates.map((candidate) => {
|
|
1093
|
-
const base = typeof candidate.score === "number" && Number.isFinite(candidate.score) ? candidate.score : 0;
|
|
1094
|
-
let semantic = 0;
|
|
1095
|
-
if (queryEmbedding && structuralWeight > 0) {
|
|
1096
|
-
const embedding = embeddingMap.get(candidate.path);
|
|
1097
|
-
if (embedding) {
|
|
1098
|
-
const similarity = structuralSimilarity(queryEmbedding, embedding);
|
|
1099
|
-
if (Number.isFinite(similarity) && similarity > 0) {
|
|
1100
|
-
semantic = similarity;
|
|
1101
|
-
}
|
|
1102
|
-
}
|
|
1103
|
-
}
|
|
1104
|
-
const combined = base + structuralWeight * semantic;
|
|
1105
|
-
return {
|
|
1106
|
-
path: candidate.path,
|
|
1107
|
-
base,
|
|
1108
|
-
semantic,
|
|
1109
|
-
combined,
|
|
1110
|
-
};
|
|
1111
|
-
});
|
|
1112
|
-
const sorted = scored.sort((a, b) => {
|
|
1113
|
-
if (b.combined === a.combined) {
|
|
1114
|
-
if (b.semantic === a.semantic) {
|
|
1115
|
-
return a.path.localeCompare(b.path);
|
|
1116
|
-
}
|
|
1117
|
-
return b.semantic - a.semantic;
|
|
1118
|
-
}
|
|
1119
|
-
return b.combined - a.combined;
|
|
1120
|
-
});
|
|
1121
|
-
return { candidates: sorted.slice(0, limit) };
|
|
1122
|
-
}
|
|
1123
|
-
export async function depsClosure(context, params) {
|
|
1124
|
-
const { db, repoId } = context;
|
|
1125
|
-
if (!params.path) {
|
|
1126
|
-
throw new Error("deps_closure requires a file path. Provide a tracked source file path to continue.");
|
|
1127
|
-
}
|
|
1128
|
-
const direction = params.direction ?? "outbound";
|
|
1129
|
-
const maxDepth = params.max_depth ?? 3;
|
|
1130
|
-
const includePackages = params.include_packages ?? true;
|
|
1131
|
-
const dependencyRows = await db.all(`
|
|
1132
|
-
SELECT src_path, dst_kind, dst, rel
|
|
1133
|
-
FROM dependency
|
|
1134
|
-
WHERE repo_id = ?
|
|
1135
|
-
`, [repoId]);
|
|
1136
|
-
// outbound: このファイルが使用する依存関係
|
|
1137
|
-
const outbound = new Map();
|
|
1138
|
-
// inbound: このファイルを使用しているファイル
|
|
1139
|
-
const inbound = new Map();
|
|
1140
|
-
for (const row of dependencyRows) {
|
|
1141
|
-
// outbound マップ構築
|
|
1142
|
-
if (!outbound.has(row.src_path)) {
|
|
1143
|
-
outbound.set(row.src_path, []);
|
|
1144
|
-
}
|
|
1145
|
-
outbound.get(row.src_path)?.push(row);
|
|
1146
|
-
// inbound マップ構築(dst が path の場合のみ)
|
|
1147
|
-
if (row.dst_kind === "path") {
|
|
1148
|
-
if (!inbound.has(row.dst)) {
|
|
1149
|
-
inbound.set(row.dst, []);
|
|
1150
|
-
}
|
|
1151
|
-
inbound.get(row.dst)?.push(row);
|
|
1152
|
-
}
|
|
1153
|
-
}
|
|
1154
|
-
const queue = [{ path: params.path, depth: 0 }];
|
|
1155
|
-
const visitedPaths = new Set([params.path]);
|
|
1156
|
-
const nodeDepth = new Map();
|
|
1157
|
-
const edgeSet = new Map();
|
|
1158
|
-
const recordNode = (node) => {
|
|
1159
|
-
const key = `${node.kind}:${node.target}`;
|
|
1160
|
-
const existing = nodeDepth.get(key);
|
|
1161
|
-
if (!existing || node.depth < existing.depth) {
|
|
1162
|
-
nodeDepth.set(key, { ...node });
|
|
1163
|
-
}
|
|
1164
|
-
};
|
|
1165
|
-
const recordEdge = (edge) => {
|
|
1166
|
-
const key = `${edge.from}->${edge.to}:${edge.kind}:${edge.rel}`;
|
|
1167
|
-
const existing = edgeSet.get(key);
|
|
1168
|
-
if (!existing || edge.depth < existing.depth) {
|
|
1169
|
-
edgeSet.set(key, { ...edge });
|
|
1170
|
-
}
|
|
1171
|
-
};
|
|
1172
|
-
recordNode({ kind: "path", target: params.path, depth: 0 });
|
|
1173
|
-
while (queue.length > 0) {
|
|
1174
|
-
const current = queue.shift();
|
|
1175
|
-
if (current.depth >= maxDepth) {
|
|
1176
|
-
continue;
|
|
1177
|
-
}
|
|
1178
|
-
// direction に応じて使用するマップを選択
|
|
1179
|
-
const edgeMap = direction === "inbound" ? inbound : outbound;
|
|
1180
|
-
const edges = edgeMap.get(current.path) ?? [];
|
|
1181
|
-
for (const edge of edges) {
|
|
1182
|
-
const nextDepth = current.depth + 1;
|
|
1183
|
-
if (direction === "inbound") {
|
|
1184
|
-
// inbound: edge.src_path がこのファイルを使用している
|
|
1185
|
-
recordEdge({
|
|
1186
|
-
from: edge.src_path,
|
|
1187
|
-
to: current.path,
|
|
1188
|
-
kind: "path",
|
|
1189
|
-
rel: edge.rel,
|
|
1190
|
-
depth: nextDepth,
|
|
1191
|
-
});
|
|
1192
|
-
recordNode({ kind: "path", target: edge.src_path, depth: nextDepth });
|
|
1193
|
-
if (!visitedPaths.has(edge.src_path)) {
|
|
1194
|
-
visitedPaths.add(edge.src_path);
|
|
1195
|
-
queue.push({ path: edge.src_path, depth: nextDepth });
|
|
1196
|
-
}
|
|
1197
|
-
}
|
|
1198
|
-
else {
|
|
1199
|
-
// outbound: このファイルが edge.dst を使用している
|
|
1200
|
-
if (edge.dst_kind === "path") {
|
|
1201
|
-
recordEdge({
|
|
1202
|
-
from: current.path,
|
|
1203
|
-
to: edge.dst,
|
|
1204
|
-
kind: "path",
|
|
1205
|
-
rel: edge.rel,
|
|
1206
|
-
depth: nextDepth,
|
|
1207
|
-
});
|
|
1208
|
-
recordNode({ kind: "path", target: edge.dst, depth: nextDepth });
|
|
1209
|
-
if (!visitedPaths.has(edge.dst)) {
|
|
1210
|
-
visitedPaths.add(edge.dst);
|
|
1211
|
-
queue.push({ path: edge.dst, depth: nextDepth });
|
|
1212
|
-
}
|
|
1213
|
-
}
|
|
1214
|
-
else if (edge.dst_kind === "package" && includePackages) {
|
|
1215
|
-
recordEdge({
|
|
1216
|
-
from: current.path,
|
|
1217
|
-
to: edge.dst,
|
|
1218
|
-
kind: "package",
|
|
1219
|
-
rel: edge.rel,
|
|
1220
|
-
depth: nextDepth,
|
|
1221
|
-
});
|
|
1222
|
-
recordNode({ kind: "package", target: edge.dst, depth: nextDepth });
|
|
1223
|
-
}
|
|
1224
|
-
}
|
|
1225
|
-
}
|
|
1226
|
-
}
|
|
1227
|
-
const nodes = Array.from(nodeDepth.values()).sort((a, b) => {
|
|
1228
|
-
if (a.depth === b.depth) {
|
|
1229
|
-
return a.target.localeCompare(b.target);
|
|
1230
|
-
}
|
|
1231
|
-
return a.depth - b.depth;
|
|
1232
|
-
});
|
|
1233
|
-
const edges = Array.from(edgeSet.values()).sort((a, b) => {
|
|
1234
|
-
if (a.depth === b.depth) {
|
|
1235
|
-
const fromCmp = a.from.localeCompare(b.from);
|
|
1236
|
-
if (fromCmp !== 0) {
|
|
1237
|
-
return fromCmp;
|
|
1238
|
-
}
|
|
1239
|
-
return a.to.localeCompare(b.to);
|
|
1240
|
-
}
|
|
1241
|
-
return a.depth - b.depth;
|
|
1242
|
-
});
|
|
1243
|
-
return {
|
|
1244
|
-
root: params.path,
|
|
1245
|
-
direction,
|
|
1246
|
-
nodes,
|
|
1247
|
-
edges,
|
|
1248
|
-
};
|
|
1249
|
-
}
|
|
1250
|
-
export async function resolveRepoId(db, repoRoot) {
|
|
1251
|
-
try {
|
|
1252
|
-
const rows = await db.all("SELECT id FROM repo WHERE root = ?", [repoRoot]);
|
|
1253
|
-
if (rows.length === 0) {
|
|
1254
|
-
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
1255
|
-
}
|
|
1256
|
-
const row = rows[0];
|
|
1257
|
-
if (!row) {
|
|
1258
|
-
throw new Error("Failed to retrieve repository record. Database returned empty result.");
|
|
1259
|
-
}
|
|
1260
|
-
return row.id;
|
|
1261
|
-
}
|
|
1262
|
-
catch (error) {
|
|
1263
|
-
if (error instanceof Error && error.message.includes("Table with name repo")) {
|
|
1264
|
-
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
1265
|
-
}
|
|
1266
|
-
throw error;
|
|
1267
|
-
}
|
|
1268
|
-
}
|