kiri-mcp-server 0.16.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/README.md +65 -22
  2. package/dist/package.json +4 -2
  3. package/dist/src/client/proxy.js +0 -0
  4. package/dist/src/daemon/daemon.js +0 -0
  5. package/dist/src/indexer/codeintel/dart/adapter.d.ts +36 -0
  6. package/dist/src/indexer/codeintel/dart/adapter.d.ts.map +1 -0
  7. package/dist/src/indexer/codeintel/dart/adapter.js +60 -0
  8. package/dist/src/indexer/codeintel/dart/adapter.js.map +1 -0
  9. package/dist/src/indexer/codeintel/dart/index.d.ts +7 -0
  10. package/dist/src/indexer/codeintel/dart/index.d.ts.map +1 -0
  11. package/dist/src/indexer/codeintel/dart/index.js +7 -0
  12. package/dist/src/indexer/codeintel/dart/index.js.map +1 -0
  13. package/dist/src/indexer/codeintel/index.d.ts +30 -0
  14. package/dist/src/indexer/codeintel/index.d.ts.map +1 -0
  15. package/dist/src/indexer/codeintel/index.js +32 -0
  16. package/dist/src/indexer/codeintel/index.js.map +1 -0
  17. package/dist/src/indexer/codeintel/java/analyzer.d.ts +22 -0
  18. package/dist/src/indexer/codeintel/java/analyzer.d.ts.map +1 -0
  19. package/dist/src/indexer/codeintel/java/analyzer.js +281 -0
  20. package/dist/src/indexer/codeintel/java/analyzer.js.map +1 -0
  21. package/dist/src/indexer/codeintel/java/index.d.ts +7 -0
  22. package/dist/src/indexer/codeintel/java/index.d.ts.map +1 -0
  23. package/dist/src/indexer/codeintel/java/index.js +7 -0
  24. package/dist/src/indexer/codeintel/java/index.js.map +1 -0
  25. package/dist/src/indexer/codeintel/php/analyzer.d.ts +23 -0
  26. package/dist/src/indexer/codeintel/php/analyzer.d.ts.map +1 -0
  27. package/dist/src/indexer/codeintel/php/analyzer.js +342 -0
  28. package/dist/src/indexer/codeintel/php/analyzer.js.map +1 -0
  29. package/dist/src/indexer/codeintel/php/index.d.ts +7 -0
  30. package/dist/src/indexer/codeintel/php/index.d.ts.map +1 -0
  31. package/dist/src/indexer/codeintel/php/index.js +7 -0
  32. package/dist/src/indexer/codeintel/php/index.js.map +1 -0
  33. package/dist/src/indexer/codeintel/registry.d.ts +76 -0
  34. package/dist/src/indexer/codeintel/registry.d.ts.map +1 -0
  35. package/dist/src/indexer/codeintel/registry.js +127 -0
  36. package/dist/src/indexer/codeintel/registry.js.map +1 -0
  37. package/dist/src/indexer/codeintel/rust/analyzer.d.ts +14 -0
  38. package/dist/src/indexer/codeintel/rust/analyzer.d.ts.map +1 -0
  39. package/dist/src/indexer/codeintel/rust/analyzer.js +388 -0
  40. package/dist/src/indexer/codeintel/rust/analyzer.js.map +1 -0
  41. package/dist/src/indexer/codeintel/rust/index.d.ts +5 -0
  42. package/dist/src/indexer/codeintel/rust/index.d.ts.map +1 -0
  43. package/dist/src/indexer/codeintel/rust/index.js +5 -0
  44. package/dist/src/indexer/codeintel/rust/index.js.map +1 -0
  45. package/dist/src/indexer/codeintel/swift/analyzer.d.ts +22 -0
  46. package/dist/src/indexer/codeintel/swift/analyzer.d.ts.map +1 -0
  47. package/dist/src/indexer/codeintel/swift/analyzer.js +271 -0
  48. package/dist/src/indexer/codeintel/swift/analyzer.js.map +1 -0
  49. package/dist/src/indexer/codeintel/swift/index.d.ts +7 -0
  50. package/dist/src/indexer/codeintel/swift/index.d.ts.map +1 -0
  51. package/dist/src/indexer/codeintel/swift/index.js +7 -0
  52. package/dist/src/indexer/codeintel/swift/index.js.map +1 -0
  53. package/dist/src/indexer/codeintel/types.d.ts +114 -0
  54. package/dist/src/indexer/codeintel/types.d.ts.map +1 -0
  55. package/dist/src/indexer/codeintel/types.js +13 -0
  56. package/dist/src/indexer/codeintel/types.js.map +1 -0
  57. package/dist/src/indexer/codeintel/typescript/analyzer.d.ts +22 -0
  58. package/dist/src/indexer/codeintel/typescript/analyzer.d.ts.map +1 -0
  59. package/dist/{indexer/codeintel.js → src/indexer/codeintel/typescript/analyzer.js} +62 -34
  60. package/dist/src/indexer/codeintel/typescript/analyzer.js.map +1 -0
  61. package/dist/src/indexer/codeintel/typescript/index.d.ts +7 -0
  62. package/dist/src/indexer/codeintel/typescript/index.d.ts.map +1 -0
  63. package/dist/src/indexer/codeintel/typescript/index.js +7 -0
  64. package/dist/src/indexer/codeintel/typescript/index.js.map +1 -0
  65. package/dist/src/indexer/codeintel/utils.d.ts +91 -0
  66. package/dist/src/indexer/codeintel/utils.d.ts.map +1 -0
  67. package/dist/src/indexer/codeintel/utils.js +145 -0
  68. package/dist/src/indexer/codeintel/utils.js.map +1 -0
  69. package/dist/src/indexer/codeintel.d.ts +33 -26
  70. package/dist/src/indexer/codeintel.d.ts.map +1 -1
  71. package/dist/src/indexer/codeintel.js +56 -1078
  72. package/dist/src/indexer/codeintel.js.map +1 -1
  73. package/dist/src/indexer/graph-metrics.d.ts.map +1 -1
  74. package/dist/src/indexer/graph-metrics.js +16 -4
  75. package/dist/src/indexer/graph-metrics.js.map +1 -1
  76. package/dist/src/server/boost-profiles.d.ts +1 -1
  77. package/dist/src/server/boost-profiles.d.ts.map +1 -1
  78. package/dist/src/server/boost-profiles.js +22 -0
  79. package/dist/src/server/boost-profiles.js.map +1 -1
  80. package/dist/src/server/main.js +0 -0
  81. package/dist/src/server/rpc.js +4 -4
  82. package/dist/src/server/rpc.js.map +1 -1
  83. package/package.json +10 -2
  84. package/dist/client/cli.js +0 -68
  85. package/dist/client/cli.js.map +0 -1
  86. package/dist/client/index.js +0 -5
  87. package/dist/client/index.js.map +0 -1
  88. package/dist/eval/metrics.js +0 -47
  89. package/dist/eval/metrics.js.map +0 -1
  90. package/dist/indexer/cli.js +0 -362
  91. package/dist/indexer/cli.js.map +0 -1
  92. package/dist/indexer/codeintel.js.map +0 -1
  93. package/dist/indexer/git.js +0 -30
  94. package/dist/indexer/git.js.map +0 -1
  95. package/dist/indexer/language.js +0 -34
  96. package/dist/indexer/language.js.map +0 -1
  97. package/dist/indexer/pipeline/filters/denylist.js +0 -71
  98. package/dist/indexer/pipeline/filters/denylist.js.map +0 -1
  99. package/dist/indexer/schema.js +0 -101
  100. package/dist/indexer/schema.js.map +0 -1
  101. package/dist/server/bootstrap.js +0 -19
  102. package/dist/server/bootstrap.js.map +0 -1
  103. package/dist/server/context.js +0 -1
  104. package/dist/server/context.js.map +0 -1
  105. package/dist/server/fallbacks/degradeController.js +0 -69
  106. package/dist/server/fallbacks/degradeController.js.map +0 -1
  107. package/dist/server/handlers.js +0 -1268
  108. package/dist/server/handlers.js.map +0 -1
  109. package/dist/server/main.js +0 -151
  110. package/dist/server/main.js.map +0 -1
  111. package/dist/server/observability/metrics.js +0 -56
  112. package/dist/server/observability/metrics.js.map +0 -1
  113. package/dist/server/observability/tracing.js +0 -58
  114. package/dist/server/observability/tracing.js.map +0 -1
  115. package/dist/server/rpc.js +0 -477
  116. package/dist/server/rpc.js.map +0 -1
  117. package/dist/server/runtime.js +0 -47
  118. package/dist/server/runtime.js.map +0 -1
  119. package/dist/server/scoring.js +0 -116
  120. package/dist/server/scoring.js.map +0 -1
  121. package/dist/server/stdio.js +0 -76
  122. package/dist/server/stdio.js.map +0 -1
  123. package/dist/shared/duckdb.js +0 -119
  124. package/dist/shared/duckdb.js.map +0 -1
  125. package/dist/shared/embedding.js +0 -98
  126. package/dist/shared/embedding.js.map +0 -1
  127. package/dist/shared/index.js +0 -9
  128. package/dist/shared/index.js.map +0 -1
  129. package/dist/shared/security/config.js +0 -64
  130. package/dist/shared/security/config.js.map +0 -1
  131. package/dist/shared/security/masker.js +0 -56
  132. package/dist/shared/security/masker.js.map +0 -1
  133. package/dist/shared/tokenizer.js +0 -4
  134. package/dist/shared/tokenizer.js.map +0 -1
  135. package/dist/shared/utils/simpleYaml.js +0 -89
  136. package/dist/shared/utils/simpleYaml.js.map +0 -1
  137. package/dist/src/server/rrf.d.ts +0 -86
  138. package/dist/src/server/rrf.d.ts.map +0 -1
  139. package/dist/src/server/rrf.js +0 -108
  140. package/dist/src/server/rrf.js.map +0 -1
  141. package/dist/src/shared/embedding/engine.d.ts +0 -38
  142. package/dist/src/shared/embedding/engine.d.ts.map +0 -1
  143. package/dist/src/shared/embedding/engine.js +0 -6
  144. package/dist/src/shared/embedding/engine.js.map +0 -1
  145. package/dist/src/shared/embedding/lsh-engine.d.ts +0 -11
  146. package/dist/src/shared/embedding/lsh-engine.d.ts.map +0 -1
  147. package/dist/src/shared/embedding/lsh-engine.js +0 -14
  148. package/dist/src/shared/embedding/lsh-engine.js.map +0 -1
  149. package/dist/src/shared/embedding/registry.d.ts +0 -25
  150. package/dist/src/shared/embedding/registry.d.ts.map +0 -1
  151. package/dist/src/shared/embedding/registry.js +0 -50
  152. package/dist/src/shared/embedding/registry.js.map +0 -1
  153. package/dist/src/shared/embedding/semantic-engine.d.ts +0 -14
  154. package/dist/src/shared/embedding/semantic-engine.d.ts.map +0 -1
  155. package/dist/src/shared/embedding/semantic-engine.js +0 -50
  156. package/dist/src/shared/embedding/semantic-engine.js.map +0 -1
  157. package/dist/src/shared/models/model-manager.d.ts +0 -38
  158. package/dist/src/shared/models/model-manager.d.ts.map +0 -1
  159. package/dist/src/shared/models/model-manager.js +0 -116
  160. package/dist/src/shared/models/model-manager.js.map +0 -1
  161. package/dist/src/shared/models/model-manifest.d.ts +0 -22
  162. package/dist/src/shared/models/model-manifest.d.ts.map +0 -1
  163. package/dist/src/shared/models/model-manifest.js +0 -24
  164. package/dist/src/shared/models/model-manifest.js.map +0 -1
@@ -1,1268 +0,0 @@
1
- import path from "node:path";
2
- import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
3
- import { encode as encodeGPT } from "../shared/tokenizer.js";
4
- import { coerceProfileName, loadScoringProfile } from "./scoring.js";
5
- const DEFAULT_SEARCH_LIMIT = 50;
6
- const DEFAULT_SNIPPET_WINDOW = 150;
7
- const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
8
- const MAX_BUNDLE_LIMIT = 20;
9
- const MAX_KEYWORDS = 12;
10
- const MAX_MATCHES_PER_KEYWORD = 40;
11
- const MAX_DEPENDENCY_SEEDS = 8;
12
- const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
13
- const NEARBY_LIMIT = 6;
14
- const FALLBACK_SNIPPET_WINDOW = 40; // Reduced from 120 to optimize token usage
15
- const MAX_RERANK_LIMIT = 50;
16
- const STOP_WORDS = new Set([
17
- "the",
18
- "and",
19
- "for",
20
- "with",
21
- "from",
22
- "this",
23
- "that",
24
- "have",
25
- "has",
26
- "will",
27
- "would",
28
- "into",
29
- "about",
30
- "there",
31
- "their",
32
- "your",
33
- "fix",
34
- "test",
35
- "tests",
36
- "issue",
37
- "error",
38
- "bug",
39
- "fail",
40
- "failing",
41
- "make",
42
- "when",
43
- "where",
44
- "should",
45
- "could",
46
- "need",
47
- "goal",
48
- ]);
49
- function normalizeLimit(limit) {
50
- if (!limit || Number.isNaN(limit)) {
51
- return DEFAULT_SEARCH_LIMIT;
52
- }
53
- return Math.min(Math.max(1, Math.floor(limit)), 100);
54
- }
55
- function buildPreview(content, query) {
56
- const lowerContent = content.toLowerCase();
57
- const lowerQuery = query.toLowerCase();
58
- const index = lowerContent.indexOf(lowerQuery);
59
- if (index === -1) {
60
- return { preview: content.slice(0, 240), line: 1 };
61
- }
62
- const prefix = content.slice(0, index);
63
- const prefixLines = prefix.split(/\r?\n/);
64
- const matchLine = prefix.length === 0 ? 1 : prefixLines.length;
65
- const snippetStart = Math.max(0, index - 120);
66
- const snippetEnd = Math.min(content.length, index + query.length + 120);
67
- const preview = content.slice(snippetStart, snippetEnd);
68
- return { preview, line: matchLine };
69
- }
70
- function normalizeBundleLimit(limit) {
71
- if (!limit || Number.isNaN(limit)) {
72
- return DEFAULT_BUNDLE_LIMIT;
73
- }
74
- return Math.min(Math.max(1, Math.floor(limit)), MAX_BUNDLE_LIMIT);
75
- }
76
- /**
77
- * トークン化戦略を取得
78
- * 環境変数またはデフォルト値から決定
79
- */
80
- function getTokenizationStrategy() {
81
- const strategy = process.env.KIRI_TOKENIZATION_STRATEGY?.toLowerCase();
82
- if (strategy === "legacy" || strategy === "hybrid") {
83
- return strategy;
84
- }
85
- return "phrase-aware"; // デフォルト
86
- }
87
- /**
88
- * 引用符で囲まれたフレーズを抽出
89
- * 例: 'search "page-agent handler" test' → ["page-agent handler"]
90
- */
91
- function extractQuotedPhrases(text) {
92
- const phrases = [];
93
- const quotePattern = /"([^"]+)"|'([^']+)'/g;
94
- let match;
95
- let remaining = text;
96
- // eslint-disable-next-line no-cond-assign
97
- while ((match = quotePattern.exec(text)) !== null) {
98
- const phrase = (match[1] || match[2] || "").trim().toLowerCase();
99
- if (phrase.length >= 3) {
100
- phrases.push(phrase);
101
- }
102
- remaining = remaining.replace(match[0], " ");
103
- }
104
- return { phrases, remaining };
105
- }
106
- /**
107
- * ハイフン区切り用語を抽出
108
- * 例: "page-agent lambda-handler" → ["page-agent", "lambda-handler"]
109
- */
110
- function extractHyphenatedTerms(text) {
111
- // マッチ条件: 英数字 + ハイフン + 英数字(少なくとも3文字以上)
112
- const hyphenPattern = /\b[a-z0-9]+(?:-[a-z0-9]+)+\b/gi;
113
- const matches = text.match(hyphenPattern) || [];
114
- return matches
115
- .map((term) => term.toLowerCase())
116
- .filter((term) => term.length >= 3 && !STOP_WORDS.has(term));
117
- }
118
- /**
119
- * パスライクな用語を抽出
120
- * 例: "lambda/page-agent/handler" → ["lambda", "page-agent", "handler"]
121
- */
122
- function extractPathSegments(text) {
123
- const pathPattern = /\b[a-z0-9_-]+(?:\/[a-z0-9_-]+)+\b/gi;
124
- const matches = text.match(pathPattern) || [];
125
- const segments = [];
126
- for (const path of matches) {
127
- const parts = path.toLowerCase().split("/");
128
- for (const part of parts) {
129
- if (part.length >= 3 && !STOP_WORDS.has(part) && !segments.includes(part)) {
130
- segments.push(part);
131
- }
132
- }
133
- }
134
- return segments;
135
- }
136
- /**
137
- * 通常の単語を抽出(レガシーロジック)
138
- */
139
- function extractRegularWords(text, strategy) {
140
- const splitPattern = strategy === "legacy" ? /[^a-z0-9_]+/iu : /[^a-z0-9_-]+/iu;
141
- const words = text
142
- .toLowerCase()
143
- .split(splitPattern)
144
- .map((word) => word.trim())
145
- .filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
146
- return words;
147
- }
148
- /**
149
- * テキストからキーワード、フレーズ、パスセグメントを抽出
150
- * トークン化戦略に基づいて、ハイフン区切り用語の処理方法を変更
151
- */
152
- function extractKeywords(text) {
153
- const strategy = getTokenizationStrategy();
154
- const result = {
155
- phrases: [],
156
- keywords: [],
157
- pathSegments: [],
158
- };
159
- // Phase 1: 引用符で囲まれたフレーズを抽出
160
- const { phrases: quotedPhrases, remaining: afterQuotes } = extractQuotedPhrases(text);
161
- result.phrases.push(...quotedPhrases);
162
- // Phase 2: パスセグメントを抽出
163
- const pathSegments = extractPathSegments(afterQuotes);
164
- result.pathSegments.push(...pathSegments);
165
- // Phase 3: ハイフン区切り用語を抽出(phrase-aware または hybrid モード)
166
- if (strategy === "phrase-aware" || strategy === "hybrid") {
167
- const hyphenatedTerms = extractHyphenatedTerms(afterQuotes);
168
- result.phrases.push(...hyphenatedTerms);
169
- // hybrid モードの場合、ハイフン区切り用語を分割したキーワードも追加
170
- if (strategy === "hybrid") {
171
- for (const term of hyphenatedTerms) {
172
- const parts = term.split("-").filter((part) => part.length >= 3 && !STOP_WORDS.has(part));
173
- result.keywords.push(...parts);
174
- }
175
- }
176
- }
177
- // Phase 4: 通常の単語を抽出
178
- const regularWords = extractRegularWords(afterQuotes, strategy);
179
- // 重複を除去しながら、最大キーワード数まで追加
180
- for (const word of regularWords) {
181
- if (!result.keywords.includes(word) && !result.phrases.includes(word)) {
182
- result.keywords.push(word);
183
- if (result.keywords.length >= MAX_KEYWORDS) {
184
- break;
185
- }
186
- }
187
- }
188
- return result;
189
- }
190
- function ensureCandidate(map, filePath) {
191
- let candidate = map.get(filePath);
192
- if (!candidate) {
193
- candidate = {
194
- path: filePath,
195
- score: 0,
196
- reasons: new Set(),
197
- matchLine: null,
198
- content: null,
199
- totalLines: null,
200
- lang: null,
201
- ext: null,
202
- embedding: null,
203
- semanticSimilarity: null,
204
- };
205
- map.set(filePath, candidate);
206
- }
207
- return candidate;
208
- }
209
- function parseEmbedding(vectorJson, vectorDims) {
210
- if (!vectorJson || !vectorDims || vectorDims <= 0) {
211
- return null;
212
- }
213
- try {
214
- const parsed = JSON.parse(vectorJson);
215
- if (!Array.isArray(parsed)) {
216
- return null;
217
- }
218
- const values = [];
219
- for (let i = 0; i < parsed.length && i < vectorDims; i += 1) {
220
- const raw = parsed[i];
221
- const num = typeof raw === "number" ? raw : Number(raw);
222
- if (!Number.isFinite(num)) {
223
- return null;
224
- }
225
- values.push(num);
226
- }
227
- return values.length === vectorDims ? values : null;
228
- }
229
- catch {
230
- return null;
231
- }
232
- }
233
- function applyStructuralScores(candidates, queryEmbedding, structuralWeight) {
234
- if (!queryEmbedding || structuralWeight <= 0) {
235
- return;
236
- }
237
- for (const candidate of candidates) {
238
- if (!candidate.embedding) {
239
- continue;
240
- }
241
- const similarity = structuralSimilarity(queryEmbedding, candidate.embedding);
242
- if (!Number.isFinite(similarity) || similarity <= 0) {
243
- continue;
244
- }
245
- candidate.semanticSimilarity = similarity;
246
- candidate.score += structuralWeight * similarity;
247
- candidate.reasons.add(`structural:${similarity.toFixed(2)}`);
248
- }
249
- }
250
- async function fetchEmbeddingMap(db, repoId, paths) {
251
- const map = new Map();
252
- if (paths.length === 0) {
253
- return map;
254
- }
255
- const placeholders = paths.map(() => "?").join(", ");
256
- const rows = await db.all(`
257
- SELECT path, vector_json, dims AS vector_dims
258
- FROM file_embedding
259
- WHERE repo_id = ? AND path IN (${placeholders})
260
- `, [repoId, ...paths]);
261
- for (const row of rows) {
262
- const embedding = parseEmbedding(row.vector_json, row.vector_dims);
263
- if (embedding) {
264
- map.set(row.path, embedding);
265
- }
266
- }
267
- return map;
268
- }
269
- async function loadFileContent(db, repoId, filePath) {
270
- const rows = await db.all(`
271
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
272
- FROM file f
273
- JOIN blob b ON b.hash = f.blob_hash
274
- LEFT JOIN file_embedding fe
275
- ON fe.repo_id = f.repo_id
276
- AND fe.path = f.path
277
- WHERE f.repo_id = ? AND f.path = ?
278
- LIMIT 1
279
- `, [repoId, filePath]);
280
- const row = rows[0];
281
- if (!row || row.is_binary || row.content === null) {
282
- return null;
283
- }
284
- const totalLines = row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length;
285
- return {
286
- content: row.content,
287
- lang: row.lang,
288
- ext: row.ext,
289
- totalLines,
290
- embedding: parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null),
291
- };
292
- }
293
- function selectSnippet(snippets, matchLine) {
294
- const firstSnippet = snippets[0];
295
- if (!firstSnippet) {
296
- return null;
297
- }
298
- if (matchLine === null) {
299
- return firstSnippet;
300
- }
301
- const containing = snippets.find((snippet) => matchLine >= snippet.start_line && matchLine <= snippet.end_line);
302
- if (containing) {
303
- return containing;
304
- }
305
- if (matchLine < firstSnippet.start_line) {
306
- return firstSnippet;
307
- }
308
- const lastSnippet = snippets[snippets.length - 1];
309
- return lastSnippet ?? firstSnippet;
310
- }
311
- function buildSnippetPreview(content, startLine, endLine) {
312
- const lines = content.split(/\r?\n/);
313
- const startIndex = Math.max(0, Math.min(startLine - 1, lines.length));
314
- const endIndex = Math.max(startIndex, Math.min(endLine, lines.length));
315
- const snippet = lines.slice(startIndex, endIndex).join("\n");
316
- if (snippet.length <= 240) {
317
- return snippet;
318
- }
319
- return `${snippet.slice(0, 239)}…`;
320
- }
321
- /**
322
- * トークン数を推定(コンテンツベース)
323
- * 実際のGPTトークナイザーを使用して正確にカウント
324
- *
325
- * @param content - ファイル全体のコンテンツ
326
- * @param startLine - 開始行(1-indexed)
327
- * @param endLine - 終了行(1-indexed)
328
- * @returns 推定トークン数
329
- */
330
- function estimateTokensFromContent(content, startLine, endLine) {
331
- const lines = content.split(/\r?\n/);
332
- const startIndex = Math.max(0, startLine - 1);
333
- const endIndex = Math.min(endLine, lines.length);
334
- const selectedLines = lines.slice(startIndex, endIndex);
335
- const text = selectedLines.join("\n");
336
- try {
337
- // 実際のGPTトークナイザーを使用
338
- return encodeGPT(text).length;
339
- }
340
- catch (error) {
341
- // フォールバック: 平均的な英語テキストで4文字 ≈ 1トークン
342
- console.warn("Token encoding failed, using character-based fallback", error);
343
- return Math.max(1, Math.ceil(text.length / 4));
344
- }
345
- }
346
- /**
347
- * 複数単語クエリを単語分割してOR検索条件を構築
348
- * @param query - 検索クエリ文字列
349
- * @returns 単語配列(2文字以下を除外)
350
- */
351
- function splitQueryWords(query) {
352
- // 空白、スラッシュ、ハイフン、アンダースコアで分割
353
- const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
354
- return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
355
- }
356
- /**
357
- * ファイルタイプに基づいてスコアをブーストする
358
- * プロファイルに応じて実装ファイルまたはドキュメントを優遇
359
- * @param path - ファイルパス
360
- * @param baseScore - 元のスコア
361
- * @param profile - ブーストプロファイル ("default" | "docs" | "none")
362
- * @returns ブースト適用後のスコア
363
- */
364
- function applyFileTypeBoost(path, baseScore, profile = "default") {
365
- // Blacklisted directories that are almost always irrelevant for code context
366
- const blacklistedDirs = [
367
- ".cursor/",
368
- ".devcontainer/",
369
- ".serena/",
370
- "__mocks__/",
371
- "docs/",
372
- ".git/",
373
- "node_modules/",
374
- ];
375
- if (blacklistedDirs.some((dir) => path.startsWith(dir))) {
376
- return -100; // Effectively remove it
377
- }
378
- if (profile === "none") {
379
- return baseScore;
380
- }
381
- if (profile === "docs") {
382
- if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
383
- return baseScore * 1.8; // Stronger boost for docs
384
- }
385
- if (path.startsWith("src/") &&
386
- (path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
387
- return baseScore * 0.5; // Stronger penalty for implementation files
388
- }
389
- return baseScore;
390
- }
391
- // Default profile: prioritize implementation files, heavily penalize docs
392
- const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
393
- if (docExtensions.some((ext) => path.endsWith(ext))) {
394
- return baseScore * 0.1; // Heavy penalty for docs
395
- }
396
- if (path.startsWith("src/app/")) {
397
- return baseScore * 1.8;
398
- }
399
- if (path.startsWith("src/components/")) {
400
- return baseScore * 1.7;
401
- }
402
- if (path.startsWith("src/lib/")) {
403
- return baseScore * 1.6;
404
- }
405
- if (path.startsWith("src/") &&
406
- (path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
407
- return baseScore * 1.5;
408
- }
409
- if (path.startsWith("tests/") || path.startsWith("test/")) {
410
- return baseScore * 0.2; // Also penalize tests in default mode
411
- }
412
- return baseScore;
413
- }
414
- /**
415
- * contextBundle専用のブーストプロファイル適用
416
- * candidateのスコアと理由を直接変更する
417
- * @param candidate - スコアリング対象の候補
418
- * @param row - ファイル情報(path, ext)
419
- * @param profile - ブーストプロファイル
420
- */
421
- function applyBoostProfile(candidate, row, profile, extractedTerms, pathMatchWeight) {
422
- if (profile === "none") {
423
- return;
424
- }
425
- const { path, ext } = row;
426
- const lowerPath = path.toLowerCase();
427
- const fileName = path.split("/").pop() ?? "";
428
- // パスベースのスコアリング: goalのキーワード/フレーズがファイルパスに含まれる場合にブースト
429
- if (extractedTerms && pathMatchWeight && pathMatchWeight > 0) {
430
- // フレーズがパスに完全一致する場合(最高の重み)
431
- for (const phrase of extractedTerms.phrases) {
432
- if (lowerPath.includes(phrase)) {
433
- candidate.score += pathMatchWeight * 1.5; // 1.5倍のブースト
434
- candidate.reasons.add(`path-phrase:${phrase}`);
435
- break; // 最初のマッチのみ適用
436
- }
437
- }
438
- // パスセグメントがマッチする場合(中程度の重み)
439
- const pathParts = lowerPath.split("/");
440
- for (const segment of extractedTerms.pathSegments) {
441
- if (pathParts.includes(segment)) {
442
- candidate.score += pathMatchWeight;
443
- candidate.reasons.add(`path-segment:${segment}`);
444
- break; // 最初のマッチのみ適用
445
- }
446
- }
447
- // 通常のキーワードがパスに含まれる場合(低い重み)
448
- for (const keyword of extractedTerms.keywords) {
449
- if (lowerPath.includes(keyword)) {
450
- candidate.score += pathMatchWeight * 0.5; // 0.5倍のブースト
451
- candidate.reasons.add(`path-keyword:${keyword}`);
452
- break; // 最初のマッチのみ適用
453
- }
454
- }
455
- }
456
- // Blacklisted directories that are almost always irrelevant for code context
457
- const blacklistedDirs = [
458
- ".cursor/",
459
- ".devcontainer/",
460
- ".serena/",
461
- "__mocks__/",
462
- "docs/",
463
- "test/",
464
- "tests/",
465
- ".git/",
466
- "node_modules/",
467
- "db/migrate/",
468
- "db/migrations/",
469
- "config/",
470
- "dist/",
471
- "build/",
472
- "out/",
473
- "coverage/",
474
- ".vscode/",
475
- ".idea/",
476
- "tmp/",
477
- "temp/",
478
- ];
479
- if (blacklistedDirs.some((dir) => path.startsWith(dir))) {
480
- candidate.score = -100; // Effectively remove it
481
- candidate.reasons.add("penalty:blacklisted-dir");
482
- return;
483
- }
484
- // Penalize test files explicitly (even if outside test directories)
485
- const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
486
- if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
487
- candidate.score -= 2.0; // Strong penalty for test files
488
- candidate.reasons.add("penalty:test-file");
489
- return;
490
- }
491
- // Penalize lock files and package manifests
492
- const lockFiles = [
493
- "package-lock.json",
494
- "pnpm-lock.yaml",
495
- "yarn.lock",
496
- "bun.lockb",
497
- "Gemfile.lock",
498
- "Cargo.lock",
499
- "poetry.lock",
500
- ];
501
- if (lockFiles.some((lockFile) => fileName === lockFile)) {
502
- candidate.score -= 3.0; // Very strong penalty for lock files
503
- candidate.reasons.add("penalty:lock-file");
504
- return;
505
- }
506
- // Penalize configuration files
507
- const configPatterns = [
508
- ".config.js",
509
- ".config.ts",
510
- ".config.mjs",
511
- ".config.cjs",
512
- "tsconfig.json",
513
- "jsconfig.json",
514
- "package.json",
515
- ".eslintrc",
516
- ".prettierrc",
517
- "jest.config",
518
- "vite.config",
519
- "vitest.config",
520
- "webpack.config",
521
- "rollup.config",
522
- ];
523
- if (configPatterns.some((pattern) => lowerPath.endsWith(pattern) || fileName.startsWith(".env")) ||
524
- fileName === "Dockerfile" ||
525
- fileName === "docker-compose.yml" ||
526
- fileName === "docker-compose.yaml") {
527
- candidate.score -= 1.5; // Strong penalty for config files
528
- candidate.reasons.add("penalty:config-file");
529
- return;
530
- }
531
- // Penalize migration files (by path content)
532
- if (lowerPath.includes("migrate") || lowerPath.includes("migration")) {
533
- candidate.score -= 2.0; // Strong penalty for migrations
534
- candidate.reasons.add("penalty:migration-file");
535
- return;
536
- }
537
- if (profile === "docs") {
538
- // DOCS PROFILE: Boost docs, penalize code
539
- if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
540
- candidate.score += 0.8;
541
- candidate.reasons.add("boost:doc-file");
542
- }
543
- else if (path.startsWith("src/") && (ext === ".ts" || ext === ".tsx" || ext === ".js")) {
544
- candidate.score -= 0.5;
545
- candidate.reasons.add("penalty:impl-file");
546
- }
547
- }
548
- else if (profile === "default") {
549
- // DEFAULT PROFILE: Penalize docs heavily, boost implementation files.
550
- // Penalize documentation and other non-code files
551
- const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
552
- if (docExtensions.some((docExt) => path.endsWith(docExt))) {
553
- candidate.score -= 1.0; // Strong penalty to overcome structural similarity
554
- candidate.reasons.add("penalty:doc-file");
555
- }
556
- // Boost implementation files, with more specific paths getting higher scores
557
- if (path.startsWith("src/app/")) {
558
- candidate.score += 0.8;
559
- candidate.reasons.add("boost:app-file");
560
- }
561
- else if (path.startsWith("src/components/")) {
562
- candidate.score += 0.7;
563
- candidate.reasons.add("boost:component-file");
564
- }
565
- else if (path.startsWith("src/lib/")) {
566
- candidate.score += 0.6;
567
- candidate.reasons.add("boost:lib-file");
568
- }
569
- else if (path.startsWith("src/")) {
570
- if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
571
- candidate.score += 0.5;
572
- candidate.reasons.add("boost:impl-file");
573
- }
574
- }
575
- }
576
- }
577
- export async function filesSearch(context, params) {
578
- const { db, repoId } = context;
579
- const { query } = params;
580
- if (!query || query.trim().length === 0) {
581
- throw new Error("files_search requires a non-empty query. Provide a search keyword to continue.");
582
- }
583
- const limit = normalizeLimit(params.limit);
584
- const hasFTS = context.features?.fts ?? false;
585
- let sql;
586
- let values;
587
- if (hasFTS) {
588
- // FTS拡張利用可能: fts_main_blob.match_bm25 を使用
589
- const conditions = ["f.repo_id = ?"];
590
- values = [repoId];
591
- // 言語・拡張子フィルタ
592
- if (params.lang) {
593
- conditions.push("COALESCE(f.lang, '') = ?");
594
- values.push(params.lang);
595
- }
596
- if (params.ext) {
597
- conditions.push("COALESCE(f.ext, '') = ?");
598
- values.push(params.ext);
599
- }
600
- if (params.path_prefix) {
601
- conditions.push("f.path LIKE ?");
602
- values.push(`${params.path_prefix}%`);
603
- }
604
- // FTS検索(BM25スコアリング)
605
- sql = `
606
- SELECT f.path, f.lang, f.ext, b.content, fts.score
607
- FROM file f
608
- JOIN blob b ON b.hash = f.blob_hash
609
- JOIN (
610
- SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
611
- FROM blob
612
- WHERE score IS NOT NULL
613
- ) fts ON fts.hash = b.hash
614
- WHERE ${conditions.join(" AND ")}
615
- ORDER BY fts.score DESC
616
- LIMIT ?
617
- `;
618
- values.unshift(query); // FTSクエリを先頭に追加
619
- values.push(limit);
620
- }
621
- else {
622
- // FTS拡張利用不可: ILIKE検索(Phase 1の単語分割ロジック)
623
- const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
624
- values = [repoId];
625
- const words = splitQueryWords(query);
626
- if (words.length === 1) {
627
- conditions.push("b.content ILIKE '%' || ? || '%'");
628
- values.push(query);
629
- }
630
- else {
631
- const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
632
- conditions.push(`(${wordConditions.join(" OR ")})`);
633
- values.push(...words);
634
- }
635
- if (params.lang) {
636
- conditions.push("COALESCE(f.lang, '') = ?");
637
- values.push(params.lang);
638
- }
639
- if (params.ext) {
640
- conditions.push("COALESCE(f.ext, '') = ?");
641
- values.push(params.ext);
642
- }
643
- if (params.path_prefix) {
644
- conditions.push("f.path LIKE ?");
645
- values.push(`${params.path_prefix}%`);
646
- }
647
- sql = `
648
- SELECT f.path, f.lang, f.ext, b.content
649
- FROM file f
650
- JOIN blob b ON b.hash = f.blob_hash
651
- WHERE ${conditions.join(" AND ")}
652
- ORDER BY f.path
653
- LIMIT ?
654
- `;
655
- values.push(limit);
656
- }
657
- const rows = await db.all(sql, values);
658
- const boostProfile = params.boost_profile ?? "default";
659
- return rows
660
- .map((row) => {
661
- const { preview, line } = buildPreview(row.content ?? "", query);
662
- const baseScore = row.score ?? 1.0; // FTS時はBM25スコア、ILIKE時は1.0
663
- const boostedScore = applyFileTypeBoost(row.path, baseScore, boostProfile);
664
- return {
665
- path: row.path,
666
- preview,
667
- matchLine: line,
668
- lang: row.lang,
669
- ext: row.ext,
670
- score: boostedScore,
671
- };
672
- })
673
- .sort((a, b) => b.score - a.score); // スコアの高い順に再ソート
674
- }
675
- export async function snippetsGet(context, params) {
676
- const { db, repoId } = context;
677
- if (!params.path) {
678
- throw new Error("snippets_get requires a file path. Specify a tracked text file path to continue.");
679
- }
680
- const rows = await db.all(`
681
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content
682
- FROM file f
683
- JOIN blob b ON b.hash = f.blob_hash
684
- WHERE f.repo_id = ? AND f.path = ?
685
- LIMIT 1
686
- `, [repoId, params.path]);
687
- if (rows.length === 0) {
688
- throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
689
- }
690
- const row = rows[0];
691
- if (!row) {
692
- throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
693
- }
694
- if (row.is_binary) {
695
- throw new Error("Binary snippets are not supported. Choose a text file to preview its content.");
696
- }
697
- if (row.content === null) {
698
- throw new Error("Snippet content is unavailable. Re-run the indexer to refresh DuckDB state.");
699
- }
700
- const lines = row.content.split(/\r?\n/);
701
- const totalLines = lines.length;
702
- const snippetRows = await db.all(`
703
- SELECT s.snippet_id, s.start_line, s.end_line, s.symbol_id, sym.name AS symbol_name, sym.kind AS symbol_kind
704
- FROM snippet s
705
- LEFT JOIN symbol sym
706
- ON sym.repo_id = s.repo_id
707
- AND sym.path = s.path
708
- AND sym.symbol_id = s.symbol_id
709
- WHERE s.repo_id = ? AND s.path = ?
710
- ORDER BY s.start_line
711
- `, [repoId, params.path]);
712
- const requestedStart = params.start_line ?? 1;
713
- const requestedEnd = params.end_line ?? Math.min(totalLines, requestedStart + DEFAULT_SNIPPET_WINDOW - 1);
714
- const useSymbolSnippets = snippetRows.length > 0 && params.end_line === undefined;
715
- let snippetSelection = null;
716
- if (useSymbolSnippets) {
717
- snippetSelection =
718
- snippetRows.find((snippet) => requestedStart >= snippet.start_line && requestedStart <= snippet.end_line) ?? null;
719
- if (!snippetSelection) {
720
- const firstSnippet = snippetRows[0];
721
- if (firstSnippet && requestedStart < firstSnippet.start_line) {
722
- snippetSelection = firstSnippet;
723
- }
724
- else {
725
- snippetSelection = snippetRows[snippetRows.length - 1] ?? null;
726
- }
727
- }
728
- }
729
- let startLine;
730
- let endLine;
731
- let symbolName = null;
732
- let symbolKind = null;
733
- if (snippetSelection) {
734
- startLine = snippetSelection.start_line;
735
- endLine = snippetSelection.end_line;
736
- symbolName = snippetSelection.symbol_name;
737
- symbolKind = snippetSelection.symbol_kind;
738
- }
739
- else {
740
- startLine = Math.max(1, Math.min(totalLines, requestedStart));
741
- endLine = Math.max(startLine, Math.min(totalLines, requestedEnd));
742
- }
743
- const snippetContent = lines.slice(startLine - 1, endLine).join("\n");
744
- return {
745
- path: row.path,
746
- startLine,
747
- endLine,
748
- content: snippetContent,
749
- totalLines,
750
- symbolName,
751
- symbolKind,
752
- };
753
- }
754
- export async function contextBundle(context, params) {
755
- const { db, repoId } = context;
756
- const goal = params.goal?.trim() ?? "";
757
- if (goal.length === 0) {
758
- throw new Error("context_bundle requires a non-empty goal. Describe your objective to receive context.");
759
- }
760
- const limit = normalizeBundleLimit(params.limit);
761
- const artifacts = params.artifacts ?? {};
762
- // スコアリング重みをロード(将来的には設定ファイルや引数から)
763
- const profileName = coerceProfileName(params.profile ?? null);
764
- const weights = loadScoringProfile(profileName);
765
- const keywordSources = [goal];
766
- if (artifacts.failing_tests && artifacts.failing_tests.length > 0) {
767
- keywordSources.push(artifacts.failing_tests.join(" "));
768
- }
769
- if (artifacts.last_diff) {
770
- keywordSources.push(artifacts.last_diff);
771
- }
772
- if (artifacts.editing_path) {
773
- keywordSources.push(artifacts.editing_path);
774
- }
775
- const semanticSeed = keywordSources.join(" ");
776
- const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
777
- const extractedTerms = extractKeywords(semanticSeed);
778
- // フォールバック: editing_pathからキーワードを抽出
779
- if (extractedTerms.phrases.length === 0 &&
780
- extractedTerms.keywords.length === 0 &&
781
- artifacts.editing_path) {
782
- const pathSegments = artifacts.editing_path
783
- .split(/[/_.-]/)
784
- .map((segment) => segment.toLowerCase())
785
- .filter((segment) => segment.length >= 3 && !STOP_WORDS.has(segment));
786
- extractedTerms.pathSegments.push(...pathSegments.slice(0, MAX_KEYWORDS));
787
- }
788
- const candidates = new Map();
789
- const stringMatchSeeds = new Set();
790
- const fileCache = new Map();
791
- // フレーズマッチング(高い重み: textMatch × 2)
792
- for (const phrase of extractedTerms.phrases) {
793
- const rows = await db.all(`
794
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
795
- FROM file f
796
- JOIN blob b ON b.hash = f.blob_hash
797
- LEFT JOIN file_embedding fe
798
- ON fe.repo_id = f.repo_id
799
- AND fe.path = f.path
800
- WHERE f.repo_id = ?
801
- AND f.is_binary = FALSE
802
- AND b.content ILIKE '%' || ? || '%'
803
- ORDER BY f.path
804
- LIMIT ?
805
- `, [repoId, phrase, MAX_MATCHES_PER_KEYWORD]);
806
- for (const row of rows) {
807
- if (row.content === null) {
808
- continue;
809
- }
810
- const candidate = ensureCandidate(candidates, row.path);
811
- // フレーズマッチは通常の2倍のスコア
812
- candidate.score += weights.textMatch * 2.0;
813
- candidate.reasons.add(`phrase:${phrase}`);
814
- // Apply boost profile to prioritize/penalize files based on type and location
815
- const boostProfile = params.boost_profile ?? "default";
816
- applyBoostProfile(candidate, row, boostProfile, extractedTerms, weights.pathMatch);
817
- const { line } = buildPreview(row.content, phrase);
818
- candidate.matchLine =
819
- candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
820
- candidate.content ?? (candidate.content = row.content);
821
- candidate.lang ?? (candidate.lang = row.lang);
822
- candidate.ext ?? (candidate.ext = row.ext);
823
- candidate.totalLines ?? (candidate.totalLines = row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length);
824
- candidate.embedding ?? (candidate.embedding = parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null));
825
- stringMatchSeeds.add(row.path);
826
- if (!fileCache.has(row.path)) {
827
- fileCache.set(row.path, {
828
- content: row.content,
829
- lang: row.lang,
830
- ext: row.ext,
831
- totalLines: candidate.totalLines ?? 0,
832
- embedding: candidate.embedding,
833
- });
834
- }
835
- }
836
- }
837
- // キーワードマッチング(通常の重み)
838
- for (const keyword of extractedTerms.keywords) {
839
- const rows = await db.all(`
840
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
841
- FROM file f
842
- JOIN blob b ON b.hash = f.blob_hash
843
- LEFT JOIN file_embedding fe
844
- ON fe.repo_id = f.repo_id
845
- AND fe.path = f.path
846
- WHERE f.repo_id = ?
847
- AND f.is_binary = FALSE
848
- AND b.content ILIKE '%' || ? || '%'
849
- ORDER BY f.path
850
- LIMIT ?
851
- `, [repoId, keyword, MAX_MATCHES_PER_KEYWORD]);
852
- for (const row of rows) {
853
- if (row.content === null) {
854
- continue;
855
- }
856
- const candidate = ensureCandidate(candidates, row.path);
857
- candidate.score += weights.textMatch;
858
- candidate.reasons.add(`text:${keyword}`);
859
- // Apply boost profile to prioritize/penalize files based on type and location
860
- const boostProfile = params.boost_profile ?? "default";
861
- applyBoostProfile(candidate, row, boostProfile, extractedTerms, weights.pathMatch);
862
- const { line } = buildPreview(row.content, keyword);
863
- candidate.matchLine =
864
- candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
865
- candidate.content ?? (candidate.content = row.content);
866
- candidate.lang ?? (candidate.lang = row.lang);
867
- candidate.ext ?? (candidate.ext = row.ext);
868
- candidate.totalLines ?? (candidate.totalLines = row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length);
869
- candidate.embedding ?? (candidate.embedding = parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null));
870
- stringMatchSeeds.add(row.path);
871
- if (!fileCache.has(row.path)) {
872
- fileCache.set(row.path, {
873
- content: row.content,
874
- lang: row.lang,
875
- ext: row.ext,
876
- totalLines: candidate.totalLines ?? 0,
877
- embedding: candidate.embedding,
878
- });
879
- }
880
- }
881
- }
882
- if (artifacts.editing_path) {
883
- const editingCandidate = ensureCandidate(candidates, artifacts.editing_path);
884
- editingCandidate.score += weights.editingPath;
885
- editingCandidate.reasons.add("artifact:editing_path");
886
- editingCandidate.matchLine ?? (editingCandidate.matchLine = 1);
887
- }
888
- // SQL injection防御: ファイルパスの検証パターン
889
- const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
890
- const dependencySeeds = new Set();
891
- for (const pathSeed of stringMatchSeeds) {
892
- if (!SAFE_PATH_PATTERN.test(pathSeed)) {
893
- console.warn(`Skipping potentially unsafe path in dependency seeds: ${pathSeed}`);
894
- continue;
895
- }
896
- dependencySeeds.add(pathSeed);
897
- if (dependencySeeds.size >= MAX_DEPENDENCY_SEEDS) {
898
- break;
899
- }
900
- }
901
- if (artifacts.editing_path) {
902
- if (!SAFE_PATH_PATTERN.test(artifacts.editing_path)) {
903
- throw new Error(`Invalid editing_path format. Path must contain only alphanumeric characters, underscores, dots, hyphens, and forward slashes.`);
904
- }
905
- dependencySeeds.add(artifacts.editing_path);
906
- }
907
- if (dependencySeeds.size > 0) {
908
- // SQL injection防御: プレースホルダー生成前にサイズを検証
909
- if (dependencySeeds.size > MAX_DEPENDENCY_SEEDS_QUERY_LIMIT) {
910
- throw new Error(`Too many dependency seeds: ${dependencySeeds.size} (max ${MAX_DEPENDENCY_SEEDS_QUERY_LIMIT}). Narrow your search criteria.`);
911
- }
912
- const placeholders = Array.from(dependencySeeds, () => "?").join(", ");
913
- // 防御的チェック: プレースホルダーが正しい形式であることを確認
914
- // 期待される形式: "?, ?, ..." (クエスチョンマーク、カンマ、スペースのみ)
915
- if (!/^(\?)(,\s*\?)*$/.test(placeholders)) {
916
- throw new Error("Invalid placeholder generation detected. Operation aborted for safety.");
917
- }
918
- const depRows = await db.all(`
919
- SELECT src_path, dst_kind, dst, rel
920
- FROM dependency
921
- WHERE repo_id = ? AND src_path IN (${placeholders})
922
- `, [repoId, ...dependencySeeds]);
923
- for (const dep of depRows) {
924
- if (dep.dst_kind !== "path") {
925
- continue;
926
- }
927
- const candidate = ensureCandidate(candidates, dep.dst);
928
- candidate.score += weights.dependency;
929
- candidate.reasons.add(`dep:${dep.src_path}`);
930
- }
931
- }
932
- if (artifacts.editing_path) {
933
- const directory = path.posix.dirname(artifacts.editing_path);
934
- if (directory && directory !== ".") {
935
- const nearRows = await db.all(`
936
- SELECT path
937
- FROM file
938
- WHERE repo_id = ?
939
- AND is_binary = FALSE
940
- AND path LIKE ?
941
- ORDER BY path
942
- LIMIT ?
943
- `, [repoId, `${directory}/%`, NEARBY_LIMIT + 1]);
944
- for (const near of nearRows) {
945
- if (near.path === artifacts.editing_path) {
946
- continue;
947
- }
948
- const candidate = ensureCandidate(candidates, near.path);
949
- candidate.score += weights.proximity;
950
- candidate.reasons.add(`near:${directory}`);
951
- }
952
- }
953
- }
954
- const materializedCandidates = [];
955
- for (const candidate of candidates.values()) {
956
- if (!candidate.content) {
957
- const cached = fileCache.get(candidate.path);
958
- if (cached) {
959
- candidate.content = cached.content;
960
- candidate.lang = cached.lang;
961
- candidate.ext = cached.ext;
962
- candidate.totalLines = cached.totalLines;
963
- candidate.embedding = cached.embedding;
964
- }
965
- else {
966
- const loaded = await loadFileContent(db, repoId, candidate.path);
967
- if (!loaded) {
968
- continue;
969
- }
970
- candidate.content = loaded.content;
971
- candidate.lang = loaded.lang;
972
- candidate.ext = loaded.ext;
973
- candidate.totalLines = loaded.totalLines;
974
- candidate.embedding = loaded.embedding;
975
- fileCache.set(candidate.path, loaded);
976
- }
977
- }
978
- materializedCandidates.push(candidate);
979
- }
980
- if (materializedCandidates.length === 0) {
981
- return { context: [], tokens_estimate: 0 };
982
- }
983
- applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
984
- const sortedCandidates = materializedCandidates
985
- .filter((candidate) => candidate.score > 0) // Filter out candidates with negative or zero scores
986
- .sort((a, b) => {
987
- if (b.score === a.score) {
988
- return a.path.localeCompare(b.path);
989
- }
990
- return b.score - a.score;
991
- })
992
- .slice(0, limit);
993
- const maxScore = Math.max(...sortedCandidates.map((candidate) => candidate.score));
994
- const results = [];
995
- for (const candidate of sortedCandidates) {
996
- if (!candidate.content) {
997
- continue;
998
- }
999
- const snippets = await db.all(`
1000
- SELECT s.snippet_id, s.start_line, s.end_line, s.symbol_id, sym.name AS symbol_name, sym.kind AS symbol_kind
1001
- FROM snippet s
1002
- LEFT JOIN symbol sym
1003
- ON sym.repo_id = s.repo_id
1004
- AND sym.path = s.path
1005
- AND sym.symbol_id = s.symbol_id
1006
- WHERE s.repo_id = ? AND s.path = ?
1007
- ORDER BY s.start_line
1008
- `, [repoId, candidate.path]);
1009
- const selected = selectSnippet(snippets, candidate.matchLine);
1010
- let startLine;
1011
- let endLine;
1012
- if (selected) {
1013
- startLine = selected.start_line;
1014
- endLine = selected.end_line;
1015
- }
1016
- else {
1017
- const totalLines = candidate.totalLines ?? 0;
1018
- const matchLine = candidate.matchLine ?? 1;
1019
- const windowHalf = Math.floor(FALLBACK_SNIPPET_WINDOW / 2);
1020
- startLine = Math.max(1, matchLine - windowHalf);
1021
- endLine = Math.min(totalLines === 0 ? matchLine + windowHalf : totalLines, startLine + FALLBACK_SNIPPET_WINDOW - 1);
1022
- }
1023
- if (endLine < startLine) {
1024
- endLine = startLine;
1025
- }
1026
- const reasons = new Set(candidate.reasons);
1027
- if (selected && selected.symbol_name) {
1028
- reasons.add(`symbol:${selected.symbol_name}`);
1029
- }
1030
- const normalizedScore = maxScore > 0 ? candidate.score / maxScore : 0;
1031
- const item = {
1032
- path: candidate.path,
1033
- range: [startLine, endLine],
1034
- why: Array.from(reasons).sort(),
1035
- score: Number.isFinite(normalizedScore) ? normalizedScore : 0,
1036
- };
1037
- // Add preview only if not in compact mode
1038
- if (!params.compact) {
1039
- item.preview = buildSnippetPreview(candidate.content, startLine, endLine);
1040
- }
1041
- results.push(item);
1042
- }
1043
- // コンテンツベースのトークン推定を使用(より正確)
1044
- const tokensEstimate = results.reduce((acc, item) => {
1045
- const candidate = sortedCandidates.find((c) => c.path === item.path);
1046
- if (candidate && candidate.content) {
1047
- return acc + estimateTokensFromContent(candidate.content, item.range[0], item.range[1]);
1048
- }
1049
- // フォールバック: 行ベース推定(コンテンツが利用不可の場合)
1050
- const lineCount = Math.max(1, item.range[1] - item.range[0] + 1);
1051
- return acc + lineCount * 4;
1052
- }, 0);
1053
- return { context: results, tokens_estimate: tokensEstimate };
1054
- }
1055
- export async function semanticRerank(context, params) {
1056
- const text = params.text?.trim() ?? "";
1057
- if (text.length === 0) {
1058
- throw new Error("semantic_rerank requires non-empty text. Describe the intent to compute semantic similarity.");
1059
- }
1060
- if (!Array.isArray(params.candidates) || params.candidates.length === 0) {
1061
- return { candidates: [] };
1062
- }
1063
- const uniqueCandidates = [];
1064
- const seenPaths = new Set();
1065
- for (const candidate of params.candidates) {
1066
- if (!candidate || typeof candidate.path !== "string" || candidate.path.length === 0) {
1067
- continue;
1068
- }
1069
- if (seenPaths.has(candidate.path)) {
1070
- continue;
1071
- }
1072
- seenPaths.add(candidate.path);
1073
- uniqueCandidates.push(candidate);
1074
- if (uniqueCandidates.length >= MAX_RERANK_LIMIT) {
1075
- break;
1076
- }
1077
- }
1078
- if (uniqueCandidates.length === 0) {
1079
- return { candidates: [] };
1080
- }
1081
- const limitRaw = params.k ?? uniqueCandidates.length;
1082
- const limit = Math.max(1, Math.min(MAX_RERANK_LIMIT, Math.floor(limitRaw)));
1083
- const profileName = coerceProfileName(params.profile ?? null);
1084
- const weights = loadScoringProfile(profileName);
1085
- const structuralWeight = weights.structural;
1086
- const queryEmbedding = generateEmbedding(text)?.values ?? null;
1087
- let embeddingMap = new Map();
1088
- if (queryEmbedding && structuralWeight > 0) {
1089
- const paths = uniqueCandidates.map((candidate) => candidate.path);
1090
- embeddingMap = await fetchEmbeddingMap(context.db, context.repoId, paths);
1091
- }
1092
- const scored = uniqueCandidates.map((candidate) => {
1093
- const base = typeof candidate.score === "number" && Number.isFinite(candidate.score) ? candidate.score : 0;
1094
- let semantic = 0;
1095
- if (queryEmbedding && structuralWeight > 0) {
1096
- const embedding = embeddingMap.get(candidate.path);
1097
- if (embedding) {
1098
- const similarity = structuralSimilarity(queryEmbedding, embedding);
1099
- if (Number.isFinite(similarity) && similarity > 0) {
1100
- semantic = similarity;
1101
- }
1102
- }
1103
- }
1104
- const combined = base + structuralWeight * semantic;
1105
- return {
1106
- path: candidate.path,
1107
- base,
1108
- semantic,
1109
- combined,
1110
- };
1111
- });
1112
- const sorted = scored.sort((a, b) => {
1113
- if (b.combined === a.combined) {
1114
- if (b.semantic === a.semantic) {
1115
- return a.path.localeCompare(b.path);
1116
- }
1117
- return b.semantic - a.semantic;
1118
- }
1119
- return b.combined - a.combined;
1120
- });
1121
- return { candidates: sorted.slice(0, limit) };
1122
- }
1123
- export async function depsClosure(context, params) {
1124
- const { db, repoId } = context;
1125
- if (!params.path) {
1126
- throw new Error("deps_closure requires a file path. Provide a tracked source file path to continue.");
1127
- }
1128
- const direction = params.direction ?? "outbound";
1129
- const maxDepth = params.max_depth ?? 3;
1130
- const includePackages = params.include_packages ?? true;
1131
- const dependencyRows = await db.all(`
1132
- SELECT src_path, dst_kind, dst, rel
1133
- FROM dependency
1134
- WHERE repo_id = ?
1135
- `, [repoId]);
1136
- // outbound: このファイルが使用する依存関係
1137
- const outbound = new Map();
1138
- // inbound: このファイルを使用しているファイル
1139
- const inbound = new Map();
1140
- for (const row of dependencyRows) {
1141
- // outbound マップ構築
1142
- if (!outbound.has(row.src_path)) {
1143
- outbound.set(row.src_path, []);
1144
- }
1145
- outbound.get(row.src_path)?.push(row);
1146
- // inbound マップ構築(dst が path の場合のみ)
1147
- if (row.dst_kind === "path") {
1148
- if (!inbound.has(row.dst)) {
1149
- inbound.set(row.dst, []);
1150
- }
1151
- inbound.get(row.dst)?.push(row);
1152
- }
1153
- }
1154
- const queue = [{ path: params.path, depth: 0 }];
1155
- const visitedPaths = new Set([params.path]);
1156
- const nodeDepth = new Map();
1157
- const edgeSet = new Map();
1158
- const recordNode = (node) => {
1159
- const key = `${node.kind}:${node.target}`;
1160
- const existing = nodeDepth.get(key);
1161
- if (!existing || node.depth < existing.depth) {
1162
- nodeDepth.set(key, { ...node });
1163
- }
1164
- };
1165
- const recordEdge = (edge) => {
1166
- const key = `${edge.from}->${edge.to}:${edge.kind}:${edge.rel}`;
1167
- const existing = edgeSet.get(key);
1168
- if (!existing || edge.depth < existing.depth) {
1169
- edgeSet.set(key, { ...edge });
1170
- }
1171
- };
1172
- recordNode({ kind: "path", target: params.path, depth: 0 });
1173
- while (queue.length > 0) {
1174
- const current = queue.shift();
1175
- if (current.depth >= maxDepth) {
1176
- continue;
1177
- }
1178
- // direction に応じて使用するマップを選択
1179
- const edgeMap = direction === "inbound" ? inbound : outbound;
1180
- const edges = edgeMap.get(current.path) ?? [];
1181
- for (const edge of edges) {
1182
- const nextDepth = current.depth + 1;
1183
- if (direction === "inbound") {
1184
- // inbound: edge.src_path がこのファイルを使用している
1185
- recordEdge({
1186
- from: edge.src_path,
1187
- to: current.path,
1188
- kind: "path",
1189
- rel: edge.rel,
1190
- depth: nextDepth,
1191
- });
1192
- recordNode({ kind: "path", target: edge.src_path, depth: nextDepth });
1193
- if (!visitedPaths.has(edge.src_path)) {
1194
- visitedPaths.add(edge.src_path);
1195
- queue.push({ path: edge.src_path, depth: nextDepth });
1196
- }
1197
- }
1198
- else {
1199
- // outbound: このファイルが edge.dst を使用している
1200
- if (edge.dst_kind === "path") {
1201
- recordEdge({
1202
- from: current.path,
1203
- to: edge.dst,
1204
- kind: "path",
1205
- rel: edge.rel,
1206
- depth: nextDepth,
1207
- });
1208
- recordNode({ kind: "path", target: edge.dst, depth: nextDepth });
1209
- if (!visitedPaths.has(edge.dst)) {
1210
- visitedPaths.add(edge.dst);
1211
- queue.push({ path: edge.dst, depth: nextDepth });
1212
- }
1213
- }
1214
- else if (edge.dst_kind === "package" && includePackages) {
1215
- recordEdge({
1216
- from: current.path,
1217
- to: edge.dst,
1218
- kind: "package",
1219
- rel: edge.rel,
1220
- depth: nextDepth,
1221
- });
1222
- recordNode({ kind: "package", target: edge.dst, depth: nextDepth });
1223
- }
1224
- }
1225
- }
1226
- }
1227
- const nodes = Array.from(nodeDepth.values()).sort((a, b) => {
1228
- if (a.depth === b.depth) {
1229
- return a.target.localeCompare(b.target);
1230
- }
1231
- return a.depth - b.depth;
1232
- });
1233
- const edges = Array.from(edgeSet.values()).sort((a, b) => {
1234
- if (a.depth === b.depth) {
1235
- const fromCmp = a.from.localeCompare(b.from);
1236
- if (fromCmp !== 0) {
1237
- return fromCmp;
1238
- }
1239
- return a.to.localeCompare(b.to);
1240
- }
1241
- return a.depth - b.depth;
1242
- });
1243
- return {
1244
- root: params.path,
1245
- direction,
1246
- nodes,
1247
- edges,
1248
- };
1249
- }
1250
- export async function resolveRepoId(db, repoRoot) {
1251
- try {
1252
- const rows = await db.all("SELECT id FROM repo WHERE root = ?", [repoRoot]);
1253
- if (rows.length === 0) {
1254
- throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
1255
- }
1256
- const row = rows[0];
1257
- if (!row) {
1258
- throw new Error("Failed to retrieve repository record. Database returned empty result.");
1259
- }
1260
- return row.id;
1261
- }
1262
- catch (error) {
1263
- if (error instanceof Error && error.message.includes("Table with name repo")) {
1264
- throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
1265
- }
1266
- throw error;
1267
- }
1268
- }