kiri-mcp-server 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -5
- package/config/default.example.yml +9 -0
- package/config/scoring-profiles.yml +21 -6
- package/dist/config/default.example.yml +9 -0
- package/dist/config/scoring-profiles.yml +21 -6
- package/dist/package.json +1 -1
- package/dist/server/context.js +0 -1
- package/dist/server/handlers.js +547 -79
- package/dist/server/scoring.js +8 -3
- package/dist/shared/duckdb.js +0 -2
- package/dist/shared/embedding.js +15 -2
- package/dist/shared/tokenizer.js +0 -1
- package/dist/shared/utils/simpleYaml.js +0 -1
- package/dist/src/server/handlers.d.ts.map +1 -1
- package/dist/src/server/handlers.js +353 -85
- package/dist/src/server/handlers.js.map +1 -1
- package/dist/src/server/rpc.d.ts.map +1 -1
- package/dist/src/server/rpc.js +9 -3
- package/dist/src/server/rpc.js.map +1 -1
- package/dist/src/server/scoring.d.ts +6 -0
- package/dist/src/server/scoring.d.ts.map +1 -1
- package/dist/src/server/scoring.js +29 -5
- package/dist/src/server/scoring.js.map +1 -1
- package/dist/src/shared/duckdb.d.ts +1 -0
- package/dist/src/shared/duckdb.d.ts.map +1 -1
- package/dist/src/shared/duckdb.js +54 -3
- package/dist/src/shared/duckdb.js.map +1 -1
- package/dist/src/shared/embedding.d.ts.map +1 -1
- package/dist/src/shared/embedding.js +2 -8
- package/dist/src/shared/embedding.js.map +1 -1
- package/dist/src/shared/tokenizer.d.ts +18 -0
- package/dist/src/shared/tokenizer.d.ts.map +1 -1
- package/dist/src/shared/tokenizer.js +35 -0
- package/dist/src/shared/tokenizer.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
|
|
3
|
-
import { encode as encodeGPT } from "../shared/tokenizer.js";
|
|
3
|
+
import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
|
|
4
4
|
import { coerceProfileName, loadScoringProfile } from "./scoring.js";
|
|
5
5
|
const DEFAULT_SEARCH_LIMIT = 50;
|
|
6
6
|
const DEFAULT_SNIPPET_WINDOW = 150;
|
|
@@ -73,22 +73,125 @@ function normalizeBundleLimit(limit) {
|
|
|
73
73
|
}
|
|
74
74
|
return Math.min(Math.max(1, Math.floor(limit)), MAX_BUNDLE_LIMIT);
|
|
75
75
|
}
|
|
76
|
+
/**
|
|
77
|
+
* トークン化戦略を取得
|
|
78
|
+
* 環境変数またはデフォルト値から決定
|
|
79
|
+
*/
|
|
80
|
+
function getTokenizationStrategy() {
|
|
81
|
+
const strategy = process.env.KIRI_TOKENIZATION_STRATEGY?.toLowerCase();
|
|
82
|
+
if (strategy === "legacy" || strategy === "hybrid") {
|
|
83
|
+
return strategy;
|
|
84
|
+
}
|
|
85
|
+
return "phrase-aware"; // デフォルト
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* 引用符で囲まれたフレーズを抽出
|
|
89
|
+
* 例: 'search "page-agent handler" test' → ["page-agent handler"]
|
|
90
|
+
*/
|
|
91
|
+
function extractQuotedPhrases(text) {
|
|
92
|
+
const phrases = [];
|
|
93
|
+
const quotePattern = /"([^"]+)"|'([^']+)'/g;
|
|
94
|
+
let match;
|
|
95
|
+
let remaining = text;
|
|
96
|
+
while ((match = quotePattern.exec(text)) !== null) {
|
|
97
|
+
const phrase = (match[1] || match[2] || "").trim().toLowerCase();
|
|
98
|
+
if (phrase.length >= 3) {
|
|
99
|
+
phrases.push(phrase);
|
|
100
|
+
}
|
|
101
|
+
remaining = remaining.replace(match[0], " ");
|
|
102
|
+
}
|
|
103
|
+
return { phrases, remaining };
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* 複合用語を抽出(ハイフンまたはアンダースコア区切り)
|
|
107
|
+
* Unicode文字に対応(日本語、中国語などの複合用語もサポート)
|
|
108
|
+
* 例: "page-agent lambda-handler" → ["page-agent", "lambda-handler"]
|
|
109
|
+
* 例: "user_profile file_embedding" → ["user_profile", "file_embedding"]
|
|
110
|
+
* 例: "app-日本語" → ["app-日本語"]
|
|
111
|
+
*/
|
|
112
|
+
function extractCompoundTerms(text) {
|
|
113
|
+
// Unicode対応: ハイフン(-)とアンダースコア(_)の両方をサポート
|
|
114
|
+
// snake_case (Python/Rust) と kebab-case を同等に扱う
|
|
115
|
+
// 注: \b はアンダースコアを単語文字として扱うため、アンダースコアでは機能しない
|
|
116
|
+
// そのため、明示的な境界チェックを使用
|
|
117
|
+
const compoundPattern = /(?:^|\s|[^\p{L}\p{N}_-])([\p{L}\p{N}]+(?:[-_][\p{L}\p{N}]+)+)(?=\s|[^\p{L}\p{N}_-]|$)/giu;
|
|
118
|
+
const matches = Array.from(text.matchAll(compoundPattern)).map((m) => m[1]);
|
|
119
|
+
return matches
|
|
120
|
+
.map((term) => term.toLowerCase())
|
|
121
|
+
.filter((term) => term.length >= 3 && !STOP_WORDS.has(term));
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* パスライクな用語を抽出
|
|
125
|
+
* Unicode文字に対応
|
|
126
|
+
* 例: "lambda/page-agent/handler" → ["lambda", "page-agent", "handler"]
|
|
127
|
+
*/
|
|
128
|
+
function extractPathSegments(text) {
|
|
129
|
+
// Unicode対応: パスセグメントでもUnicode文字をサポート
|
|
130
|
+
const pathPattern = /\b[\p{L}\p{N}_-]+(?:\/[\p{L}\p{N}_-]+)+\b/giu;
|
|
131
|
+
const matches = text.match(pathPattern) || [];
|
|
132
|
+
const segments = [];
|
|
133
|
+
for (const path of matches) {
|
|
134
|
+
const parts = path.toLowerCase().split("/");
|
|
135
|
+
for (const part of parts) {
|
|
136
|
+
if (part.length >= 3 && !STOP_WORDS.has(part) && !segments.includes(part)) {
|
|
137
|
+
segments.push(part);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return segments;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* 通常の単語を抽出
|
|
145
|
+
* 共有トークン化ユーティリティを使用
|
|
146
|
+
*/
|
|
147
|
+
function extractRegularWords(text, strategy) {
|
|
148
|
+
const words = tokenizeText(text, strategy).filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
|
|
149
|
+
return words;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* テキストからキーワード、フレーズ、パスセグメントを抽出
|
|
153
|
+
* トークン化戦略に基づいて、ハイフン区切り用語の処理方法を変更
|
|
154
|
+
*/
|
|
76
155
|
function extractKeywords(text) {
|
|
77
|
-
const
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
156
|
+
const strategy = getTokenizationStrategy();
|
|
157
|
+
const result = {
|
|
158
|
+
phrases: [],
|
|
159
|
+
keywords: [],
|
|
160
|
+
pathSegments: [],
|
|
161
|
+
};
|
|
162
|
+
// Phase 1: 引用符で囲まれたフレーズを抽出
|
|
163
|
+
const { phrases: quotedPhrases, remaining: afterQuotes } = extractQuotedPhrases(text);
|
|
164
|
+
result.phrases.push(...quotedPhrases);
|
|
165
|
+
// Phase 2: パスセグメントを抽出
|
|
166
|
+
const pathSegments = extractPathSegments(afterQuotes);
|
|
167
|
+
result.pathSegments.push(...pathSegments);
|
|
168
|
+
// Phase 3: 複合用語を抽出(ハイフン/アンダースコア区切り)(phrase-aware または hybrid モード)
|
|
169
|
+
if (strategy === "phrase-aware" || strategy === "hybrid") {
|
|
170
|
+
const compoundTerms = extractCompoundTerms(afterQuotes);
|
|
171
|
+
result.phrases.push(...compoundTerms);
|
|
172
|
+
// hybrid モードの場合、複合用語を分割したキーワードも追加
|
|
173
|
+
if (strategy === "hybrid") {
|
|
174
|
+
for (const term of compoundTerms) {
|
|
175
|
+
// ハイフンとアンダースコアの両方で分割
|
|
176
|
+
const parts = term
|
|
177
|
+
.split(/[-_]/)
|
|
178
|
+
.filter((part) => part.length >= 3 && !STOP_WORDS.has(part));
|
|
179
|
+
result.keywords.push(...parts);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// Phase 4: 通常の単語を抽出
|
|
184
|
+
const regularWords = extractRegularWords(afterQuotes, strategy);
|
|
185
|
+
// 重複を除去しながら、最大キーワード数まで追加
|
|
186
|
+
for (const word of regularWords) {
|
|
187
|
+
if (!result.keywords.includes(word) && !result.phrases.includes(word)) {
|
|
188
|
+
result.keywords.push(word);
|
|
189
|
+
if (result.keywords.length >= MAX_KEYWORDS) {
|
|
87
190
|
break;
|
|
88
191
|
}
|
|
89
192
|
}
|
|
90
193
|
}
|
|
91
|
-
return
|
|
194
|
+
return result;
|
|
92
195
|
}
|
|
93
196
|
function ensureCandidate(map, filePath) {
|
|
94
197
|
let candidate = map.get(filePath);
|
|
@@ -96,6 +199,7 @@ function ensureCandidate(map, filePath) {
|
|
|
96
199
|
candidate = {
|
|
97
200
|
path: filePath,
|
|
98
201
|
score: 0,
|
|
202
|
+
scoreMultiplier: 1.0, // Default: no boost or penalty
|
|
99
203
|
reasons: new Set(),
|
|
100
204
|
matchLine: null,
|
|
101
205
|
content: null,
|
|
@@ -257,14 +361,15 @@ function splitQueryWords(query) {
|
|
|
257
361
|
return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
|
|
258
362
|
}
|
|
259
363
|
/**
|
|
260
|
-
*
|
|
261
|
-
*
|
|
364
|
+
* files_search専用のファイルタイプブースト適用(v0.7.0+: 設定可能な乗算的ペナルティ)
|
|
365
|
+
* context_bundleと同じ乗算的ペナルティロジックを使用
|
|
262
366
|
* @param path - ファイルパス
|
|
263
|
-
* @param baseScore -
|
|
264
|
-
* @param profile - ブーストプロファイル
|
|
367
|
+
* @param baseScore - 基本スコア(FTS BM25スコアまたは1.0)
|
|
368
|
+
* @param profile - ブーストプロファイル
|
|
369
|
+
* @param weights - スコアリングウェイト設定(乗算的ペナルティに使用)
|
|
265
370
|
* @returns ブースト適用後のスコア
|
|
266
371
|
*/
|
|
267
|
-
function applyFileTypeBoost(path, baseScore, profile = "default") {
|
|
372
|
+
function applyFileTypeBoost(path, baseScore, profile = "default", weights) {
|
|
268
373
|
// Blacklisted directories that are almost always irrelevant for code context
|
|
269
374
|
const blacklistedDirs = [
|
|
270
375
|
".cursor/",
|
|
@@ -281,54 +386,91 @@ function applyFileTypeBoost(path, baseScore, profile = "default") {
|
|
|
281
386
|
if (profile === "none") {
|
|
282
387
|
return baseScore;
|
|
283
388
|
}
|
|
389
|
+
// Extract file extension for type detection
|
|
390
|
+
const ext = path.includes(".") ? path.substring(path.lastIndexOf(".")) : null;
|
|
391
|
+
// ✅ UNIFIED LOGIC: Use same multiplicative penalties as context_bundle
|
|
284
392
|
if (profile === "docs") {
|
|
393
|
+
// Boost documentation files
|
|
285
394
|
if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
|
|
286
|
-
return baseScore * 1.
|
|
395
|
+
return baseScore * 1.5; // 50% boost (same as context_bundle)
|
|
287
396
|
}
|
|
397
|
+
// Penalty for implementation files in docs mode
|
|
288
398
|
if (path.startsWith("src/") &&
|
|
289
399
|
(path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
|
|
290
|
-
return baseScore * 0.5; //
|
|
400
|
+
return baseScore * 0.5; // 50% penalty
|
|
291
401
|
}
|
|
292
402
|
return baseScore;
|
|
293
403
|
}
|
|
294
|
-
// Default profile:
|
|
404
|
+
// Default profile: Use configurable multiplicative penalties
|
|
405
|
+
let multiplier = 1.0;
|
|
406
|
+
// Documentation files: apply docPenaltyMultiplier
|
|
295
407
|
const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
|
|
296
|
-
if (docExtensions.some((
|
|
297
|
-
|
|
408
|
+
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
409
|
+
multiplier *= weights.docPenaltyMultiplier; // 0.3 = 70% reduction (Phase 1)
|
|
410
|
+
return baseScore * multiplier;
|
|
298
411
|
}
|
|
412
|
+
// Implementation file boosts: apply implBoostMultiplier with path-based scaling
|
|
299
413
|
if (path.startsWith("src/app/")) {
|
|
300
|
-
|
|
414
|
+
multiplier *= weights.implBoostMultiplier * 1.4; // Extra boost for app files
|
|
301
415
|
}
|
|
302
|
-
if (path.startsWith("src/components/")) {
|
|
303
|
-
|
|
416
|
+
else if (path.startsWith("src/components/")) {
|
|
417
|
+
multiplier *= weights.implBoostMultiplier * 1.3;
|
|
304
418
|
}
|
|
305
|
-
if (path.startsWith("src/lib/")) {
|
|
306
|
-
|
|
419
|
+
else if (path.startsWith("src/lib/")) {
|
|
420
|
+
multiplier *= weights.implBoostMultiplier * 1.2;
|
|
307
421
|
}
|
|
308
|
-
if (path.startsWith("src/")
|
|
309
|
-
(
|
|
310
|
-
|
|
422
|
+
else if (path.startsWith("src/")) {
|
|
423
|
+
if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
|
|
424
|
+
multiplier *= weights.implBoostMultiplier; // Base impl boost
|
|
425
|
+
}
|
|
311
426
|
}
|
|
427
|
+
// Test files: additive penalty (keep strong for files_search)
|
|
312
428
|
if (path.startsWith("tests/") || path.startsWith("test/")) {
|
|
313
|
-
return baseScore * 0.2; //
|
|
429
|
+
return baseScore * 0.2; // Strong penalty for tests
|
|
314
430
|
}
|
|
315
|
-
return baseScore;
|
|
431
|
+
return baseScore * multiplier;
|
|
316
432
|
}
|
|
317
433
|
/**
|
|
318
|
-
*
|
|
319
|
-
*
|
|
320
|
-
* @param candidate - スコアリング対象の候補
|
|
321
|
-
* @param row - ファイル情報(path, ext)
|
|
322
|
-
* @param profile - ブーストプロファイル
|
|
434
|
+
* パスベースのスコアリングを適用(加算的ブースト)
|
|
435
|
+
* goalのキーワード/フレーズがファイルパスに含まれる場合にスコアを加算
|
|
323
436
|
*/
|
|
324
|
-
function
|
|
325
|
-
if (
|
|
437
|
+
function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
|
|
438
|
+
if (!extractedTerms || weights.pathMatch <= 0) {
|
|
326
439
|
return;
|
|
327
440
|
}
|
|
328
|
-
|
|
329
|
-
const
|
|
330
|
-
|
|
331
|
-
|
|
441
|
+
// フレーズがパスに完全一致する場合(最高の重み)
|
|
442
|
+
for (const phrase of extractedTerms.phrases) {
|
|
443
|
+
if (lowerPath.includes(phrase)) {
|
|
444
|
+
candidate.score += weights.pathMatch * 1.5; // 1.5倍のブースト
|
|
445
|
+
candidate.reasons.add(`path-phrase:${phrase}`);
|
|
446
|
+
return; // 最初のマッチのみ適用
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
// パスセグメントがマッチする場合(中程度の重み)
|
|
450
|
+
const pathParts = lowerPath.split("/");
|
|
451
|
+
for (const segment of extractedTerms.pathSegments) {
|
|
452
|
+
if (pathParts.includes(segment)) {
|
|
453
|
+
candidate.score += weights.pathMatch;
|
|
454
|
+
candidate.reasons.add(`path-segment:${segment}`);
|
|
455
|
+
return; // 最初のマッチのみ適用
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
// 通常のキーワードがパスに含まれる場合(低い重み)
|
|
459
|
+
for (const keyword of extractedTerms.keywords) {
|
|
460
|
+
if (lowerPath.includes(keyword)) {
|
|
461
|
+
candidate.score += weights.pathMatch * 0.5; // 0.5倍のブースト
|
|
462
|
+
candidate.reasons.add(`path-keyword:${keyword}`);
|
|
463
|
+
return; // 最初のマッチのみ適用
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
/**
|
|
468
|
+
* 加算的ファイルペナルティを適用
|
|
469
|
+
* ブラックリストディレクトリ、テストファイル、lockファイル、設定ファイル、マイグレーションファイルに強いペナルティ
|
|
470
|
+
* @returns true if penalty was applied and processing should stop
|
|
471
|
+
*/
|
|
472
|
+
function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName) {
|
|
473
|
+
// Blacklisted directories - effectively remove
|
|
332
474
|
const blacklistedDirs = [
|
|
333
475
|
".cursor/",
|
|
334
476
|
".devcontainer/",
|
|
@@ -352,18 +494,18 @@ function applyBoostProfile(candidate, row, profile) {
|
|
|
352
494
|
"temp/",
|
|
353
495
|
];
|
|
354
496
|
if (blacklistedDirs.some((dir) => path.startsWith(dir))) {
|
|
355
|
-
candidate.score = -100;
|
|
497
|
+
candidate.score = -100;
|
|
356
498
|
candidate.reasons.add("penalty:blacklisted-dir");
|
|
357
|
-
return;
|
|
499
|
+
return true;
|
|
358
500
|
}
|
|
359
|
-
//
|
|
501
|
+
// Test files - strong penalty
|
|
360
502
|
const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
|
|
361
503
|
if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
|
|
362
|
-
candidate.score -= 2.0;
|
|
504
|
+
candidate.score -= 2.0;
|
|
363
505
|
candidate.reasons.add("penalty:test-file");
|
|
364
|
-
return;
|
|
506
|
+
return true;
|
|
365
507
|
}
|
|
366
|
-
//
|
|
508
|
+
// Lock files - very strong penalty
|
|
367
509
|
const lockFiles = [
|
|
368
510
|
"package-lock.json",
|
|
369
511
|
"pnpm-lock.yaml",
|
|
@@ -374,11 +516,11 @@ function applyBoostProfile(candidate, row, profile) {
|
|
|
374
516
|
"poetry.lock",
|
|
375
517
|
];
|
|
376
518
|
if (lockFiles.some((lockFile) => fileName === lockFile)) {
|
|
377
|
-
candidate.score -= 3.0;
|
|
519
|
+
candidate.score -= 3.0;
|
|
378
520
|
candidate.reasons.add("penalty:lock-file");
|
|
379
|
-
return;
|
|
521
|
+
return true;
|
|
380
522
|
}
|
|
381
|
-
//
|
|
523
|
+
// Configuration files - strong penalty
|
|
382
524
|
const configPatterns = [
|
|
383
525
|
".config.js",
|
|
384
526
|
".config.ts",
|
|
@@ -399,56 +541,96 @@ function applyBoostProfile(candidate, row, profile) {
|
|
|
399
541
|
fileName === "Dockerfile" ||
|
|
400
542
|
fileName === "docker-compose.yml" ||
|
|
401
543
|
fileName === "docker-compose.yaml") {
|
|
402
|
-
candidate.score -= 1.5;
|
|
544
|
+
candidate.score -= 1.5;
|
|
403
545
|
candidate.reasons.add("penalty:config-file");
|
|
404
|
-
return;
|
|
546
|
+
return true;
|
|
405
547
|
}
|
|
406
|
-
//
|
|
548
|
+
// Migration files - strong penalty
|
|
407
549
|
if (lowerPath.includes("migrate") || lowerPath.includes("migration")) {
|
|
408
|
-
candidate.score -= 2.0;
|
|
550
|
+
candidate.score -= 2.0;
|
|
409
551
|
candidate.reasons.add("penalty:migration-file");
|
|
552
|
+
return true;
|
|
553
|
+
}
|
|
554
|
+
return false; // No penalty applied, continue processing
|
|
555
|
+
}
|
|
556
|
+
/**
|
|
557
|
+
* ファイルタイプ別の乗算的ペナルティ/ブーストを適用(v0.7.0+)
|
|
558
|
+
* profile="docs": ドキュメントファイルをブースト
|
|
559
|
+
* profile="default": ドキュメントファイルにペナルティ、実装ファイルをブースト
|
|
560
|
+
*/
|
|
561
|
+
function applyFileTypeMultipliers(candidate, path, ext, profile, weights) {
|
|
562
|
+
if (profile === "none") {
|
|
410
563
|
return;
|
|
411
564
|
}
|
|
565
|
+
// ✅ CRITICAL SAFETY: profile="docs" mode boosts docs, skips penalties
|
|
412
566
|
if (profile === "docs") {
|
|
413
|
-
|
|
414
|
-
if (
|
|
415
|
-
candidate.
|
|
567
|
+
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
568
|
+
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
569
|
+
candidate.scoreMultiplier *= 1.5; // 50% boost for docs
|
|
416
570
|
candidate.reasons.add("boost:doc-file");
|
|
417
571
|
}
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
candidate.reasons.add("penalty:impl-file");
|
|
421
|
-
}
|
|
572
|
+
// No penalty for implementation files in "docs" mode
|
|
573
|
+
return;
|
|
422
574
|
}
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
// Penalize documentation and other non-code files
|
|
575
|
+
// DEFAULT PROFILE: Use MULTIPLICATIVE penalties for docs, MULTIPLICATIVE boosts for impl files
|
|
576
|
+
if (profile === "default") {
|
|
426
577
|
const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
|
|
427
578
|
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
428
|
-
|
|
579
|
+
// ✅ MULTIPLICATIVE penalty (v0.7.0): 70% reduction (Phase 1 conservative)
|
|
580
|
+
candidate.scoreMultiplier *= weights.docPenaltyMultiplier;
|
|
429
581
|
candidate.reasons.add("penalty:doc-file");
|
|
582
|
+
return; // Don't apply impl boosts to docs
|
|
430
583
|
}
|
|
431
|
-
//
|
|
584
|
+
// ✅ MULTIPLICATIVE boost for implementation files
|
|
432
585
|
if (path.startsWith("src/app/")) {
|
|
433
|
-
candidate.
|
|
586
|
+
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.4; // Extra boost for app files
|
|
434
587
|
candidate.reasons.add("boost:app-file");
|
|
435
588
|
}
|
|
436
589
|
else if (path.startsWith("src/components/")) {
|
|
437
|
-
candidate.
|
|
590
|
+
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.3;
|
|
438
591
|
candidate.reasons.add("boost:component-file");
|
|
439
592
|
}
|
|
440
593
|
else if (path.startsWith("src/lib/")) {
|
|
441
|
-
candidate.
|
|
594
|
+
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.2;
|
|
442
595
|
candidate.reasons.add("boost:lib-file");
|
|
443
596
|
}
|
|
444
597
|
else if (path.startsWith("src/")) {
|
|
445
598
|
if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
|
|
446
|
-
candidate.
|
|
599
|
+
candidate.scoreMultiplier *= weights.implBoostMultiplier;
|
|
447
600
|
candidate.reasons.add("boost:impl-file");
|
|
448
601
|
}
|
|
449
602
|
}
|
|
450
603
|
}
|
|
451
604
|
}
|
|
605
|
+
/**
|
|
606
|
+
* contextBundle専用のブーストプロファイル適用(v0.7.0+: リファクタリング版)
|
|
607
|
+
* 複雑度を削減するために3つのヘルパー関数に分割:
|
|
608
|
+
* 1. applyPathBasedScoring: パスベースの加算的スコアリング
|
|
609
|
+
* 2. applyAdditiveFilePenalties: 強力な加算的ペナルティ
|
|
610
|
+
* 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト
|
|
611
|
+
*
|
|
612
|
+
* CRITICAL SAFETY RULES:
|
|
613
|
+
* 1. Multipliers are stored in candidate.scoreMultiplier, applied AFTER all additive scoring
|
|
614
|
+
* 2. profile="docs" skips documentation penalties (allows doc-focused queries)
|
|
615
|
+
* 3. Blacklist/test/lock/config files keep additive penalties (already very strong)
|
|
616
|
+
*/
|
|
617
|
+
function applyBoostProfile(candidate, row, profile, weights, extractedTerms) {
|
|
618
|
+
if (profile === "none") {
|
|
619
|
+
return;
|
|
620
|
+
}
|
|
621
|
+
const { path, ext } = row;
|
|
622
|
+
const lowerPath = path.toLowerCase();
|
|
623
|
+
const fileName = path.split("/").pop() ?? "";
|
|
624
|
+
// Step 1: パスベースのスコアリング(加算的ブースト)
|
|
625
|
+
applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms);
|
|
626
|
+
// Step 2: 加算的ペナルティ(ブラックリスト、テスト、lock、設定、マイグレーション)
|
|
627
|
+
const shouldStop = applyAdditiveFilePenalties(candidate, path, lowerPath, fileName);
|
|
628
|
+
if (shouldStop) {
|
|
629
|
+
return; // ペナルティが適用された場合は処理終了
|
|
630
|
+
}
|
|
631
|
+
// Step 3: ファイルタイプ別の乗算的ペナルティ/ブースト
|
|
632
|
+
applyFileTypeMultipliers(candidate, path, ext, profile, weights);
|
|
633
|
+
}
|
|
452
634
|
export async function filesSearch(context, params) {
|
|
453
635
|
const { db, repoId } = context;
|
|
454
636
|
const { query } = params;
|
|
@@ -531,11 +713,14 @@ export async function filesSearch(context, params) {
|
|
|
531
713
|
}
|
|
532
714
|
const rows = await db.all(sql, values);
|
|
533
715
|
const boostProfile = params.boost_profile ?? "default";
|
|
716
|
+
// ✅ v0.7.0+: Load configurable scoring weights for unified boosting logic
|
|
717
|
+
// Note: filesSearch doesn't have a separate profile parameter, uses default weights
|
|
718
|
+
const weights = loadScoringProfile(null);
|
|
534
719
|
return rows
|
|
535
720
|
.map((row) => {
|
|
536
721
|
const { preview, line } = buildPreview(row.content ?? "", query);
|
|
537
722
|
const baseScore = row.score ?? 1.0; // FTS時はBM25スコア、ILIKE時は1.0
|
|
538
|
-
const boostedScore = applyFileTypeBoost(row.path, baseScore, boostProfile);
|
|
723
|
+
const boostedScore = applyFileTypeBoost(row.path, baseScore, boostProfile, weights);
|
|
539
724
|
return {
|
|
540
725
|
path: row.path,
|
|
541
726
|
preview,
|
|
@@ -649,18 +834,84 @@ export async function contextBundle(context, params) {
|
|
|
649
834
|
}
|
|
650
835
|
const semanticSeed = keywordSources.join(" ");
|
|
651
836
|
const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
|
|
652
|
-
|
|
653
|
-
|
|
837
|
+
const extractedTerms = extractKeywords(semanticSeed);
|
|
838
|
+
// フォールバック: editing_pathからキーワードを抽出
|
|
839
|
+
if (extractedTerms.phrases.length === 0 &&
|
|
840
|
+
extractedTerms.keywords.length === 0 &&
|
|
841
|
+
artifacts.editing_path) {
|
|
654
842
|
const pathSegments = artifacts.editing_path
|
|
655
843
|
.split(/[/_.-]/)
|
|
656
844
|
.map((segment) => segment.toLowerCase())
|
|
657
845
|
.filter((segment) => segment.length >= 3 && !STOP_WORDS.has(segment));
|
|
658
|
-
|
|
846
|
+
extractedTerms.pathSegments.push(...pathSegments.slice(0, MAX_KEYWORDS));
|
|
659
847
|
}
|
|
660
848
|
const candidates = new Map();
|
|
661
849
|
const stringMatchSeeds = new Set();
|
|
662
850
|
const fileCache = new Map();
|
|
663
|
-
|
|
851
|
+
// フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
|
|
852
|
+
if (extractedTerms.phrases.length > 0) {
|
|
853
|
+
const phrasePlaceholders = extractedTerms.phrases
|
|
854
|
+
.map(() => "b.content ILIKE '%' || ? || '%'")
|
|
855
|
+
.join(" OR ");
|
|
856
|
+
const rows = await db.all(`
|
|
857
|
+
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
858
|
+
FROM file f
|
|
859
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
860
|
+
LEFT JOIN file_embedding fe
|
|
861
|
+
ON fe.repo_id = f.repo_id
|
|
862
|
+
AND fe.path = f.path
|
|
863
|
+
WHERE f.repo_id = ?
|
|
864
|
+
AND f.is_binary = FALSE
|
|
865
|
+
AND (${phrasePlaceholders})
|
|
866
|
+
ORDER BY f.path
|
|
867
|
+
LIMIT ?
|
|
868
|
+
`, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
|
|
869
|
+
const boostProfile = params.boost_profile ?? "default";
|
|
870
|
+
for (const row of rows) {
|
|
871
|
+
if (row.content === null) {
|
|
872
|
+
continue;
|
|
873
|
+
}
|
|
874
|
+
// どのフレーズにマッチしたかをチェック
|
|
875
|
+
const lowerContent = row.content.toLowerCase();
|
|
876
|
+
const matchedPhrases = extractedTerms.phrases.filter((phrase) => lowerContent.includes(phrase));
|
|
877
|
+
if (matchedPhrases.length === 0) {
|
|
878
|
+
continue; // Should not happen, but defensive check
|
|
879
|
+
}
|
|
880
|
+
const candidate = ensureCandidate(candidates, row.path);
|
|
881
|
+
// 各マッチしたフレーズに対してスコアリング
|
|
882
|
+
for (const phrase of matchedPhrases) {
|
|
883
|
+
// フレーズマッチは通常の2倍のスコア
|
|
884
|
+
candidate.score += weights.textMatch * 2.0;
|
|
885
|
+
candidate.reasons.add(`phrase:${phrase}`);
|
|
886
|
+
}
|
|
887
|
+
// Apply boost profile once per file
|
|
888
|
+
applyBoostProfile(candidate, row, boostProfile, weights, extractedTerms);
|
|
889
|
+
// Use first matched phrase for preview (guaranteed to exist due to length check above)
|
|
890
|
+
const { line } = buildPreview(row.content, matchedPhrases[0]);
|
|
891
|
+
candidate.matchLine =
|
|
892
|
+
candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
|
|
893
|
+
candidate.content ??= row.content;
|
|
894
|
+
candidate.lang ??= row.lang;
|
|
895
|
+
candidate.ext ??= row.ext;
|
|
896
|
+
candidate.totalLines ??= row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length;
|
|
897
|
+
candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
|
|
898
|
+
stringMatchSeeds.add(row.path);
|
|
899
|
+
if (!fileCache.has(row.path)) {
|
|
900
|
+
fileCache.set(row.path, {
|
|
901
|
+
content: row.content,
|
|
902
|
+
lang: row.lang,
|
|
903
|
+
ext: row.ext,
|
|
904
|
+
totalLines: candidate.totalLines ?? 0,
|
|
905
|
+
embedding: candidate.embedding,
|
|
906
|
+
});
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
// キーワードマッチング(通常の重み)- 統合クエリでパフォーマンス改善
|
|
911
|
+
if (extractedTerms.keywords.length > 0) {
|
|
912
|
+
const keywordPlaceholders = extractedTerms.keywords
|
|
913
|
+
.map(() => "b.content ILIKE '%' || ? || '%'")
|
|
914
|
+
.join(" OR ");
|
|
664
915
|
const rows = await db.all(`
|
|
665
916
|
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
666
917
|
FROM file f
|
|
@@ -670,21 +921,31 @@ export async function contextBundle(context, params) {
|
|
|
670
921
|
AND fe.path = f.path
|
|
671
922
|
WHERE f.repo_id = ?
|
|
672
923
|
AND f.is_binary = FALSE
|
|
673
|
-
AND
|
|
924
|
+
AND (${keywordPlaceholders})
|
|
674
925
|
ORDER BY f.path
|
|
675
926
|
LIMIT ?
|
|
676
|
-
`, [repoId,
|
|
927
|
+
`, [repoId, ...extractedTerms.keywords, MAX_MATCHES_PER_KEYWORD * extractedTerms.keywords.length]);
|
|
928
|
+
const boostProfile = params.boost_profile ?? "default";
|
|
677
929
|
for (const row of rows) {
|
|
678
930
|
if (row.content === null) {
|
|
679
931
|
continue;
|
|
680
932
|
}
|
|
933
|
+
// どのキーワードにマッチしたかをチェック
|
|
934
|
+
const lowerContent = row.content.toLowerCase();
|
|
935
|
+
const matchedKeywords = extractedTerms.keywords.filter((keyword) => lowerContent.includes(keyword));
|
|
936
|
+
if (matchedKeywords.length === 0) {
|
|
937
|
+
continue; // Should not happen, but defensive check
|
|
938
|
+
}
|
|
681
939
|
const candidate = ensureCandidate(candidates, row.path);
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
940
|
+
// 各マッチしたキーワードに対してスコアリング
|
|
941
|
+
for (const keyword of matchedKeywords) {
|
|
942
|
+
candidate.score += weights.textMatch;
|
|
943
|
+
candidate.reasons.add(`text:${keyword}`);
|
|
944
|
+
}
|
|
945
|
+
// Apply boost profile once per file
|
|
946
|
+
applyBoostProfile(candidate, row, boostProfile, weights, extractedTerms);
|
|
947
|
+
// Use first matched keyword for preview (guaranteed to exist due to length check above)
|
|
948
|
+
const { line } = buildPreview(row.content, matchedKeywords[0]);
|
|
688
949
|
candidate.matchLine =
|
|
689
950
|
candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
|
|
690
951
|
candidate.content ??= row.content;
|
|
@@ -806,6 +1067,13 @@ export async function contextBundle(context, params) {
|
|
|
806
1067
|
return { context: [], tokens_estimate: 0 };
|
|
807
1068
|
}
|
|
808
1069
|
applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
|
|
1070
|
+
// ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
|
|
1071
|
+
// Only apply to positive scores to prevent negative score inversion
|
|
1072
|
+
for (const candidate of materializedCandidates) {
|
|
1073
|
+
if (candidate.scoreMultiplier !== 1.0 && candidate.score > 0) {
|
|
1074
|
+
candidate.score *= candidate.scoreMultiplier;
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
809
1077
|
const sortedCandidates = materializedCandidates
|
|
810
1078
|
.filter((candidate) => candidate.score > 0) // Filter out candidates with negative or zero scores
|
|
811
1079
|
.sort((a, b) => {
|