kiri-mcp-server 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +59 -5
  2. package/config/default.example.yml +9 -0
  3. package/config/scoring-profiles.yml +21 -6
  4. package/dist/config/default.example.yml +9 -0
  5. package/dist/config/scoring-profiles.yml +21 -6
  6. package/dist/package.json +1 -1
  7. package/dist/server/context.js +0 -1
  8. package/dist/server/handlers.js +547 -79
  9. package/dist/server/scoring.js +8 -3
  10. package/dist/shared/duckdb.js +0 -2
  11. package/dist/shared/embedding.js +15 -2
  12. package/dist/shared/tokenizer.js +0 -1
  13. package/dist/shared/utils/simpleYaml.js +0 -1
  14. package/dist/src/server/handlers.d.ts.map +1 -1
  15. package/dist/src/server/handlers.js +353 -85
  16. package/dist/src/server/handlers.js.map +1 -1
  17. package/dist/src/server/rpc.d.ts.map +1 -1
  18. package/dist/src/server/rpc.js +9 -3
  19. package/dist/src/server/rpc.js.map +1 -1
  20. package/dist/src/server/scoring.d.ts +6 -0
  21. package/dist/src/server/scoring.d.ts.map +1 -1
  22. package/dist/src/server/scoring.js +29 -5
  23. package/dist/src/server/scoring.js.map +1 -1
  24. package/dist/src/shared/duckdb.d.ts +1 -0
  25. package/dist/src/shared/duckdb.d.ts.map +1 -1
  26. package/dist/src/shared/duckdb.js +54 -3
  27. package/dist/src/shared/duckdb.js.map +1 -1
  28. package/dist/src/shared/embedding.d.ts.map +1 -1
  29. package/dist/src/shared/embedding.js +2 -8
  30. package/dist/src/shared/embedding.js.map +1 -1
  31. package/dist/src/shared/tokenizer.d.ts +18 -0
  32. package/dist/src/shared/tokenizer.d.ts.map +1 -1
  33. package/dist/src/shared/tokenizer.js +35 -0
  34. package/dist/src/shared/tokenizer.js.map +1 -1
  35. package/package.json +1 -1
@@ -1,6 +1,6 @@
1
1
  import path from "node:path";
2
2
  import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
3
- import { encode as encodeGPT } from "../shared/tokenizer.js";
3
+ import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
4
4
  import { coerceProfileName, loadScoringProfile } from "./scoring.js";
5
5
  const DEFAULT_SEARCH_LIMIT = 50;
6
6
  const DEFAULT_SNIPPET_WINDOW = 150;
@@ -73,22 +73,125 @@ function normalizeBundleLimit(limit) {
73
73
  }
74
74
  return Math.min(Math.max(1, Math.floor(limit)), MAX_BUNDLE_LIMIT);
75
75
  }
76
+ /**
77
+ * トークン化戦略を取得
78
+ * 環境変数またはデフォルト値から決定
79
+ */
80
+ function getTokenizationStrategy() {
81
+ const strategy = process.env.KIRI_TOKENIZATION_STRATEGY?.toLowerCase();
82
+ if (strategy === "legacy" || strategy === "hybrid") {
83
+ return strategy;
84
+ }
85
+ return "phrase-aware"; // デフォルト
86
+ }
87
+ /**
88
+ * 引用符で囲まれたフレーズを抽出
89
+ * 例: 'search "page-agent handler" test' → ["page-agent handler"]
90
+ */
91
+ function extractQuotedPhrases(text) {
92
+ const phrases = [];
93
+ const quotePattern = /"([^"]+)"|'([^']+)'/g;
94
+ let match;
95
+ let remaining = text;
96
+ while ((match = quotePattern.exec(text)) !== null) {
97
+ const phrase = (match[1] || match[2] || "").trim().toLowerCase();
98
+ if (phrase.length >= 3) {
99
+ phrases.push(phrase);
100
+ }
101
+ remaining = remaining.replace(match[0], " ");
102
+ }
103
+ return { phrases, remaining };
104
+ }
105
+ /**
106
+ * 複合用語を抽出(ハイフンまたはアンダースコア区切り)
107
+ * Unicode文字に対応(日本語、中国語などの複合用語もサポート)
108
+ * 例: "page-agent lambda-handler" → ["page-agent", "lambda-handler"]
109
+ * 例: "user_profile file_embedding" → ["user_profile", "file_embedding"]
110
+ * 例: "app-日本語" → ["app-日本語"]
111
+ */
112
+ function extractCompoundTerms(text) {
113
+ // Unicode対応: ハイフン(-)とアンダースコア(_)の両方をサポート
114
+ // snake_case (Python/Rust) と kebab-case を同等に扱う
115
+ // 注: \b はアンダースコアを単語文字として扱うため、アンダースコアでは機能しない
116
+ // そのため、明示的な境界チェックを使用
117
+ const compoundPattern = /(?:^|\s|[^\p{L}\p{N}_-])([\p{L}\p{N}]+(?:[-_][\p{L}\p{N}]+)+)(?=\s|[^\p{L}\p{N}_-]|$)/giu;
118
+ const matches = Array.from(text.matchAll(compoundPattern)).map((m) => m[1]);
119
+ return matches
120
+ .map((term) => term.toLowerCase())
121
+ .filter((term) => term.length >= 3 && !STOP_WORDS.has(term));
122
+ }
123
+ /**
124
+ * パスライクな用語を抽出
125
+ * Unicode文字に対応
126
+ * 例: "lambda/page-agent/handler" → ["lambda", "page-agent", "handler"]
127
+ */
128
+ function extractPathSegments(text) {
129
+ // Unicode対応: パスセグメントでもUnicode文字をサポート
130
+ const pathPattern = /\b[\p{L}\p{N}_-]+(?:\/[\p{L}\p{N}_-]+)+\b/giu;
131
+ const matches = text.match(pathPattern) || [];
132
+ const segments = [];
133
+ for (const path of matches) {
134
+ const parts = path.toLowerCase().split("/");
135
+ for (const part of parts) {
136
+ if (part.length >= 3 && !STOP_WORDS.has(part) && !segments.includes(part)) {
137
+ segments.push(part);
138
+ }
139
+ }
140
+ }
141
+ return segments;
142
+ }
143
+ /**
144
+ * 通常の単語を抽出
145
+ * 共有トークン化ユーティリティを使用
146
+ */
147
+ function extractRegularWords(text, strategy) {
148
+ const words = tokenizeText(text, strategy).filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
149
+ return words;
150
+ }
151
+ /**
152
+ * テキストからキーワード、フレーズ、パスセグメントを抽出
153
+ * トークン化戦略に基づいて、ハイフン区切り用語の処理方法を変更
154
+ */
76
155
  function extractKeywords(text) {
77
- const words = text
78
- .toLowerCase()
79
- .split(/[^a-z0-9_]+/iu)
80
- .map((word) => word.trim())
81
- .filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
82
- const unique = [];
83
- for (const word of words) {
84
- if (!unique.includes(word)) {
85
- unique.push(word);
86
- if (unique.length >= MAX_KEYWORDS) {
156
+ const strategy = getTokenizationStrategy();
157
+ const result = {
158
+ phrases: [],
159
+ keywords: [],
160
+ pathSegments: [],
161
+ };
162
+ // Phase 1: 引用符で囲まれたフレーズを抽出
163
+ const { phrases: quotedPhrases, remaining: afterQuotes } = extractQuotedPhrases(text);
164
+ result.phrases.push(...quotedPhrases);
165
+ // Phase 2: パスセグメントを抽出
166
+ const pathSegments = extractPathSegments(afterQuotes);
167
+ result.pathSegments.push(...pathSegments);
168
+ // Phase 3: 複合用語を抽出(ハイフン/アンダースコア区切り)(phrase-aware または hybrid モード)
169
+ if (strategy === "phrase-aware" || strategy === "hybrid") {
170
+ const compoundTerms = extractCompoundTerms(afterQuotes);
171
+ result.phrases.push(...compoundTerms);
172
+ // hybrid モードの場合、複合用語を分割したキーワードも追加
173
+ if (strategy === "hybrid") {
174
+ for (const term of compoundTerms) {
175
+ // ハイフンとアンダースコアの両方で分割
176
+ const parts = term
177
+ .split(/[-_]/)
178
+ .filter((part) => part.length >= 3 && !STOP_WORDS.has(part));
179
+ result.keywords.push(...parts);
180
+ }
181
+ }
182
+ }
183
+ // Phase 4: 通常の単語を抽出
184
+ const regularWords = extractRegularWords(afterQuotes, strategy);
185
+ // 重複を除去しながら、最大キーワード数まで追加
186
+ for (const word of regularWords) {
187
+ if (!result.keywords.includes(word) && !result.phrases.includes(word)) {
188
+ result.keywords.push(word);
189
+ if (result.keywords.length >= MAX_KEYWORDS) {
87
190
  break;
88
191
  }
89
192
  }
90
193
  }
91
- return unique;
194
+ return result;
92
195
  }
93
196
  function ensureCandidate(map, filePath) {
94
197
  let candidate = map.get(filePath);
@@ -96,6 +199,7 @@ function ensureCandidate(map, filePath) {
96
199
  candidate = {
97
200
  path: filePath,
98
201
  score: 0,
202
+ scoreMultiplier: 1.0, // Default: no boost or penalty
99
203
  reasons: new Set(),
100
204
  matchLine: null,
101
205
  content: null,
@@ -257,14 +361,15 @@ function splitQueryWords(query) {
257
361
  return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
258
362
  }
259
363
  /**
260
- * ファイルタイプに基づいてスコアをブーストする
261
- * プロファイルに応じて実装ファイルまたはドキュメントを優遇
364
+ * files_search専用のファイルタイプブースト適用(v0.7.0+: 設定可能な乗算的ペナルティ)
365
+ * context_bundleと同じ乗算的ペナルティロジックを使用
262
366
  * @param path - ファイルパス
263
- * @param baseScore - 元のスコア
264
- * @param profile - ブーストプロファイル ("default" | "docs" | "none")
367
+ * @param baseScore - 基本スコア(FTS BM25スコアまたは1.0)
368
+ * @param profile - ブーストプロファイル
369
+ * @param weights - スコアリングウェイト設定(乗算的ペナルティに使用)
265
370
  * @returns ブースト適用後のスコア
266
371
  */
267
- function applyFileTypeBoost(path, baseScore, profile = "default") {
372
+ function applyFileTypeBoost(path, baseScore, profile = "default", weights) {
268
373
  // Blacklisted directories that are almost always irrelevant for code context
269
374
  const blacklistedDirs = [
270
375
  ".cursor/",
@@ -281,54 +386,91 @@ function applyFileTypeBoost(path, baseScore, profile = "default") {
281
386
  if (profile === "none") {
282
387
  return baseScore;
283
388
  }
389
+ // Extract file extension for type detection
390
+ const ext = path.includes(".") ? path.substring(path.lastIndexOf(".")) : null;
391
+ // ✅ UNIFIED LOGIC: Use same multiplicative penalties as context_bundle
284
392
  if (profile === "docs") {
393
+ // Boost documentation files
285
394
  if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
286
- return baseScore * 1.8; // Stronger boost for docs
395
+ return baseScore * 1.5; // 50% boost (same as context_bundle)
287
396
  }
397
+ // Penalty for implementation files in docs mode
288
398
  if (path.startsWith("src/") &&
289
399
  (path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
290
- return baseScore * 0.5; // Stronger penalty for implementation files
400
+ return baseScore * 0.5; // 50% penalty
291
401
  }
292
402
  return baseScore;
293
403
  }
294
- // Default profile: prioritize implementation files, heavily penalize docs
404
+ // Default profile: Use configurable multiplicative penalties
405
+ let multiplier = 1.0;
406
+ // Documentation files: apply docPenaltyMultiplier
295
407
  const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
296
- if (docExtensions.some((ext) => path.endsWith(ext))) {
297
- return baseScore * 0.1; // Heavy penalty for docs
408
+ if (docExtensions.some((docExt) => path.endsWith(docExt))) {
409
+ multiplier *= weights.docPenaltyMultiplier; // 0.3 = 70% reduction (Phase 1)
410
+ return baseScore * multiplier;
298
411
  }
412
+ // Implementation file boosts: apply implBoostMultiplier with path-based scaling
299
413
  if (path.startsWith("src/app/")) {
300
- return baseScore * 1.8;
414
+ multiplier *= weights.implBoostMultiplier * 1.4; // Extra boost for app files
301
415
  }
302
- if (path.startsWith("src/components/")) {
303
- return baseScore * 1.7;
416
+ else if (path.startsWith("src/components/")) {
417
+ multiplier *= weights.implBoostMultiplier * 1.3;
304
418
  }
305
- if (path.startsWith("src/lib/")) {
306
- return baseScore * 1.6;
419
+ else if (path.startsWith("src/lib/")) {
420
+ multiplier *= weights.implBoostMultiplier * 1.2;
307
421
  }
308
- if (path.startsWith("src/") &&
309
- (path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
310
- return baseScore * 1.5;
422
+ else if (path.startsWith("src/")) {
423
+ if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
424
+ multiplier *= weights.implBoostMultiplier; // Base impl boost
425
+ }
311
426
  }
427
+ // Test files: additive penalty (keep strong for files_search)
312
428
  if (path.startsWith("tests/") || path.startsWith("test/")) {
313
- return baseScore * 0.2; // Also penalize tests in default mode
429
+ return baseScore * 0.2; // Strong penalty for tests
314
430
  }
315
- return baseScore;
431
+ return baseScore * multiplier;
316
432
  }
317
433
  /**
318
- * contextBundle専用のブーストプロファイル適用
319
- * candidateのスコアと理由を直接変更する
320
- * @param candidate - スコアリング対象の候補
321
- * @param row - ファイル情報(path, ext)
322
- * @param profile - ブーストプロファイル
434
+ * パスベースのスコアリングを適用(加算的ブースト)
435
+ * goalのキーワード/フレーズがファイルパスに含まれる場合にスコアを加算
323
436
  */
324
- function applyBoostProfile(candidate, row, profile) {
325
- if (profile === "none") {
437
+ function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
438
+ if (!extractedTerms || weights.pathMatch <= 0) {
326
439
  return;
327
440
  }
328
- const { path, ext } = row;
329
- const lowerPath = path.toLowerCase();
330
- const fileName = path.split("/").pop() ?? "";
331
- // Blacklisted directories that are almost always irrelevant for code context
441
+ // フレーズがパスに完全一致する場合(最高の重み)
442
+ for (const phrase of extractedTerms.phrases) {
443
+ if (lowerPath.includes(phrase)) {
444
+ candidate.score += weights.pathMatch * 1.5; // 1.5倍のブースト
445
+ candidate.reasons.add(`path-phrase:${phrase}`);
446
+ return; // 最初のマッチのみ適用
447
+ }
448
+ }
449
+ // パスセグメントがマッチする場合(中程度の重み)
450
+ const pathParts = lowerPath.split("/");
451
+ for (const segment of extractedTerms.pathSegments) {
452
+ if (pathParts.includes(segment)) {
453
+ candidate.score += weights.pathMatch;
454
+ candidate.reasons.add(`path-segment:${segment}`);
455
+ return; // 最初のマッチのみ適用
456
+ }
457
+ }
458
+ // 通常のキーワードがパスに含まれる場合(低い重み)
459
+ for (const keyword of extractedTerms.keywords) {
460
+ if (lowerPath.includes(keyword)) {
461
+ candidate.score += weights.pathMatch * 0.5; // 0.5倍のブースト
462
+ candidate.reasons.add(`path-keyword:${keyword}`);
463
+ return; // 最初のマッチのみ適用
464
+ }
465
+ }
466
+ }
467
+ /**
468
+ * 加算的ファイルペナルティを適用
469
+ * ブラックリストディレクトリ、テストファイル、lockファイル、設定ファイル、マイグレーションファイルに強いペナルティ
470
+ * @returns true if penalty was applied and processing should stop
471
+ */
472
+ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName) {
473
+ // Blacklisted directories - effectively remove
332
474
  const blacklistedDirs = [
333
475
  ".cursor/",
334
476
  ".devcontainer/",
@@ -352,18 +494,18 @@ function applyBoostProfile(candidate, row, profile) {
352
494
  "temp/",
353
495
  ];
354
496
  if (blacklistedDirs.some((dir) => path.startsWith(dir))) {
355
- candidate.score = -100; // Effectively remove it
497
+ candidate.score = -100;
356
498
  candidate.reasons.add("penalty:blacklisted-dir");
357
- return;
499
+ return true;
358
500
  }
359
- // Penalize test files explicitly (even if outside test directories)
501
+ // Test files - strong penalty
360
502
  const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
361
503
  if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
362
- candidate.score -= 2.0; // Strong penalty for test files
504
+ candidate.score -= 2.0;
363
505
  candidate.reasons.add("penalty:test-file");
364
- return;
506
+ return true;
365
507
  }
366
- // Penalize lock files and package manifests
508
+ // Lock files - very strong penalty
367
509
  const lockFiles = [
368
510
  "package-lock.json",
369
511
  "pnpm-lock.yaml",
@@ -374,11 +516,11 @@ function applyBoostProfile(candidate, row, profile) {
374
516
  "poetry.lock",
375
517
  ];
376
518
  if (lockFiles.some((lockFile) => fileName === lockFile)) {
377
- candidate.score -= 3.0; // Very strong penalty for lock files
519
+ candidate.score -= 3.0;
378
520
  candidate.reasons.add("penalty:lock-file");
379
- return;
521
+ return true;
380
522
  }
381
- // Penalize configuration files
523
+ // Configuration files - strong penalty
382
524
  const configPatterns = [
383
525
  ".config.js",
384
526
  ".config.ts",
@@ -399,56 +541,96 @@ function applyBoostProfile(candidate, row, profile) {
399
541
  fileName === "Dockerfile" ||
400
542
  fileName === "docker-compose.yml" ||
401
543
  fileName === "docker-compose.yaml") {
402
- candidate.score -= 1.5; // Strong penalty for config files
544
+ candidate.score -= 1.5;
403
545
  candidate.reasons.add("penalty:config-file");
404
- return;
546
+ return true;
405
547
  }
406
- // Penalize migration files (by path content)
548
+ // Migration files - strong penalty
407
549
  if (lowerPath.includes("migrate") || lowerPath.includes("migration")) {
408
- candidate.score -= 2.0; // Strong penalty for migrations
550
+ candidate.score -= 2.0;
409
551
  candidate.reasons.add("penalty:migration-file");
552
+ return true;
553
+ }
554
+ return false; // No penalty applied, continue processing
555
+ }
556
+ /**
557
+ * ファイルタイプ別の乗算的ペナルティ/ブーストを適用(v0.7.0+)
558
+ * profile="docs": ドキュメントファイルをブースト
559
+ * profile="default": ドキュメントファイルにペナルティ、実装ファイルをブースト
560
+ */
561
+ function applyFileTypeMultipliers(candidate, path, ext, profile, weights) {
562
+ if (profile === "none") {
410
563
  return;
411
564
  }
565
+ // ✅ CRITICAL SAFETY: profile="docs" mode boosts docs, skips penalties
412
566
  if (profile === "docs") {
413
- // DOCS PROFILE: Boost docs, penalize code
414
- if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
415
- candidate.score += 0.8;
567
+ const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
568
+ if (docExtensions.some((docExt) => path.endsWith(docExt))) {
569
+ candidate.scoreMultiplier *= 1.5; // 50% boost for docs
416
570
  candidate.reasons.add("boost:doc-file");
417
571
  }
418
- else if (path.startsWith("src/") && (ext === ".ts" || ext === ".tsx" || ext === ".js")) {
419
- candidate.score -= 0.5;
420
- candidate.reasons.add("penalty:impl-file");
421
- }
572
+ // No penalty for implementation files in "docs" mode
573
+ return;
422
574
  }
423
- else if (profile === "default") {
424
- // DEFAULT PROFILE: Penalize docs heavily, boost implementation files.
425
- // Penalize documentation and other non-code files
575
+ // DEFAULT PROFILE: Use MULTIPLICATIVE penalties for docs, MULTIPLICATIVE boosts for impl files
576
+ if (profile === "default") {
426
577
  const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
427
578
  if (docExtensions.some((docExt) => path.endsWith(docExt))) {
428
- candidate.score -= 1.0; // Strong penalty to overcome structural similarity
579
+ // MULTIPLICATIVE penalty (v0.7.0): 70% reduction (Phase 1 conservative)
580
+ candidate.scoreMultiplier *= weights.docPenaltyMultiplier;
429
581
  candidate.reasons.add("penalty:doc-file");
582
+ return; // Don't apply impl boosts to docs
430
583
  }
431
- // Boost implementation files, with more specific paths getting higher scores
584
+ // MULTIPLICATIVE boost for implementation files
432
585
  if (path.startsWith("src/app/")) {
433
- candidate.score += 0.8;
586
+ candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.4; // Extra boost for app files
434
587
  candidate.reasons.add("boost:app-file");
435
588
  }
436
589
  else if (path.startsWith("src/components/")) {
437
- candidate.score += 0.7;
590
+ candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.3;
438
591
  candidate.reasons.add("boost:component-file");
439
592
  }
440
593
  else if (path.startsWith("src/lib/")) {
441
- candidate.score += 0.6;
594
+ candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.2;
442
595
  candidate.reasons.add("boost:lib-file");
443
596
  }
444
597
  else if (path.startsWith("src/")) {
445
598
  if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
446
- candidate.score += 0.5;
599
+ candidate.scoreMultiplier *= weights.implBoostMultiplier;
447
600
  candidate.reasons.add("boost:impl-file");
448
601
  }
449
602
  }
450
603
  }
451
604
  }
605
+ /**
606
+ * contextBundle専用のブーストプロファイル適用(v0.7.0+: リファクタリング版)
607
+ * 複雑度を削減するために3つのヘルパー関数に分割:
608
+ * 1. applyPathBasedScoring: パスベースの加算的スコアリング
609
+ * 2. applyAdditiveFilePenalties: 強力な加算的ペナルティ
610
+ * 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト
611
+ *
612
+ * CRITICAL SAFETY RULES:
613
+ * 1. Multipliers are stored in candidate.scoreMultiplier, applied AFTER all additive scoring
614
+ * 2. profile="docs" skips documentation penalties (allows doc-focused queries)
615
+ * 3. Blacklist/test/lock/config files keep additive penalties (already very strong)
616
+ */
617
+ function applyBoostProfile(candidate, row, profile, weights, extractedTerms) {
618
+ if (profile === "none") {
619
+ return;
620
+ }
621
+ const { path, ext } = row;
622
+ const lowerPath = path.toLowerCase();
623
+ const fileName = path.split("/").pop() ?? "";
624
+ // Step 1: パスベースのスコアリング(加算的ブースト)
625
+ applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms);
626
+ // Step 2: 加算的ペナルティ(ブラックリスト、テスト、lock、設定、マイグレーション)
627
+ const shouldStop = applyAdditiveFilePenalties(candidate, path, lowerPath, fileName);
628
+ if (shouldStop) {
629
+ return; // ペナルティが適用された場合は処理終了
630
+ }
631
+ // Step 3: ファイルタイプ別の乗算的ペナルティ/ブースト
632
+ applyFileTypeMultipliers(candidate, path, ext, profile, weights);
633
+ }
452
634
  export async function filesSearch(context, params) {
453
635
  const { db, repoId } = context;
454
636
  const { query } = params;
@@ -531,11 +713,14 @@ export async function filesSearch(context, params) {
531
713
  }
532
714
  const rows = await db.all(sql, values);
533
715
  const boostProfile = params.boost_profile ?? "default";
716
+ // ✅ v0.7.0+: Load configurable scoring weights for unified boosting logic
717
+ // Note: filesSearch doesn't have a separate profile parameter, uses default weights
718
+ const weights = loadScoringProfile(null);
534
719
  return rows
535
720
  .map((row) => {
536
721
  const { preview, line } = buildPreview(row.content ?? "", query);
537
722
  const baseScore = row.score ?? 1.0; // FTS時はBM25スコア、ILIKE時は1.0
538
- const boostedScore = applyFileTypeBoost(row.path, baseScore, boostProfile);
723
+ const boostedScore = applyFileTypeBoost(row.path, baseScore, boostProfile, weights);
539
724
  return {
540
725
  path: row.path,
541
726
  preview,
@@ -649,18 +834,84 @@ export async function contextBundle(context, params) {
649
834
  }
650
835
  const semanticSeed = keywordSources.join(" ");
651
836
  const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
652
- let keywords = extractKeywords(semanticSeed);
653
- if (keywords.length === 0 && artifacts.editing_path) {
837
+ const extractedTerms = extractKeywords(semanticSeed);
838
+ // フォールバック: editing_pathからキーワードを抽出
839
+ if (extractedTerms.phrases.length === 0 &&
840
+ extractedTerms.keywords.length === 0 &&
841
+ artifacts.editing_path) {
654
842
  const pathSegments = artifacts.editing_path
655
843
  .split(/[/_.-]/)
656
844
  .map((segment) => segment.toLowerCase())
657
845
  .filter((segment) => segment.length >= 3 && !STOP_WORDS.has(segment));
658
- keywords = pathSegments.slice(0, MAX_KEYWORDS);
846
+ extractedTerms.pathSegments.push(...pathSegments.slice(0, MAX_KEYWORDS));
659
847
  }
660
848
  const candidates = new Map();
661
849
  const stringMatchSeeds = new Set();
662
850
  const fileCache = new Map();
663
- for (const keyword of keywords) {
851
+ // フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
852
+ if (extractedTerms.phrases.length > 0) {
853
+ const phrasePlaceholders = extractedTerms.phrases
854
+ .map(() => "b.content ILIKE '%' || ? || '%'")
855
+ .join(" OR ");
856
+ const rows = await db.all(`
857
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
858
+ FROM file f
859
+ JOIN blob b ON b.hash = f.blob_hash
860
+ LEFT JOIN file_embedding fe
861
+ ON fe.repo_id = f.repo_id
862
+ AND fe.path = f.path
863
+ WHERE f.repo_id = ?
864
+ AND f.is_binary = FALSE
865
+ AND (${phrasePlaceholders})
866
+ ORDER BY f.path
867
+ LIMIT ?
868
+ `, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
869
+ const boostProfile = params.boost_profile ?? "default";
870
+ for (const row of rows) {
871
+ if (row.content === null) {
872
+ continue;
873
+ }
874
+ // どのフレーズにマッチしたかをチェック
875
+ const lowerContent = row.content.toLowerCase();
876
+ const matchedPhrases = extractedTerms.phrases.filter((phrase) => lowerContent.includes(phrase));
877
+ if (matchedPhrases.length === 0) {
878
+ continue; // Should not happen, but defensive check
879
+ }
880
+ const candidate = ensureCandidate(candidates, row.path);
881
+ // 各マッチしたフレーズに対してスコアリング
882
+ for (const phrase of matchedPhrases) {
883
+ // フレーズマッチは通常の2倍のスコア
884
+ candidate.score += weights.textMatch * 2.0;
885
+ candidate.reasons.add(`phrase:${phrase}`);
886
+ }
887
+ // Apply boost profile once per file
888
+ applyBoostProfile(candidate, row, boostProfile, weights, extractedTerms);
889
+ // Use first matched phrase for preview (guaranteed to exist due to length check above)
890
+ const { line } = buildPreview(row.content, matchedPhrases[0]);
891
+ candidate.matchLine =
892
+ candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
893
+ candidate.content ??= row.content;
894
+ candidate.lang ??= row.lang;
895
+ candidate.ext ??= row.ext;
896
+ candidate.totalLines ??= row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length;
897
+ candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
898
+ stringMatchSeeds.add(row.path);
899
+ if (!fileCache.has(row.path)) {
900
+ fileCache.set(row.path, {
901
+ content: row.content,
902
+ lang: row.lang,
903
+ ext: row.ext,
904
+ totalLines: candidate.totalLines ?? 0,
905
+ embedding: candidate.embedding,
906
+ });
907
+ }
908
+ }
909
+ }
910
+ // キーワードマッチング(通常の重み)- 統合クエリでパフォーマンス改善
911
+ if (extractedTerms.keywords.length > 0) {
912
+ const keywordPlaceholders = extractedTerms.keywords
913
+ .map(() => "b.content ILIKE '%' || ? || '%'")
914
+ .join(" OR ");
664
915
  const rows = await db.all(`
665
916
  SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
666
917
  FROM file f
@@ -670,21 +921,31 @@ export async function contextBundle(context, params) {
670
921
  AND fe.path = f.path
671
922
  WHERE f.repo_id = ?
672
923
  AND f.is_binary = FALSE
673
- AND b.content ILIKE '%' || ? || '%'
924
+ AND (${keywordPlaceholders})
674
925
  ORDER BY f.path
675
926
  LIMIT ?
676
- `, [repoId, keyword, MAX_MATCHES_PER_KEYWORD]);
927
+ `, [repoId, ...extractedTerms.keywords, MAX_MATCHES_PER_KEYWORD * extractedTerms.keywords.length]);
928
+ const boostProfile = params.boost_profile ?? "default";
677
929
  for (const row of rows) {
678
930
  if (row.content === null) {
679
931
  continue;
680
932
  }
933
+ // どのキーワードにマッチしたかをチェック
934
+ const lowerContent = row.content.toLowerCase();
935
+ const matchedKeywords = extractedTerms.keywords.filter((keyword) => lowerContent.includes(keyword));
936
+ if (matchedKeywords.length === 0) {
937
+ continue; // Should not happen, but defensive check
938
+ }
681
939
  const candidate = ensureCandidate(candidates, row.path);
682
- candidate.score += weights.textMatch;
683
- candidate.reasons.add(`text:${keyword}`);
684
- // Apply boost profile to prioritize/penalize files based on type and location
685
- const boostProfile = params.boost_profile ?? "default";
686
- applyBoostProfile(candidate, row, boostProfile);
687
- const { line } = buildPreview(row.content, keyword);
940
+ // 各マッチしたキーワードに対してスコアリング
941
+ for (const keyword of matchedKeywords) {
942
+ candidate.score += weights.textMatch;
943
+ candidate.reasons.add(`text:${keyword}`);
944
+ }
945
+ // Apply boost profile once per file
946
+ applyBoostProfile(candidate, row, boostProfile, weights, extractedTerms);
947
+ // Use first matched keyword for preview (guaranteed to exist due to length check above)
948
+ const { line } = buildPreview(row.content, matchedKeywords[0]);
688
949
  candidate.matchLine =
689
950
  candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
690
951
  candidate.content ??= row.content;
@@ -806,6 +1067,13 @@ export async function contextBundle(context, params) {
806
1067
  return { context: [], tokens_estimate: 0 };
807
1068
  }
808
1069
  applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
1070
+ // ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
1071
+ // Only apply to positive scores to prevent negative score inversion
1072
+ for (const candidate of materializedCandidates) {
1073
+ if (candidate.scoreMultiplier !== 1.0 && candidate.score > 0) {
1074
+ candidate.score *= candidate.scoreMultiplier;
1075
+ }
1076
+ }
809
1077
  const sortedCandidates = materializedCandidates
810
1078
  .filter((candidate) => candidate.score > 0) // Filter out candidates with negative or zero scores
811
1079
  .sort((a, b) => {