kiri-mcp-server 0.14.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +51 -11
  2. package/config/scoring-profiles.yml +78 -0
  3. package/config/stop-words.yml +307 -0
  4. package/dist/config/scoring-profiles.yml +78 -0
  5. package/dist/config/stop-words.yml +307 -0
  6. package/dist/package.json +2 -2
  7. package/dist/src/indexer/cli.d.ts +1 -0
  8. package/dist/src/indexer/cli.d.ts.map +1 -1
  9. package/dist/src/indexer/cli.js +22 -2
  10. package/dist/src/indexer/cli.js.map +1 -1
  11. package/dist/src/indexer/cochange.d.ts +97 -0
  12. package/dist/src/indexer/cochange.d.ts.map +1 -0
  13. package/dist/src/indexer/cochange.js +315 -0
  14. package/dist/src/indexer/cochange.js.map +1 -0
  15. package/dist/src/indexer/graph-metrics.d.ts +68 -0
  16. package/dist/src/indexer/graph-metrics.d.ts.map +1 -0
  17. package/dist/src/indexer/graph-metrics.js +239 -0
  18. package/dist/src/indexer/graph-metrics.js.map +1 -0
  19. package/dist/src/indexer/schema.d.ts +15 -0
  20. package/dist/src/indexer/schema.d.ts.map +1 -1
  21. package/dist/src/indexer/schema.js +86 -0
  22. package/dist/src/indexer/schema.js.map +1 -1
  23. package/dist/src/server/context.d.ts +2 -0
  24. package/dist/src/server/context.d.ts.map +1 -1
  25. package/dist/src/server/context.js.map +1 -1
  26. package/dist/src/server/handlers/snippets-get.d.ts +10 -0
  27. package/dist/src/server/handlers/snippets-get.d.ts.map +1 -1
  28. package/dist/src/server/handlers/snippets-get.js +40 -3
  29. package/dist/src/server/handlers/snippets-get.js.map +1 -1
  30. package/dist/src/server/handlers.d.ts +1 -1
  31. package/dist/src/server/handlers.d.ts.map +1 -1
  32. package/dist/src/server/handlers.js +208 -51
  33. package/dist/src/server/handlers.js.map +1 -1
  34. package/dist/src/server/idf-provider.d.ts +110 -0
  35. package/dist/src/server/idf-provider.d.ts.map +1 -0
  36. package/dist/src/server/idf-provider.js +233 -0
  37. package/dist/src/server/idf-provider.js.map +1 -0
  38. package/dist/src/server/rpc.d.ts.map +1 -1
  39. package/dist/src/server/rpc.js +21 -1
  40. package/dist/src/server/rpc.js.map +1 -1
  41. package/dist/src/server/scoring.d.ts +10 -0
  42. package/dist/src/server/scoring.d.ts.map +1 -1
  43. package/dist/src/server/scoring.js +73 -0
  44. package/dist/src/server/scoring.js.map +1 -1
  45. package/dist/src/server/services/index.d.ts +2 -0
  46. package/dist/src/server/services/index.d.ts.map +1 -1
  47. package/dist/src/server/services/index.js +3 -0
  48. package/dist/src/server/services/index.js.map +1 -1
  49. package/dist/src/server/stop-words.d.ts +106 -0
  50. package/dist/src/server/stop-words.d.ts.map +1 -0
  51. package/dist/src/server/stop-words.js +312 -0
  52. package/dist/src/server/stop-words.js.map +1 -0
  53. package/dist/src/shared/duckdb.d.ts +8 -2
  54. package/dist/src/shared/duckdb.d.ts.map +1 -1
  55. package/dist/src/shared/duckdb.js +37 -62
  56. package/dist/src/shared/duckdb.js.map +1 -1
  57. package/package.json +2 -2
@@ -8,8 +8,10 @@ import { expandAbbreviations } from "./abbreviations.js";
8
8
  import { getBoostProfile, } from "./boost-profiles.js";
9
9
  import { loadPathPenalties, mergePathPenaltyEntries } from "./config-loader.js";
10
10
  import { loadServerConfig } from "./config.js";
11
+ import { createIdfProvider } from "./idf-provider.js";
11
12
  import { coerceProfileName, loadScoringProfile } from "./scoring.js";
12
13
  import { createServerServices } from "./services/index.js";
14
+ import { loadStopWords } from "./stop-words.js";
13
15
  // Re-export extracted handlers for backward compatibility
14
16
  export { snippetsGet, } from "./handlers/snippets-get.js";
15
17
  // Configuration file patterns (v0.8.0+: consolidated to avoid duplication)
@@ -176,6 +178,8 @@ export async function checkTableAvailability(db) {
176
178
  "markdown_link",
177
179
  "hint_expansion",
178
180
  "hint_dictionary",
181
+ "graph_metrics",
182
+ "cochange",
179
183
  ];
180
184
  const checkTable = async (tableName) => {
181
185
  if (!ALLOWED_TABLES.includes(tableName)) {
@@ -199,6 +203,8 @@ export async function checkTableAvailability(db) {
199
203
  hasLinkTable: await checkTable("markdown_link"),
200
204
  hasHintLog: await checkTable("hint_expansion"),
201
205
  hasHintDictionary: await checkTable("hint_dictionary"),
206
+ hasGraphMetrics: await checkTable("graph_metrics"),
207
+ hasCochange: await checkTable("cochange"),
202
208
  };
203
209
  // 起動時警告: テーブルが存在しない場合に通知
204
210
  if (!result.hasMetadataTables) {
@@ -213,6 +219,12 @@ export async function checkTableAvailability(db) {
213
219
  if (!result.hasHintDictionary) {
214
220
  console.warn("hint_dictionary table is missing. Dictionary hints disabled. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
215
221
  }
222
+ if (!result.hasGraphMetrics) {
223
+ console.warn("graph_metrics table is missing. Graph layer scoring disabled. Run indexer with --full flag to create the table.");
224
+ }
225
+ if (!result.hasCochange) {
226
+ console.warn("cochange table is missing. Co-change scoring disabled. Run indexer with --full flag to create the table.");
227
+ }
216
228
  return result;
217
229
  }
218
230
  async function hasDirtyRepos(db) {
@@ -533,6 +545,8 @@ const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
533
545
  const FALLBACK_SNIPPET_WINDOW = serverConfig.features.snippetWindow;
534
546
  const MAX_RERANK_LIMIT = 50;
535
547
  const MAX_ARTIFACT_HINTS = 8;
548
+ /** Minimum confidence floor for co-change scoring to prevent zero-boost from low Jaccard scores */
549
+ const MIN_COCHANGE_CONFIDENCE_FLOOR = 0.2;
536
550
  const DOMAIN_PATH_HINT_LIMIT = MAX_ARTIFACT_HINTS;
537
551
  const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
538
552
  const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
@@ -590,15 +604,16 @@ const WHY_TAG_PRIORITY = {
590
604
  substring: 4, // Substring hint expansion
591
605
  "path-phrase": 5, // Path contains multi-word phrase
592
606
  structural: 6, // Semantic similarity
593
- "path-segment": 7, // Path component matches
594
- "path-keyword": 8, // Path keyword match
595
- dep: 9, // Dependency relationship
596
- near: 10, // Proximity to editing file
597
- boost: 11, // File type boost
598
- recent: 12, // Recently changed
599
- symbol: 13, // Symbol match
600
- penalty: 14, // Penalty explanations (keep for transparency)
601
- keyword: 15, // Generic keyword (deprecated, kept for compatibility)
607
+ cochange: 7, // Co-change history (files that change together)
608
+ "path-segment": 8, // Path component matches
609
+ "path-keyword": 9, // Path keyword match
610
+ dep: 10, // Dependency relationship
611
+ near: 11, // Proximity to editing file
612
+ boost: 12, // File type boost
613
+ recent: 13, // Recently changed
614
+ symbol: 14, // Symbol match
615
+ penalty: 15, // Penalty explanations (keep for transparency)
616
+ keyword: 16, // Generic keyword (deprecated, kept for compatibility)
602
617
  };
603
618
  // Reserve at least one slot for important structural tags
604
619
  const RESERVED_WHY_SLOTS = {
@@ -663,39 +678,18 @@ function selectWhyTags(reasons) {
663
678
  }
664
679
  return Array.from(selected);
665
680
  }
666
- const STOP_WORDS = new Set([
667
- "the",
668
- "and",
669
- "for",
670
- "with",
671
- "from",
672
- "this",
673
- "that",
674
- "have",
675
- "has",
676
- "will",
677
- "would",
678
- "into",
679
- "about",
680
- "there",
681
- "their",
682
- "your",
683
- "fix",
684
- "test",
685
- "tests",
686
- "issue",
687
- "error",
688
- "bug",
689
- "fail",
690
- "failing",
691
- "make",
692
- "when",
693
- "where",
694
- "should",
695
- "could",
696
- "need",
697
- "goal",
698
- ]);
681
+ /**
682
+ * ストップワードサービスの遅延初期化
683
+ * シングルトンキャッシュを使用し、config/stop-words.yml から読み込み
684
+ * @see Issue #48: Improve context_bundle stop word coverage and configurability
685
+ */
686
+ let _stopWordsService = null;
687
+ function getStopWordsService() {
688
+ if (!_stopWordsService) {
689
+ _stopWordsService = loadStopWords();
690
+ }
691
+ return _stopWordsService;
692
+ }
699
693
  function prioritizeHintCandidates(rankedCandidates, hintPaths, limit) {
700
694
  if (rankedCandidates.length === 0) {
701
695
  return [];
@@ -822,7 +816,7 @@ function extractCompoundTerms(text) {
822
816
  const matches = Array.from(text.matchAll(compoundPattern)).map((m) => m[1]);
823
817
  return matches
824
818
  .map((term) => term.toLowerCase())
825
- .filter((term) => term.length >= 3 && !STOP_WORDS.has(term));
819
+ .filter((term) => term.length >= 3 && !getStopWordsService().has(term));
826
820
  }
827
821
  /**
828
822
  * パスライクな用語を抽出
@@ -837,7 +831,7 @@ function extractPathSegments(text) {
837
831
  for (const path of matches) {
838
832
  const parts = path.toLowerCase().split("/");
839
833
  for (const part of parts) {
840
- if (part.length >= 3 && !STOP_WORDS.has(part) && !segments.includes(part)) {
834
+ if (part.length >= 3 && !getStopWordsService().has(part) && !segments.includes(part)) {
841
835
  segments.push(part);
842
836
  }
843
837
  }
@@ -849,7 +843,7 @@ function extractPathSegments(text) {
849
843
  * 共有トークン化ユーティリティを使用
850
844
  */
851
845
  function extractRegularWords(text, strategy) {
852
- const words = tokenizeText(text, strategy).filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
846
+ const words = tokenizeText(text, strategy).filter((word) => word.length >= 3 && !getStopWordsService().has(word));
853
847
  return words;
854
848
  }
855
849
  /**
@@ -879,7 +873,7 @@ function extractKeywords(text) {
879
873
  // ハイフンとアンダースコアの両方で分割
880
874
  const parts = term
881
875
  .split(/[-_]/)
882
- .filter((part) => part.length >= 3 && !STOP_WORDS.has(part));
876
+ .filter((part) => part.length >= 3 && !getStopWordsService().has(part));
883
877
  result.keywords.push(...parts);
884
878
  }
885
879
  }
@@ -904,7 +898,7 @@ function addKeywordDerivedPathSegments(result) {
904
898
  }
905
899
  const additional = [];
906
900
  for (const keyword of result.keywords) {
907
- if (keyword.length < 3 || STOP_WORDS.has(keyword)) {
901
+ if (keyword.length < 3 || getStopWordsService().has(keyword)) {
908
902
  continue;
909
903
  }
910
904
  if (result.pathSegments.includes(keyword) || additional.includes(keyword)) {
@@ -1348,6 +1342,144 @@ function applyStructuralScores(candidates, queryEmbedding, structuralWeight) {
1348
1342
  candidate.reasons.add(`structural:${similarity.toFixed(2)}`);
1349
1343
  }
1350
1344
  }
1345
+ /**
1346
+ * Graph Layer: Apply graph-based scoring boosts (Phase 3.2)
1347
+ *
1348
+ * Uses precomputed metrics from graph_metrics table:
1349
+ * - inbound_count: Number of files that import this file (PageRank-like importance)
1350
+ * - importance_score: Normalized PageRank score [0, 1]
1351
+ *
1352
+ * Boosts are additive and scaled by profile weights.
1353
+ *
1354
+ * @param hasGraphMetrics - graph_metrics テーブルが存在するかどうか
1355
+ */
1356
+ async function applyGraphLayerScores(db, repoId, candidates, weights, hasGraphMetrics) {
1357
+ // Skip if graph_metrics table doesn't exist (graceful degradation)
1358
+ if (!hasGraphMetrics) {
1359
+ return;
1360
+ }
1361
+ // Skip if both weights are zero (disabled)
1362
+ if (weights.graphInbound <= 0 && weights.graphImportance <= 0) {
1363
+ return;
1364
+ }
1365
+ if (candidates.length === 0) {
1366
+ return;
1367
+ }
1368
+ // Fetch graph metrics for all candidate paths
1369
+ const paths = candidates.map((c) => c.path);
1370
+ const placeholders = paths.map(() => "?").join(", ");
1371
+ const metrics = await db.all(`
1372
+ SELECT path, inbound_count, importance_score
1373
+ FROM graph_metrics
1374
+ WHERE repo_id = ? AND path IN (${placeholders})
1375
+ `, [repoId, ...paths]);
1376
+ // Build lookup map
1377
+ const metricsMap = new Map();
1378
+ for (const m of metrics) {
1379
+ metricsMap.set(m.path, {
1380
+ inbound: m.inbound_count,
1381
+ importance: m.importance_score,
1382
+ });
1383
+ }
1384
+ // Compute max inbound for normalization (log scale)
1385
+ let maxInbound = 1;
1386
+ for (const m of metrics) {
1387
+ if (m.inbound_count > maxInbound) {
1388
+ maxInbound = m.inbound_count;
1389
+ }
1390
+ }
1391
+ // Apply boosts
1392
+ for (const candidate of candidates) {
1393
+ const graphMetrics = metricsMap.get(candidate.path);
1394
+ if (!graphMetrics) {
1395
+ continue;
1396
+ }
1397
+ // Inbound dependency boost (log-scaled to dampen very high values)
1398
+ if (weights.graphInbound > 0 && graphMetrics.inbound > 0) {
1399
+ // Log-scale normalization: log(1 + count) / log(1 + max)
1400
+ const normalizedInbound = Math.log(1 + graphMetrics.inbound) / Math.log(1 + maxInbound);
1401
+ const inboundBoost = weights.graphInbound * normalizedInbound;
1402
+ candidate.score += inboundBoost;
1403
+ candidate.reasons.add(`graph:inbound:${graphMetrics.inbound}`);
1404
+ }
1405
+ // Importance score boost (already normalized to [0, 1])
1406
+ if (weights.graphImportance > 0 && graphMetrics.importance > 0) {
1407
+ const importanceBoost = weights.graphImportance * graphMetrics.importance;
1408
+ candidate.score += importanceBoost;
1409
+ candidate.reasons.add(`graph:importance:${graphMetrics.importance.toFixed(2)}`);
1410
+ }
1411
+ }
1412
+ }
1413
+ /**
1414
+ * Apply co-change scores based on git history.
1415
+ * Files that frequently change together with editing_path get boosted.
1416
+ *
1417
+ * Phase 4: Co-change graph integration.
1418
+ *
1419
+ * @param db - DuckDB client
1420
+ * @param repoId - Repository ID
1421
+ * @param candidates - Candidate files to score
1422
+ * @param weights - Scoring weights (uses cochange weight)
1423
+ * @param editingPath - Currently edited file path (optional)
1424
+ * @param hasCochange - cochange テーブルが存在するかどうか
1425
+ */
1426
+ async function applyCochangeScores(db, repoId, candidates, weights, editingPath, hasCochange) {
1427
+ // Skip if cochange table doesn't exist (graceful degradation)
1428
+ if (!hasCochange) {
1429
+ return;
1430
+ }
1431
+ // Skip if cochange weight is zero (disabled by default)
1432
+ if (weights.cochange <= 0) {
1433
+ return;
1434
+ }
1435
+ // Skip if no editing_path provided (co-change needs a reference file)
1436
+ if (!editingPath || candidates.length === 0) {
1437
+ return;
1438
+ }
1439
+ // Query co-change edges involving editing_path
1440
+ // Both directions: editing_path can be file1 or file2 (canonical ordering)
1441
+ const cochangeEdges = await db.all(`
1442
+ SELECT
1443
+ CASE WHEN file1 = ? THEN file2 ELSE file1 END as neighbor,
1444
+ cochange_count,
1445
+ confidence
1446
+ FROM cochange
1447
+ WHERE repo_id = ? AND (file1 = ? OR file2 = ?)
1448
+ `, [editingPath, repoId, editingPath, editingPath]);
1449
+ if (cochangeEdges.length === 0) {
1450
+ return;
1451
+ }
1452
+ // Build lookup map: neighbor path -> (count, confidence)
1453
+ const cochangeMap = new Map();
1454
+ for (const edge of cochangeEdges) {
1455
+ cochangeMap.set(edge.neighbor, {
1456
+ count: edge.cochange_count,
1457
+ confidence: edge.confidence ?? 0,
1458
+ });
1459
+ }
1460
+ // Compute max count for normalization
1461
+ let maxCount = 1;
1462
+ for (const edge of cochangeEdges) {
1463
+ if (edge.cochange_count > maxCount) {
1464
+ maxCount = edge.cochange_count;
1465
+ }
1466
+ }
1467
+ // Apply cochange boost to candidates
1468
+ for (const candidate of candidates) {
1469
+ const cochange = cochangeMap.get(candidate.path);
1470
+ if (!cochange || cochange.count <= 0) {
1471
+ continue;
1472
+ }
1473
+ // Normalize cochange count using log scale (similar to inbound boost)
1474
+ const normalizedCount = Math.log(1 + cochange.count) / Math.log(1 + maxCount);
1475
+ // Weight the boost by confidence (Jaccard similarity) for quality
1476
+ // Final boost = weight * normalized_count * confidence
1477
+ const confidenceFactor = Math.max(cochange.confidence, MIN_COCHANGE_CONFIDENCE_FLOOR);
1478
+ const cochangeBoost = weights.cochange * normalizedCount * confidenceFactor;
1479
+ candidate.score += cochangeBoost;
1480
+ candidate.reasons.add(`cochange:${cochange.count}:${(cochange.confidence * 100).toFixed(0)}%`);
1481
+ }
1482
+ }
1351
1483
  async function fetchEmbeddingMap(db, repoId, paths) {
1352
1484
  const map = new Map();
1353
1485
  if (paths.length === 0) {
@@ -2829,12 +2961,26 @@ async function contextBundleImpl(context, params) {
2829
2961
  const pathSegments = artifacts.editing_path
2830
2962
  .split(/[/_.-]/)
2831
2963
  .map((segment) => segment.toLowerCase())
2832
- .filter((segment) => segment.length >= 3 && !STOP_WORDS.has(segment));
2964
+ .filter((segment) => segment.length >= 3 && !getStopWordsService().has(segment));
2833
2965
  extractedTerms.pathSegments.push(...pathSegments.slice(0, MAX_KEYWORDS));
2834
2966
  }
2835
2967
  const candidates = new Map();
2836
2968
  const stringMatchSeeds = new Set();
2837
2969
  const fileCache = new Map();
2970
+ // Phase 2: IDF重み付けプロバイダーの初期化
2971
+ // キーワードの文書頻度に基づいて重みを計算し、高頻度語を自動的に減衰
2972
+ const idfProvider = createIdfProvider(db, repoId);
2973
+ const idfWeights = new Map();
2974
+ // 抽出されたキーワードのIDF重みを事前計算(非同期バッチ処理)
2975
+ if (extractedTerms.keywords.length > 0) {
2976
+ const computedWeights = await idfProvider.computeIdfBatch(extractedTerms.keywords);
2977
+ for (const [term, weight] of computedWeights) {
2978
+ idfWeights.set(term, weight);
2979
+ }
2980
+ if (process.env.KIRI_TRACE_IDF === "1") {
2981
+ console.info("[idf-weights]", JSON.stringify(Object.fromEntries(Array.from(idfWeights.entries()).map(([k, v]) => [k, v.toFixed(3)]))));
2982
+ }
2983
+ }
2838
2984
  // ✅ Cache boost profile config to avoid redundant lookups in hot path
2839
2985
  const boostProfile = params.boost_profile ??
2840
2986
  (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
@@ -2961,10 +3107,17 @@ async function contextBundleImpl(context, params) {
2961
3107
  continue; // Should not happen, but defensive check
2962
3108
  }
2963
3109
  const candidate = ensureCandidate(candidates, row.path);
2964
- // 各マッチしたキーワードに対してスコアリング
3110
+ // 各マッチしたキーワードに対してスコアリング(Phase 2: IDF重み付け)
2965
3111
  for (const keyword of matchedKeywords) {
2966
- candidate.score += weights.textMatch;
2967
- candidate.reasons.add(`text:${keyword}`);
3112
+ // IDF重みを適用(事前計算済み、なければデフォルト1.0)
3113
+ // 減衰適用: 0.6 + 0.4 * idfWeight でファイル種別マルチプライヤとのバランスを維持
3114
+ // - 高頻度語: IDF=0 → 0.6 (40%減)
3115
+ // - 低頻度語: IDF=1 → 1.0 (減衰なし)
3116
+ const rawIdfWeight = idfWeights.get(keyword.toLowerCase()) ?? 1.0;
3117
+ const dampedIdfWeight = 0.6 + 0.4 * rawIdfWeight;
3118
+ const weightedScore = weights.textMatch * dampedIdfWeight;
3119
+ candidate.score += weightedScore;
3120
+ candidate.reasons.add(`text:${keyword}:idf=${rawIdfWeight.toFixed(2)}`);
2968
3121
  candidate.keywordHits.add(keyword);
2969
3122
  }
2970
3123
  // Apply boost profile once per file
@@ -3338,6 +3491,10 @@ async function contextBundleImpl(context, params) {
3338
3491
  }
3339
3492
  }
3340
3493
  applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
3494
+ // Phase 3.2: Apply graph layer scoring (inbound dependencies, PageRank importance)
3495
+ await applyGraphLayerScores(db, repoId, materializedCandidates, weights, context.tableAvailability.hasGraphMetrics);
3496
+ // Phase 4: Apply co-change scores (files that change together with editing_path)
3497
+ await applyCochangeScores(db, repoId, materializedCandidates, weights, artifacts.editing_path, context.tableAvailability.hasCochange);
3341
3498
  // ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
3342
3499
  // Only apply to positive scores to prevent negative score inversion
3343
3500
  for (const candidate of materializedCandidates) {