npm - kiri-mcp-server - Versions diffs - 0.14.0 → 0.16.1 - Mend

kiri-mcp-server 0.14.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/README.md +51 -11
package/config/scoring-profiles.yml +78 -0
package/config/stop-words.yml +307 -0
package/dist/config/scoring-profiles.yml +78 -0
package/dist/config/stop-words.yml +307 -0
package/dist/package.json +2 -2
package/dist/src/indexer/cli.d.ts +1 -0
package/dist/src/indexer/cli.d.ts.map +1 -1
package/dist/src/indexer/cli.js +22 -2
package/dist/src/indexer/cli.js.map +1 -1
package/dist/src/indexer/cochange.d.ts +97 -0
package/dist/src/indexer/cochange.d.ts.map +1 -0
package/dist/src/indexer/cochange.js +315 -0
package/dist/src/indexer/cochange.js.map +1 -0
package/dist/src/indexer/graph-metrics.d.ts +68 -0
package/dist/src/indexer/graph-metrics.d.ts.map +1 -0
package/dist/src/indexer/graph-metrics.js +239 -0
package/dist/src/indexer/graph-metrics.js.map +1 -0
package/dist/src/indexer/schema.d.ts +15 -0
package/dist/src/indexer/schema.d.ts.map +1 -1
package/dist/src/indexer/schema.js +86 -0
package/dist/src/indexer/schema.js.map +1 -1
package/dist/src/server/context.d.ts +2 -0
package/dist/src/server/context.d.ts.map +1 -1
package/dist/src/server/context.js.map +1 -1
package/dist/src/server/handlers/snippets-get.d.ts +10 -0
package/dist/src/server/handlers/snippets-get.d.ts.map +1 -1
package/dist/src/server/handlers/snippets-get.js +40 -3
package/dist/src/server/handlers/snippets-get.js.map +1 -1
package/dist/src/server/handlers.d.ts +1 -1
package/dist/src/server/handlers.d.ts.map +1 -1
package/dist/src/server/handlers.js +208 -51
package/dist/src/server/handlers.js.map +1 -1
package/dist/src/server/idf-provider.d.ts +110 -0
package/dist/src/server/idf-provider.d.ts.map +1 -0
package/dist/src/server/idf-provider.js +233 -0
package/dist/src/server/idf-provider.js.map +1 -0
package/dist/src/server/rpc.d.ts.map +1 -1
package/dist/src/server/rpc.js +21 -1
package/dist/src/server/rpc.js.map +1 -1
package/dist/src/server/scoring.d.ts +10 -0
package/dist/src/server/scoring.d.ts.map +1 -1
package/dist/src/server/scoring.js +73 -0
package/dist/src/server/scoring.js.map +1 -1
package/dist/src/server/services/index.d.ts +2 -0
package/dist/src/server/services/index.d.ts.map +1 -1
package/dist/src/server/services/index.js +3 -0
package/dist/src/server/services/index.js.map +1 -1
package/dist/src/server/stop-words.d.ts +106 -0
package/dist/src/server/stop-words.d.ts.map +1 -0
package/dist/src/server/stop-words.js +312 -0
package/dist/src/server/stop-words.js.map +1 -0
package/dist/src/shared/duckdb.d.ts +8 -2
package/dist/src/shared/duckdb.d.ts.map +1 -1
package/dist/src/shared/duckdb.js +37 -62
package/dist/src/shared/duckdb.js.map +1 -1
package/package.json +2 -2

package/dist/src/server/handlers.js CHANGED Viewed

@@ -8,8 +8,10 @@ import { expandAbbreviations } from "./abbreviations.js";
 import { getBoostProfile, } from "./boost-profiles.js";
 import { loadPathPenalties, mergePathPenaltyEntries } from "./config-loader.js";
 import { loadServerConfig } from "./config.js";
+import { createIdfProvider } from "./idf-provider.js";
 import { coerceProfileName, loadScoringProfile } from "./scoring.js";
 import { createServerServices } from "./services/index.js";
+import { loadStopWords } from "./stop-words.js";
 // Re-export extracted handlers for backward compatibility
 export { snippetsGet, } from "./handlers/snippets-get.js";
 // Configuration file patterns (v0.8.0+: consolidated to avoid duplication)
@@ -176,6 +178,8 @@ export async function checkTableAvailability(db) {
         "markdown_link",
         "hint_expansion",
         "hint_dictionary",
+        "graph_metrics",
+        "cochange",
     ];
     const checkTable = async (tableName) => {
         if (!ALLOWED_TABLES.includes(tableName)) {
@@ -199,6 +203,8 @@ export async function checkTableAvailability(db) {
         hasLinkTable: await checkTable("markdown_link"),
         hasHintLog: await checkTable("hint_expansion"),
         hasHintDictionary: await checkTable("hint_dictionary"),
+        hasGraphMetrics: await checkTable("graph_metrics"),
+        hasCochange: await checkTable("cochange"),
     };
     // 起動時警告: テーブルが存在しない場合に通知
     if (!result.hasMetadataTables) {
@@ -213,6 +219,12 @@ export async function checkTableAvailability(db) {
     if (!result.hasHintDictionary) {
         console.warn("hint_dictionary table is missing. Dictionary hints disabled. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
     }
+    if (!result.hasGraphMetrics) {
+        console.warn("graph_metrics table is missing. Graph layer scoring disabled. Run indexer with --full flag to create the table.");
+    }
+    if (!result.hasCochange) {
+        console.warn("cochange table is missing. Co-change scoring disabled. Run indexer with --full flag to create the table.");
+    }
     return result;
 }
 async function hasDirtyRepos(db) {
@@ -533,6 +545,8 @@ const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
 const FALLBACK_SNIPPET_WINDOW = serverConfig.features.snippetWindow;
 const MAX_RERANK_LIMIT = 50;
 const MAX_ARTIFACT_HINTS = 8;
+/** Minimum confidence floor for co-change scoring to prevent zero-boost from low Jaccard scores */
+const MIN_COCHANGE_CONFIDENCE_FLOOR = 0.2;
 const DOMAIN_PATH_HINT_LIMIT = MAX_ARTIFACT_HINTS;
 const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
 const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
@@ -590,15 +604,16 @@ const WHY_TAG_PRIORITY = {
     substring: 4, // Substring hint expansion
     "path-phrase": 5, // Path contains multi-word phrase
     structural: 6, // Semantic similarity
-    "path-segment": 7, // Path component matches
-    "path-keyword": 8, // Path keyword match
-    dep: 9, // Dependency relationship
-    near: 10, // Proximity to editing file
-    boost: 11, // File type boost
-    recent: 12, // Recently changed
-    symbol: 13, // Symbol match
-    penalty: 14, // Penalty explanations (keep for transparency)
-    keyword: 15, // Generic keyword (deprecated, kept for compatibility)
+    cochange: 7, // Co-change history (files that change together)
+    "path-segment": 8, // Path component matches
+    "path-keyword": 9, // Path keyword match
+    dep: 10, // Dependency relationship
+    near: 11, // Proximity to editing file
+    boost: 12, // File type boost
+    recent: 13, // Recently changed
+    symbol: 14, // Symbol match
+    penalty: 15, // Penalty explanations (keep for transparency)
+    keyword: 16, // Generic keyword (deprecated, kept for compatibility)
 };
 // Reserve at least one slot for important structural tags
 const RESERVED_WHY_SLOTS = {
@@ -663,39 +678,18 @@ function selectWhyTags(reasons) {
     }
     return Array.from(selected);
 }
-const STOP_WORDS = new Set([
-    "the",
-    "and",
-    "for",
-    "with",
-    "from",
-    "this",
-    "that",
-    "have",
-    "has",
-    "will",
-    "would",
-    "into",
-    "about",
-    "there",
-    "their",
-    "your",
-    "fix",
-    "test",
-    "tests",
-    "issue",
-    "error",
-    "bug",
-    "fail",
-    "failing",
-    "make",
-    "when",
-    "where",
-    "should",
-    "could",
-    "need",
-    "goal",
-]);
+/**
+ * ストップワードサービスの遅延初期化
+ * シングルトンキャッシュを使用し、config/stop-words.yml から読み込み
+ * @see Issue #48: Improve context_bundle stop word coverage and configurability
+ */
+let _stopWordsService = null;
+function getStopWordsService() {
+    if (!_stopWordsService) {
+        _stopWordsService = loadStopWords();
+    }
+    return _stopWordsService;
+}
 function prioritizeHintCandidates(rankedCandidates, hintPaths, limit) {
     if (rankedCandidates.length === 0) {
         return [];
@@ -822,7 +816,7 @@ function extractCompoundTerms(text) {
     const matches = Array.from(text.matchAll(compoundPattern)).map((m) => m[1]);
     return matches
         .map((term) => term.toLowerCase())
-        .filter((term) => term.length >= 3 && !STOP_WORDS.has(term));
+        .filter((term) => term.length >= 3 && !getStopWordsService().has(term));
 }
 /**
  * パスライクな用語を抽出
@@ -837,7 +831,7 @@ function extractPathSegments(text) {
     for (const path of matches) {
         const parts = path.toLowerCase().split("/");
         for (const part of parts) {
-            if (part.length >= 3 && !STOP_WORDS.has(part) && !segments.includes(part)) {
+            if (part.length >= 3 && !getStopWordsService().has(part) && !segments.includes(part)) {
                 segments.push(part);
             }
         }
@@ -849,7 +843,7 @@ function extractPathSegments(text) {
  * 共有トークン化ユーティリティを使用
  */
 function extractRegularWords(text, strategy) {
-    const words = tokenizeText(text, strategy).filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
+    const words = tokenizeText(text, strategy).filter((word) => word.length >= 3 && !getStopWordsService().has(word));
     return words;
 }
 /**
@@ -879,7 +873,7 @@ function extractKeywords(text) {
                 // ハイフンとアンダースコアの両方で分割
                 const parts = term
                     .split(/[-_]/)
-                    .filter((part) => part.length >= 3 && !STOP_WORDS.has(part));
+                    .filter((part) => part.length >= 3 && !getStopWordsService().has(part));
                 result.keywords.push(...parts);
             }
         }
@@ -904,7 +898,7 @@ function addKeywordDerivedPathSegments(result) {
     }
     const additional = [];
     for (const keyword of result.keywords) {
-        if (keyword.length < 3 || STOP_WORDS.has(keyword)) {
+        if (keyword.length < 3 || getStopWordsService().has(keyword)) {
             continue;
         }
         if (result.pathSegments.includes(keyword) || additional.includes(keyword)) {
@@ -1348,6 +1342,144 @@ function applyStructuralScores(candidates, queryEmbedding, structuralWeight) {
         candidate.reasons.add(`structural:${similarity.toFixed(2)}`);
     }
 }
+/**
+ * Graph Layer: Apply graph-based scoring boosts (Phase 3.2)
+ *
+ * Uses precomputed metrics from graph_metrics table:
+ * - inbound_count: Number of files that import this file (PageRank-like importance)
+ * - importance_score: Normalized PageRank score [0, 1]
+ *
+ * Boosts are additive and scaled by profile weights.
+ *
+ * @param hasGraphMetrics - graph_metrics テーブルが存在するかどうか
+ */
+async function applyGraphLayerScores(db, repoId, candidates, weights, hasGraphMetrics) {
+    // Skip if graph_metrics table doesn't exist (graceful degradation)
+    if (!hasGraphMetrics) {
+        return;
+    }
+    // Skip if both weights are zero (disabled)
+    if (weights.graphInbound <= 0 && weights.graphImportance <= 0) {
+        return;
+    }
+    if (candidates.length === 0) {
+        return;
+    }
+    // Fetch graph metrics for all candidate paths
+    const paths = candidates.map((c) => c.path);
+    const placeholders = paths.map(() => "?").join(", ");
+    const metrics = await db.all(`
+    SELECT path, inbound_count, importance_score
+    FROM graph_metrics
+    WHERE repo_id = ? AND path IN (${placeholders})
+    `, [repoId, ...paths]);
+    // Build lookup map
+    const metricsMap = new Map();
+    for (const m of metrics) {
+        metricsMap.set(m.path, {
+            inbound: m.inbound_count,
+            importance: m.importance_score,
+        });
+    }
+    // Compute max inbound for normalization (log scale)
+    let maxInbound = 1;
+    for (const m of metrics) {
+        if (m.inbound_count > maxInbound) {
+            maxInbound = m.inbound_count;
+        }
+    }
+    // Apply boosts
+    for (const candidate of candidates) {
+        const graphMetrics = metricsMap.get(candidate.path);
+        if (!graphMetrics) {
+            continue;
+        }
+        // Inbound dependency boost (log-scaled to dampen very high values)
+        if (weights.graphInbound > 0 && graphMetrics.inbound > 0) {
+            // Log-scale normalization: log(1 + count) / log(1 + max)
+            const normalizedInbound = Math.log(1 + graphMetrics.inbound) / Math.log(1 + maxInbound);
+            const inboundBoost = weights.graphInbound * normalizedInbound;
+            candidate.score += inboundBoost;
+            candidate.reasons.add(`graph:inbound:${graphMetrics.inbound}`);
+        }
+        // Importance score boost (already normalized to [0, 1])
+        if (weights.graphImportance > 0 && graphMetrics.importance > 0) {
+            const importanceBoost = weights.graphImportance * graphMetrics.importance;
+            candidate.score += importanceBoost;
+            candidate.reasons.add(`graph:importance:${graphMetrics.importance.toFixed(2)}`);
+        }
+    }
+}
+/**
+ * Apply co-change scores based on git history.
+ * Files that frequently change together with editing_path get boosted.
+ *
+ * Phase 4: Co-change graph integration.
+ *
+ * @param db - DuckDB client
+ * @param repoId - Repository ID
+ * @param candidates - Candidate files to score
+ * @param weights - Scoring weights (uses cochange weight)
+ * @param editingPath - Currently edited file path (optional)
+ * @param hasCochange - cochange テーブルが存在するかどうか
+ */
+async function applyCochangeScores(db, repoId, candidates, weights, editingPath, hasCochange) {
+    // Skip if cochange table doesn't exist (graceful degradation)
+    if (!hasCochange) {
+        return;
+    }
+    // Skip if cochange weight is zero (disabled by default)
+    if (weights.cochange <= 0) {
+        return;
+    }
+    // Skip if no editing_path provided (co-change needs a reference file)
+    if (!editingPath || candidates.length === 0) {
+        return;
+    }
+    // Query co-change edges involving editing_path
+    // Both directions: editing_path can be file1 or file2 (canonical ordering)
+    const cochangeEdges = await db.all(`
+    SELECT
+      CASE WHEN file1 = ? THEN file2 ELSE file1 END as neighbor,
+      cochange_count,
+      confidence
+    FROM cochange
+    WHERE repo_id = ? AND (file1 = ? OR file2 = ?)
+    `, [editingPath, repoId, editingPath, editingPath]);
+    if (cochangeEdges.length === 0) {
+        return;
+    }
+    // Build lookup map: neighbor path -> (count, confidence)
+    const cochangeMap = new Map();
+    for (const edge of cochangeEdges) {
+        cochangeMap.set(edge.neighbor, {
+            count: edge.cochange_count,
+            confidence: edge.confidence ?? 0,
+        });
+    }
+    // Compute max count for normalization
+    let maxCount = 1;
+    for (const edge of cochangeEdges) {
+        if (edge.cochange_count > maxCount) {
+            maxCount = edge.cochange_count;
+        }
+    }
+    // Apply cochange boost to candidates
+    for (const candidate of candidates) {
+        const cochange = cochangeMap.get(candidate.path);
+        if (!cochange || cochange.count <= 0) {
+            continue;
+        }
+        // Normalize cochange count using log scale (similar to inbound boost)
+        const normalizedCount = Math.log(1 + cochange.count) / Math.log(1 + maxCount);
+        // Weight the boost by confidence (Jaccard similarity) for quality
+        // Final boost = weight * normalized_count * confidence
+        const confidenceFactor = Math.max(cochange.confidence, MIN_COCHANGE_CONFIDENCE_FLOOR);
+        const cochangeBoost = weights.cochange * normalizedCount * confidenceFactor;
+        candidate.score += cochangeBoost;
+        candidate.reasons.add(`cochange:${cochange.count}:${(cochange.confidence * 100).toFixed(0)}%`);
+    }
+}
 async function fetchEmbeddingMap(db, repoId, paths) {
     const map = new Map();
     if (paths.length === 0) {
@@ -2829,12 +2961,26 @@ async function contextBundleImpl(context, params) {
         const pathSegments = artifacts.editing_path
             .split(/[/_.-]/)
             .map((segment) => segment.toLowerCase())
-            .filter((segment) => segment.length >= 3 && !STOP_WORDS.has(segment));
+            .filter((segment) => segment.length >= 3 && !getStopWordsService().has(segment));
         extractedTerms.pathSegments.push(...pathSegments.slice(0, MAX_KEYWORDS));
     }
     const candidates = new Map();
     const stringMatchSeeds = new Set();
     const fileCache = new Map();
+    // Phase 2: IDF重み付けプロバイダーの初期化
+    // キーワードの文書頻度に基づいて重みを計算し、高頻度語を自動的に減衰
+    const idfProvider = createIdfProvider(db, repoId);
+    const idfWeights = new Map();
+    // 抽出されたキーワードのIDF重みを事前計算（非同期バッチ処理）
+    if (extractedTerms.keywords.length > 0) {
+        const computedWeights = await idfProvider.computeIdfBatch(extractedTerms.keywords);
+        for (const [term, weight] of computedWeights) {
+            idfWeights.set(term, weight);
+        }
+        if (process.env.KIRI_TRACE_IDF === "1") {
+            console.info("[idf-weights]", JSON.stringify(Object.fromEntries(Array.from(idfWeights.entries()).map(([k, v]) => [k, v.toFixed(3)]))));
+        }
+    }
     // ✅ Cache boost profile config to avoid redundant lookups in hot path
     const boostProfile = params.boost_profile ??
         (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
@@ -2961,10 +3107,17 @@ async function contextBundleImpl(context, params) {
                 continue; // Should not happen, but defensive check
             }
             const candidate = ensureCandidate(candidates, row.path);
-            // 各マッチしたキーワードに対してスコアリング
+            // 各マッチしたキーワードに対してスコアリング（Phase 2: IDF重み付け）
             for (const keyword of matchedKeywords) {
-                candidate.score += weights.textMatch;
-                candidate.reasons.add(`text:${keyword}`);
+                // IDF重みを適用（事前計算済み、なければデフォルト1.0）
+                // 減衰適用: 0.6 + 0.4 * idfWeight でファイル種別マルチプライヤとのバランスを維持
+                // - 高頻度語: IDF=0 → 0.6 (40%減)
+                // - 低頻度語: IDF=1 → 1.0 (減衰なし)
+                const rawIdfWeight = idfWeights.get(keyword.toLowerCase()) ?? 1.0;
+                const dampedIdfWeight = 0.6 + 0.4 * rawIdfWeight;
+                const weightedScore = weights.textMatch * dampedIdfWeight;
+                candidate.score += weightedScore;
+                candidate.reasons.add(`text:${keyword}:idf=${rawIdfWeight.toFixed(2)}`);
                 candidate.keywordHits.add(keyword);
             }
             // Apply boost profile once per file
@@ -3338,6 +3491,10 @@ async function contextBundleImpl(context, params) {
         }
     }
     applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
+    // Phase 3.2: Apply graph layer scoring (inbound dependencies, PageRank importance)
+    await applyGraphLayerScores(db, repoId, materializedCandidates, weights, context.tableAvailability.hasGraphMetrics);
+    // Phase 4: Apply co-change scores (files that change together with editing_path)
+    await applyCochangeScores(db, repoId, materializedCandidates, weights, artifacts.editing_path, context.tableAvailability.hasCochange);
     // ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
     // Only apply to positive scores to prevent negative score inversion
     for (const candidate of materializedCandidates) {