npm - kiri-mcp-server - Versions diffs - 0.11.0 → 0.12.0 - Mend

kiri-mcp-server 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +43 -10
package/config/kiri.yml +25 -0
package/dist/config/kiri.yml +25 -0
package/dist/package.json +1 -1
package/dist/src/server/boost-profiles.d.ts +6 -5
package/dist/src/server/boost-profiles.d.ts.map +1 -1
package/dist/src/server/boost-profiles.js +22 -0
package/dist/src/server/boost-profiles.js.map +1 -1
package/dist/src/server/config-loader.d.ts +9 -0
package/dist/src/server/config-loader.d.ts.map +1 -0
package/dist/src/server/config-loader.js +121 -0
package/dist/src/server/config-loader.js.map +1 -0
package/dist/src/server/config.d.ts +2 -0
package/dist/src/server/config.d.ts.map +1 -1
package/dist/src/server/config.js +11 -0
package/dist/src/server/config.js.map +1 -1
package/dist/src/server/handlers.d.ts +1 -0
package/dist/src/server/handlers.d.ts.map +1 -1
package/dist/src/server/handlers.js +224 -5
package/dist/src/server/handlers.js.map +1 -1
package/dist/src/server/rpc.d.ts.map +1 -1
package/dist/src/server/rpc.js +24 -4
package/dist/src/server/rpc.js.map +1 -1
package/dist/src/shared/tokenizer.d.ts +1 -1
package/dist/src/shared/tokenizer.d.ts.map +1 -1
package/dist/src/shared/tokenizer.js +97 -15
package/dist/src/shared/tokenizer.js.map +1 -1
package/package.json +28 -26

package/dist/src/server/handlers.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js"
 import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
 import { expandAbbreviations } from "./abbreviations.js";
 import { getBoostProfile, } from "./boost-profiles.js";
+import { loadPathPenalties, mergePathPenaltyEntries } from "./config-loader.js";
 import { loadServerConfig } from "./config.js";
 import { coerceProfileName, loadScoringProfile } from "./scoring.js";
 import { createServerServices } from "./services/index.js";
@@ -447,12 +448,14 @@ function createHintExpansionConfig(weights) {
 const DEFAULT_SEARCH_LIMIT = 50;
 const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
 const MAX_BUNDLE_LIMIT = 20;
+const TRACE_SEARCH = process.env.KIRI_TRACE_SEARCH === "1";
 const MAX_KEYWORDS = 12;
 const MAX_MATCHES_PER_KEYWORD = 40;
 const MAX_DEPENDENCY_SEEDS = 8;
 const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
 const NEARBY_LIMIT = 6;
 const serverConfig = loadServerConfig();
+const mergedPathMultiplierCache = new Map();
 const SUPPRESS_NON_CODE_ENABLED = serverConfig.features.suppressNonCode;
 const FINAL_RESULT_SUPPRESSION_ENABLED = serverConfig.features.suppressFinalResults;
 const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
@@ -463,6 +466,15 @@ const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
 const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
 const HINT_PRIORITY_PATH_MULTIPLIER = serverConfig.hints.priority.pathMultiplier;
 const HINT_PRIORITY_BASE_BONUS = serverConfig.hints.priority.baseBonus;
+const PATH_FALLBACK_LIMIT = 40;
+const PATH_FALLBACK_TERMS_LIMIT = 5;
+const PATH_FALLBACK_KEEP = 8;
+const AUTO_PATH_SEGMENT_LIMIT = 4;
+function traceSearch(message, ...args) {
+    if (TRACE_SEARCH) {
+        console.log(`[TRACE context_bundle] ${message}`, ...args);
+    }
+}
 const HINT_DIR_LIMIT = serverConfig.hints.directory.limit;
 const HINT_DIR_MAX_FILES = serverConfig.hints.directory.maxFiles;
 const HINT_DEP_OUT_LIMIT = serverConfig.hints.dependency.outLimit;
@@ -811,8 +823,30 @@ function extractKeywords(text) {
             }
         }
     }
+    addKeywordDerivedPathSegments(result);
     return result;
 }
+function addKeywordDerivedPathSegments(result) {
+    if (result.pathSegments.length >= AUTO_PATH_SEGMENT_LIMIT) {
+        return;
+    }
+    const additional = [];
+    for (const keyword of result.keywords) {
+        if (keyword.length < 3 || STOP_WORDS.has(keyword)) {
+            continue;
+        }
+        if (result.pathSegments.includes(keyword) || additional.includes(keyword)) {
+            continue;
+        }
+        additional.push(keyword);
+        if (result.pathSegments.length + additional.length >= AUTO_PATH_SEGMENT_LIMIT) {
+            break;
+        }
+    }
+    if (additional.length > 0) {
+        result.pathSegments.push(...additional);
+    }
+}
 function ensureCandidate(map, filePath) {
     let candidate = map.get(filePath);
     if (!candidate) {
@@ -829,6 +863,10 @@ function ensureCandidate(map, filePath) {
             embedding: null,
             semanticSimilarity: null,
             pathMatchHits: 0, // Issue #68: Track path match count
+            keywordHits: new Set(),
+            phraseHits: 0,
+            // pathFallbackReason は optional なので省略（exactOptionalPropertyTypes対応）
+            fallbackTextHits: 0,
             penalties: [], // Issue #68: Penalty log for telemetry
         };
         map.set(filePath, candidate);
@@ -1774,6 +1812,50 @@ function applyFileTypeBoost(path, baseScore, profileConfig, weights) {
     }
     return baseScore * multiplier;
 }
+function applyCoverageBoost(candidate, extractedTerms, weights) {
+    // Skip for pure path-fallback candidates without text evidence
+    if (candidate.reasons.has("fallback:path") &&
+        candidate.keywordHits.size === 0 &&
+        candidate.phraseHits === 0) {
+        return;
+    }
+    // Coverage boost is only meaningful for text/phrase evidence; skip if no text evidence at all
+    if (candidate.keywordHits.size === 0 && candidate.phraseHits === 0) {
+        return;
+    }
+    if (extractedTerms.keywords.length > 0 && candidate.keywordHits.size > 0) {
+        const coverage = candidate.keywordHits.size / extractedTerms.keywords.length;
+        const bonus = coverage * weights.textMatch * 0.4;
+        candidate.score += bonus;
+        candidate.reasons.add(`coverage:keywords:${coverage.toFixed(2)}`);
+    }
+    if (extractedTerms.phrases.length > 0 && candidate.phraseHits > 0) {
+        const phraseCoverage = Math.min(1, candidate.phraseHits / extractedTerms.phrases.length);
+        const bonus = phraseCoverage * weights.textMatch * 0.6;
+        candidate.score += bonus;
+        candidate.reasons.add(`coverage:phrases:${phraseCoverage.toFixed(2)}`);
+    }
+}
+async function fetchPathFallbackCandidates(db, repoId, terms, limit) {
+    if (terms.length === 0 || limit <= 0) {
+        return [];
+    }
+    const filters = terms.map(() => "f.path ILIKE ?").join(" OR ");
+    const params = [repoId, ...terms.map((term) => `%${term}%`), limit];
+    return await db.all(`
+      SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
+      FROM file f
+      JOIN blob b ON b.hash = f.blob_hash
+      LEFT JOIN file_embedding fe
+        ON fe.repo_id = f.repo_id
+       AND fe.path = f.path
+      WHERE f.repo_id = ?
+        AND f.is_binary = FALSE
+        AND (${filters})
+      ORDER BY f.path
+      LIMIT ?
+    `, params);
+}
 /**
  * パスベースのスコアリングを適用（加算的ブースト）
  * goalのキーワード/フレーズがファイルパスに含まれる場合にスコアを加算
@@ -1955,6 +2037,18 @@ function applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName,
 function applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights) {
     const fileName = path.split("/").pop() ?? "";
     const lowerPath = path.toLowerCase();
+    // Very low value: schemas, fixtures, testdata, examples, baseline
+    const schemaJson = lowerPath.endsWith(".schema.json") || lowerPath.includes("/schemas/");
+    const isFixture = lowerPath.includes("/fixtures/") ||
+        lowerPath.includes("/fixture/") ||
+        lowerPath.includes("/testdata/");
+    const isExample = lowerPath.includes("/examples/") || lowerPath.includes("/example/");
+    const isBaseline = lowerPath.includes("baseline") || lowerPath.includes("golden");
+    if (schemaJson || isFixture || isExample || isBaseline) {
+        candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
+        candidate.reasons.add("penalty:low-value-file");
+        return;
+    }
     // ✅ Step 1: Low-value files (v1.0.0: syntax/perf/legal/migration)
     // Apply configPenaltyMultiplier (strong penalty) to rarely useful file types
     const isSyntaxGrammar = path.includes("/syntaxes/") &&
@@ -2207,7 +2301,17 @@ export async function filesSearch(context, params) {
     const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
     const boostProfile = params.boost_profile ??
         (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
-    const profileConfig = getBoostProfile(boostProfile);
+    const baseProfileConfig = getBoostProfile(boostProfile);
+    const cachedMerged = mergedPathMultiplierCache.get(boostProfile);
+    const mergedPathMultipliers = cachedMerged ??
+        mergePathPenaltyEntries(baseProfileConfig.pathMultipliers, [], serverConfig.pathPenalties);
+    if (!cachedMerged) {
+        mergedPathMultiplierCache.set(boostProfile, mergedPathMultipliers);
+    }
+    const profileConfig = {
+        ...baseProfileConfig,
+        pathMultipliers: mergedPathMultipliers,
+    };
     const weights = loadScoringProfile(null);
     const options = parseOutputOptions(params);
     const previewQuery = hasTextQuery
@@ -2583,6 +2687,8 @@ async function contextBundleImpl(context, params) {
     const semanticSeed = keywordSources.join(" ");
     const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
     const extractedTerms = extractKeywords(semanticSeed);
+    const segmentPreview = extractedTerms.pathSegments.slice(0, AUTO_PATH_SEGMENT_LIMIT).join(",");
+    traceSearch(`terms repo=${repoId} id=${params.requestId ?? "n/a"} keywords=${extractedTerms.keywords.length} phrases=${extractedTerms.phrases.length} pathSegments=${extractedTerms.pathSegments.length} segs=[${segmentPreview}]`);
     // フォールバック: editing_pathからキーワードを抽出
     if (extractedTerms.phrases.length === 0 &&
         extractedTerms.keywords.length === 0 &&
@@ -2599,14 +2705,18 @@ async function contextBundleImpl(context, params) {
     // ✅ Cache boost profile config to avoid redundant lookups in hot path
     const boostProfile = params.boost_profile ??
         (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
-    const profileConfig = getBoostProfile(boostProfile);
+    const baseProfileConfig = getBoostProfile(boostProfile);
+    const profileConfig = {
+        ...baseProfileConfig,
+        pathMultipliers: loadPathPenalties(baseProfileConfig.pathMultipliers),
+    };
     // フレーズマッチング（高い重み: textMatch × 2）- 統合クエリでパフォーマンス改善
     if (extractedTerms.phrases.length > 0) {
         const phrasePlaceholders = extractedTerms.phrases
             .map(() => "b.content ILIKE '%' || ? || '%'")
             .join(" OR ");
         // DEBUG: Log SQL query parameters for troubleshooting
-        console.log(`[DEBUG contextBundle] Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
+        traceSearch(`Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
         const rows = await db.all(`
         SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
         FROM file f
@@ -2622,11 +2732,14 @@ async function contextBundleImpl(context, params) {
       `, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
         // DEBUG: Log returned paths and verify they match expected repo_id
         if (rows.length > 0) {
-            console.log(`[DEBUG contextBundle] Phrase match returned ${rows.length} rows. Sample paths:`, rows.slice(0, 3).map((r) => r.path));
+            traceSearch(`Phrase match returned ${rows.length} rows. Sample paths: ${rows
+                .slice(0, 3)
+                .map((r) => r.path)
+                .join(", ")}`);
             // Verify repo_id of returned files
             const pathsToCheck = rows.slice(0, 3).map((r) => r.path);
             const verification = await db.all(`SELECT path, repo_id FROM file WHERE path IN (${pathsToCheck.map(() => "?").join(", ")}) LIMIT 3`, pathsToCheck);
-            console.log(`[DEBUG contextBundle] Repo ID verification:`, verification);
+            traceSearch(`Repo ID verification`, verification);
         }
         for (const row of rows) {
             if (row.content === null) {
@@ -2639,6 +2752,7 @@ async function contextBundleImpl(context, params) {
                 continue; // Should not happen, but defensive check
             }
             const candidate = ensureCandidate(candidates, row.path);
+            candidate.phraseHits += matchedPhrases.length;
             // 各マッチしたフレーズに対してスコアリング
             for (const phrase of matchedPhrases) {
                 // フレーズマッチは通常の2倍のスコア
@@ -2669,6 +2783,7 @@ async function contextBundleImpl(context, params) {
                 });
             }
         }
+        traceSearch(`phrase search produced ${rows.length} rows, candidates=${candidates.size}`);
     }
     // キーワードマッチング（通常の重み）- 統合クエリでパフォーマンス改善
     if (extractedTerms.keywords.length > 0) {
@@ -2703,6 +2818,7 @@ async function contextBundleImpl(context, params) {
             for (const keyword of matchedKeywords) {
                 candidate.score += weights.textMatch;
                 candidate.reasons.add(`text:${keyword}`);
+                candidate.keywordHits.add(keyword);
             }
             // Apply boost profile once per file
             if (boostProfile !== "none") {
@@ -2728,6 +2844,99 @@ async function contextBundleImpl(context, params) {
                 });
             }
         }
+        traceSearch(`keyword search produced ${rows.length} rows, candidates=${candidates.size}`);
+    }
+    const fallbackTerms = Array.from(new Set([...extractedTerms.phrases, ...extractedTerms.keywords, ...extractedTerms.pathSegments]
+        .map((term) => term.toLowerCase())
+        .filter((term) => term.length >= 3))).slice(0, PATH_FALLBACK_TERMS_LIMIT);
+    if (fallbackTerms.length > 0) {
+        const fallbackRows = await fetchPathFallbackCandidates(db, repoId, fallbackTerms, Math.min(limit * 2, PATH_FALLBACK_LIMIT));
+        const fallbackReason = stringMatchSeeds.size === 0
+            ? "no-string-match"
+            : candidates.size < limit
+                ? "low-candidates"
+                : "supplemental";
+        traceSearch(`path fallback triggered (${fallbackReason}) terms=${JSON.stringify(fallbackTerms)} rows=${fallbackRows.length}`);
+        const fallbackWeight = stringMatchSeeds.size === 0 ? weights.pathMatch * 0.75 : weights.pathMatch * 0.2;
+        for (const row of fallbackRows) {
+            const candidate = ensureCandidate(candidates, row.path);
+            candidate.pathFallbackReason = fallbackReason;
+            candidate.score += fallbackWeight;
+            candidate.reasons.add("fallback:path");
+            const contentLower = row.content?.toLowerCase() ?? "";
+            if (contentLower.length > 0) {
+                let textHits = 0;
+                for (const term of fallbackTerms) {
+                    if (contentLower.includes(term)) {
+                        textHits += 1;
+                        candidate.keywordHits.add(term);
+                    }
+                }
+                candidate.fallbackTextHits += textHits;
+                if (textHits > 0) {
+                    const textBoost = textHits * weights.textMatch * 0.15;
+                    candidate.score += textBoost;
+                    candidate.reasons.add(`fallback:content:${textHits}`);
+                }
+            }
+            candidate.matchLine ??= 1;
+            candidate.lang ??= row.lang;
+            candidate.ext ??= row.ext;
+            candidate.totalLines ??= row.content?.split(/\r?\n/).length ?? null;
+            candidate.content ??= row.content;
+            candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
+            if (boostProfile !== "none") {
+                applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
+            }
+            stringMatchSeeds.add(row.path);
+            if (!fileCache.has(row.path) && row.content) {
+                fileCache.set(row.path, {
+                    content: row.content,
+                    lang: row.lang,
+                    ext: row.ext,
+                    totalLines: candidate.totalLines ?? 0,
+                    embedding: candidate.embedding,
+                });
+            }
+        }
+        // Drop fallback-only candidates with zero text evidence before trimming
+        for (const [path, candidate] of Array.from(candidates.entries())) {
+            const isFallbackOnly = candidate.reasons.has("fallback:path") &&
+                candidate.keywordHits.size === 0 &&
+                candidate.phraseHits === 0;
+            const hasTextEvidence = candidate.fallbackTextHits > 0;
+            if (isFallbackOnly && !hasTextEvidence) {
+                candidates.delete(path);
+            }
+        }
+        // Demote fallback-only hits without text evidence
+        for (const candidate of candidates.values()) {
+            const isFallbackOnly = candidate.reasons.has("fallback:path") &&
+                candidate.keywordHits.size === 0 &&
+                candidate.phraseHits === 0;
+            const hasTextEvidence = candidate.fallbackTextHits > 0;
+            if (isFallbackOnly && !hasTextEvidence) {
+                candidate.scoreMultiplier *= 0.5;
+                candidate.reasons.add("penalty:fallback-no-text");
+            }
+        }
+        if (fallbackRows.length > PATH_FALLBACK_KEEP) {
+            const fallbackOnly = Array.from(candidates.entries())
+                .filter(([_, candidate]) => candidate.reasons.has("fallback:path") &&
+                candidate.keywordHits.size === 0 &&
+                candidate.phraseHits === 0)
+                .sort((a, b) => b[1].score - a[1].score);
+            const toDrop = fallbackOnly.slice(PATH_FALLBACK_KEEP);
+            for (const [path] of toDrop) {
+                candidates.delete(path);
+            }
+            traceSearch(`path fallback trimmed kept=${PATH_FALLBACK_KEEP} dropped=${toDrop.length} candidates=${candidates.size}`);
+        }
+    }
+    if (extractedTerms.keywords.length > 0 || extractedTerms.phrases.length > 0) {
+        for (const candidate of candidates.values()) {
+            applyCoverageBoost(candidate, extractedTerms, weights);
+        }
     }
     const artifactPathTargets = artifactPathHints.map((hintPath) => ({
         path: hintPath,
@@ -2891,9 +3100,11 @@ async function contextBundleImpl(context, params) {
         await addMetadataFallbackCandidates();
     }
     let materializedCandidates = await materializeCandidates();
+    traceSearch(`materialized candidates: ${materializedCandidates.length}`);
     if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
         await addMetadataFallbackCandidates();
         materializedCandidates = await materializeCandidates();
+        traceSearch(`materialized candidates after metadata fallback: ${materializedCandidates.length}`);
     }
     if (materializedCandidates.length === 0) {
         // Get warnings from WarningManager (includes breaking change notification if applicable)
@@ -3021,6 +3232,14 @@ async function contextBundleImpl(context, params) {
         }
         return b.score - a.score;
     });
+    if (TRACE_SEARCH) {
+        const sample = rankedCandidates.slice(0, 5).map((candidate) => ({
+            path: candidate.path,
+            score: Number(candidate.score.toFixed(3)),
+            reasons: Array.from(candidate.reasons).slice(0, 3),
+        }));
+        traceSearch(`ranked candidates=${rankedCandidates.length}`, sample);
+    }
     const prioritizedCandidates = prioritizeHintCandidates(rankedCandidates, resolvedPathHintTargets.map((target) => target.path), limit);
     if (prioritizedCandidates.length === 0) {
         const warnings = [...context.warningManager.responseWarnings];