kiri-mcp-server 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js"
5
5
  import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
6
6
  import { expandAbbreviations } from "./abbreviations.js";
7
7
  import { getBoostProfile, } from "./boost-profiles.js";
8
+ import { loadPathPenalties, mergePathPenaltyEntries } from "./config-loader.js";
8
9
  import { loadServerConfig } from "./config.js";
9
10
  import { coerceProfileName, loadScoringProfile } from "./scoring.js";
10
11
  import { createServerServices } from "./services/index.js";
@@ -447,12 +448,14 @@ function createHintExpansionConfig(weights) {
447
448
  const DEFAULT_SEARCH_LIMIT = 50;
448
449
  const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
449
450
  const MAX_BUNDLE_LIMIT = 20;
451
+ const TRACE_SEARCH = process.env.KIRI_TRACE_SEARCH === "1";
450
452
  const MAX_KEYWORDS = 12;
451
453
  const MAX_MATCHES_PER_KEYWORD = 40;
452
454
  const MAX_DEPENDENCY_SEEDS = 8;
453
455
  const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
454
456
  const NEARBY_LIMIT = 6;
455
457
  const serverConfig = loadServerConfig();
458
+ const mergedPathMultiplierCache = new Map();
456
459
  const SUPPRESS_NON_CODE_ENABLED = serverConfig.features.suppressNonCode;
457
460
  const FINAL_RESULT_SUPPRESSION_ENABLED = serverConfig.features.suppressFinalResults;
458
461
  const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
@@ -463,6 +466,15 @@ const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
463
466
  const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
464
467
  const HINT_PRIORITY_PATH_MULTIPLIER = serverConfig.hints.priority.pathMultiplier;
465
468
  const HINT_PRIORITY_BASE_BONUS = serverConfig.hints.priority.baseBonus;
469
+ const PATH_FALLBACK_LIMIT = 40;
470
+ const PATH_FALLBACK_TERMS_LIMIT = 5;
471
+ const PATH_FALLBACK_KEEP = 8;
472
+ const AUTO_PATH_SEGMENT_LIMIT = 4;
473
+ function traceSearch(message, ...args) {
474
+ if (TRACE_SEARCH) {
475
+ console.log(`[TRACE context_bundle] ${message}`, ...args);
476
+ }
477
+ }
466
478
  const HINT_DIR_LIMIT = serverConfig.hints.directory.limit;
467
479
  const HINT_DIR_MAX_FILES = serverConfig.hints.directory.maxFiles;
468
480
  const HINT_DEP_OUT_LIMIT = serverConfig.hints.dependency.outLimit;
@@ -811,8 +823,30 @@ function extractKeywords(text) {
811
823
  }
812
824
  }
813
825
  }
826
+ addKeywordDerivedPathSegments(result);
814
827
  return result;
815
828
  }
829
+ function addKeywordDerivedPathSegments(result) {
830
+ if (result.pathSegments.length >= AUTO_PATH_SEGMENT_LIMIT) {
831
+ return;
832
+ }
833
+ const additional = [];
834
+ for (const keyword of result.keywords) {
835
+ if (keyword.length < 3 || STOP_WORDS.has(keyword)) {
836
+ continue;
837
+ }
838
+ if (result.pathSegments.includes(keyword) || additional.includes(keyword)) {
839
+ continue;
840
+ }
841
+ additional.push(keyword);
842
+ if (result.pathSegments.length + additional.length >= AUTO_PATH_SEGMENT_LIMIT) {
843
+ break;
844
+ }
845
+ }
846
+ if (additional.length > 0) {
847
+ result.pathSegments.push(...additional);
848
+ }
849
+ }
816
850
  function ensureCandidate(map, filePath) {
817
851
  let candidate = map.get(filePath);
818
852
  if (!candidate) {
@@ -829,6 +863,10 @@ function ensureCandidate(map, filePath) {
829
863
  embedding: null,
830
864
  semanticSimilarity: null,
831
865
  pathMatchHits: 0, // Issue #68: Track path match count
866
+ keywordHits: new Set(),
867
+ phraseHits: 0,
868
+ // pathFallbackReason は optional なので省略(exactOptionalPropertyTypes対応)
869
+ fallbackTextHits: 0,
832
870
  penalties: [], // Issue #68: Penalty log for telemetry
833
871
  };
834
872
  map.set(filePath, candidate);
@@ -1774,6 +1812,50 @@ function applyFileTypeBoost(path, baseScore, profileConfig, weights) {
1774
1812
  }
1775
1813
  return baseScore * multiplier;
1776
1814
  }
1815
+ function applyCoverageBoost(candidate, extractedTerms, weights) {
1816
+ // Skip for pure path-fallback candidates without text evidence
1817
+ if (candidate.reasons.has("fallback:path") &&
1818
+ candidate.keywordHits.size === 0 &&
1819
+ candidate.phraseHits === 0) {
1820
+ return;
1821
+ }
1822
+ // Coverage boost is only meaningful for text/phrase evidence; skip if no text evidence at all
1823
+ if (candidate.keywordHits.size === 0 && candidate.phraseHits === 0) {
1824
+ return;
1825
+ }
1826
+ if (extractedTerms.keywords.length > 0 && candidate.keywordHits.size > 0) {
1827
+ const coverage = candidate.keywordHits.size / extractedTerms.keywords.length;
1828
+ const bonus = coverage * weights.textMatch * 0.4;
1829
+ candidate.score += bonus;
1830
+ candidate.reasons.add(`coverage:keywords:${coverage.toFixed(2)}`);
1831
+ }
1832
+ if (extractedTerms.phrases.length > 0 && candidate.phraseHits > 0) {
1833
+ const phraseCoverage = Math.min(1, candidate.phraseHits / extractedTerms.phrases.length);
1834
+ const bonus = phraseCoverage * weights.textMatch * 0.6;
1835
+ candidate.score += bonus;
1836
+ candidate.reasons.add(`coverage:phrases:${phraseCoverage.toFixed(2)}`);
1837
+ }
1838
+ }
1839
+ async function fetchPathFallbackCandidates(db, repoId, terms, limit) {
1840
+ if (terms.length === 0 || limit <= 0) {
1841
+ return [];
1842
+ }
1843
+ const filters = terms.map(() => "f.path ILIKE ?").join(" OR ");
1844
+ const params = [repoId, ...terms.map((term) => `%${term}%`), limit];
1845
+ return await db.all(`
1846
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
1847
+ FROM file f
1848
+ JOIN blob b ON b.hash = f.blob_hash
1849
+ LEFT JOIN file_embedding fe
1850
+ ON fe.repo_id = f.repo_id
1851
+ AND fe.path = f.path
1852
+ WHERE f.repo_id = ?
1853
+ AND f.is_binary = FALSE
1854
+ AND (${filters})
1855
+ ORDER BY f.path
1856
+ LIMIT ?
1857
+ `, params);
1858
+ }
1777
1859
  /**
1778
1860
  * パスベースのスコアリングを適用(加算的ブースト)
1779
1861
  * goalのキーワード/フレーズがファイルパスに含まれる場合にスコアを加算
@@ -1955,6 +2037,18 @@ function applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName,
1955
2037
  function applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights) {
1956
2038
  const fileName = path.split("/").pop() ?? "";
1957
2039
  const lowerPath = path.toLowerCase();
2040
+ // Very low value: schemas, fixtures, testdata, examples, baseline
2041
+ const schemaJson = lowerPath.endsWith(".schema.json") || lowerPath.includes("/schemas/");
2042
+ const isFixture = lowerPath.includes("/fixtures/") ||
2043
+ lowerPath.includes("/fixture/") ||
2044
+ lowerPath.includes("/testdata/");
2045
+ const isExample = lowerPath.includes("/examples/") || lowerPath.includes("/example/");
2046
+ const isBaseline = lowerPath.includes("baseline") || lowerPath.includes("golden");
2047
+ if (schemaJson || isFixture || isExample || isBaseline) {
2048
+ candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
2049
+ candidate.reasons.add("penalty:low-value-file");
2050
+ return;
2051
+ }
1958
2052
  // ✅ Step 1: Low-value files (v1.0.0: syntax/perf/legal/migration)
1959
2053
  // Apply configPenaltyMultiplier (strong penalty) to rarely useful file types
1960
2054
  const isSyntaxGrammar = path.includes("/syntaxes/") &&
@@ -2207,7 +2301,17 @@ export async function filesSearch(context, params) {
2207
2301
  const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
2208
2302
  const boostProfile = params.boost_profile ??
2209
2303
  (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
2210
- const profileConfig = getBoostProfile(boostProfile);
2304
+ const baseProfileConfig = getBoostProfile(boostProfile);
2305
+ const cachedMerged = mergedPathMultiplierCache.get(boostProfile);
2306
+ const mergedPathMultipliers = cachedMerged ??
2307
+ mergePathPenaltyEntries(baseProfileConfig.pathMultipliers, [], serverConfig.pathPenalties);
2308
+ if (!cachedMerged) {
2309
+ mergedPathMultiplierCache.set(boostProfile, mergedPathMultipliers);
2310
+ }
2311
+ const profileConfig = {
2312
+ ...baseProfileConfig,
2313
+ pathMultipliers: mergedPathMultipliers,
2314
+ };
2211
2315
  const weights = loadScoringProfile(null);
2212
2316
  const options = parseOutputOptions(params);
2213
2317
  const previewQuery = hasTextQuery
@@ -2583,6 +2687,8 @@ async function contextBundleImpl(context, params) {
2583
2687
  const semanticSeed = keywordSources.join(" ");
2584
2688
  const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
2585
2689
  const extractedTerms = extractKeywords(semanticSeed);
2690
+ const segmentPreview = extractedTerms.pathSegments.slice(0, AUTO_PATH_SEGMENT_LIMIT).join(",");
2691
+ traceSearch(`terms repo=${repoId} id=${params.requestId ?? "n/a"} keywords=${extractedTerms.keywords.length} phrases=${extractedTerms.phrases.length} pathSegments=${extractedTerms.pathSegments.length} segs=[${segmentPreview}]`);
2586
2692
  // フォールバック: editing_pathからキーワードを抽出
2587
2693
  if (extractedTerms.phrases.length === 0 &&
2588
2694
  extractedTerms.keywords.length === 0 &&
@@ -2599,14 +2705,18 @@ async function contextBundleImpl(context, params) {
2599
2705
  // ✅ Cache boost profile config to avoid redundant lookups in hot path
2600
2706
  const boostProfile = params.boost_profile ??
2601
2707
  (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
2602
- const profileConfig = getBoostProfile(boostProfile);
2708
+ const baseProfileConfig = getBoostProfile(boostProfile);
2709
+ const profileConfig = {
2710
+ ...baseProfileConfig,
2711
+ pathMultipliers: loadPathPenalties(baseProfileConfig.pathMultipliers),
2712
+ };
2603
2713
  // フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
2604
2714
  if (extractedTerms.phrases.length > 0) {
2605
2715
  const phrasePlaceholders = extractedTerms.phrases
2606
2716
  .map(() => "b.content ILIKE '%' || ? || '%'")
2607
2717
  .join(" OR ");
2608
2718
  // DEBUG: Log SQL query parameters for troubleshooting
2609
- console.log(`[DEBUG contextBundle] Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
2719
+ traceSearch(`Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
2610
2720
  const rows = await db.all(`
2611
2721
  SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
2612
2722
  FROM file f
@@ -2622,11 +2732,14 @@ async function contextBundleImpl(context, params) {
2622
2732
  `, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
2623
2733
  // DEBUG: Log returned paths and verify they match expected repo_id
2624
2734
  if (rows.length > 0) {
2625
- console.log(`[DEBUG contextBundle] Phrase match returned ${rows.length} rows. Sample paths:`, rows.slice(0, 3).map((r) => r.path));
2735
+ traceSearch(`Phrase match returned ${rows.length} rows. Sample paths: ${rows
2736
+ .slice(0, 3)
2737
+ .map((r) => r.path)
2738
+ .join(", ")}`);
2626
2739
  // Verify repo_id of returned files
2627
2740
  const pathsToCheck = rows.slice(0, 3).map((r) => r.path);
2628
2741
  const verification = await db.all(`SELECT path, repo_id FROM file WHERE path IN (${pathsToCheck.map(() => "?").join(", ")}) LIMIT 3`, pathsToCheck);
2629
- console.log(`[DEBUG contextBundle] Repo ID verification:`, verification);
2742
+ traceSearch(`Repo ID verification`, verification);
2630
2743
  }
2631
2744
  for (const row of rows) {
2632
2745
  if (row.content === null) {
@@ -2639,6 +2752,7 @@ async function contextBundleImpl(context, params) {
2639
2752
  continue; // Should not happen, but defensive check
2640
2753
  }
2641
2754
  const candidate = ensureCandidate(candidates, row.path);
2755
+ candidate.phraseHits += matchedPhrases.length;
2642
2756
  // 各マッチしたフレーズに対してスコアリング
2643
2757
  for (const phrase of matchedPhrases) {
2644
2758
  // フレーズマッチは通常の2倍のスコア
@@ -2669,6 +2783,7 @@ async function contextBundleImpl(context, params) {
2669
2783
  });
2670
2784
  }
2671
2785
  }
2786
+ traceSearch(`phrase search produced ${rows.length} rows, candidates=${candidates.size}`);
2672
2787
  }
2673
2788
  // キーワードマッチング(通常の重み)- 統合クエリでパフォーマンス改善
2674
2789
  if (extractedTerms.keywords.length > 0) {
@@ -2703,6 +2818,7 @@ async function contextBundleImpl(context, params) {
2703
2818
  for (const keyword of matchedKeywords) {
2704
2819
  candidate.score += weights.textMatch;
2705
2820
  candidate.reasons.add(`text:${keyword}`);
2821
+ candidate.keywordHits.add(keyword);
2706
2822
  }
2707
2823
  // Apply boost profile once per file
2708
2824
  if (boostProfile !== "none") {
@@ -2728,6 +2844,99 @@ async function contextBundleImpl(context, params) {
2728
2844
  });
2729
2845
  }
2730
2846
  }
2847
+ traceSearch(`keyword search produced ${rows.length} rows, candidates=${candidates.size}`);
2848
+ }
2849
+ const fallbackTerms = Array.from(new Set([...extractedTerms.phrases, ...extractedTerms.keywords, ...extractedTerms.pathSegments]
2850
+ .map((term) => term.toLowerCase())
2851
+ .filter((term) => term.length >= 3))).slice(0, PATH_FALLBACK_TERMS_LIMIT);
2852
+ if (fallbackTerms.length > 0) {
2853
+ const fallbackRows = await fetchPathFallbackCandidates(db, repoId, fallbackTerms, Math.min(limit * 2, PATH_FALLBACK_LIMIT));
2854
+ const fallbackReason = stringMatchSeeds.size === 0
2855
+ ? "no-string-match"
2856
+ : candidates.size < limit
2857
+ ? "low-candidates"
2858
+ : "supplemental";
2859
+ traceSearch(`path fallback triggered (${fallbackReason}) terms=${JSON.stringify(fallbackTerms)} rows=${fallbackRows.length}`);
2860
+ const fallbackWeight = stringMatchSeeds.size === 0 ? weights.pathMatch * 0.75 : weights.pathMatch * 0.2;
2861
+ for (const row of fallbackRows) {
2862
+ const candidate = ensureCandidate(candidates, row.path);
2863
+ candidate.pathFallbackReason = fallbackReason;
2864
+ candidate.score += fallbackWeight;
2865
+ candidate.reasons.add("fallback:path");
2866
+ const contentLower = row.content?.toLowerCase() ?? "";
2867
+ if (contentLower.length > 0) {
2868
+ let textHits = 0;
2869
+ for (const term of fallbackTerms) {
2870
+ if (contentLower.includes(term)) {
2871
+ textHits += 1;
2872
+ candidate.keywordHits.add(term);
2873
+ }
2874
+ }
2875
+ candidate.fallbackTextHits += textHits;
2876
+ if (textHits > 0) {
2877
+ const textBoost = textHits * weights.textMatch * 0.15;
2878
+ candidate.score += textBoost;
2879
+ candidate.reasons.add(`fallback:content:${textHits}`);
2880
+ }
2881
+ }
2882
+ candidate.matchLine ??= 1;
2883
+ candidate.lang ??= row.lang;
2884
+ candidate.ext ??= row.ext;
2885
+ candidate.totalLines ??= row.content?.split(/\r?\n/).length ?? null;
2886
+ candidate.content ??= row.content;
2887
+ candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
2888
+ if (boostProfile !== "none") {
2889
+ applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
2890
+ }
2891
+ stringMatchSeeds.add(row.path);
2892
+ if (!fileCache.has(row.path) && row.content) {
2893
+ fileCache.set(row.path, {
2894
+ content: row.content,
2895
+ lang: row.lang,
2896
+ ext: row.ext,
2897
+ totalLines: candidate.totalLines ?? 0,
2898
+ embedding: candidate.embedding,
2899
+ });
2900
+ }
2901
+ }
2902
+ // Drop fallback-only candidates with zero text evidence before trimming
2903
+ for (const [path, candidate] of Array.from(candidates.entries())) {
2904
+ const isFallbackOnly = candidate.reasons.has("fallback:path") &&
2905
+ candidate.keywordHits.size === 0 &&
2906
+ candidate.phraseHits === 0;
2907
+ const hasTextEvidence = candidate.fallbackTextHits > 0;
2908
+ if (isFallbackOnly && !hasTextEvidence) {
2909
+ candidates.delete(path);
2910
+ }
2911
+ }
2912
+ // Demote fallback-only hits without text evidence
2913
+ for (const candidate of candidates.values()) {
2914
+ const isFallbackOnly = candidate.reasons.has("fallback:path") &&
2915
+ candidate.keywordHits.size === 0 &&
2916
+ candidate.phraseHits === 0;
2917
+ const hasTextEvidence = candidate.fallbackTextHits > 0;
2918
+ if (isFallbackOnly && !hasTextEvidence) {
2919
+ candidate.scoreMultiplier *= 0.5;
2920
+ candidate.reasons.add("penalty:fallback-no-text");
2921
+ }
2922
+ }
2923
+ if (fallbackRows.length > PATH_FALLBACK_KEEP) {
2924
+ const fallbackOnly = Array.from(candidates.entries())
2925
+ .filter(([_, candidate]) => candidate.reasons.has("fallback:path") &&
2926
+ candidate.keywordHits.size === 0 &&
2927
+ candidate.phraseHits === 0)
2928
+ .sort((a, b) => b[1].score - a[1].score);
2929
+ const toDrop = fallbackOnly.slice(PATH_FALLBACK_KEEP);
2930
+ for (const [path] of toDrop) {
2931
+ candidates.delete(path);
2932
+ }
2933
+ traceSearch(`path fallback trimmed kept=${PATH_FALLBACK_KEEP} dropped=${toDrop.length} candidates=${candidates.size}`);
2934
+ }
2935
+ }
2936
+ if (extractedTerms.keywords.length > 0 || extractedTerms.phrases.length > 0) {
2937
+ for (const candidate of candidates.values()) {
2938
+ applyCoverageBoost(candidate, extractedTerms, weights);
2939
+ }
2731
2940
  }
2732
2941
  const artifactPathTargets = artifactPathHints.map((hintPath) => ({
2733
2942
  path: hintPath,
@@ -2891,9 +3100,11 @@ async function contextBundleImpl(context, params) {
2891
3100
  await addMetadataFallbackCandidates();
2892
3101
  }
2893
3102
  let materializedCandidates = await materializeCandidates();
3103
+ traceSearch(`materialized candidates: ${materializedCandidates.length}`);
2894
3104
  if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
2895
3105
  await addMetadataFallbackCandidates();
2896
3106
  materializedCandidates = await materializeCandidates();
3107
+ traceSearch(`materialized candidates after metadata fallback: ${materializedCandidates.length}`);
2897
3108
  }
2898
3109
  if (materializedCandidates.length === 0) {
2899
3110
  // Get warnings from WarningManager (includes breaking change notification if applicable)
@@ -3021,6 +3232,14 @@ async function contextBundleImpl(context, params) {
3021
3232
  }
3022
3233
  return b.score - a.score;
3023
3234
  });
3235
+ if (TRACE_SEARCH) {
3236
+ const sample = rankedCandidates.slice(0, 5).map((candidate) => ({
3237
+ path: candidate.path,
3238
+ score: Number(candidate.score.toFixed(3)),
3239
+ reasons: Array.from(candidate.reasons).slice(0, 3),
3240
+ }));
3241
+ traceSearch(`ranked candidates=${rankedCandidates.length}`, sample);
3242
+ }
3024
3243
  const prioritizedCandidates = prioritizeHintCandidates(rankedCandidates, resolvedPathHintTargets.map((target) => target.path), limit);
3025
3244
  if (prioritizedCandidates.length === 0) {
3026
3245
  const warnings = [...context.warningManager.responseWarnings];