kiri-mcp-server 0.24.4 → 0.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -589,6 +589,7 @@ const HINT_SUBSTRING_BOOST = serverConfig.hints.substring.boost;
589
589
  const HINT_LOG_ENABLED = process.env.KIRI_HINT_LOG === "1";
590
590
  const HINT_DICTIONARY_ENABLED = process.env.KIRI_HINT_DICTIONARY !== "0";
591
591
  const HINT_DICTIONARY_LIMIT = Math.max(0, Number.parseInt(process.env.KIRI_HINT_DICTIONARY_LIMIT ?? "2", 10));
592
+ const DEFAULT_FILECACHE_MAX_BYTES = 64 * 1024 * 1024;
592
593
  // Issue #68: Path/Large File Penalty configuration (環境変数で上書き可能)
593
594
  const PATH_MISS_DELTA = serverConfig.penalties.pathMissDelta;
594
595
  const LARGE_FILE_DELTA = serverConfig.penalties.largeFileDelta;
@@ -1325,6 +1326,16 @@ function parseEmbedding(vectorJson, vectorDims) {
1325
1326
  return null;
1326
1327
  }
1327
1328
  }
1329
+ function coerceLineCount(value) {
1330
+ if (value === null || value === undefined) {
1331
+ return null;
1332
+ }
1333
+ const numeric = typeof value === "bigint" ? Number(value) : value;
1334
+ if (!Number.isFinite(numeric) || numeric < 0) {
1335
+ return null;
1336
+ }
1337
+ return numeric;
1338
+ }
1328
1339
  function applyStructuralScores(candidates, queryEmbedding, structuralWeight) {
1329
1340
  if (!queryEmbedding || structuralWeight <= 0) {
1330
1341
  return;
@@ -1501,7 +1512,7 @@ async function fetchEmbeddingMap(db, repoId, paths) {
1501
1512
  }
1502
1513
  async function loadFileContent(db, repoId, filePath) {
1503
1514
  const rows = await db.all(`
1504
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
1515
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.content, b.line_count, fe.vector_json, fe.dims AS vector_dims
1505
1516
  FROM file f
1506
1517
  JOIN blob b ON b.hash = f.blob_hash
1507
1518
  LEFT JOIN file_embedding fe
@@ -1514,7 +1525,8 @@ async function loadFileContent(db, repoId, filePath) {
1514
1525
  if (!row || row.is_binary || row.content === null) {
1515
1526
  return null;
1516
1527
  }
1517
- const totalLines = row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length;
1528
+ const totalLines = coerceLineCount(row.line_count) ??
1529
+ (row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length);
1518
1530
  return {
1519
1531
  content: row.content,
1520
1532
  lang: row.lang,
@@ -1523,6 +1535,28 @@ async function loadFileContent(db, repoId, filePath) {
1523
1535
  embedding: parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null),
1524
1536
  };
1525
1537
  }
1538
+ async function loadFileMetadata(db, repoId, filePath) {
1539
+ const rows = await db.all(`
1540
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.line_count, fe.vector_json, fe.dims AS vector_dims
1541
+ FROM file f
1542
+ JOIN blob b ON b.hash = f.blob_hash
1543
+ LEFT JOIN file_embedding fe
1544
+ ON fe.repo_id = f.repo_id
1545
+ AND fe.path = f.path
1546
+ WHERE f.repo_id = ? AND f.path = ?
1547
+ LIMIT 1
1548
+ `, [repoId, filePath]);
1549
+ const row = rows[0];
1550
+ if (!row || row.is_binary) {
1551
+ return null;
1552
+ }
1553
+ return {
1554
+ lang: row.lang,
1555
+ ext: row.ext,
1556
+ totalLines: coerceLineCount(row.line_count),
1557
+ embedding: parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null),
1558
+ };
1559
+ }
1526
1560
  function selectSnippet(snippets, matchLine) {
1527
1561
  const firstSnippet = snippets[0];
1528
1562
  if (!firstSnippet) {
@@ -1814,7 +1848,7 @@ async function fetchMetadataOnlyCandidates(db, tableAvailability, repoId, filter
1814
1848
  params.push(...clause.params);
1815
1849
  }
1816
1850
  const sql = `
1817
- SELECT f.path, f.lang, f.ext, b.content
1851
+ SELECT f.path, f.lang, f.ext, b.content, b.line_count
1818
1852
  FROM file f
1819
1853
  JOIN blob b ON b.hash = f.blob_hash
1820
1854
  WHERE ${whereClauses.join(" AND ")}
@@ -2074,7 +2108,7 @@ async function fetchPathFallbackCandidates(db, repoId, terms, limit) {
2074
2108
  const filters = terms.map(() => "f.path ILIKE ?").join(" OR ");
2075
2109
  const params = [repoId, ...terms.map((term) => `%${term}%`), limit];
2076
2110
  return await db.all(`
2077
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
2111
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.content, b.line_count, fe.vector_json, fe.dims AS vector_dims
2078
2112
  FROM file f
2079
2113
  JOIN blob b ON b.hash = f.blob_hash
2080
2114
  LEFT JOIN file_embedding fe
@@ -2607,6 +2641,40 @@ function readPenaltyFlags() {
2607
2641
  largeFilePenalty: process.env.KIRI_LARGE_FILE_PENALTY === "1",
2608
2642
  };
2609
2643
  }
2644
+ function parseByteSize(raw) {
2645
+ const trimmed = raw.trim().toLowerCase();
2646
+ const match = /^(\d+)([kmg]?)(b)?$/.exec(trimmed);
2647
+ if (!match) {
2648
+ return null;
2649
+ }
2650
+ const value = Number.parseInt(match[1] ?? "", 10);
2651
+ if (!Number.isFinite(value)) {
2652
+ return null;
2653
+ }
2654
+ const unit = match[2] ?? "";
2655
+ let multiplier = 1;
2656
+ if (unit === "k") {
2657
+ multiplier = 1024;
2658
+ }
2659
+ else if (unit === "m") {
2660
+ multiplier = 1024 * 1024;
2661
+ }
2662
+ else if (unit === "g") {
2663
+ multiplier = 1024 * 1024 * 1024;
2664
+ }
2665
+ return value * multiplier;
2666
+ }
2667
+ function readFileCacheMaxBytes() {
2668
+ const raw = process.env.KIRI_FILECACHE_MAX_BYTES;
2669
+ if (raw === undefined) {
2670
+ return DEFAULT_FILECACHE_MAX_BYTES;
2671
+ }
2672
+ const parsed = parseByteSize(raw);
2673
+ if (parsed === null || Number.isNaN(parsed)) {
2674
+ return DEFAULT_FILECACHE_MAX_BYTES;
2675
+ }
2676
+ return Math.max(0, parsed);
2677
+ }
2610
2678
  /**
2611
2679
  * クエリ統計を計算(単語数と平均単語長)
2612
2680
  */
@@ -2970,6 +3038,67 @@ async function contextBundleImpl(context, params) {
2970
3038
  const candidates = new Map();
2971
3039
  const stringMatchSeeds = new Set();
2972
3040
  const fileCache = new Map();
3041
+ const fileCacheMaxBytes = readFileCacheMaxBytes();
3042
+ let fileCacheBytes = 0;
3043
+ const estimateContentBytes = (content) => Buffer.byteLength(content, "utf8");
3044
+ const touchFileCache = (filePath) => {
3045
+ const entry = fileCache.get(filePath);
3046
+ if (!entry) {
3047
+ return undefined;
3048
+ }
3049
+ fileCache.delete(filePath);
3050
+ fileCache.set(filePath, entry);
3051
+ return entry;
3052
+ };
3053
+ const pruneFileCache = () => {
3054
+ if (fileCacheMaxBytes <= 0) {
3055
+ if (fileCacheBytes <= 0) {
3056
+ return;
3057
+ }
3058
+ for (const entry of fileCache.values()) {
3059
+ if (!entry.contentBytes) {
3060
+ continue;
3061
+ }
3062
+ delete entry.content;
3063
+ delete entry.contentBytes;
3064
+ }
3065
+ fileCacheBytes = 0;
3066
+ return;
3067
+ }
3068
+ if (fileCacheBytes <= fileCacheMaxBytes) {
3069
+ return;
3070
+ }
3071
+ for (const entry of fileCache.values()) {
3072
+ if (!entry.contentBytes) {
3073
+ continue;
3074
+ }
3075
+ fileCacheBytes = Math.max(0, fileCacheBytes - entry.contentBytes);
3076
+ delete entry.content;
3077
+ delete entry.contentBytes;
3078
+ if (fileCacheBytes <= fileCacheMaxBytes) {
3079
+ break;
3080
+ }
3081
+ }
3082
+ };
3083
+ const setFileCacheEntry = (filePath, next) => {
3084
+ const existing = fileCache.get(filePath);
3085
+ if (existing?.contentBytes) {
3086
+ fileCacheBytes = Math.max(0, fileCacheBytes - existing.contentBytes);
3087
+ }
3088
+ const merged = { ...existing, ...next };
3089
+ if (fileCacheMaxBytes <= 0 || !merged.content) {
3090
+ delete merged.content;
3091
+ delete merged.contentBytes;
3092
+ }
3093
+ else {
3094
+ const contentBytes = estimateContentBytes(merged.content);
3095
+ merged.contentBytes = contentBytes;
3096
+ fileCacheBytes += contentBytes;
3097
+ }
3098
+ fileCache.delete(filePath);
3099
+ fileCache.set(filePath, merged);
3100
+ pruneFileCache();
3101
+ };
2973
3102
  // Phase 2: IDF重み付けプロバイダーの初期化
2974
3103
  // キーワードの文書頻度に基づいて重みを計算し、高頻度語を自動的に減衰
2975
3104
  const idfProvider = createIdfProvider(db, repoId);
@@ -3006,7 +3135,7 @@ async function contextBundleImpl(context, params) {
3006
3135
  phraseParams.push(`${pathPrefix}%`);
3007
3136
  }
3008
3137
  const rows = await db.all(`
3009
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
3138
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.content, b.line_count, fe.vector_json, fe.dims AS vector_dims
3010
3139
  FROM file f
3011
3140
  JOIN blob b ON b.hash = f.blob_hash
3012
3141
  LEFT JOIN file_embedding fe
@@ -3054,18 +3183,18 @@ async function contextBundleImpl(context, params) {
3054
3183
  const { line } = buildPreview(row.content, matchedPhrases[0]);
3055
3184
  candidate.matchLine =
3056
3185
  candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
3057
- candidate.content ??= row.content;
3058
3186
  candidate.lang ??= row.lang;
3059
3187
  candidate.ext ??= row.ext;
3060
- candidate.totalLines ??= row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length;
3188
+ const totalLines = coerceLineCount(row.line_count) ??
3189
+ (row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length);
3190
+ candidate.totalLines ??= totalLines;
3061
3191
  candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
3062
3192
  stringMatchSeeds.add(row.path);
3063
3193
  if (!fileCache.has(row.path)) {
3064
- fileCache.set(row.path, {
3065
- content: row.content,
3194
+ setFileCacheEntry(row.path, {
3066
3195
  lang: row.lang,
3067
3196
  ext: row.ext,
3068
- totalLines: candidate.totalLines ?? 0,
3197
+ totalLines: candidate.totalLines,
3069
3198
  embedding: candidate.embedding,
3070
3199
  });
3071
3200
  }
@@ -3088,7 +3217,7 @@ async function contextBundleImpl(context, params) {
3088
3217
  keywordParams.push(`${pathPrefix}%`);
3089
3218
  }
3090
3219
  const rows = await db.all(`
3091
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
3220
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.content, b.line_count, fe.vector_json, fe.dims AS vector_dims
3092
3221
  FROM file f
3093
3222
  JOIN blob b ON b.hash = f.blob_hash
3094
3223
  LEFT JOIN file_embedding fe
@@ -3131,18 +3260,18 @@ async function contextBundleImpl(context, params) {
3131
3260
  const { line } = buildPreview(row.content, matchedKeywords[0]);
3132
3261
  candidate.matchLine =
3133
3262
  candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
3134
- candidate.content ??= row.content;
3135
3263
  candidate.lang ??= row.lang;
3136
3264
  candidate.ext ??= row.ext;
3137
- candidate.totalLines ??= row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length;
3265
+ const totalLines = coerceLineCount(row.line_count) ??
3266
+ (row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length);
3267
+ candidate.totalLines ??= totalLines;
3138
3268
  candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
3139
3269
  stringMatchSeeds.add(row.path);
3140
3270
  if (!fileCache.has(row.path)) {
3141
- fileCache.set(row.path, {
3142
- content: row.content,
3271
+ setFileCacheEntry(row.path, {
3143
3272
  lang: row.lang,
3144
3273
  ext: row.ext,
3145
- totalLines: candidate.totalLines ?? 0,
3274
+ totalLines: candidate.totalLines,
3146
3275
  embedding: candidate.embedding,
3147
3276
  });
3148
3277
  }
@@ -3185,19 +3314,21 @@ async function contextBundleImpl(context, params) {
3185
3314
  candidate.matchLine ??= 1;
3186
3315
  candidate.lang ??= row.lang;
3187
3316
  candidate.ext ??= row.ext;
3188
- candidate.totalLines ??= row.content?.split(/\r?\n/).length ?? null;
3189
- candidate.content ??= row.content;
3317
+ const totalLines = coerceLineCount(row.line_count) ??
3318
+ (row.content ? (row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length) : null);
3319
+ if (totalLines !== null) {
3320
+ candidate.totalLines ??= totalLines;
3321
+ }
3190
3322
  candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
3191
3323
  if (boostProfile !== "none") {
3192
3324
  applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
3193
3325
  }
3194
3326
  stringMatchSeeds.add(row.path);
3195
- if (!fileCache.has(row.path) && row.content) {
3196
- fileCache.set(row.path, {
3197
- content: row.content,
3327
+ if (!fileCache.has(row.path)) {
3328
+ setFileCacheEntry(row.path, {
3198
3329
  lang: row.lang,
3199
3330
  ext: row.ext,
3200
- totalLines: candidate.totalLines ?? 0,
3331
+ totalLines: candidate.totalLines,
3201
3332
  embedding: candidate.embedding,
3202
3333
  });
3203
3334
  }
@@ -3355,27 +3486,31 @@ async function contextBundleImpl(context, params) {
3355
3486
  if (isSuppressedPath(candidate.path)) {
3356
3487
  continue;
3357
3488
  }
3358
- if (!candidate.content) {
3359
- const cached = fileCache.get(candidate.path);
3360
- if (cached) {
3361
- candidate.content = cached.content;
3362
- candidate.lang = cached.lang;
3363
- candidate.ext = cached.ext;
3364
- candidate.totalLines = cached.totalLines;
3365
- candidate.embedding = cached.embedding;
3366
- }
3367
- else {
3368
- const loaded = await loadFileContent(db, repoId, candidate.path);
3369
- if (!loaded) {
3370
- continue;
3371
- }
3372
- candidate.content = loaded.content;
3373
- candidate.lang = loaded.lang;
3374
- candidate.ext = loaded.ext;
3375
- candidate.totalLines = loaded.totalLines;
3376
- candidate.embedding = loaded.embedding;
3377
- fileCache.set(candidate.path, loaded);
3489
+ const cached = touchFileCache(candidate.path);
3490
+ if (cached) {
3491
+ candidate.lang ??= cached.lang;
3492
+ candidate.ext ??= cached.ext;
3493
+ candidate.totalLines ??= cached.totalLines;
3494
+ candidate.embedding ??= cached.embedding;
3495
+ }
3496
+ const needsMetadata = candidate.lang === null || candidate.ext === null || candidate.totalLines === null;
3497
+ const hasTextEvidence = candidate.keywordHits.size > 0 ||
3498
+ candidate.phraseHits > 0 ||
3499
+ candidate.fallbackTextHits > 0;
3500
+ const needsEmbedding = candidate.embedding === null && !hasTextEvidence;
3501
+ if (needsMetadata || needsEmbedding) {
3502
+ const loaded = await loadFileMetadata(db, repoId, candidate.path);
3503
+ if (!loaded) {
3504
+ continue;
3378
3505
  }
3506
+ candidate.lang ??= loaded.lang;
3507
+ candidate.ext ??= loaded.ext;
3508
+ candidate.totalLines ??= loaded.totalLines;
3509
+ candidate.embedding ??= loaded.embedding;
3510
+ setFileCacheEntry(candidate.path, {
3511
+ ...(cached ?? {}),
3512
+ ...loaded,
3513
+ });
3379
3514
  }
3380
3515
  result.push(candidate);
3381
3516
  }
@@ -3391,11 +3526,13 @@ async function contextBundleImpl(context, params) {
3391
3526
  }
3392
3527
  for (const row of metadataRows) {
3393
3528
  const candidate = ensureCandidate(candidates, row.path);
3394
- if (row.content) {
3395
- candidate.content = row.content;
3396
- candidate.totalLines = row.content.split(/\r?\n/).length;
3397
- fileCache.set(row.path, {
3398
- content: row.content,
3529
+ const totalLines = coerceLineCount(row.line_count) ??
3530
+ (row.content ? (row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length) : null);
3531
+ if (totalLines !== null) {
3532
+ candidate.totalLines ??= totalLines;
3533
+ }
3534
+ if (!fileCache.has(row.path)) {
3535
+ setFileCacheEntry(row.path, {
3399
3536
  lang: row.lang,
3400
3537
  ext: row.ext,
3401
3538
  totalLines: candidate.totalLines,
@@ -3566,9 +3703,37 @@ async function contextBundleImpl(context, params) {
3566
3703
  };
3567
3704
  }
3568
3705
  const maxScore = Math.max(...prioritizedCandidates.map((candidate) => candidate.score));
3706
+ const ensureCandidateContent = async (candidate) => {
3707
+ if (candidate.content) {
3708
+ return true;
3709
+ }
3710
+ const cached = touchFileCache(candidate.path);
3711
+ if (cached?.content) {
3712
+ candidate.content = cached.content;
3713
+ candidate.lang ??= cached.lang;
3714
+ candidate.ext ??= cached.ext;
3715
+ candidate.totalLines ??= cached.totalLines;
3716
+ candidate.embedding ??= cached.embedding;
3717
+ return true;
3718
+ }
3719
+ const loaded = await loadFileContent(db, repoId, candidate.path);
3720
+ if (!loaded) {
3721
+ return false;
3722
+ }
3723
+ candidate.content = loaded.content;
3724
+ candidate.lang ??= loaded.lang;
3725
+ candidate.ext ??= loaded.ext;
3726
+ candidate.totalLines ??= loaded.totalLines;
3727
+ candidate.embedding ??= loaded.embedding;
3728
+ setFileCacheEntry(candidate.path, {
3729
+ ...(cached ?? {}),
3730
+ ...loaded,
3731
+ });
3732
+ return true;
3733
+ };
3569
3734
  const results = [];
3570
3735
  for (const candidate of prioritizedCandidates) {
3571
- if (!candidate.content) {
3736
+ if (!(await ensureCandidateContent(candidate))) {
3572
3737
  continue;
3573
3738
  }
3574
3739
  const snippets = await db.all(`