kiri-mcp-server 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -13
- package/dist/client/cli.js +68 -0
- package/dist/client/cli.js.map +1 -0
- package/dist/client/index.js +5 -0
- package/dist/client/index.js.map +1 -0
- package/dist/eval/metrics.js +47 -0
- package/dist/eval/metrics.js.map +1 -0
- package/dist/indexer/cli.js +362 -0
- package/dist/indexer/cli.js.map +1 -0
- package/dist/indexer/codeintel.js +182 -0
- package/dist/indexer/codeintel.js.map +1 -0
- package/dist/indexer/git.js +30 -0
- package/dist/indexer/git.js.map +1 -0
- package/dist/indexer/language.js +34 -0
- package/dist/indexer/language.js.map +1 -0
- package/dist/indexer/pipeline/filters/denylist.js +71 -0
- package/dist/indexer/pipeline/filters/denylist.js.map +1 -0
- package/dist/indexer/schema.js +101 -0
- package/dist/indexer/schema.js.map +1 -0
- package/dist/package.json +14 -1
- package/dist/server/bootstrap.js +19 -0
- package/dist/server/bootstrap.js.map +1 -0
- package/dist/server/context.js +1 -0
- package/dist/server/context.js.map +1 -0
- package/dist/server/fallbacks/degradeController.js +69 -0
- package/dist/server/fallbacks/degradeController.js.map +1 -0
- package/dist/server/handlers.js +1268 -0
- package/dist/server/handlers.js.map +1 -0
- package/dist/server/main.js +151 -0
- package/dist/server/main.js.map +1 -0
- package/dist/server/observability/metrics.js +56 -0
- package/dist/server/observability/metrics.js.map +1 -0
- package/dist/server/observability/tracing.js +58 -0
- package/dist/server/observability/tracing.js.map +1 -0
- package/dist/server/rpc.js +477 -0
- package/dist/server/rpc.js.map +1 -0
- package/dist/server/runtime.js +47 -0
- package/dist/server/runtime.js.map +1 -0
- package/dist/server/scoring.js +116 -0
- package/dist/server/scoring.js.map +1 -0
- package/dist/server/stdio.js +76 -0
- package/dist/server/stdio.js.map +1 -0
- package/dist/shared/duckdb.js +119 -0
- package/dist/shared/duckdb.js.map +1 -0
- package/dist/shared/embedding.js +98 -0
- package/dist/shared/embedding.js.map +1 -0
- package/dist/shared/index.js +9 -0
- package/dist/shared/index.js.map +1 -0
- package/dist/shared/security/config.js +64 -0
- package/dist/shared/security/config.js.map +1 -0
- package/dist/shared/security/masker.js +56 -0
- package/dist/shared/security/masker.js.map +1 -0
- package/dist/shared/tokenizer.js +4 -0
- package/dist/shared/tokenizer.js.map +1 -0
- package/dist/shared/utils/simpleYaml.js +89 -0
- package/dist/shared/utils/simpleYaml.js.map +1 -0
- package/dist/src/client/proxy.js +83 -13
- package/dist/src/client/proxy.js.map +1 -1
- package/dist/src/client/start-daemon.d.ts.map +1 -1
- package/dist/src/client/start-daemon.js +2 -1
- package/dist/src/client/start-daemon.js.map +1 -1
- package/dist/src/daemon/daemon.js +97 -18
- package/dist/src/daemon/daemon.js.map +1 -1
- package/dist/src/daemon/socket.d.ts +6 -4
- package/dist/src/daemon/socket.d.ts.map +1 -1
- package/dist/src/daemon/socket.js +62 -18
- package/dist/src/daemon/socket.js.map +1 -1
- package/dist/src/indexer/cli.d.ts +1 -0
- package/dist/src/indexer/cli.d.ts.map +1 -1
- package/dist/src/indexer/cli.js +503 -257
- package/dist/src/indexer/cli.js.map +1 -1
- package/dist/src/indexer/codeintel.d.ts +1 -1
- package/dist/src/indexer/codeintel.d.ts.map +1 -1
- package/dist/src/indexer/codeintel.js +296 -3
- package/dist/src/indexer/codeintel.js.map +1 -1
- package/dist/src/indexer/dart/analyze.d.ts +29 -0
- package/dist/src/indexer/dart/analyze.d.ts.map +1 -0
- package/dist/src/indexer/dart/analyze.js +452 -0
- package/dist/src/indexer/dart/analyze.js.map +1 -0
- package/dist/src/indexer/dart/client.d.ts +113 -0
- package/dist/src/indexer/dart/client.d.ts.map +1 -0
- package/dist/src/indexer/dart/client.js +444 -0
- package/dist/src/indexer/dart/client.js.map +1 -0
- package/dist/src/indexer/dart/config.d.ts +36 -0
- package/dist/src/indexer/dart/config.d.ts.map +1 -0
- package/dist/src/indexer/dart/config.js +62 -0
- package/dist/src/indexer/dart/config.js.map +1 -0
- package/dist/src/indexer/dart/dependencies.d.ts +17 -0
- package/dist/src/indexer/dart/dependencies.d.ts.map +1 -0
- package/dist/src/indexer/dart/dependencies.js +102 -0
- package/dist/src/indexer/dart/dependencies.js.map +1 -0
- package/dist/src/indexer/dart/pathKey.d.ts +40 -0
- package/dist/src/indexer/dart/pathKey.d.ts.map +1 -0
- package/dist/src/indexer/dart/pathKey.js +72 -0
- package/dist/src/indexer/dart/pathKey.js.map +1 -0
- package/dist/src/indexer/dart/poolGate.d.ts +57 -0
- package/dist/src/indexer/dart/poolGate.d.ts.map +1 -0
- package/dist/src/indexer/dart/poolGate.js +87 -0
- package/dist/src/indexer/dart/poolGate.js.map +1 -0
- package/dist/src/indexer/dart/sdk.d.ts +40 -0
- package/dist/src/indexer/dart/sdk.d.ts.map +1 -0
- package/dist/src/indexer/dart/sdk.js +167 -0
- package/dist/src/indexer/dart/sdk.js.map +1 -0
- package/dist/src/indexer/dart/transform.d.ts +17 -0
- package/dist/src/indexer/dart/transform.d.ts.map +1 -0
- package/dist/src/indexer/dart/transform.js +157 -0
- package/dist/src/indexer/dart/transform.js.map +1 -0
- package/dist/src/indexer/dart/types.d.ts +137 -0
- package/dist/src/indexer/dart/types.d.ts.map +1 -0
- package/dist/src/indexer/dart/types.js +5 -0
- package/dist/src/indexer/dart/types.js.map +1 -0
- package/dist/src/indexer/git.d.ts +1 -0
- package/dist/src/indexer/git.d.ts.map +1 -1
- package/dist/src/indexer/git.js +8 -0
- package/dist/src/indexer/git.js.map +1 -1
- package/dist/src/indexer/language.d.ts.map +1 -1
- package/dist/src/indexer/language.js +1 -0
- package/dist/src/indexer/language.js.map +1 -1
- package/dist/src/indexer/queue.d.ts +19 -0
- package/dist/src/indexer/queue.d.ts.map +1 -0
- package/dist/src/indexer/queue.js +50 -0
- package/dist/src/indexer/queue.js.map +1 -0
- package/dist/src/indexer/schema.d.ts +61 -1
- package/dist/src/indexer/schema.d.ts.map +1 -1
- package/dist/src/indexer/schema.js +253 -2
- package/dist/src/indexer/schema.js.map +1 -1
- package/dist/src/indexer/watch.d.ts +21 -0
- package/dist/src/indexer/watch.d.ts.map +1 -1
- package/dist/src/indexer/watch.js +189 -28
- package/dist/src/indexer/watch.js.map +1 -1
- package/dist/src/server/abbreviations.d.ts +47 -0
- package/dist/src/server/abbreviations.d.ts.map +1 -0
- package/dist/src/server/abbreviations.js +71 -0
- package/dist/src/server/abbreviations.js.map +1 -0
- package/dist/src/server/boost-profiles.d.ts +63 -0
- package/dist/src/server/boost-profiles.d.ts.map +1 -0
- package/dist/src/server/boost-profiles.js +86 -0
- package/dist/src/server/boost-profiles.js.map +1 -0
- package/dist/src/server/context.d.ts +7 -0
- package/dist/src/server/context.d.ts.map +1 -1
- package/dist/src/server/handlers.d.ts +3 -2
- package/dist/src/server/handlers.d.ts.map +1 -1
- package/dist/src/server/handlers.js +542 -96
- package/dist/src/server/handlers.js.map +1 -1
- package/dist/src/server/indexBootstrap.d.ts.map +1 -1
- package/dist/src/server/indexBootstrap.js +4 -1
- package/dist/src/server/indexBootstrap.js.map +1 -1
- package/dist/src/server/main.d.ts.map +1 -1
- package/dist/src/server/main.js +112 -30
- package/dist/src/server/main.js.map +1 -1
- package/dist/src/server/rpc.d.ts.map +1 -1
- package/dist/src/server/rpc.js +28 -9
- package/dist/src/server/rpc.js.map +1 -1
- package/dist/src/server/rrf.d.ts +86 -0
- package/dist/src/server/rrf.d.ts.map +1 -0
- package/dist/src/server/rrf.js +108 -0
- package/dist/src/server/rrf.js.map +1 -0
- package/dist/src/server/runtime.d.ts.map +1 -1
- package/dist/src/server/runtime.js +45 -6
- package/dist/src/server/runtime.js.map +1 -1
- package/dist/src/server/scoring.d.ts.map +1 -1
- package/dist/src/server/scoring.js +19 -0
- package/dist/src/server/scoring.js.map +1 -1
- package/dist/src/shared/cli/args.d.ts +70 -0
- package/dist/src/shared/cli/args.d.ts.map +1 -0
- package/dist/src/shared/cli/args.js +84 -0
- package/dist/src/shared/cli/args.js.map +1 -0
- package/dist/src/shared/duckdb.d.ts.map +1 -1
- package/dist/src/shared/duckdb.js +9 -0
- package/dist/src/shared/duckdb.js.map +1 -1
- package/dist/src/shared/embedding/engine.d.ts +38 -0
- package/dist/src/shared/embedding/engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/engine.js +6 -0
- package/dist/src/shared/embedding/engine.js.map +1 -0
- package/dist/src/shared/embedding/lsh-engine.d.ts +11 -0
- package/dist/src/shared/embedding/lsh-engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/lsh-engine.js +14 -0
- package/dist/src/shared/embedding/lsh-engine.js.map +1 -0
- package/dist/src/shared/embedding/registry.d.ts +25 -0
- package/dist/src/shared/embedding/registry.d.ts.map +1 -0
- package/dist/src/shared/embedding/registry.js +50 -0
- package/dist/src/shared/embedding/registry.js.map +1 -0
- package/dist/src/shared/embedding/semantic-engine.d.ts +14 -0
- package/dist/src/shared/embedding/semantic-engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/semantic-engine.js +50 -0
- package/dist/src/shared/embedding/semantic-engine.js.map +1 -0
- package/dist/src/shared/models/model-manager.d.ts +38 -0
- package/dist/src/shared/models/model-manager.d.ts.map +1 -0
- package/dist/src/shared/models/model-manager.js +116 -0
- package/dist/src/shared/models/model-manager.js.map +1 -0
- package/dist/src/shared/models/model-manifest.d.ts +22 -0
- package/dist/src/shared/models/model-manifest.d.ts.map +1 -0
- package/dist/src/shared/models/model-manifest.js +24 -0
- package/dist/src/shared/models/model-manifest.js.map +1 -0
- package/dist/src/shared/utils/path.d.ts +46 -0
- package/dist/src/shared/utils/path.d.ts.map +1 -0
- package/dist/src/shared/utils/path.js +94 -0
- package/dist/src/shared/utils/path.js.map +1 -0
- package/dist/src/shared/utils/socket.d.ts +61 -0
- package/dist/src/shared/utils/socket.d.ts.map +1 -0
- package/dist/src/shared/utils/socket.js +156 -0
- package/dist/src/shared/utils/socket.js.map +1 -0
- package/dist/src/shared/utils/validation.d.ts +14 -0
- package/dist/src/shared/utils/validation.d.ts.map +1 -0
- package/dist/src/shared/utils/validation.js +22 -0
- package/dist/src/shared/utils/validation.js.map +1 -0
- package/package.json +14 -1
|
@@ -1,6 +1,11 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
1
2
|
import path from "node:path";
|
|
3
|
+
import { checkFTSSchemaExists } from "../indexer/schema.js";
|
|
2
4
|
import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
|
|
3
5
|
import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
|
|
6
|
+
import { getRepoPathCandidates, normalizeRepoPath } from "../shared/utils/path.js";
|
|
7
|
+
import { expandAbbreviations } from "./abbreviations.js";
|
|
8
|
+
import { getBoostProfile, } from "./boost-profiles.js";
|
|
4
9
|
import { coerceProfileName, loadScoringProfile } from "./scoring.js";
|
|
5
10
|
// Configuration file patterns (v0.8.0+: consolidated to avoid duplication)
|
|
6
11
|
// Comprehensive list covering multiple languages and tools
|
|
@@ -127,6 +132,67 @@ const CONFIG_PATTERNS = [
|
|
|
127
132
|
".circleci/config.yml",
|
|
128
133
|
".github/workflows",
|
|
129
134
|
];
|
|
135
|
+
const FTS_STATUS_CACHE_TTL_MS = 10_000;
|
|
136
|
+
async function hasDirtyRepos(db) {
|
|
137
|
+
const statusCheck = await db.all(`SELECT COUNT(*) as count FROM repo
|
|
138
|
+
WHERE fts_dirty = true OR fts_status IN ('dirty', 'rebuilding')`);
|
|
139
|
+
return (statusCheck[0]?.count ?? 0) > 0;
|
|
140
|
+
}
|
|
141
|
+
async function refreshFtsStatus(context) {
|
|
142
|
+
const previousReady = context.features?.fts ?? false;
|
|
143
|
+
const cache = {
|
|
144
|
+
ready: false,
|
|
145
|
+
schemaExists: false,
|
|
146
|
+
anyDirty: false,
|
|
147
|
+
lastChecked: Date.now(),
|
|
148
|
+
};
|
|
149
|
+
try {
|
|
150
|
+
cache.schemaExists = await checkFTSSchemaExists(context.db);
|
|
151
|
+
if (!cache.schemaExists) {
|
|
152
|
+
context.warningManager.warnForRequest("fts-schema-missing", "FTS schema not found, falling back to ILIKE");
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
cache.anyDirty = await hasDirtyRepos(context.db);
|
|
156
|
+
if (cache.anyDirty) {
|
|
157
|
+
context.warningManager.warnForRequest("fts-stale", "FTS index is stale or rebuilding, using ILIKE fallback. Run indexer to update FTS.");
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
await context.db.run("LOAD fts;");
|
|
161
|
+
cache.ready = true;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
catch (error) {
|
|
166
|
+
cache.ready = false;
|
|
167
|
+
cache.schemaExists = false;
|
|
168
|
+
context.warningManager.warnForRequest("fts-check-failed", `FTS availability check failed: ${error}`);
|
|
169
|
+
}
|
|
170
|
+
if (!context.features) {
|
|
171
|
+
context.features = {};
|
|
172
|
+
}
|
|
173
|
+
context.features.fts = cache.ready;
|
|
174
|
+
context.ftsStatusCache = cache;
|
|
175
|
+
if (cache.ready && !previousReady) {
|
|
176
|
+
console.info("✅ FTS recovered and enabled");
|
|
177
|
+
}
|
|
178
|
+
else if (!cache.ready && previousReady) {
|
|
179
|
+
console.warn("⚠️ FTS became unavailable; falling back to ILIKE");
|
|
180
|
+
}
|
|
181
|
+
return cache;
|
|
182
|
+
}
|
|
183
|
+
async function getFreshFtsStatus(context) {
|
|
184
|
+
const cache = context.ftsStatusCache;
|
|
185
|
+
if (cache && Date.now() - cache.lastChecked < FTS_STATUS_CACHE_TTL_MS) {
|
|
186
|
+
if (cache.ready) {
|
|
187
|
+
const dirtyNow = await hasDirtyRepos(context.db);
|
|
188
|
+
if (dirtyNow) {
|
|
189
|
+
return refreshFtsStatus(context);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return cache;
|
|
193
|
+
}
|
|
194
|
+
return refreshFtsStatus(context);
|
|
195
|
+
}
|
|
130
196
|
/**
|
|
131
197
|
* Check if a file path represents a configuration file
|
|
132
198
|
* Supports multiple languages: JS/TS, Python, Ruby, Go, PHP, Java, Rust, C/C++, Docker, CI/CD
|
|
@@ -162,6 +228,9 @@ const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
|
|
|
162
228
|
const NEARBY_LIMIT = 6;
|
|
163
229
|
const FALLBACK_SNIPPET_WINDOW = 40; // Reduced from 120 to optimize token usage
|
|
164
230
|
const MAX_RERANK_LIMIT = 50;
|
|
231
|
+
// Issue #68: Path/Large File Penalty configuration (環境変数で上書き可能)
|
|
232
|
+
const PATH_MISS_DELTA = parseFloat(process.env.KIRI_PATH_MISS_DELTA || "-0.5");
|
|
233
|
+
const LARGE_FILE_DELTA = parseFloat(process.env.KIRI_LARGE_FILE_DELTA || "-0.8");
|
|
165
234
|
const MAX_WHY_TAGS = 10;
|
|
166
235
|
// 項目3: whyタグの優先度マップ(低い数値ほど高優先度)
|
|
167
236
|
// All actual tag prefixes used in the codebase
|
|
@@ -450,6 +519,8 @@ function ensureCandidate(map, filePath) {
|
|
|
450
519
|
ext: null,
|
|
451
520
|
embedding: null,
|
|
452
521
|
semanticSimilarity: null,
|
|
522
|
+
pathMatchHits: 0, // Issue #68: Track path match count
|
|
523
|
+
penalties: [], // Issue #68: Penalty log for telemetry
|
|
453
524
|
};
|
|
454
525
|
map.set(filePath, candidate);
|
|
455
526
|
}
|
|
@@ -614,6 +685,22 @@ function splitQueryWords(query) {
|
|
|
614
685
|
const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
|
|
615
686
|
return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
|
|
616
687
|
}
|
|
688
|
+
/**
|
|
689
|
+
* パス固有のマルチプライヤーを取得(最長プレフィックスマッチ)
|
|
690
|
+
* 配列の順序に依存せず、常に最長一致のプレフィックスを選択
|
|
691
|
+
* @param filePath - ファイルパス
|
|
692
|
+
* @param profileConfig - ブーストプロファイル設定
|
|
693
|
+
* @returns パス固有のマルチプライヤー(マッチなしの場合は1.0)
|
|
694
|
+
*/
|
|
695
|
+
function getPathMultiplier(filePath, profileConfig) {
|
|
696
|
+
let bestMatch = { prefix: "", multiplier: 1.0 };
|
|
697
|
+
for (const { prefix, multiplier } of profileConfig.pathMultipliers) {
|
|
698
|
+
if (filePath.startsWith(prefix) && prefix.length > bestMatch.prefix.length) {
|
|
699
|
+
bestMatch = { prefix, multiplier };
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
return bestMatch.multiplier;
|
|
703
|
+
}
|
|
617
704
|
/**
|
|
618
705
|
* files_search専用のファイルタイプブースト適用(v0.7.0+: 設定可能な乗算的ペナルティ)
|
|
619
706
|
* context_bundleと同じ乗算的ペナルティロジックを使用
|
|
@@ -623,7 +710,7 @@ function splitQueryWords(query) {
|
|
|
623
710
|
* @param weights - スコアリングウェイト設定(乗算的ペナルティに使用)
|
|
624
711
|
* @returns ブースト適用後のスコア
|
|
625
712
|
*/
|
|
626
|
-
function applyFileTypeBoost(path, baseScore,
|
|
713
|
+
function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
|
|
627
714
|
// Blacklisted directories that are almost always irrelevant for code context
|
|
628
715
|
const blacklistedDirs = [
|
|
629
716
|
".cursor/",
|
|
@@ -634,54 +721,41 @@ function applyFileTypeBoost(path, baseScore, profile = "default", weights) {
|
|
|
634
721
|
".git/",
|
|
635
722
|
"node_modules/",
|
|
636
723
|
];
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
const ext = path.includes(".") ? path.substring(path.lastIndexOf(".")) : null;
|
|
645
|
-
// ✅ UNIFIED LOGIC: Use same multiplicative penalties as context_bundle
|
|
646
|
-
if (profile === "docs") {
|
|
647
|
-
// Boost documentation files
|
|
648
|
-
if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
|
|
649
|
-
return baseScore * 1.5; // 50% boost (same as context_bundle)
|
|
650
|
-
}
|
|
651
|
-
// Penalty for implementation files in docs mode
|
|
652
|
-
if (path.startsWith("src/") &&
|
|
653
|
-
(path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
|
|
654
|
-
return baseScore * 0.5; // 50% penalty
|
|
724
|
+
for (const dir of blacklistedDirs) {
|
|
725
|
+
if (path.startsWith(dir)) {
|
|
726
|
+
// ✅ Decoupled: Check denylist overrides from profile config
|
|
727
|
+
if (profileConfig.denylistOverrides.includes(dir)) {
|
|
728
|
+
continue;
|
|
729
|
+
}
|
|
730
|
+
return -100; // Effectively remove it
|
|
655
731
|
}
|
|
656
|
-
return baseScore;
|
|
657
732
|
}
|
|
658
|
-
// Default profile: Use configurable multiplicative penalties
|
|
659
|
-
let multiplier = 1.0;
|
|
660
733
|
const fileName = path.split("/").pop() ?? "";
|
|
661
|
-
|
|
734
|
+
const ext = path.includes(".") ? path.substring(path.lastIndexOf(".")) : null;
|
|
735
|
+
let multiplier = 1.0;
|
|
736
|
+
// ✅ Step 1: Config files
|
|
662
737
|
if (isConfigFile(path, fileName)) {
|
|
663
|
-
multiplier *=
|
|
738
|
+
multiplier *= profileConfig.fileTypeMultipliers.config;
|
|
664
739
|
return baseScore * multiplier;
|
|
665
740
|
}
|
|
666
|
-
// ✅ Step 2: Documentation files
|
|
741
|
+
// ✅ Step 2: Documentation files
|
|
667
742
|
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
668
743
|
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
669
|
-
multiplier *=
|
|
744
|
+
multiplier *= profileConfig.fileTypeMultipliers.doc;
|
|
670
745
|
return baseScore * multiplier;
|
|
671
746
|
}
|
|
672
|
-
// ✅ Step 3: Implementation
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
multiplier *=
|
|
678
|
-
|
|
679
|
-
else if (path.startsWith("src/lib/")) {
|
|
680
|
-
multiplier *= weights.implBoostMultiplier * 1.2;
|
|
747
|
+
// ✅ Step 3: Implementation files with path-specific boosts
|
|
748
|
+
const implMultiplier = profileConfig.fileTypeMultipliers.impl;
|
|
749
|
+
// ✅ Use longest-prefix-match logic (order-independent)
|
|
750
|
+
const pathBoost = getPathMultiplier(path, profileConfig);
|
|
751
|
+
if (pathBoost !== 1.0) {
|
|
752
|
+
multiplier *= implMultiplier * pathBoost;
|
|
753
|
+
return baseScore * multiplier;
|
|
681
754
|
}
|
|
682
|
-
|
|
755
|
+
// Fallback for other src/ files
|
|
756
|
+
if (path.startsWith("src/")) {
|
|
683
757
|
if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
|
|
684
|
-
multiplier *=
|
|
758
|
+
multiplier *= implMultiplier;
|
|
685
759
|
}
|
|
686
760
|
}
|
|
687
761
|
// Test files: additive penalty (keep strong for files_search)
|
|
@@ -698,29 +772,92 @@ function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
|
|
|
698
772
|
if (!extractedTerms || weights.pathMatch <= 0) {
|
|
699
773
|
return;
|
|
700
774
|
}
|
|
775
|
+
// hasAddedScore gates additive boosts; pathMatchHits/reasons still track every hit for penalties/debugging.
|
|
776
|
+
let hasAddedScore = false;
|
|
701
777
|
// フレーズがパスに完全一致する場合(最高の重み)
|
|
702
778
|
for (const phrase of extractedTerms.phrases) {
|
|
703
779
|
if (lowerPath.includes(phrase)) {
|
|
704
|
-
|
|
780
|
+
if (!hasAddedScore) {
|
|
781
|
+
candidate.score += weights.pathMatch * 1.5; // 1.5倍のブースト
|
|
782
|
+
hasAddedScore = true;
|
|
783
|
+
}
|
|
705
784
|
candidate.reasons.add(`path-phrase:${phrase}`);
|
|
706
|
-
|
|
785
|
+
candidate.pathMatchHits++; // Issue #68: Track path match for penalty calculation
|
|
707
786
|
}
|
|
708
787
|
}
|
|
709
788
|
// パスセグメントがマッチする場合(中程度の重み)
|
|
710
789
|
const pathParts = lowerPath.split("/");
|
|
711
790
|
for (const segment of extractedTerms.pathSegments) {
|
|
712
791
|
if (pathParts.includes(segment)) {
|
|
713
|
-
|
|
792
|
+
if (!hasAddedScore) {
|
|
793
|
+
candidate.score += weights.pathMatch;
|
|
794
|
+
hasAddedScore = true;
|
|
795
|
+
}
|
|
714
796
|
candidate.reasons.add(`path-segment:${segment}`);
|
|
715
|
-
|
|
797
|
+
candidate.pathMatchHits++; // Issue #68: Track path match for penalty calculation
|
|
716
798
|
}
|
|
717
799
|
}
|
|
718
800
|
// 通常のキーワードがパスに含まれる場合(低い重み)
|
|
801
|
+
const matchedKeywords = new Set();
|
|
719
802
|
for (const keyword of extractedTerms.keywords) {
|
|
720
803
|
if (lowerPath.includes(keyword)) {
|
|
721
|
-
|
|
804
|
+
if (!hasAddedScore) {
|
|
805
|
+
candidate.score += weights.pathMatch * 0.5; // 0.5倍のブースト
|
|
806
|
+
hasAddedScore = true;
|
|
807
|
+
}
|
|
722
808
|
candidate.reasons.add(`path-keyword:${keyword}`);
|
|
723
|
-
|
|
809
|
+
candidate.pathMatchHits++; // Issue #68: Track path match for penalty calculation
|
|
810
|
+
matchedKeywords.add(keyword); // Track for abbreviation expansion
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
// ADR 003: Abbreviation expansion for keywords with zero exact matches
|
|
814
|
+
// Avoid double-counting by only expanding keywords that didn't match exactly
|
|
815
|
+
// Skip abbreviation expansion for files that will be heavily penalized (test/config/lock files)
|
|
816
|
+
const fileName = lowerPath.split("/").pop() ?? "";
|
|
817
|
+
const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
|
|
818
|
+
const lockFiles = [
|
|
819
|
+
"package-lock.json",
|
|
820
|
+
"pnpm-lock.yaml",
|
|
821
|
+
"yarn.lock",
|
|
822
|
+
"bun.lockb",
|
|
823
|
+
"gemfile.lock",
|
|
824
|
+
"cargo.lock",
|
|
825
|
+
"poetry.lock",
|
|
826
|
+
];
|
|
827
|
+
const configPatterns = [
|
|
828
|
+
"tsconfig.json",
|
|
829
|
+
"vite.config",
|
|
830
|
+
"vitest.config",
|
|
831
|
+
"eslint.config",
|
|
832
|
+
"prettier.config",
|
|
833
|
+
"package.json",
|
|
834
|
+
".env",
|
|
835
|
+
"dockerfile",
|
|
836
|
+
];
|
|
837
|
+
const shouldSkipAbbreviation = testPatterns.some((pattern) => lowerPath.endsWith(pattern)) ||
|
|
838
|
+
lockFiles.some((lock) => fileName === lock) ||
|
|
839
|
+
configPatterns.some((cfg) => fileName.includes(cfg));
|
|
840
|
+
if (!shouldSkipAbbreviation) {
|
|
841
|
+
for (const keyword of extractedTerms.keywords) {
|
|
842
|
+
if (matchedKeywords.has(keyword)) {
|
|
843
|
+
continue; // Skip keywords that already matched exactly
|
|
844
|
+
}
|
|
845
|
+
const expandedTerms = expandAbbreviations(keyword);
|
|
846
|
+
// Try each expanded variant (except the original keyword itself)
|
|
847
|
+
for (const term of expandedTerms) {
|
|
848
|
+
if (term === keyword)
|
|
849
|
+
continue; // Skip original to avoid duplicate check
|
|
850
|
+
if (lowerPath.includes(term)) {
|
|
851
|
+
// Lower weight (0.4x) for abbreviation-expanded matches
|
|
852
|
+
if (!hasAddedScore) {
|
|
853
|
+
candidate.score += weights.pathMatch * 0.4;
|
|
854
|
+
hasAddedScore = true;
|
|
855
|
+
}
|
|
856
|
+
candidate.reasons.add(`abbr-path:${keyword}→${term}`);
|
|
857
|
+
candidate.pathMatchHits++; // Count for penalty calculation
|
|
858
|
+
break; // Only count first match per keyword to avoid over-boosting
|
|
859
|
+
}
|
|
860
|
+
}
|
|
724
861
|
}
|
|
725
862
|
}
|
|
726
863
|
}
|
|
@@ -730,7 +867,7 @@ function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
|
|
|
730
867
|
* @param profile - boost_profile設定("docs"の場合はdocs/ディレクトリのブラックリストをスキップ)
|
|
731
868
|
* @returns true if penalty was applied and processing should stop
|
|
732
869
|
*/
|
|
733
|
-
function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName,
|
|
870
|
+
function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profileConfig) {
|
|
734
871
|
// Blacklisted directories - effectively remove
|
|
735
872
|
const blacklistedDirs = [
|
|
736
873
|
".cursor/",
|
|
@@ -756,10 +893,9 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
756
893
|
];
|
|
757
894
|
for (const dir of blacklistedDirs) {
|
|
758
895
|
if (path.startsWith(dir)) {
|
|
759
|
-
// ✅
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
continue; // このブラックリストエントリをスキップ
|
|
896
|
+
// ✅ Decoupled: Check denylist overrides from profile config
|
|
897
|
+
if (profileConfig.denylistOverrides.includes(dir)) {
|
|
898
|
+
continue; // Skip this blacklisted directory
|
|
763
899
|
}
|
|
764
900
|
candidate.score = -100;
|
|
765
901
|
candidate.reasons.add("penalty:blacklisted-dir");
|
|
@@ -788,7 +924,7 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
788
924
|
candidate.reasons.add("penalty:lock-file");
|
|
789
925
|
return true;
|
|
790
926
|
}
|
|
791
|
-
// Configuration files -
|
|
927
|
+
// Configuration files - penalty handling depends on profile
|
|
792
928
|
const configPatterns = [
|
|
793
929
|
".config.js",
|
|
794
930
|
".config.ts",
|
|
@@ -809,6 +945,12 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
809
945
|
fileName === "Dockerfile" ||
|
|
810
946
|
fileName === "docker-compose.yml" ||
|
|
811
947
|
fileName === "docker-compose.yaml") {
|
|
948
|
+
// ✅ Use explicit flag instead of magic number (0.3) to determine behavior
|
|
949
|
+
// This decouples profile detection from multiplier values
|
|
950
|
+
if (profileConfig.skipConfigAdditivePenalty) {
|
|
951
|
+
return false; // Continue to multiplicative penalty only
|
|
952
|
+
}
|
|
953
|
+
// For other profiles, apply strong additive penalty
|
|
812
954
|
candidate.score -= 1.5;
|
|
813
955
|
candidate.reasons.add("penalty:config-file");
|
|
814
956
|
return true;
|
|
@@ -826,54 +968,50 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
826
968
|
* profile="docs": ドキュメントファイルをブースト
|
|
827
969
|
* profile="default": ドキュメントファイルにペナルティ、実装ファイルをブースト
|
|
828
970
|
*/
|
|
829
|
-
function applyFileTypeMultipliers(candidate, path, ext,
|
|
830
|
-
|
|
831
|
-
|
|
971
|
+
function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights) {
|
|
972
|
+
const fileName = path.split("/").pop() ?? "";
|
|
973
|
+
// ✅ Step 1: Config files
|
|
974
|
+
if (isConfigFile(path, fileName)) {
|
|
975
|
+
candidate.scoreMultiplier *= profileConfig.fileTypeMultipliers.config;
|
|
976
|
+
candidate.reasons.add("penalty:config-file");
|
|
977
|
+
return; // Don't apply impl boosts to config files
|
|
832
978
|
}
|
|
833
|
-
// ✅
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
979
|
+
// ✅ Step 2: Documentation files
|
|
980
|
+
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
981
|
+
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
982
|
+
const docMultiplier = profileConfig.fileTypeMultipliers.doc;
|
|
983
|
+
candidate.scoreMultiplier *= docMultiplier;
|
|
984
|
+
if (docMultiplier > 1.0) {
|
|
838
985
|
candidate.reasons.add("boost:doc-file");
|
|
839
986
|
}
|
|
840
|
-
|
|
841
|
-
return;
|
|
842
|
-
}
|
|
843
|
-
// DEFAULT PROFILE: Use MULTIPLICATIVE penalties for config/docs, MULTIPLICATIVE boosts for impl files
|
|
844
|
-
if (profile === "default") {
|
|
845
|
-
const fileName = path.split("/").pop() ?? "";
|
|
846
|
-
// ✅ Step 1: Config files get strongest penalty (95% reduction)
|
|
847
|
-
if (isConfigFile(path, fileName)) {
|
|
848
|
-
candidate.scoreMultiplier *= weights.configPenaltyMultiplier; // 0.05 = 95% reduction
|
|
849
|
-
candidate.reasons.add("penalty:config-file");
|
|
850
|
-
return; // Don't apply impl boosts to config files
|
|
851
|
-
}
|
|
852
|
-
// ✅ Step 2: Documentation files get moderate penalty (50% reduction)
|
|
853
|
-
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
854
|
-
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
855
|
-
candidate.scoreMultiplier *= weights.docPenaltyMultiplier; // 0.5 = 50% reduction
|
|
987
|
+
else if (docMultiplier < 1.0) {
|
|
856
988
|
candidate.reasons.add("penalty:doc-file");
|
|
857
|
-
return; // Don't apply impl boosts to docs
|
|
858
989
|
}
|
|
859
|
-
//
|
|
990
|
+
return; // Don't apply impl boosts to docs
|
|
991
|
+
}
|
|
992
|
+
// ✅ Step 3: Implementation files with path-specific boosts
|
|
993
|
+
const implMultiplier = profileConfig.fileTypeMultipliers.impl;
|
|
994
|
+
// ✅ Use longest-prefix-match logic (order-independent)
|
|
995
|
+
const pathBoost = getPathMultiplier(path, profileConfig);
|
|
996
|
+
if (pathBoost !== 1.0) {
|
|
997
|
+
candidate.scoreMultiplier *= implMultiplier * pathBoost;
|
|
998
|
+
// Add specific reason based on matched path
|
|
860
999
|
if (path.startsWith("src/app/")) {
|
|
861
|
-
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.4; // Extra boost for app files
|
|
862
1000
|
candidate.reasons.add("boost:app-file");
|
|
863
1001
|
}
|
|
864
1002
|
else if (path.startsWith("src/components/")) {
|
|
865
|
-
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.3;
|
|
866
1003
|
candidate.reasons.add("boost:component-file");
|
|
867
1004
|
}
|
|
868
1005
|
else if (path.startsWith("src/lib/")) {
|
|
869
|
-
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.2;
|
|
870
1006
|
candidate.reasons.add("boost:lib-file");
|
|
871
1007
|
}
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
1008
|
+
return;
|
|
1009
|
+
}
|
|
1010
|
+
// Fallback for other src/ files
|
|
1011
|
+
if (path.startsWith("src/")) {
|
|
1012
|
+
if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
|
|
1013
|
+
candidate.scoreMultiplier *= implMultiplier;
|
|
1014
|
+
candidate.reasons.add("boost:impl-file");
|
|
877
1015
|
}
|
|
878
1016
|
}
|
|
879
1017
|
}
|
|
@@ -889,22 +1027,19 @@ function applyFileTypeMultipliers(candidate, path, ext, profile, weights) {
|
|
|
889
1027
|
* 2. profile="docs" skips documentation penalties (allows doc-focused queries)
|
|
890
1028
|
* 3. Blacklist/test/lock/config files keep additive penalties (already very strong)
|
|
891
1029
|
*/
|
|
892
|
-
function applyBoostProfile(candidate, row,
|
|
893
|
-
if (profile === "none") {
|
|
894
|
-
return;
|
|
895
|
-
}
|
|
1030
|
+
function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms) {
|
|
896
1031
|
const { path, ext } = row;
|
|
897
1032
|
const lowerPath = path.toLowerCase();
|
|
898
1033
|
const fileName = path.split("/").pop() ?? "";
|
|
899
1034
|
// Step 1: パスベースのスコアリング(加算的ブースト)
|
|
900
1035
|
applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms);
|
|
901
1036
|
// Step 2: 加算的ペナルティ(ブラックリスト、テスト、lock、設定、マイグレーション)
|
|
902
|
-
const shouldStop = applyAdditiveFilePenalties(candidate, path, lowerPath, fileName,
|
|
1037
|
+
const shouldStop = applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profileConfig);
|
|
903
1038
|
if (shouldStop) {
|
|
904
1039
|
return; // ペナルティが適用された場合は処理終了
|
|
905
1040
|
}
|
|
906
1041
|
// Step 3: ファイルタイプ別の乗算的ペナルティ/ブースト
|
|
907
|
-
applyFileTypeMultipliers(candidate, path, ext,
|
|
1042
|
+
applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights);
|
|
908
1043
|
}
|
|
909
1044
|
export async function filesSearch(context, params) {
|
|
910
1045
|
const { db, repoId } = context;
|
|
@@ -913,7 +1048,8 @@ export async function filesSearch(context, params) {
|
|
|
913
1048
|
throw new Error("files_search requires a non-empty query. Provide a search keyword to continue.");
|
|
914
1049
|
}
|
|
915
1050
|
const limit = normalizeLimit(params.limit);
|
|
916
|
-
const
|
|
1051
|
+
const ftsStatus = await getFreshFtsStatus(context);
|
|
1052
|
+
const hasFTS = ftsStatus.ready;
|
|
917
1053
|
let sql;
|
|
918
1054
|
let values;
|
|
919
1055
|
if (hasFTS) {
|
|
@@ -988,6 +1124,7 @@ export async function filesSearch(context, params) {
|
|
|
988
1124
|
}
|
|
989
1125
|
const rows = await db.all(sql, values);
|
|
990
1126
|
const boostProfile = params.boost_profile ?? "default";
|
|
1127
|
+
const profileConfig = getBoostProfile(boostProfile);
|
|
991
1128
|
// ✅ v0.7.0+: Load configurable scoring weights for unified boosting logic
|
|
992
1129
|
// Note: filesSearch doesn't have a separate profile parameter, uses default weights
|
|
993
1130
|
const weights = loadScoringProfile(null);
|
|
@@ -1007,7 +1144,9 @@ export async function filesSearch(context, params) {
|
|
|
1007
1144
|
matchLine = findFirstMatchLine(row.content ?? "", query);
|
|
1008
1145
|
}
|
|
1009
1146
|
const baseScore = row.score ?? 1.0; // FTS時はBM25スコア、ILIKE時は1.0
|
|
1010
|
-
const boostedScore =
|
|
1147
|
+
const boostedScore = boostProfile === "none"
|
|
1148
|
+
? baseScore
|
|
1149
|
+
: applyFileTypeBoost(row.path, baseScore, profileConfig, weights);
|
|
1011
1150
|
const result = {
|
|
1012
1151
|
path: row.path,
|
|
1013
1152
|
matchLine,
|
|
@@ -1107,6 +1246,265 @@ export async function snippetsGet(context, params) {
|
|
|
1107
1246
|
symbolKind,
|
|
1108
1247
|
};
|
|
1109
1248
|
}
|
|
1249
|
+
// ============================================================================
|
|
1250
|
+
// Issue #68: Path/Large File Penalty Helper Functions
|
|
1251
|
+
// ============================================================================
|
|
1252
|
+
/**
|
|
1253
|
+
* 環境変数からペナルティ機能フラグを読み取る
|
|
1254
|
+
*/
|
|
1255
|
+
function readPenaltyFlags() {
|
|
1256
|
+
return {
|
|
1257
|
+
pathPenalty: process.env.KIRI_PATH_PENALTY === "1",
|
|
1258
|
+
largeFilePenalty: process.env.KIRI_LARGE_FILE_PENALTY === "1",
|
|
1259
|
+
};
|
|
1260
|
+
}
|
|
1261
|
+
/**
|
|
1262
|
+
* クエリ統計を計算(単語数と平均単語長)
|
|
1263
|
+
*/
|
|
1264
|
+
function computeQueryStats(goal) {
|
|
1265
|
+
const words = goal
|
|
1266
|
+
.trim()
|
|
1267
|
+
.split(/\s+/)
|
|
1268
|
+
.filter((w) => w.length > 0);
|
|
1269
|
+
const totalLength = words.reduce((sum, w) => sum + w.length, 0);
|
|
1270
|
+
return {
|
|
1271
|
+
wordCount: words.length,
|
|
1272
|
+
avgWordLength: words.length > 0 ? totalLength / words.length : 0,
|
|
1273
|
+
};
|
|
1274
|
+
}
|
|
1275
|
+
/**
|
|
1276
|
+
* Path Miss Penaltyをcandidateに適用(レガシー: Binary penalty)
|
|
1277
|
+
* 条件: wordCount >= 2 AND avgWordLength >= 4 AND pathMatchHits === 0
|
|
1278
|
+
*
|
|
1279
|
+
* @deprecated Use applyGraduatedPenalty() instead (ADR 002)
|
|
1280
|
+
*/
|
|
1281
|
+
function applyPathMissPenalty(candidate, queryStats) {
|
|
1282
|
+
if (queryStats.wordCount >= 2 && queryStats.avgWordLength >= 4 && candidate.pathMatchHits === 0) {
|
|
1283
|
+
candidate.score += PATH_MISS_DELTA; // -0.5
|
|
1284
|
+
recordPenaltyEvent(candidate, "path-miss", PATH_MISS_DELTA, {
|
|
1285
|
+
wordCount: queryStats.wordCount,
|
|
1286
|
+
avgWordLength: queryStats.avgWordLength,
|
|
1287
|
+
pathMatchHits: candidate.pathMatchHits,
|
|
1288
|
+
});
|
|
1289
|
+
}
|
|
1290
|
+
}
|
|
1291
|
+
/**
|
|
1292
|
+
* 段階的ペナルティをcandidateに適用(Issue #68: Graduated Penalty)
|
|
1293
|
+
* ADR 002: Graduated Penalty System
|
|
1294
|
+
*
|
|
1295
|
+
* @param candidate Candidate to apply penalty to
|
|
1296
|
+
* @param queryStats Query statistics for eligibility check
|
|
1297
|
+
* @param config Graduated penalty configuration
|
|
1298
|
+
*/
|
|
1299
|
+
function applyGraduatedPenalty(candidate, queryStats, config) {
|
|
1300
|
+
const penalty = computeGraduatedPenalty(candidate.pathMatchHits, queryStats, config);
|
|
1301
|
+
if (penalty !== 0) {
|
|
1302
|
+
candidate.score += penalty;
|
|
1303
|
+
recordPenaltyEvent(candidate, "path-miss", penalty, {
|
|
1304
|
+
wordCount: queryStats.wordCount,
|
|
1305
|
+
avgWordLength: queryStats.avgWordLength,
|
|
1306
|
+
pathMatchHits: candidate.pathMatchHits,
|
|
1307
|
+
tier: candidate.pathMatchHits === 0
|
|
1308
|
+
? "tier0"
|
|
1309
|
+
: candidate.pathMatchHits === 1
|
|
1310
|
+
? "tier1"
|
|
1311
|
+
: candidate.pathMatchHits === 2
|
|
1312
|
+
? "tier2"
|
|
1313
|
+
: "no-penalty",
|
|
1314
|
+
});
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
/**
|
|
1318
|
+
* Large File Penaltyをcandidateに適用
|
|
1319
|
+
* 条件: totalLines > 500 AND matchLine > 120
|
|
1320
|
+
* TODO(Issue #68): Add "no symbol at match location" check after selectSnippet integration
|
|
1321
|
+
*/
|
|
1322
|
+
function applyLargeFilePenalty(candidate) {
|
|
1323
|
+
const { totalLines, matchLine } = candidate;
|
|
1324
|
+
if (totalLines !== null && totalLines > 500 && matchLine !== null && matchLine > 120) {
|
|
1325
|
+
candidate.score += LARGE_FILE_DELTA; // -0.8
|
|
1326
|
+
recordPenaltyEvent(candidate, "large-file", LARGE_FILE_DELTA, {
|
|
1327
|
+
totalLines,
|
|
1328
|
+
matchLine,
|
|
1329
|
+
});
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
/**
|
|
1333
|
+
* ペナルティイベントを記録(テレメトリ用)
|
|
1334
|
+
*/
|
|
1335
|
+
function recordPenaltyEvent(candidate, kind, delta, details) {
|
|
1336
|
+
candidate.penalties.push({ kind, delta, details });
|
|
1337
|
+
candidate.reasons.add(`penalty:${kind}`);
|
|
1338
|
+
}
|
|
1339
|
+
/**
|
|
1340
|
+
* pathMatchHits分布を計算(Issue #68: Telemetry)
|
|
1341
|
+
* LDE: 純粋関数として実装(副作用なし、イミュータブル)
|
|
1342
|
+
*/
|
|
1343
|
+
function computePathMatchDistribution(candidates) {
|
|
1344
|
+
let zero = 0;
|
|
1345
|
+
let one = 0;
|
|
1346
|
+
let two = 0;
|
|
1347
|
+
let three = 0;
|
|
1348
|
+
let fourPlus = 0;
|
|
1349
|
+
for (const candidate of candidates) {
|
|
1350
|
+
const hits = candidate.pathMatchHits;
|
|
1351
|
+
if (hits === 0)
|
|
1352
|
+
zero++;
|
|
1353
|
+
else if (hits === 1)
|
|
1354
|
+
one++;
|
|
1355
|
+
else if (hits === 2)
|
|
1356
|
+
two++;
|
|
1357
|
+
else if (hits === 3)
|
|
1358
|
+
three++;
|
|
1359
|
+
else
|
|
1360
|
+
fourPlus++;
|
|
1361
|
+
}
|
|
1362
|
+
return {
|
|
1363
|
+
zero,
|
|
1364
|
+
one,
|
|
1365
|
+
two,
|
|
1366
|
+
three,
|
|
1367
|
+
fourPlus,
|
|
1368
|
+
total: candidates.length,
|
|
1369
|
+
};
|
|
1370
|
+
}
|
|
1371
|
+
/**
|
|
1372
|
+
* スコア統計を計算(Issue #68: Telemetry)
|
|
1373
|
+
* LDE: 純粋関数として実装(副作用なし、イミュータブル)
|
|
1374
|
+
*/
|
|
1375
|
+
function computeScoreStats(candidates) {
|
|
1376
|
+
if (candidates.length === 0) {
|
|
1377
|
+
return { min: 0, max: 0, mean: 0, median: 0 };
|
|
1378
|
+
}
|
|
1379
|
+
const scores = candidates.map((c) => c.score).sort((a, b) => a - b);
|
|
1380
|
+
const sum = scores.reduce((acc, s) => acc + s, 0);
|
|
1381
|
+
const mean = sum / scores.length;
|
|
1382
|
+
const median = scores[Math.floor(scores.length / 2)] ?? 0;
|
|
1383
|
+
return {
|
|
1384
|
+
min: scores[0] ?? 0,
|
|
1385
|
+
max: scores[scores.length - 1] ?? 0,
|
|
1386
|
+
mean,
|
|
1387
|
+
median,
|
|
1388
|
+
};
|
|
1389
|
+
}
|
|
1390
|
+
/**
|
|
1391
|
+
* ペナルティ適用状況を計算(Issue #68: Telemetry)
|
|
1392
|
+
* LDE: 純粋関数として実装(副作用なし、イミュータブル)
|
|
1393
|
+
*/
|
|
1394
|
+
function computePenaltyTelemetry(candidates) {
|
|
1395
|
+
let pathMissPenalties = 0;
|
|
1396
|
+
let largeFilePenalties = 0;
|
|
1397
|
+
for (const candidate of candidates) {
|
|
1398
|
+
for (const penalty of candidate.penalties) {
|
|
1399
|
+
if (penalty.kind === "path-miss")
|
|
1400
|
+
pathMissPenalties++;
|
|
1401
|
+
if (penalty.kind === "large-file")
|
|
1402
|
+
largeFilePenalties++;
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
return {
|
|
1406
|
+
pathMissPenalties,
|
|
1407
|
+
largeFilePenalties,
|
|
1408
|
+
totalCandidates: candidates.length,
|
|
1409
|
+
pathMatchDistribution: computePathMatchDistribution(candidates),
|
|
1410
|
+
scoreStats: computeScoreStats(candidates),
|
|
1411
|
+
};
|
|
1412
|
+
}
|
|
1413
|
+
/**
|
|
1414
|
+
* テレメトリーをファイル出力(Issue #68: Debug)
|
|
1415
|
+
* LDE: 副作用を分離(I/O操作)
|
|
1416
|
+
*
|
|
1417
|
+
* JSON Lines形式で /tmp/kiri-penalty-telemetry.jsonl に追記
|
|
1418
|
+
*/
|
|
1419
|
+
function logPenaltyTelemetry(telemetry, queryStats) {
|
|
1420
|
+
const dist = telemetry.pathMatchDistribution;
|
|
1421
|
+
const scores = telemetry.scoreStats;
|
|
1422
|
+
// JSON Lines形式でテレメトリーデータを記録
|
|
1423
|
+
const telemetryRecord = {
|
|
1424
|
+
timestamp: new Date().toISOString(),
|
|
1425
|
+
query: {
|
|
1426
|
+
wordCount: queryStats.wordCount,
|
|
1427
|
+
avgWordLength: queryStats.avgWordLength,
|
|
1428
|
+
},
|
|
1429
|
+
totalCandidates: telemetry.totalCandidates,
|
|
1430
|
+
pathMissPenalties: telemetry.pathMissPenalties,
|
|
1431
|
+
largeFilePenalties: telemetry.largeFilePenalties,
|
|
1432
|
+
pathMatchDistribution: {
|
|
1433
|
+
zero: dist.zero,
|
|
1434
|
+
one: dist.one,
|
|
1435
|
+
two: dist.two,
|
|
1436
|
+
three: dist.three,
|
|
1437
|
+
fourPlus: dist.fourPlus,
|
|
1438
|
+
total: dist.total,
|
|
1439
|
+
percentages: {
|
|
1440
|
+
zero: ((dist.zero / dist.total) * 100).toFixed(1),
|
|
1441
|
+
one: ((dist.one / dist.total) * 100).toFixed(1),
|
|
1442
|
+
two: ((dist.two / dist.total) * 100).toFixed(1),
|
|
1443
|
+
three: ((dist.three / dist.total) * 100).toFixed(1),
|
|
1444
|
+
fourPlus: ((dist.fourPlus / dist.total) * 100).toFixed(1),
|
|
1445
|
+
},
|
|
1446
|
+
},
|
|
1447
|
+
scoreStats: {
|
|
1448
|
+
min: scores.min.toFixed(2),
|
|
1449
|
+
max: scores.max.toFixed(2),
|
|
1450
|
+
mean: scores.mean.toFixed(2),
|
|
1451
|
+
median: scores.median.toFixed(2),
|
|
1452
|
+
// 最大ペナルティ(-0.8)との比率
|
|
1453
|
+
penaltyRatio: ((0.8 / scores.mean) * 100).toFixed(1) + "%",
|
|
1454
|
+
},
|
|
1455
|
+
};
|
|
1456
|
+
const telemetryFile = "/tmp/kiri-penalty-telemetry.jsonl";
|
|
1457
|
+
fs.appendFileSync(telemetryFile, JSON.stringify(telemetryRecord) + "\n");
|
|
1458
|
+
}
|
|
1459
|
+
/**
|
|
1460
|
+
* 環境変数から段階的ペナルティ設定を読み込む(Issue #68: Graduated Penalty)
|
|
1461
|
+
* LDE: 純粋関数(I/O分離、テスト可能)
|
|
1462
|
+
*/
|
|
1463
|
+
function readGraduatedPenaltyConfig() {
|
|
1464
|
+
return {
|
|
1465
|
+
enabled: process.env.KIRI_GRADUATED_PENALTY === "1",
|
|
1466
|
+
minWordCount: parseFloat(process.env.KIRI_PENALTY_MIN_WORD_COUNT || "2"),
|
|
1467
|
+
minAvgWordLength: parseFloat(process.env.KIRI_PENALTY_MIN_AVG_WORD_LENGTH || "4.0"),
|
|
1468
|
+
tier0Delta: parseFloat(process.env.KIRI_PENALTY_TIER_0 || "-0.8"),
|
|
1469
|
+
tier1Delta: parseFloat(process.env.KIRI_PENALTY_TIER_1 || "-0.4"),
|
|
1470
|
+
tier2Delta: parseFloat(process.env.KIRI_PENALTY_TIER_2 || "-0.2"),
|
|
1471
|
+
};
|
|
1472
|
+
}
|
|
1473
|
+
/**
|
|
1474
|
+
* 段階的ペナルティ値を計算(Issue #68: Graduated Penalty)
|
|
1475
|
+
* LDE: 純粋関数(副作用なし、参照透明性)
|
|
1476
|
+
*
|
|
1477
|
+
* ADR 002: Graduated Penalty System
|
|
1478
|
+
* - Tier 0 (pathMatchHits === 0): Strong penalty (no path evidence)
|
|
1479
|
+
* - Tier 1 (pathMatchHits === 1): Medium penalty (weak path evidence)
|
|
1480
|
+
* - Tier 2 (pathMatchHits === 2): Light penalty (moderate path evidence)
|
|
1481
|
+
* - Tier 3+ (pathMatchHits >= 3): No penalty (strong path evidence)
|
|
1482
|
+
*
|
|
1483
|
+
* Invariants:
|
|
1484
|
+
* - Result is always <= 0 (non-positive)
|
|
1485
|
+
* - More path hits → less penalty (monotonicity)
|
|
1486
|
+
* - Query must meet eligibility criteria
|
|
1487
|
+
*
|
|
1488
|
+
* @param pathMatchHits Number of path-based scoring matches
|
|
1489
|
+
* @param queryStats Query word count and average word length
|
|
1490
|
+
* @param config Graduated penalty configuration
|
|
1491
|
+
* @returns Penalty delta (always <= 0)
|
|
1492
|
+
*/
|
|
1493
|
+
function computeGraduatedPenalty(pathMatchHits, queryStats, config) {
|
|
1494
|
+
// Early return if query doesn't meet criteria
|
|
1495
|
+
if (queryStats.wordCount < config.minWordCount ||
|
|
1496
|
+
queryStats.avgWordLength < config.minAvgWordLength) {
|
|
1497
|
+
return 0;
|
|
1498
|
+
}
|
|
1499
|
+
// Graduated penalty tiers
|
|
1500
|
+
if (pathMatchHits === 0)
|
|
1501
|
+
return config.tier0Delta;
|
|
1502
|
+
if (pathMatchHits === 1)
|
|
1503
|
+
return config.tier1Delta;
|
|
1504
|
+
if (pathMatchHits === 2)
|
|
1505
|
+
return config.tier2Delta;
|
|
1506
|
+
return 0; // pathMatchHits >= 3: no penalty
|
|
1507
|
+
}
|
|
1110
1508
|
export async function contextBundle(context, params) {
|
|
1111
1509
|
context.warningManager.startRequest();
|
|
1112
1510
|
const { db, repoId } = context;
|
|
@@ -1154,6 +1552,9 @@ export async function contextBundle(context, params) {
|
|
|
1154
1552
|
const candidates = new Map();
|
|
1155
1553
|
const stringMatchSeeds = new Set();
|
|
1156
1554
|
const fileCache = new Map();
|
|
1555
|
+
// ✅ Cache boost profile config to avoid redundant lookups in hot path
|
|
1556
|
+
const boostProfile = params.boost_profile ?? "default";
|
|
1557
|
+
const profileConfig = getBoostProfile(boostProfile);
|
|
1157
1558
|
// フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
|
|
1158
1559
|
if (extractedTerms.phrases.length > 0) {
|
|
1159
1560
|
const phrasePlaceholders = extractedTerms.phrases
|
|
@@ -1172,7 +1573,6 @@ export async function contextBundle(context, params) {
|
|
|
1172
1573
|
ORDER BY f.path
|
|
1173
1574
|
LIMIT ?
|
|
1174
1575
|
`, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
|
|
1175
|
-
const boostProfile = params.boost_profile ?? "default";
|
|
1176
1576
|
for (const row of rows) {
|
|
1177
1577
|
if (row.content === null) {
|
|
1178
1578
|
continue;
|
|
@@ -1191,7 +1591,9 @@ export async function contextBundle(context, params) {
|
|
|
1191
1591
|
candidate.reasons.add(`phrase:${phrase}`);
|
|
1192
1592
|
}
|
|
1193
1593
|
// Apply boost profile once per file
|
|
1194
|
-
|
|
1594
|
+
if (boostProfile !== "none") {
|
|
1595
|
+
applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
|
|
1596
|
+
}
|
|
1195
1597
|
// Use first matched phrase for preview (guaranteed to exist due to length check above)
|
|
1196
1598
|
const { line } = buildPreview(row.content, matchedPhrases[0]);
|
|
1197
1599
|
candidate.matchLine =
|
|
@@ -1231,7 +1633,6 @@ export async function contextBundle(context, params) {
|
|
|
1231
1633
|
ORDER BY f.path
|
|
1232
1634
|
LIMIT ?
|
|
1233
1635
|
`, [repoId, ...extractedTerms.keywords, MAX_MATCHES_PER_KEYWORD * extractedTerms.keywords.length]);
|
|
1234
|
-
const boostProfile = params.boost_profile ?? "default";
|
|
1235
1636
|
for (const row of rows) {
|
|
1236
1637
|
if (row.content === null) {
|
|
1237
1638
|
continue;
|
|
@@ -1249,7 +1650,9 @@ export async function contextBundle(context, params) {
|
|
|
1249
1650
|
candidate.reasons.add(`text:${keyword}`);
|
|
1250
1651
|
}
|
|
1251
1652
|
// Apply boost profile once per file
|
|
1252
|
-
|
|
1653
|
+
if (boostProfile !== "none") {
|
|
1654
|
+
applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
|
|
1655
|
+
}
|
|
1253
1656
|
// Use first matched keyword for preview (guaranteed to exist due to length check above)
|
|
1254
1657
|
const { line } = buildPreview(row.content, matchedKeywords[0]);
|
|
1255
1658
|
candidate.matchLine =
|
|
@@ -1386,6 +1789,36 @@ export async function contextBundle(context, params) {
|
|
|
1386
1789
|
candidate.score *= candidate.scoreMultiplier;
|
|
1387
1790
|
}
|
|
1388
1791
|
}
|
|
1792
|
+
// Issue #68: Apply Path-Based Penalties (after multipliers, before sorting)
|
|
1793
|
+
const penaltyFlags = readPenaltyFlags();
|
|
1794
|
+
const queryStats = computeQueryStats(goal); // Always compute for telemetry
|
|
1795
|
+
const graduatedConfig = readGraduatedPenaltyConfig();
|
|
1796
|
+
// ADR 002: Use graduated penalty system if enabled, otherwise use legacy binary penalty
|
|
1797
|
+
if (graduatedConfig.enabled && penaltyFlags.pathPenalty) {
|
|
1798
|
+
for (const candidate of materializedCandidates) {
|
|
1799
|
+
applyGraduatedPenalty(candidate, queryStats, graduatedConfig);
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1802
|
+
else if (penaltyFlags.pathPenalty) {
|
|
1803
|
+
// Legacy mode: Binary penalty (pathMatchHits === 0 only)
|
|
1804
|
+
for (const candidate of materializedCandidates) {
|
|
1805
|
+
applyPathMissPenalty(candidate, queryStats);
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
// Issue #68: Apply Large File Penalty (after multipliers, before sorting)
|
|
1809
|
+
if (penaltyFlags.largeFilePenalty) {
|
|
1810
|
+
for (const candidate of materializedCandidates) {
|
|
1811
|
+
applyLargeFilePenalty(candidate);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
// Issue #68: Telemetry(デバッグ用、環境変数で制御)
|
|
1815
|
+
// LDE: 純粋関数(計算)と副作用(I/O)を分離
|
|
1816
|
+
const enableTelemetry = process.env.KIRI_PENALTY_TELEMETRY === "1";
|
|
1817
|
+
if (enableTelemetry) {
|
|
1818
|
+
console.error(`[DEBUG] Telemetry enabled. Flags: pathPenalty=${penaltyFlags.pathPenalty}, largeFilePenalty=${penaltyFlags.largeFilePenalty}`);
|
|
1819
|
+
const telemetry = computePenaltyTelemetry(materializedCandidates);
|
|
1820
|
+
logPenaltyTelemetry(telemetry, queryStats);
|
|
1821
|
+
}
|
|
1389
1822
|
const sortedCandidates = materializedCandidates
|
|
1390
1823
|
.filter((candidate) => candidate.score > 0) // Filter out candidates with negative or zero scores
|
|
1391
1824
|
.sort((a, b) => {
|
|
@@ -1669,14 +2102,27 @@ export async function depsClosure(context, params) {
|
|
|
1669
2102
|
}
|
|
1670
2103
|
export async function resolveRepoId(db, repoRoot) {
|
|
1671
2104
|
try {
|
|
1672
|
-
const
|
|
2105
|
+
const candidates = getRepoPathCandidates(repoRoot);
|
|
2106
|
+
const normalized = candidates[0];
|
|
2107
|
+
const placeholders = candidates.map(() => "?").join(", ");
|
|
2108
|
+
const rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders}) LIMIT 1`, candidates);
|
|
1673
2109
|
if (rows.length === 0) {
|
|
2110
|
+
const existingRows = await db.all("SELECT id, root FROM repo");
|
|
2111
|
+
for (const candidate of existingRows) {
|
|
2112
|
+
if (normalizeRepoPath(candidate.root) === normalized) {
|
|
2113
|
+
await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, candidate.id]);
|
|
2114
|
+
return candidate.id;
|
|
2115
|
+
}
|
|
2116
|
+
}
|
|
1674
2117
|
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
1675
2118
|
}
|
|
1676
2119
|
const row = rows[0];
|
|
1677
2120
|
if (!row) {
|
|
1678
2121
|
throw new Error("Failed to retrieve repository record. Database returned empty result.");
|
|
1679
2122
|
}
|
|
2123
|
+
if (row.root !== normalized) {
|
|
2124
|
+
await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, row.id]);
|
|
2125
|
+
}
|
|
1680
2126
|
return row.id;
|
|
1681
2127
|
}
|
|
1682
2128
|
catch (error) {
|