kiri-mcp-server 0.9.9 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -7
- package/config/scoring-profiles.yml +82 -35
- package/dist/config/scoring-profiles.yml +82 -35
- package/dist/package.json +12 -1
- package/dist/src/client/proxy.js +81 -12
- package/dist/src/client/proxy.js.map +1 -1
- package/dist/src/daemon/daemon.js +91 -14
- package/dist/src/daemon/daemon.js.map +1 -1
- package/dist/src/indexer/cli.d.ts.map +1 -1
- package/dist/src/indexer/cli.js +712 -98
- package/dist/src/indexer/cli.js.map +1 -1
- package/dist/src/indexer/git.d.ts.map +1 -1
- package/dist/src/indexer/git.js +41 -3
- package/dist/src/indexer/git.js.map +1 -1
- package/dist/src/indexer/migrations/repo-merger.d.ts +33 -0
- package/dist/src/indexer/migrations/repo-merger.d.ts.map +1 -0
- package/dist/src/indexer/migrations/repo-merger.js +67 -0
- package/dist/src/indexer/migrations/repo-merger.js.map +1 -0
- package/dist/src/indexer/schema.d.ts +66 -0
- package/dist/src/indexer/schema.d.ts.map +1 -1
- package/dist/src/indexer/schema.js +337 -0
- package/dist/src/indexer/schema.js.map +1 -1
- package/dist/src/server/abbreviations.d.ts +47 -0
- package/dist/src/server/abbreviations.d.ts.map +1 -0
- package/dist/src/server/abbreviations.js +71 -0
- package/dist/src/server/abbreviations.js.map +1 -0
- package/dist/src/server/boost-profiles.d.ts +63 -0
- package/dist/src/server/boost-profiles.d.ts.map +1 -0
- package/dist/src/server/boost-profiles.js +202 -0
- package/dist/src/server/boost-profiles.js.map +1 -0
- package/dist/src/server/config.d.ts +45 -0
- package/dist/src/server/config.d.ts.map +1 -0
- package/dist/src/server/config.js +146 -0
- package/dist/src/server/config.js.map +1 -0
- package/dist/src/server/context.d.ts +29 -0
- package/dist/src/server/context.d.ts.map +1 -1
- package/dist/src/server/context.js +26 -1
- package/dist/src/server/context.js.map +1 -1
- package/dist/src/server/handlers/snippets-get.d.ts +36 -0
- package/dist/src/server/handlers/snippets-get.d.ts.map +1 -0
- package/dist/src/server/handlers/snippets-get.js +120 -0
- package/dist/src/server/handlers/snippets-get.js.map +1 -0
- package/dist/src/server/handlers.d.ts +35 -22
- package/dist/src/server/handlers.d.ts.map +1 -1
- package/dist/src/server/handlers.js +1993 -414
- package/dist/src/server/handlers.js.map +1 -1
- package/dist/src/server/indexBootstrap.d.ts.map +1 -1
- package/dist/src/server/indexBootstrap.js +49 -2
- package/dist/src/server/indexBootstrap.js.map +1 -1
- package/dist/src/server/main.d.ts.map +1 -1
- package/dist/src/server/main.js +119 -30
- package/dist/src/server/main.js.map +1 -1
- package/dist/src/server/profile-selector.d.ts +33 -0
- package/dist/src/server/profile-selector.d.ts.map +1 -0
- package/dist/src/server/profile-selector.js +291 -0
- package/dist/src/server/profile-selector.js.map +1 -0
- package/dist/src/server/rpc.d.ts.map +1 -1
- package/dist/src/server/rpc.js +61 -12
- package/dist/src/server/rpc.js.map +1 -1
- package/dist/src/server/rrf.d.ts +86 -0
- package/dist/src/server/rrf.d.ts.map +1 -0
- package/dist/src/server/rrf.js +108 -0
- package/dist/src/server/rrf.js.map +1 -0
- package/dist/src/server/runtime.d.ts.map +1 -1
- package/dist/src/server/runtime.js +14 -4
- package/dist/src/server/runtime.js.map +1 -1
- package/dist/src/server/scoring.d.ts +7 -1
- package/dist/src/server/scoring.d.ts.map +1 -1
- package/dist/src/server/scoring.js +121 -2
- package/dist/src/server/scoring.js.map +1 -1
- package/dist/src/server/services/index.d.ts +24 -0
- package/dist/src/server/services/index.d.ts.map +1 -0
- package/dist/src/server/services/index.js +20 -0
- package/dist/src/server/services/index.js.map +1 -0
- package/dist/src/server/services/repo-repository.d.ts +61 -0
- package/dist/src/server/services/repo-repository.d.ts.map +1 -0
- package/dist/src/server/services/repo-repository.js +93 -0
- package/dist/src/server/services/repo-repository.js.map +1 -0
- package/dist/src/server/services/repo-resolver.d.ts +28 -0
- package/dist/src/server/services/repo-resolver.d.ts.map +1 -0
- package/dist/src/server/services/repo-resolver.js +62 -0
- package/dist/src/server/services/repo-resolver.js.map +1 -0
- package/dist/src/shared/cli/args.d.ts +70 -0
- package/dist/src/shared/cli/args.d.ts.map +1 -0
- package/dist/src/shared/cli/args.js +84 -0
- package/dist/src/shared/cli/args.js.map +1 -0
- package/dist/src/shared/duckdb.d.ts.map +1 -1
- package/dist/src/shared/duckdb.js +21 -1
- package/dist/src/shared/duckdb.js.map +1 -1
- package/dist/src/shared/embedding/engine.d.ts +38 -0
- package/dist/src/shared/embedding/engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/engine.js +6 -0
- package/dist/src/shared/embedding/engine.js.map +1 -0
- package/dist/src/shared/embedding/lsh-engine.d.ts +11 -0
- package/dist/src/shared/embedding/lsh-engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/lsh-engine.js +14 -0
- package/dist/src/shared/embedding/lsh-engine.js.map +1 -0
- package/dist/src/shared/embedding/registry.d.ts +25 -0
- package/dist/src/shared/embedding/registry.d.ts.map +1 -0
- package/dist/src/shared/embedding/registry.js +50 -0
- package/dist/src/shared/embedding/registry.js.map +1 -0
- package/dist/src/shared/embedding/semantic-engine.d.ts +14 -0
- package/dist/src/shared/embedding/semantic-engine.d.ts.map +1 -0
- package/dist/src/shared/embedding/semantic-engine.js +50 -0
- package/dist/src/shared/embedding/semantic-engine.js.map +1 -0
- package/dist/src/shared/fs/safePath.d.ts +7 -0
- package/dist/src/shared/fs/safePath.d.ts.map +1 -0
- package/dist/src/shared/fs/safePath.js +23 -0
- package/dist/src/shared/fs/safePath.js.map +1 -0
- package/dist/src/shared/models/model-manager.d.ts +38 -0
- package/dist/src/shared/models/model-manager.d.ts.map +1 -0
- package/dist/src/shared/models/model-manager.js +116 -0
- package/dist/src/shared/models/model-manager.js.map +1 -0
- package/dist/src/shared/models/model-manifest.d.ts +22 -0
- package/dist/src/shared/models/model-manifest.d.ts.map +1 -0
- package/dist/src/shared/models/model-manifest.js +24 -0
- package/dist/src/shared/models/model-manifest.js.map +1 -0
- package/dist/src/shared/utils/glob.d.ts +5 -0
- package/dist/src/shared/utils/glob.d.ts.map +1 -0
- package/dist/src/shared/utils/glob.js +22 -0
- package/dist/src/shared/utils/glob.js.map +1 -0
- package/dist/src/shared/utils/retry.d.ts +8 -0
- package/dist/src/shared/utils/retry.d.ts.map +1 -0
- package/dist/src/shared/utils/retry.js +20 -0
- package/dist/src/shared/utils/retry.js.map +1 -0
- package/dist/src/shared/utils/validation.d.ts +14 -0
- package/dist/src/shared/utils/validation.d.ts.map +1 -0
- package/dist/src/shared/utils/validation.js +22 -0
- package/dist/src/shared/utils/validation.js.map +1 -0
- package/package.json +29 -20
|
@@ -1,9 +1,15 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
1
2
|
import path from "node:path";
|
|
2
3
|
import { checkFTSSchemaExists } from "../indexer/schema.js";
|
|
3
4
|
import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
|
|
4
5
|
import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
|
|
5
|
-
import {
|
|
6
|
+
import { expandAbbreviations } from "./abbreviations.js";
|
|
7
|
+
import { getBoostProfile, } from "./boost-profiles.js";
|
|
8
|
+
import { loadServerConfig } from "./config.js";
|
|
6
9
|
import { coerceProfileName, loadScoringProfile } from "./scoring.js";
|
|
10
|
+
import { createServerServices } from "./services/index.js";
|
|
11
|
+
// Re-export extracted handlers for backward compatibility
|
|
12
|
+
export { snippetsGet, } from "./handlers/snippets-get.js";
|
|
7
13
|
// Configuration file patterns (v0.8.0+: consolidated to avoid duplication)
|
|
8
14
|
// Comprehensive list covering multiple languages and tools
|
|
9
15
|
const CONFIG_FILES = [
|
|
@@ -130,6 +136,83 @@ const CONFIG_PATTERNS = [
|
|
|
130
136
|
".github/workflows",
|
|
131
137
|
];
|
|
132
138
|
const FTS_STATUS_CACHE_TTL_MS = 10_000;
|
|
139
|
+
const METADATA_ALIAS_MAP = new Map([
|
|
140
|
+
["tag", { key: "tags" }],
|
|
141
|
+
["tags", { key: "tags" }],
|
|
142
|
+
["category", { key: "category" }],
|
|
143
|
+
["title", { key: "title" }],
|
|
144
|
+
["service", { key: "service" }],
|
|
145
|
+
]);
|
|
146
|
+
const METADATA_KEY_PREFIXES = [
|
|
147
|
+
{ prefix: "meta." },
|
|
148
|
+
{ prefix: "metadata.", strict: true },
|
|
149
|
+
{ prefix: "docmeta.", strict: true },
|
|
150
|
+
{ prefix: "frontmatter.", source: "front_matter" },
|
|
151
|
+
{ prefix: "fm.", source: "front_matter" },
|
|
152
|
+
{ prefix: "yaml.", source: "yaml" },
|
|
153
|
+
{ prefix: "json.", source: "json" },
|
|
154
|
+
];
|
|
155
|
+
const METADATA_MATCH_WEIGHT = 0.15;
|
|
156
|
+
const METADATA_FILTER_MATCH_WEIGHT = 0.1;
|
|
157
|
+
const METADATA_HINT_BONUS = 0.25;
|
|
158
|
+
const INBOUND_LINK_WEIGHT = 0.2;
|
|
159
|
+
/**
|
|
160
|
+
* checkTableAvailability
|
|
161
|
+
*
|
|
162
|
+
* 起動時にテーブルの存在を確認し、TableAvailabilityオブジェクトを生成する。
|
|
163
|
+
* これにより、グローバルミュータブル変数による競合状態を回避する。
|
|
164
|
+
*
|
|
165
|
+
* NOTE: スキーマ変更(テーブル追加)後はサーバーの再起動が必要です。
|
|
166
|
+
*
|
|
167
|
+
* @param db - DuckDBClient インスタンス
|
|
168
|
+
* @returns TableAvailability オブジェクト
|
|
169
|
+
* @throws データベース接続エラー等、テーブル不在以外のエラーが発生した場合
|
|
170
|
+
*/
|
|
171
|
+
export async function checkTableAvailability(db) {
|
|
172
|
+
const ALLOWED_TABLES = [
|
|
173
|
+
"document_metadata_kv",
|
|
174
|
+
"markdown_link",
|
|
175
|
+
"hint_expansion",
|
|
176
|
+
"hint_dictionary",
|
|
177
|
+
];
|
|
178
|
+
const checkTable = async (tableName) => {
|
|
179
|
+
if (!ALLOWED_TABLES.includes(tableName)) {
|
|
180
|
+
throw new Error(`Invalid table name: ${tableName}`);
|
|
181
|
+
}
|
|
182
|
+
try {
|
|
183
|
+
await db.all(`SELECT 1 FROM ${tableName} LIMIT 0`);
|
|
184
|
+
return true;
|
|
185
|
+
}
|
|
186
|
+
catch (error) {
|
|
187
|
+
// テーブル不在エラーのみキャッチ
|
|
188
|
+
if (isTableMissingError(error, tableName)) {
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
// その他のエラー(接続エラー等)は再スロー
|
|
192
|
+
throw new Error(`Failed to check table availability for ${tableName}: ${error instanceof Error ? error.message : String(error)}`);
|
|
193
|
+
}
|
|
194
|
+
};
|
|
195
|
+
const result = {
|
|
196
|
+
hasMetadataTables: await checkTable("document_metadata_kv"),
|
|
197
|
+
hasLinkTable: await checkTable("markdown_link"),
|
|
198
|
+
hasHintLog: await checkTable("hint_expansion"),
|
|
199
|
+
hasHintDictionary: await checkTable("hint_dictionary"),
|
|
200
|
+
};
|
|
201
|
+
// 起動時警告: テーブルが存在しない場合に通知
|
|
202
|
+
if (!result.hasMetadataTables) {
|
|
203
|
+
console.warn("document_metadata_kv table is missing. Metadata filters and boosts disabled until database is upgraded.");
|
|
204
|
+
}
|
|
205
|
+
if (!result.hasLinkTable) {
|
|
206
|
+
console.warn("markdown_link table is missing. Inbound link boosting disabled until database is upgraded.");
|
|
207
|
+
}
|
|
208
|
+
if (!result.hasHintLog) {
|
|
209
|
+
console.warn("hint_expansion table is missing. Hint logging disabled. Enable the latest schema and rerun the indexer to capture hint logs.");
|
|
210
|
+
}
|
|
211
|
+
if (!result.hasHintDictionary) {
|
|
212
|
+
console.warn("hint_dictionary table is missing. Dictionary hints disabled. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
|
|
213
|
+
}
|
|
214
|
+
return result;
|
|
215
|
+
}
|
|
133
216
|
async function hasDirtyRepos(db) {
|
|
134
217
|
const statusCheck = await db.all(`SELECT COUNT(*) as count FROM repo
|
|
135
218
|
WHERE fts_dirty = true OR fts_status IN ('dirty', 'rebuilding')`);
|
|
@@ -214,8 +297,154 @@ function isConfigFile(path, fileName) {
|
|
|
214
297
|
fileName.startsWith(".env") ||
|
|
215
298
|
isInConfigDirectory);
|
|
216
299
|
}
|
|
300
|
+
function normalizeArtifactHints(hints) {
|
|
301
|
+
if (!Array.isArray(hints)) {
|
|
302
|
+
return [];
|
|
303
|
+
}
|
|
304
|
+
const normalized = [];
|
|
305
|
+
const seen = new Set();
|
|
306
|
+
for (const rawHint of hints) {
|
|
307
|
+
if (typeof rawHint !== "string") {
|
|
308
|
+
continue;
|
|
309
|
+
}
|
|
310
|
+
const trimmed = rawHint.trim();
|
|
311
|
+
if (!trimmed || seen.has(trimmed)) {
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
normalized.push(trimmed);
|
|
315
|
+
seen.add(trimmed);
|
|
316
|
+
if (normalized.length >= MAX_ARTIFACT_HINTS) {
|
|
317
|
+
break;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return normalized;
|
|
321
|
+
}
|
|
322
|
+
function bucketArtifactHints(hints) {
|
|
323
|
+
const buckets = {
|
|
324
|
+
pathHints: [],
|
|
325
|
+
substringHints: [],
|
|
326
|
+
};
|
|
327
|
+
for (const hint of hints) {
|
|
328
|
+
if (hint.includes("/") && SAFE_PATH_PATTERN.test(hint)) {
|
|
329
|
+
buckets.pathHints.push(hint);
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
const normalized = hint.trim().toLowerCase();
|
|
333
|
+
if (normalized.length >= 3) {
|
|
334
|
+
buckets.substringHints.push(normalized);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
return buckets;
|
|
338
|
+
}
|
|
339
|
+
function isMissingTableError(error, table) {
|
|
340
|
+
if (!(error instanceof Error)) {
|
|
341
|
+
return false;
|
|
342
|
+
}
|
|
343
|
+
return /Table with name/i.test(error.message) && error.message.includes(table);
|
|
344
|
+
}
|
|
345
|
+
async function logHintExpansionEntry(db, tableAvailability, entry) {
|
|
346
|
+
if (!HINT_LOG_ENABLED) {
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
if (!tableAvailability.hasHintLog) {
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
try {
|
|
353
|
+
await db.run(`
|
|
354
|
+
INSERT INTO hint_expansion (repo_id, hint_value, expansion_kind, target_path, payload)
|
|
355
|
+
VALUES (?, ?, ?, ?, ?)
|
|
356
|
+
`, [
|
|
357
|
+
entry.repoId,
|
|
358
|
+
entry.hintValue,
|
|
359
|
+
entry.kind,
|
|
360
|
+
entry.targetPath ?? null,
|
|
361
|
+
entry.payload ? JSON.stringify(entry.payload) : null,
|
|
362
|
+
]);
|
|
363
|
+
}
|
|
364
|
+
catch (error) {
|
|
365
|
+
if (isMissingTableError(error, "hint_expansion")) {
|
|
366
|
+
console.warn("hint_expansion table is missing in the active database. Enable the latest schema and rerun the indexer to capture hint logs.");
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
369
|
+
throw error;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
async function fetchDictionaryPathHints(db, tableAvailability, repoId, hints, perHintLimit) {
|
|
373
|
+
if (!HINT_DICTIONARY_ENABLED || perHintLimit <= 0 || hints.length === 0) {
|
|
374
|
+
return [];
|
|
375
|
+
}
|
|
376
|
+
if (!tableAvailability.hasHintDictionary) {
|
|
377
|
+
return [];
|
|
378
|
+
}
|
|
379
|
+
const uniqueHints = Array.from(new Set(hints));
|
|
380
|
+
const targets = [];
|
|
381
|
+
for (const hint of uniqueHints) {
|
|
382
|
+
let rows = [];
|
|
383
|
+
try {
|
|
384
|
+
rows = await db.all(`
|
|
385
|
+
SELECT target_path
|
|
386
|
+
FROM hint_dictionary
|
|
387
|
+
WHERE repo_id = ?
|
|
388
|
+
AND hint_value = ?
|
|
389
|
+
ORDER BY freq DESC, target_path
|
|
390
|
+
LIMIT ?
|
|
391
|
+
`, [repoId, hint, perHintLimit]);
|
|
392
|
+
}
|
|
393
|
+
catch (error) {
|
|
394
|
+
if (isMissingTableError(error, "hint_dictionary")) {
|
|
395
|
+
console.warn("hint_dictionary table is missing in the active database. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
|
|
396
|
+
return [];
|
|
397
|
+
}
|
|
398
|
+
throw error;
|
|
399
|
+
}
|
|
400
|
+
for (const row of rows) {
|
|
401
|
+
if (!row.target_path || !SAFE_PATH_PATTERN.test(row.target_path)) {
|
|
402
|
+
continue;
|
|
403
|
+
}
|
|
404
|
+
targets.push({ path: row.target_path, sourceHint: hint, origin: "dictionary" });
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
return targets;
|
|
408
|
+
}
|
|
409
|
+
function createHintSeedMeta(targets) {
|
|
410
|
+
const meta = new Map();
|
|
411
|
+
const deduped = [];
|
|
412
|
+
for (const target of targets) {
|
|
413
|
+
if (meta.has(target.path)) {
|
|
414
|
+
continue;
|
|
415
|
+
}
|
|
416
|
+
meta.set(target.path, { sourceHint: target.sourceHint, origin: target.origin });
|
|
417
|
+
deduped.push(target);
|
|
418
|
+
}
|
|
419
|
+
return { list: deduped, meta };
|
|
420
|
+
}
|
|
421
|
+
function getHintSeedMeta(seedMeta, path) {
|
|
422
|
+
return seedMeta?.get(path);
|
|
423
|
+
}
|
|
424
|
+
function computeHintPriorityBoost(weights) {
|
|
425
|
+
const textComponent = weights.textMatch * HINT_PRIORITY_TEXT_MULTIPLIER;
|
|
426
|
+
const pathComponent = weights.pathMatch * HINT_PRIORITY_PATH_MULTIPLIER;
|
|
427
|
+
const aggregate = textComponent + pathComponent + weights.editingPath + weights.dependency;
|
|
428
|
+
return Math.max(HINT_PRIORITY_BASE_BONUS, aggregate);
|
|
429
|
+
}
|
|
430
|
+
function createHintExpansionConfig(weights) {
|
|
431
|
+
return {
|
|
432
|
+
dirLimit: Math.max(0, HINT_DIR_LIMIT),
|
|
433
|
+
dirMaxFiles: Math.max(1, HINT_DIR_MAX_FILES),
|
|
434
|
+
depOutLimit: Math.max(0, HINT_DEP_OUT_LIMIT),
|
|
435
|
+
depInLimit: Math.max(0, HINT_DEP_IN_LIMIT),
|
|
436
|
+
semLimit: Math.max(0, HINT_SEM_LIMIT),
|
|
437
|
+
semDirCandidateLimit: Math.max(1, HINT_SEM_DIR_CANDIDATE_LIMIT),
|
|
438
|
+
semThreshold: Number.isFinite(HINT_SEM_THRESHOLD) ? HINT_SEM_THRESHOLD : 0.65,
|
|
439
|
+
perHintLimit: Math.max(0, HINT_PER_HINT_LIMIT),
|
|
440
|
+
dbQueryBudget: Math.max(0, HINT_DB_QUERY_BUDGET),
|
|
441
|
+
dirBoost: computeHintPriorityBoost(weights) * 0.35,
|
|
442
|
+
depBoost: weights.dependency * 0.8,
|
|
443
|
+
substringLimit: Math.max(0, HINT_SUBSTRING_LIMIT),
|
|
444
|
+
substringBoost: Math.max(0, HINT_SUBSTRING_BOOST),
|
|
445
|
+
};
|
|
446
|
+
}
|
|
217
447
|
const DEFAULT_SEARCH_LIMIT = 50;
|
|
218
|
-
const DEFAULT_SNIPPET_WINDOW = 150;
|
|
219
448
|
const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
|
|
220
449
|
const MAX_BUNDLE_LIMIT = 20;
|
|
221
450
|
const MAX_KEYWORDS = 12;
|
|
@@ -223,32 +452,76 @@ const MAX_MATCHES_PER_KEYWORD = 40;
|
|
|
223
452
|
const MAX_DEPENDENCY_SEEDS = 8;
|
|
224
453
|
const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
|
|
225
454
|
const NEARBY_LIMIT = 6;
|
|
226
|
-
const
|
|
455
|
+
const serverConfig = loadServerConfig();
|
|
456
|
+
const SUPPRESS_NON_CODE_ENABLED = serverConfig.features.suppressNonCode;
|
|
457
|
+
const FINAL_RESULT_SUPPRESSION_ENABLED = serverConfig.features.suppressFinalResults;
|
|
458
|
+
const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
|
|
459
|
+
const FALLBACK_SNIPPET_WINDOW = serverConfig.features.snippetWindow;
|
|
227
460
|
const MAX_RERANK_LIMIT = 50;
|
|
461
|
+
const MAX_ARTIFACT_HINTS = 8;
|
|
462
|
+
const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
|
|
463
|
+
const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
|
|
464
|
+
const HINT_PRIORITY_PATH_MULTIPLIER = serverConfig.hints.priority.pathMultiplier;
|
|
465
|
+
const HINT_PRIORITY_BASE_BONUS = serverConfig.hints.priority.baseBonus;
|
|
466
|
+
const HINT_DIR_LIMIT = serverConfig.hints.directory.limit;
|
|
467
|
+
const HINT_DIR_MAX_FILES = serverConfig.hints.directory.maxFiles;
|
|
468
|
+
const HINT_DEP_OUT_LIMIT = serverConfig.hints.dependency.outLimit;
|
|
469
|
+
const HINT_DEP_IN_LIMIT = serverConfig.hints.dependency.inLimit;
|
|
470
|
+
const HINT_SEM_LIMIT = serverConfig.hints.semantic.limit;
|
|
471
|
+
const HINT_SEM_DIR_CANDIDATE_LIMIT = serverConfig.hints.semantic.dirCandidateLimit;
|
|
472
|
+
const HINT_SEM_THRESHOLD = serverConfig.hints.semantic.threshold;
|
|
473
|
+
const SUPPRESSED_PATH_PREFIXES = [".github/", ".git/", "ThirdPartyNotices", "node_modules/"];
|
|
474
|
+
const SUPPRESSED_FILE_NAMES = ["thirdpartynotices.txt", "thirdpartynotices.md", "cgmanifest.json"];
|
|
475
|
+
function isSuppressedPath(path) {
|
|
476
|
+
if (!SUPPRESS_NON_CODE_ENABLED) {
|
|
477
|
+
return false;
|
|
478
|
+
}
|
|
479
|
+
const normalized = path.startsWith("./") ? path.replace(/^\.\/+/u, "") : path;
|
|
480
|
+
const lower = normalized.toLowerCase();
|
|
481
|
+
if (SUPPRESSED_FILE_NAMES.some((name) => lower.endsWith(name))) {
|
|
482
|
+
return true;
|
|
483
|
+
}
|
|
484
|
+
const lowerPrefixMatches = SUPPRESSED_PATH_PREFIXES.map((prefix) => prefix.toLowerCase());
|
|
485
|
+
return lowerPrefixMatches.some((prefix) => lower.includes(prefix));
|
|
486
|
+
}
|
|
487
|
+
const HINT_PER_HINT_LIMIT = serverConfig.hints.perHintLimit;
|
|
488
|
+
const HINT_DB_QUERY_BUDGET = serverConfig.hints.dbQueryLimit;
|
|
489
|
+
const HINT_SUBSTRING_LIMIT = serverConfig.hints.substring.limit;
|
|
490
|
+
const HINT_SUBSTRING_BOOST = serverConfig.hints.substring.boost;
|
|
491
|
+
const HINT_LOG_ENABLED = process.env.KIRI_HINT_LOG === "1";
|
|
492
|
+
const HINT_DICTIONARY_ENABLED = process.env.KIRI_HINT_DICTIONARY !== "0";
|
|
493
|
+
const HINT_DICTIONARY_LIMIT = Math.max(0, Number.parseInt(process.env.KIRI_HINT_DICTIONARY_LIMIT ?? "2", 10));
|
|
494
|
+
// Issue #68: Path/Large File Penalty configuration (環境変数で上書き可能)
|
|
495
|
+
const PATH_MISS_DELTA = serverConfig.penalties.pathMissDelta;
|
|
496
|
+
const LARGE_FILE_DELTA = serverConfig.penalties.largeFileDelta;
|
|
228
497
|
const MAX_WHY_TAGS = 10;
|
|
229
498
|
// 項目3: whyタグの優先度マップ(低い数値ほど高優先度)
|
|
230
499
|
// All actual tag prefixes used in the codebase
|
|
231
500
|
const WHY_TAG_PRIORITY = {
|
|
232
|
-
artifact: 1, // User-provided hints (editing_path, failing_tests)
|
|
501
|
+
artifact: 1, // User-provided hints (editing_path, failing_tests, hints)
|
|
502
|
+
dictionary: 1, // Dictionary-provided hints
|
|
233
503
|
phrase: 2, // Multi-word literal matches (strongest signal)
|
|
234
504
|
text: 3, // Single keyword matches
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
"path-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
505
|
+
metadata: 4, // Front matter / metadata filters & boosts
|
|
506
|
+
substring: 4, // Substring hint expansion
|
|
507
|
+
"path-phrase": 5, // Path contains multi-word phrase
|
|
508
|
+
structural: 6, // Semantic similarity
|
|
509
|
+
"path-segment": 7, // Path component matches
|
|
510
|
+
"path-keyword": 8, // Path keyword match
|
|
511
|
+
dep: 9, // Dependency relationship
|
|
512
|
+
near: 10, // Proximity to editing file
|
|
513
|
+
boost: 11, // File type boost
|
|
514
|
+
recent: 12, // Recently changed
|
|
515
|
+
symbol: 13, // Symbol match
|
|
516
|
+
penalty: 14, // Penalty explanations (keep for transparency)
|
|
517
|
+
keyword: 15, // Generic keyword (deprecated, kept for compatibility)
|
|
246
518
|
};
|
|
247
519
|
// Reserve at least one slot for important structural tags
|
|
248
520
|
const RESERVED_WHY_SLOTS = {
|
|
249
521
|
dep: 1, // Dependency relationships are critical
|
|
250
522
|
symbol: 1, // Symbol boundaries help understand context
|
|
251
523
|
near: 1, // Proximity explains file selection
|
|
524
|
+
metadata: 1, // Preserve metadata reasons when filters/boosts are active
|
|
252
525
|
};
|
|
253
526
|
function parseOutputOptions(params) {
|
|
254
527
|
return {
|
|
@@ -271,6 +544,9 @@ function selectWhyTags(reasons) {
|
|
|
271
544
|
reasons = new Set(Array.from(reasons).slice(0, 1000));
|
|
272
545
|
}
|
|
273
546
|
const selected = new Set();
|
|
547
|
+
if (reasons.has("boost:links")) {
|
|
548
|
+
selected.add("boost:links");
|
|
549
|
+
}
|
|
274
550
|
const byCategory = new Map();
|
|
275
551
|
for (const reason of reasons) {
|
|
276
552
|
const prefix = reason.split(":")[0] ?? "";
|
|
@@ -336,6 +612,45 @@ const STOP_WORDS = new Set([
|
|
|
336
612
|
"need",
|
|
337
613
|
"goal",
|
|
338
614
|
]);
|
|
615
|
+
function prioritizeHintCandidates(rankedCandidates, hintPaths, limit) {
|
|
616
|
+
if (rankedCandidates.length === 0) {
|
|
617
|
+
return [];
|
|
618
|
+
}
|
|
619
|
+
const sanitizedLimit = Math.max(1, Math.min(limit, rankedCandidates.length));
|
|
620
|
+
const candidateByPath = new Map();
|
|
621
|
+
for (const candidate of rankedCandidates) {
|
|
622
|
+
if (!candidateByPath.has(candidate.path)) {
|
|
623
|
+
candidateByPath.set(candidate.path, candidate);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
const final = [];
|
|
627
|
+
const seen = new Set();
|
|
628
|
+
for (const hintPath of hintPaths) {
|
|
629
|
+
if (final.length >= sanitizedLimit) {
|
|
630
|
+
break;
|
|
631
|
+
}
|
|
632
|
+
const candidate = candidateByPath.get(hintPath);
|
|
633
|
+
if (!candidate || seen.has(candidate.path)) {
|
|
634
|
+
continue;
|
|
635
|
+
}
|
|
636
|
+
final.push(candidate);
|
|
637
|
+
seen.add(candidate.path);
|
|
638
|
+
}
|
|
639
|
+
if (final.length >= sanitizedLimit) {
|
|
640
|
+
return final;
|
|
641
|
+
}
|
|
642
|
+
for (const candidate of rankedCandidates) {
|
|
643
|
+
if (final.length >= sanitizedLimit) {
|
|
644
|
+
break;
|
|
645
|
+
}
|
|
646
|
+
if (seen.has(candidate.path)) {
|
|
647
|
+
continue;
|
|
648
|
+
}
|
|
649
|
+
final.push(candidate);
|
|
650
|
+
seen.add(candidate.path);
|
|
651
|
+
}
|
|
652
|
+
return final;
|
|
653
|
+
}
|
|
339
654
|
function normalizeLimit(limit) {
|
|
340
655
|
if (!limit || Number.isNaN(limit)) {
|
|
341
656
|
return DEFAULT_SEARCH_LIMIT;
|
|
@@ -513,13 +828,358 @@ function ensureCandidate(map, filePath) {
|
|
|
513
828
|
ext: null,
|
|
514
829
|
embedding: null,
|
|
515
830
|
semanticSimilarity: null,
|
|
831
|
+
pathMatchHits: 0, // Issue #68: Track path match count
|
|
832
|
+
penalties: [], // Issue #68: Penalty log for telemetry
|
|
516
833
|
};
|
|
517
834
|
map.set(filePath, candidate);
|
|
518
835
|
}
|
|
519
836
|
return candidate;
|
|
520
837
|
}
|
|
838
|
+
async function expandHintCandidatesForHints(params) {
|
|
839
|
+
const { hintPaths, config } = params;
|
|
840
|
+
if (hintPaths.length === 0 || config.perHintLimit <= 0 || config.dbQueryBudget <= 0) {
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
const state = { remainingDbQueries: config.dbQueryBudget };
|
|
844
|
+
for (const hintPath of hintPaths) {
|
|
845
|
+
if (state.remainingDbQueries <= 0) {
|
|
846
|
+
break;
|
|
847
|
+
}
|
|
848
|
+
await expandSingleHintNeighborhood({ ...params, hintPath, state });
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
async function expandSingleHintNeighborhood(args) {
|
|
852
|
+
const { config } = args;
|
|
853
|
+
let remaining = config.perHintLimit;
|
|
854
|
+
if (remaining <= 0) {
|
|
855
|
+
return;
|
|
856
|
+
}
|
|
857
|
+
if (config.dirLimit > 0) {
|
|
858
|
+
const added = await addHintDirectoryNeighbors(args, Math.min(config.dirLimit, remaining));
|
|
859
|
+
remaining -= added;
|
|
860
|
+
if (remaining <= 0) {
|
|
861
|
+
return;
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
if (config.depOutLimit > 0 || config.depInLimit > 0) {
|
|
865
|
+
const added = await addHintDependencyNeighbors(args, remaining);
|
|
866
|
+
remaining -= added;
|
|
867
|
+
if (remaining <= 0) {
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
if (config.semLimit > 0) {
|
|
872
|
+
await addHintSemanticNeighbors(args, Math.min(config.semLimit, remaining));
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
function useHintDbBudget(state, cost = 1) {
|
|
876
|
+
if (state.remainingDbQueries < cost) {
|
|
877
|
+
return false;
|
|
878
|
+
}
|
|
879
|
+
state.remainingDbQueries -= cost;
|
|
880
|
+
return true;
|
|
881
|
+
}
|
|
882
|
+
function applyHintReasonBoost(candidate, reason, scoreDelta, lang, ext) {
|
|
883
|
+
if (scoreDelta <= 0 || candidate.reasons.has(reason)) {
|
|
884
|
+
return false;
|
|
885
|
+
}
|
|
886
|
+
candidate.score += scoreDelta;
|
|
887
|
+
candidate.reasons.add(reason);
|
|
888
|
+
candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 2);
|
|
889
|
+
candidate.matchLine ??= 1;
|
|
890
|
+
if (lang && !candidate.lang) {
|
|
891
|
+
candidate.lang = lang;
|
|
892
|
+
}
|
|
893
|
+
if (ext && !candidate.ext) {
|
|
894
|
+
candidate.ext = ext;
|
|
895
|
+
}
|
|
896
|
+
return true;
|
|
897
|
+
}
|
|
898
|
+
async function applyPathHintPromotions(args) {
|
|
899
|
+
const { hintTargets } = args;
|
|
900
|
+
if (hintTargets.length === 0) {
|
|
901
|
+
return;
|
|
902
|
+
}
|
|
903
|
+
const hintBoost = computeHintPriorityBoost(args.weights);
|
|
904
|
+
for (const target of hintTargets) {
|
|
905
|
+
const candidate = ensureCandidate(args.candidates, target.path);
|
|
906
|
+
const reasonPrefix = target.origin === "dictionary" ? "dictionary:hint" : "artifact:hint";
|
|
907
|
+
candidate.score += hintBoost;
|
|
908
|
+
candidate.reasons.add(`${reasonPrefix}:${target.path}`);
|
|
909
|
+
candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 3);
|
|
910
|
+
candidate.matchLine ??= 1;
|
|
911
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
912
|
+
repoId: args.repoId,
|
|
913
|
+
hintValue: target.sourceHint,
|
|
914
|
+
kind: target.origin === "dictionary" ? "dictionary" : "path",
|
|
915
|
+
targetPath: target.path,
|
|
916
|
+
payload: {
|
|
917
|
+
origin: target.origin,
|
|
918
|
+
source_hint: target.sourceHint,
|
|
919
|
+
},
|
|
920
|
+
});
|
|
921
|
+
}
|
|
922
|
+
await expandHintCandidatesForHints({
|
|
923
|
+
db: args.db,
|
|
924
|
+
tableAvailability: args.tableAvailability,
|
|
925
|
+
repoId: args.repoId,
|
|
926
|
+
hintPaths: hintTargets.map((target) => target.path),
|
|
927
|
+
candidates: args.candidates,
|
|
928
|
+
fileCache: args.fileCache,
|
|
929
|
+
weights: args.weights,
|
|
930
|
+
config: createHintExpansionConfig(args.weights),
|
|
931
|
+
hintSeedMeta: args.hintSeedMeta,
|
|
932
|
+
});
|
|
933
|
+
}
|
|
934
|
+
async function addHintSubstringMatches(db, tableAvailability, repoId, hints, candidates, limitPerHint, boost) {
|
|
935
|
+
if (limitPerHint <= 0 || boost <= 0) {
|
|
936
|
+
return;
|
|
937
|
+
}
|
|
938
|
+
for (const hint of hints) {
|
|
939
|
+
if (!SAFE_PATH_PATTERN.test(hint.replace(/[^a-zA-Z0-9_.-]/g, ""))) {
|
|
940
|
+
continue;
|
|
941
|
+
}
|
|
942
|
+
const rows = await db.all(`
|
|
943
|
+
SELECT path
|
|
944
|
+
FROM file
|
|
945
|
+
WHERE repo_id = ?
|
|
946
|
+
AND is_binary = FALSE
|
|
947
|
+
AND LOWER(path) LIKE '%' || ? || '%'
|
|
948
|
+
ORDER BY path
|
|
949
|
+
LIMIT ?
|
|
950
|
+
`, [repoId, hint, limitPerHint]);
|
|
951
|
+
for (const row of rows) {
|
|
952
|
+
const candidate = ensureCandidate(candidates, row.path);
|
|
953
|
+
const reason = `substring:hint:${hint}`;
|
|
954
|
+
if (applyHintReasonBoost(candidate, reason, boost)) {
|
|
955
|
+
await logHintExpansionEntry(db, tableAvailability, {
|
|
956
|
+
repoId,
|
|
957
|
+
hintValue: hint,
|
|
958
|
+
kind: "substring",
|
|
959
|
+
targetPath: row.path,
|
|
960
|
+
});
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
}
|
|
965
|
+
async function addHintDirectoryNeighbors(args, limit) {
|
|
966
|
+
if (limit <= 0) {
|
|
967
|
+
return 0;
|
|
968
|
+
}
|
|
969
|
+
const dir = path.posix.dirname(args.hintPath);
|
|
970
|
+
if (!dir || dir === "." || dir === "/") {
|
|
971
|
+
return 0;
|
|
972
|
+
}
|
|
973
|
+
if (!useHintDbBudget(args.state)) {
|
|
974
|
+
return 0;
|
|
975
|
+
}
|
|
976
|
+
const rows = await args.db.all(`
|
|
977
|
+
SELECT path, lang, ext
|
|
978
|
+
FROM file
|
|
979
|
+
WHERE repo_id = ?
|
|
980
|
+
AND is_binary = FALSE
|
|
981
|
+
AND path LIKE ?
|
|
982
|
+
ORDER BY path
|
|
983
|
+
LIMIT ?
|
|
984
|
+
`, [args.repoId, `${dir}/%`, args.config.dirMaxFiles + 1]);
|
|
985
|
+
if (rows.length === 0 || rows.length > args.config.dirMaxFiles) {
|
|
986
|
+
return 0;
|
|
987
|
+
}
|
|
988
|
+
rows.sort((a, b) => hintNeighborRank(a.path) - hintNeighborRank(b.path));
|
|
989
|
+
let added = 0;
|
|
990
|
+
for (const row of rows) {
|
|
991
|
+
if (row.path === args.hintPath) {
|
|
992
|
+
continue;
|
|
993
|
+
}
|
|
994
|
+
if (!SAFE_PATH_PATTERN.test(row.path)) {
|
|
995
|
+
continue;
|
|
996
|
+
}
|
|
997
|
+
const candidate = ensureCandidate(args.candidates, row.path);
|
|
998
|
+
const reason = `artifact:hint_dir:${args.hintPath}:${row.path}`;
|
|
999
|
+
if (applyHintReasonBoost(candidate, reason, args.config.dirBoost, row.lang, row.ext)) {
|
|
1000
|
+
added += 1;
|
|
1001
|
+
const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
|
|
1002
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
1003
|
+
repoId: args.repoId,
|
|
1004
|
+
hintValue: seedMeta?.sourceHint ?? args.hintPath,
|
|
1005
|
+
kind: "directory",
|
|
1006
|
+
targetPath: row.path,
|
|
1007
|
+
payload: {
|
|
1008
|
+
origin: seedMeta?.origin ?? "artifact",
|
|
1009
|
+
},
|
|
1010
|
+
});
|
|
1011
|
+
if (added >= limit) {
|
|
1012
|
+
break;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
return added;
|
|
1017
|
+
}
|
|
1018
|
+
async function addHintDependencyNeighbors(args, perHintRemaining) {
|
|
1019
|
+
if (perHintRemaining <= 0) {
|
|
1020
|
+
return 0;
|
|
1021
|
+
}
|
|
1022
|
+
let added = 0;
|
|
1023
|
+
if (args.config.depOutLimit > 0) {
|
|
1024
|
+
const outLimit = Math.min(args.config.depOutLimit, perHintRemaining - added);
|
|
1025
|
+
if (outLimit > 0) {
|
|
1026
|
+
added += await addHintDependencyDirection(args, outLimit, "out");
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
if (perHintRemaining - added <= 0) {
|
|
1030
|
+
return added;
|
|
1031
|
+
}
|
|
1032
|
+
if (args.config.depInLimit > 0) {
|
|
1033
|
+
const inLimit = Math.min(args.config.depInLimit, perHintRemaining - added);
|
|
1034
|
+
if (inLimit > 0) {
|
|
1035
|
+
added += await addHintDependencyDirection(args, inLimit, "in");
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
return added;
|
|
1039
|
+
}
|
|
1040
|
+
async function addHintDependencyDirection(args, limit, direction) {
|
|
1041
|
+
if (limit <= 0) {
|
|
1042
|
+
return 0;
|
|
1043
|
+
}
|
|
1044
|
+
if (!useHintDbBudget(args.state)) {
|
|
1045
|
+
return 0;
|
|
1046
|
+
}
|
|
1047
|
+
const fetchLimit = Math.min(limit * 4, 25);
|
|
1048
|
+
if (direction === "out") {
|
|
1049
|
+
const rows = await args.db.all(`
|
|
1050
|
+
SELECT dst
|
|
1051
|
+
FROM dependency
|
|
1052
|
+
WHERE repo_id = ?
|
|
1053
|
+
AND src_path = ?
|
|
1054
|
+
AND dst_kind = 'path'
|
|
1055
|
+
LIMIT ?
|
|
1056
|
+
`, [args.repoId, args.hintPath, fetchLimit]);
|
|
1057
|
+
return await applyDependencyRows(args, rows.map((row) => row.dst), limit, direction);
|
|
1058
|
+
}
|
|
1059
|
+
const rows = await args.db.all(`
|
|
1060
|
+
SELECT src_path
|
|
1061
|
+
FROM dependency
|
|
1062
|
+
WHERE repo_id = ?
|
|
1063
|
+
AND dst = ?
|
|
1064
|
+
AND dst_kind = 'path'
|
|
1065
|
+
LIMIT ?
|
|
1066
|
+
`, [args.repoId, args.hintPath, fetchLimit]);
|
|
1067
|
+
return await applyDependencyRows(args, rows.map((row) => row.src_path), limit, direction);
|
|
1068
|
+
}
|
|
1069
|
+
async function applyDependencyRows(args, paths, limit, direction) {
|
|
1070
|
+
if (paths.length === 0) {
|
|
1071
|
+
return 0;
|
|
1072
|
+
}
|
|
1073
|
+
const uniquePaths = Array.from(new Set(paths)).filter((p) => p && SAFE_PATH_PATTERN.test(p));
|
|
1074
|
+
uniquePaths.sort((a, b) => hintNeighborRank(a) - hintNeighborRank(b));
|
|
1075
|
+
let added = 0;
|
|
1076
|
+
for (const dependencyPath of uniquePaths) {
|
|
1077
|
+
if (dependencyPath === args.hintPath) {
|
|
1078
|
+
continue;
|
|
1079
|
+
}
|
|
1080
|
+
const candidate = ensureCandidate(args.candidates, dependencyPath);
|
|
1081
|
+
const reason = `artifact:hint_dep_${direction}:${args.hintPath}:${dependencyPath}`;
|
|
1082
|
+
if (applyHintReasonBoost(candidate, reason, args.config.depBoost)) {
|
|
1083
|
+
added += 1;
|
|
1084
|
+
const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
|
|
1085
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
1086
|
+
repoId: args.repoId,
|
|
1087
|
+
hintValue: seedMeta?.sourceHint ?? args.hintPath,
|
|
1088
|
+
kind: "dependency",
|
|
1089
|
+
targetPath: dependencyPath,
|
|
1090
|
+
payload: {
|
|
1091
|
+
origin: seedMeta?.origin ?? "artifact",
|
|
1092
|
+
direction,
|
|
1093
|
+
},
|
|
1094
|
+
});
|
|
1095
|
+
if (added >= limit) {
|
|
1096
|
+
break;
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
return added;
|
|
1101
|
+
}
|
|
1102
|
+
async function addHintSemanticNeighbors(args, limit) {
|
|
1103
|
+
if (limit <= 0) {
|
|
1104
|
+
return 0;
|
|
1105
|
+
}
|
|
1106
|
+
const dir = path.posix.dirname(args.hintPath);
|
|
1107
|
+
if (!dir || dir === "." || dir === "/") {
|
|
1108
|
+
return 0;
|
|
1109
|
+
}
|
|
1110
|
+
if (!useHintDbBudget(args.state)) {
|
|
1111
|
+
return 0;
|
|
1112
|
+
}
|
|
1113
|
+
const rows = await args.db.all(`
|
|
1114
|
+
SELECT path
|
|
1115
|
+
FROM file
|
|
1116
|
+
WHERE repo_id = ?
|
|
1117
|
+
AND is_binary = FALSE
|
|
1118
|
+
AND path LIKE ?
|
|
1119
|
+
ORDER BY path
|
|
1120
|
+
LIMIT ?
|
|
1121
|
+
`, [args.repoId, `${dir}/%`, args.config.semDirCandidateLimit]);
|
|
1122
|
+
const candidatePaths = rows.map((row) => row.path).filter((p) => p !== args.hintPath);
|
|
1123
|
+
if (candidatePaths.length === 0) {
|
|
1124
|
+
return 0;
|
|
1125
|
+
}
|
|
1126
|
+
if (!useHintDbBudget(args.state)) {
|
|
1127
|
+
return 0;
|
|
1128
|
+
}
|
|
1129
|
+
const embeddingMap = await fetchEmbeddingMap(args.db, args.repoId, [
|
|
1130
|
+
args.hintPath,
|
|
1131
|
+
...candidatePaths,
|
|
1132
|
+
]);
|
|
1133
|
+
const hintEmbedding = embeddingMap.get(args.hintPath);
|
|
1134
|
+
if (!hintEmbedding) {
|
|
1135
|
+
return 0;
|
|
1136
|
+
}
|
|
1137
|
+
let added = 0;
|
|
1138
|
+
for (const candidatePath of candidatePaths) {
|
|
1139
|
+
if (!SAFE_PATH_PATTERN.test(candidatePath)) {
|
|
1140
|
+
continue;
|
|
1141
|
+
}
|
|
1142
|
+
const embedding = embeddingMap.get(candidatePath);
|
|
1143
|
+
if (!embedding) {
|
|
1144
|
+
continue;
|
|
1145
|
+
}
|
|
1146
|
+
const similarity = structuralSimilarity(hintEmbedding, embedding);
|
|
1147
|
+
if (!Number.isFinite(similarity) || similarity < args.config.semThreshold) {
|
|
1148
|
+
continue;
|
|
1149
|
+
}
|
|
1150
|
+
const candidate = ensureCandidate(args.candidates, candidatePath);
|
|
1151
|
+
const reason = `artifact:hint_sem:${args.hintPath}:${candidatePath}`;
|
|
1152
|
+
if (applyHintReasonBoost(candidate, reason, args.weights.structural * similarity)) {
|
|
1153
|
+
added += 1;
|
|
1154
|
+
if (added >= limit) {
|
|
1155
|
+
break;
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
return added;
|
|
1160
|
+
}
|
|
1161
|
+
function hintNeighborRank(filePath) {
|
|
1162
|
+
if (filePath.startsWith("src/") || filePath.startsWith("external/assay-kit/src/")) {
|
|
1163
|
+
return 0;
|
|
1164
|
+
}
|
|
1165
|
+
if (isTestLikePath(filePath)) {
|
|
1166
|
+
return 2;
|
|
1167
|
+
}
|
|
1168
|
+
if (filePath.startsWith("docs/")) {
|
|
1169
|
+
return 3;
|
|
1170
|
+
}
|
|
1171
|
+
return 1;
|
|
1172
|
+
}
|
|
1173
|
+
function isTestLikePath(filePath) {
|
|
1174
|
+
return (/(^|\/)(tests?|__tests__|fixtures)\//.test(filePath) ||
|
|
1175
|
+
filePath.endsWith(".spec.ts") ||
|
|
1176
|
+
filePath.endsWith(".spec.tsx") ||
|
|
1177
|
+
filePath.endsWith(".test.ts") ||
|
|
1178
|
+
filePath.endsWith(".test.tsx"));
|
|
1179
|
+
}
|
|
521
1180
|
function parseEmbedding(vectorJson, vectorDims) {
|
|
522
|
-
|
|
1181
|
+
const dims = vectorDims === null ? null : typeof vectorDims === "bigint" ? Number(vectorDims) : vectorDims;
|
|
1182
|
+
if (!vectorJson || !dims || dims <= 0) {
|
|
523
1183
|
return null;
|
|
524
1184
|
}
|
|
525
1185
|
try {
|
|
@@ -528,7 +1188,7 @@ function parseEmbedding(vectorJson, vectorDims) {
|
|
|
528
1188
|
return null;
|
|
529
1189
|
}
|
|
530
1190
|
const values = [];
|
|
531
|
-
for (let i = 0; i < parsed.length && i <
|
|
1191
|
+
for (let i = 0; i < parsed.length && i < dims; i += 1) {
|
|
532
1192
|
const raw = parsed[i];
|
|
533
1193
|
const num = typeof raw === "number" ? raw : Number(raw);
|
|
534
1194
|
if (!Number.isFinite(num)) {
|
|
@@ -536,7 +1196,7 @@ function parseEmbedding(vectorJson, vectorDims) {
|
|
|
536
1196
|
}
|
|
537
1197
|
values.push(num);
|
|
538
1198
|
}
|
|
539
|
-
return values.length ===
|
|
1199
|
+
return values.length === dims ? values : null;
|
|
540
1200
|
}
|
|
541
1201
|
catch {
|
|
542
1202
|
return null;
|
|
@@ -630,18 +1290,6 @@ function buildSnippetPreview(content, startLine, endLine) {
|
|
|
630
1290
|
}
|
|
631
1291
|
return `${snippet.slice(0, 239)}…`;
|
|
632
1292
|
}
|
|
633
|
-
function prependLineNumbers(snippet, startLine) {
|
|
634
|
-
const lines = snippet.split(/\r?\n/);
|
|
635
|
-
if (lines.length === 0) {
|
|
636
|
-
return snippet;
|
|
637
|
-
}
|
|
638
|
-
// Calculate required width from the last line number (dynamic sizing)
|
|
639
|
-
const endLine = startLine + lines.length - 1;
|
|
640
|
-
const width = String(endLine).length;
|
|
641
|
-
return lines
|
|
642
|
-
.map((line, index) => `${String(startLine + index).padStart(width, " ")}→${line}`)
|
|
643
|
-
.join("\n");
|
|
644
|
-
}
|
|
645
1293
|
/**
|
|
646
1294
|
* トークン数を推定(コンテンツベース)
|
|
647
1295
|
* 実際のGPTトークナイザーを使用して正確にカウント
|
|
@@ -668,14 +1316,399 @@ function estimateTokensFromContent(content, startLine, endLine) {
|
|
|
668
1316
|
}
|
|
669
1317
|
}
|
|
670
1318
|
/**
|
|
671
|
-
* 複数単語クエリを単語分割してOR検索条件を構築
|
|
672
|
-
* @param query - 検索クエリ文字列
|
|
673
|
-
* @returns 単語配列(2文字以下を除外)
|
|
1319
|
+
* 複数単語クエリを単語分割してOR検索条件を構築
|
|
1320
|
+
* @param query - 検索クエリ文字列
|
|
1321
|
+
* @returns 単語配列(2文字以下を除外)
|
|
1322
|
+
*/
|
|
1323
|
+
function splitQueryWords(query) {
|
|
1324
|
+
// 空白、スラッシュ、ハイフン、アンダースコアで分割
|
|
1325
|
+
const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
|
|
1326
|
+
return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
|
|
1327
|
+
}
|
|
1328
|
+
function normalizeMetadataFilterKey(rawKey) {
|
|
1329
|
+
if (!rawKey) {
|
|
1330
|
+
return null;
|
|
1331
|
+
}
|
|
1332
|
+
const normalized = rawKey.toLowerCase();
|
|
1333
|
+
const alias = METADATA_ALIAS_MAP.get(normalized);
|
|
1334
|
+
if (alias) {
|
|
1335
|
+
return { ...alias };
|
|
1336
|
+
}
|
|
1337
|
+
for (const entry of METADATA_KEY_PREFIXES) {
|
|
1338
|
+
if (normalized.startsWith(entry.prefix)) {
|
|
1339
|
+
const remainder = normalized.slice(entry.prefix.length);
|
|
1340
|
+
if (!remainder) {
|
|
1341
|
+
return null;
|
|
1342
|
+
}
|
|
1343
|
+
return {
|
|
1344
|
+
key: remainder,
|
|
1345
|
+
source: entry.source,
|
|
1346
|
+
...(entry.strict !== undefined && { strict: entry.strict }),
|
|
1347
|
+
};
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
return null;
|
|
1351
|
+
}
|
|
1352
|
+
function normalizeFilterValues(value) {
|
|
1353
|
+
if (typeof value === "string") {
|
|
1354
|
+
const trimmed = value.trim();
|
|
1355
|
+
return trimmed ? [trimmed] : [];
|
|
1356
|
+
}
|
|
1357
|
+
if (Array.isArray(value)) {
|
|
1358
|
+
const values = [];
|
|
1359
|
+
for (const item of value) {
|
|
1360
|
+
if (typeof item === "string") {
|
|
1361
|
+
const trimmed = item.trim();
|
|
1362
|
+
if (trimmed) {
|
|
1363
|
+
values.push(trimmed);
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
return values;
|
|
1368
|
+
}
|
|
1369
|
+
return [];
|
|
1370
|
+
}
|
|
1371
|
+
function normalizeMetadataFiltersParam(input) {
|
|
1372
|
+
if (!input || typeof input !== "object") {
|
|
1373
|
+
return [];
|
|
1374
|
+
}
|
|
1375
|
+
const filters = [];
|
|
1376
|
+
for (const [rawKey, rawValue] of Object.entries(input)) {
|
|
1377
|
+
const normalizedKey = normalizeMetadataFilterKey(rawKey);
|
|
1378
|
+
if (!normalizedKey) {
|
|
1379
|
+
continue;
|
|
1380
|
+
}
|
|
1381
|
+
const values = normalizeFilterValues(rawValue);
|
|
1382
|
+
if (values.length === 0) {
|
|
1383
|
+
continue;
|
|
1384
|
+
}
|
|
1385
|
+
const filter = {
|
|
1386
|
+
key: normalizedKey.key,
|
|
1387
|
+
values,
|
|
1388
|
+
source: normalizedKey.source,
|
|
1389
|
+
};
|
|
1390
|
+
if (normalizedKey.strict !== undefined) {
|
|
1391
|
+
filter.strict = normalizedKey.strict;
|
|
1392
|
+
}
|
|
1393
|
+
filters.push(filter);
|
|
1394
|
+
}
|
|
1395
|
+
return filters;
|
|
1396
|
+
}
|
|
1397
|
+
function mergeMetadataFilters(filters) {
|
|
1398
|
+
const merged = new Map();
|
|
1399
|
+
for (const filter of filters) {
|
|
1400
|
+
if (filter.values.length === 0)
|
|
1401
|
+
continue;
|
|
1402
|
+
const mapKey = `${filter.source ?? "*"}::${filter.key}::${filter.strict ? "strict" : "hint"}`;
|
|
1403
|
+
const existing = merged.get(mapKey);
|
|
1404
|
+
if (existing) {
|
|
1405
|
+
const existingSet = new Set(existing.values.map((val) => val.toLowerCase()));
|
|
1406
|
+
for (const value of filter.values) {
|
|
1407
|
+
if (!existingSet.has(value.toLowerCase())) {
|
|
1408
|
+
existing.values.push(value);
|
|
1409
|
+
existingSet.add(value.toLowerCase());
|
|
1410
|
+
}
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
else {
|
|
1414
|
+
const entry = {
|
|
1415
|
+
key: filter.key,
|
|
1416
|
+
source: filter.source,
|
|
1417
|
+
values: [...filter.values],
|
|
1418
|
+
};
|
|
1419
|
+
if (filter.strict !== undefined) {
|
|
1420
|
+
entry.strict = filter.strict;
|
|
1421
|
+
}
|
|
1422
|
+
merged.set(mapKey, entry);
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
return Array.from(merged.values());
|
|
1426
|
+
}
|
|
1427
|
+
function parseInlineMetadataFilters(query) {
|
|
1428
|
+
if (!query) {
|
|
1429
|
+
return { cleanedQuery: "", filters: [] };
|
|
1430
|
+
}
|
|
1431
|
+
const matches = [];
|
|
1432
|
+
const pattern = /(\b[\w.]+):("[^"]+"|'[^']+'|[^\s]+)/g;
|
|
1433
|
+
let match;
|
|
1434
|
+
while ((match = pattern.exec(query)) !== null) {
|
|
1435
|
+
const normalizedKey = normalizeMetadataFilterKey(match[1] ?? "");
|
|
1436
|
+
if (!normalizedKey) {
|
|
1437
|
+
continue;
|
|
1438
|
+
}
|
|
1439
|
+
let rawValue = match[2] ?? "";
|
|
1440
|
+
if ((rawValue.startsWith('"') && rawValue.endsWith('"')) ||
|
|
1441
|
+
(rawValue.startsWith("'") && rawValue.endsWith("'"))) {
|
|
1442
|
+
rawValue = rawValue.slice(1, -1);
|
|
1443
|
+
}
|
|
1444
|
+
const value = rawValue.trim();
|
|
1445
|
+
if (!value) {
|
|
1446
|
+
continue;
|
|
1447
|
+
}
|
|
1448
|
+
const filter = {
|
|
1449
|
+
key: normalizedKey.key,
|
|
1450
|
+
source: normalizedKey.source,
|
|
1451
|
+
values: [value],
|
|
1452
|
+
};
|
|
1453
|
+
if (normalizedKey.strict !== undefined) {
|
|
1454
|
+
filter.strict = normalizedKey.strict;
|
|
1455
|
+
}
|
|
1456
|
+
matches.push({
|
|
1457
|
+
start: match.index,
|
|
1458
|
+
end: pattern.lastIndex,
|
|
1459
|
+
filter,
|
|
1460
|
+
});
|
|
1461
|
+
}
|
|
1462
|
+
if (matches.length === 0) {
|
|
1463
|
+
return { cleanedQuery: query.trim(), filters: [] };
|
|
1464
|
+
}
|
|
1465
|
+
let cleaned = "";
|
|
1466
|
+
let lastIndex = 0;
|
|
1467
|
+
for (const info of matches) {
|
|
1468
|
+
cleaned += query.slice(lastIndex, info.start);
|
|
1469
|
+
lastIndex = info.end;
|
|
1470
|
+
}
|
|
1471
|
+
cleaned += query.slice(lastIndex);
|
|
1472
|
+
const normalizedQuery = cleaned.replace(/\s{2,}/g, " ").trim();
|
|
1473
|
+
return {
|
|
1474
|
+
cleanedQuery: normalizedQuery,
|
|
1475
|
+
filters: mergeMetadataFilters(matches.map((m) => m.filter)),
|
|
1476
|
+
};
|
|
1477
|
+
}
|
|
1478
|
+
function buildMetadataFilterConditions(filters, alias = "f") {
|
|
1479
|
+
// SQL Injection対策: aliasをリテラル型で制限し、念のため検証
|
|
1480
|
+
if (!["f", "mk"].includes(alias)) {
|
|
1481
|
+
throw new Error(`Invalid SQL alias: ${alias}`);
|
|
1482
|
+
}
|
|
1483
|
+
const clauses = [];
|
|
1484
|
+
for (const filter of filters) {
|
|
1485
|
+
if (!filter.key || filter.values.length === 0) {
|
|
1486
|
+
continue;
|
|
1487
|
+
}
|
|
1488
|
+
const likeClauses = filter.values.map(() => "mk.value ILIKE ?").join(" OR ");
|
|
1489
|
+
const whereParts = [`mk.repo_id = ${alias}.repo_id`, `mk.path = ${alias}.path`];
|
|
1490
|
+
const params = [];
|
|
1491
|
+
if (filter.source) {
|
|
1492
|
+
whereParts.push("mk.source = ?");
|
|
1493
|
+
params.push(filter.source);
|
|
1494
|
+
}
|
|
1495
|
+
whereParts.push("mk.key = ?");
|
|
1496
|
+
params.push(filter.key);
|
|
1497
|
+
whereParts.push(`(${likeClauses})`);
|
|
1498
|
+
params.push(...filter.values.map((value) => `%${value}%`));
|
|
1499
|
+
const sql = `EXISTS (SELECT 1 FROM document_metadata_kv mk WHERE ${whereParts.join(" AND ")})`;
|
|
1500
|
+
clauses.push({ sql, params });
|
|
1501
|
+
}
|
|
1502
|
+
return clauses;
|
|
1503
|
+
}
|
|
1504
|
+
function isTableMissingError(error, table) {
|
|
1505
|
+
if (!(error instanceof Error)) {
|
|
1506
|
+
return false;
|
|
1507
|
+
}
|
|
1508
|
+
return error.message.includes(`Table with name ${table}`) || error.message.includes(table);
|
|
1509
|
+
}
|
|
1510
|
+
async function safeMetadataQuery(db, tableAvailability, sql, params) {
|
|
1511
|
+
if (!tableAvailability.hasMetadataTables) {
|
|
1512
|
+
return [];
|
|
1513
|
+
}
|
|
1514
|
+
try {
|
|
1515
|
+
return await db.all(sql, params);
|
|
1516
|
+
}
|
|
1517
|
+
catch (error) {
|
|
1518
|
+
if (isTableMissingError(error, "document_metadata_kv")) {
|
|
1519
|
+
console.warn("Metadata tables not found; disabling metadata filters and boosts until database is upgraded.");
|
|
1520
|
+
return [];
|
|
1521
|
+
}
|
|
1522
|
+
throw error;
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
async function safeLinkQuery(db, tableAvailability, sql, params) {
|
|
1526
|
+
if (!tableAvailability.hasLinkTable) {
|
|
1527
|
+
return [];
|
|
1528
|
+
}
|
|
1529
|
+
try {
|
|
1530
|
+
return await db.all(sql, params);
|
|
1531
|
+
}
|
|
1532
|
+
catch (error) {
|
|
1533
|
+
if (isTableMissingError(error, "markdown_link")) {
|
|
1534
|
+
console.warn("Markdown link table not found; inbound link boosting disabled until database is upgraded.");
|
|
1535
|
+
return [];
|
|
1536
|
+
}
|
|
1537
|
+
throw error;
|
|
1538
|
+
}
|
|
1539
|
+
}
|
|
1540
|
+
async function fetchMetadataOnlyCandidates(db, tableAvailability, repoId, filters, limit) {
|
|
1541
|
+
if (!tableAvailability.hasMetadataTables || filters.length === 0 || limit <= 0) {
|
|
1542
|
+
return [];
|
|
1543
|
+
}
|
|
1544
|
+
const filterClauses = buildMetadataFilterConditions(filters);
|
|
1545
|
+
const whereClauses = ["f.repo_id = ?"];
|
|
1546
|
+
const params = [repoId];
|
|
1547
|
+
for (const clause of filterClauses) {
|
|
1548
|
+
whereClauses.push(clause.sql);
|
|
1549
|
+
params.push(...clause.params);
|
|
1550
|
+
}
|
|
1551
|
+
const sql = `
|
|
1552
|
+
SELECT f.path, f.lang, f.ext, b.content
|
|
1553
|
+
FROM file f
|
|
1554
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
1555
|
+
WHERE ${whereClauses.join(" AND ")}
|
|
1556
|
+
ORDER BY f.path
|
|
1557
|
+
LIMIT ?
|
|
1558
|
+
`;
|
|
1559
|
+
params.push(limit);
|
|
1560
|
+
try {
|
|
1561
|
+
return await db.all(sql, params);
|
|
1562
|
+
}
|
|
1563
|
+
catch (error) {
|
|
1564
|
+
if (isTableMissingError(error, "document_metadata_kv")) {
|
|
1565
|
+
console.warn("Metadata tables not found; disabling metadata-only searches until database is upgraded.");
|
|
1566
|
+
return [];
|
|
1567
|
+
}
|
|
1568
|
+
throw error;
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
async function fetchMetadataKeywordMatches(db, tableAvailability, repoId, keywords, filters, limit, excludePaths) {
|
|
1572
|
+
if (!tableAvailability.hasMetadataTables || keywords.length === 0 || limit <= 0) {
|
|
1573
|
+
return [];
|
|
1574
|
+
}
|
|
1575
|
+
const keywordClauses = keywords.map(() => "mk.value ILIKE ?").join(" OR ");
|
|
1576
|
+
const params = [repoId, ...keywords.map((kw) => `%${kw}%`)];
|
|
1577
|
+
const whereClauses = ["mk.repo_id = ?", `(${keywordClauses})`];
|
|
1578
|
+
if (excludePaths.size > 0) {
|
|
1579
|
+
const placeholders = Array.from(excludePaths)
|
|
1580
|
+
.map(() => "?")
|
|
1581
|
+
.join(", ");
|
|
1582
|
+
whereClauses.push(`f.path NOT IN (${placeholders})`);
|
|
1583
|
+
params.push(...excludePaths);
|
|
1584
|
+
}
|
|
1585
|
+
const filterClauses = buildMetadataFilterConditions(filters, "f");
|
|
1586
|
+
for (const clause of filterClauses) {
|
|
1587
|
+
whereClauses.push(clause.sql);
|
|
1588
|
+
params.push(...clause.params);
|
|
1589
|
+
}
|
|
1590
|
+
params.push(limit);
|
|
1591
|
+
const sql = `
|
|
1592
|
+
SELECT f.path, f.lang, f.ext, b.content, COUNT(*) AS score
|
|
1593
|
+
FROM document_metadata_kv mk
|
|
1594
|
+
JOIN file f ON f.repo_id = mk.repo_id AND f.path = mk.path
|
|
1595
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
1596
|
+
WHERE ${whereClauses.join(" AND ")}
|
|
1597
|
+
GROUP BY f.path, f.lang, f.ext, b.content
|
|
1598
|
+
ORDER BY score DESC, f.path
|
|
1599
|
+
LIMIT ?
|
|
1600
|
+
`;
|
|
1601
|
+
const rows = await safeMetadataQuery(db, tableAvailability, sql, params);
|
|
1602
|
+
return rows.map((row) => ({ ...row, score: Number(row.score ?? 1) }));
|
|
1603
|
+
}
|
|
1604
|
+
async function loadMetadataForPaths(db, tableAvailability, repoId, paths) {
|
|
1605
|
+
const result = new Map();
|
|
1606
|
+
if (!tableAvailability.hasMetadataTables || paths.length === 0) {
|
|
1607
|
+
return result;
|
|
1608
|
+
}
|
|
1609
|
+
const placeholders = paths.map(() => "?").join(", ");
|
|
1610
|
+
const sql = `
|
|
1611
|
+
SELECT path, key, value, source
|
|
1612
|
+
FROM document_metadata_kv
|
|
1613
|
+
WHERE repo_id = ? AND path IN (${placeholders})
|
|
1614
|
+
`;
|
|
1615
|
+
const rows = await safeMetadataQuery(db, tableAvailability, sql, [repoId, ...paths]);
|
|
1616
|
+
for (const row of rows) {
|
|
1617
|
+
if (!result.has(row.path)) {
|
|
1618
|
+
result.set(row.path, []);
|
|
1619
|
+
}
|
|
1620
|
+
result.get(row.path).push({
|
|
1621
|
+
key: row.key,
|
|
1622
|
+
value: row.value,
|
|
1623
|
+
source: row.source ?? undefined,
|
|
1624
|
+
});
|
|
1625
|
+
}
|
|
1626
|
+
return result;
|
|
1627
|
+
}
|
|
1628
|
+
async function loadInboundLinkCounts(db, tableAvailability, repoId, paths) {
|
|
1629
|
+
const counts = new Map();
|
|
1630
|
+
if (!tableAvailability.hasLinkTable || paths.length === 0) {
|
|
1631
|
+
return counts;
|
|
1632
|
+
}
|
|
1633
|
+
const placeholders = paths.map(() => "?").join(", ");
|
|
1634
|
+
const sql = `
|
|
1635
|
+
SELECT resolved_path AS path, COUNT(*) AS inbound
|
|
1636
|
+
FROM markdown_link
|
|
1637
|
+
WHERE repo_id = ? AND resolved_path IS NOT NULL AND resolved_path IN (${placeholders})
|
|
1638
|
+
GROUP BY resolved_path
|
|
1639
|
+
`;
|
|
1640
|
+
const rows = await safeLinkQuery(db, tableAvailability, sql, [repoId, ...paths]);
|
|
1641
|
+
for (const row of rows) {
|
|
1642
|
+
const inboundValue = typeof row.inbound === "bigint" ? Number(row.inbound) : Number(row.inbound ?? 0);
|
|
1643
|
+
counts.set(row.path, inboundValue);
|
|
1644
|
+
}
|
|
1645
|
+
return counts;
|
|
1646
|
+
}
|
|
1647
|
+
function computeMetadataBoost(entries, keywordSet, filterValueSet) {
|
|
1648
|
+
if (!entries || entries.length === 0) {
|
|
1649
|
+
return 0;
|
|
1650
|
+
}
|
|
1651
|
+
let boost = 0;
|
|
1652
|
+
for (const entry of entries) {
|
|
1653
|
+
const valueLower = entry.value.toLowerCase();
|
|
1654
|
+
for (const keyword of keywordSet) {
|
|
1655
|
+
if (valueLower.includes(keyword)) {
|
|
1656
|
+
boost += METADATA_MATCH_WEIGHT;
|
|
1657
|
+
break;
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
if (filterValueSet.has(valueLower)) {
|
|
1661
|
+
boost += METADATA_FILTER_MATCH_WEIGHT;
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
return Math.min(boost, 1.5);
|
|
1665
|
+
}
|
|
1666
|
+
function computeInboundLinkBoost(count) {
|
|
1667
|
+
let numericCount = count;
|
|
1668
|
+
if (typeof numericCount === "bigint") {
|
|
1669
|
+
numericCount = Number(numericCount);
|
|
1670
|
+
}
|
|
1671
|
+
if (!numericCount || numericCount <= 0) {
|
|
1672
|
+
return 0;
|
|
1673
|
+
}
|
|
1674
|
+
return Math.min(Math.log1p(numericCount) * INBOUND_LINK_WEIGHT, 1.0);
|
|
1675
|
+
}
|
|
1676
|
+
function candidateMatchesMetadataFilters(entries, filters) {
|
|
1677
|
+
if (filters.length === 0) {
|
|
1678
|
+
return true;
|
|
1679
|
+
}
|
|
1680
|
+
if (!entries || entries.length === 0) {
|
|
1681
|
+
return false;
|
|
1682
|
+
}
|
|
1683
|
+
return filters.every((filter) => {
|
|
1684
|
+
const expectedValues = filter.values.map((value) => value.toLowerCase());
|
|
1685
|
+
return entries.some((entry) => {
|
|
1686
|
+
if (entry.key !== filter.key) {
|
|
1687
|
+
return false;
|
|
1688
|
+
}
|
|
1689
|
+
if (filter.source && entry.source !== filter.source) {
|
|
1690
|
+
return false;
|
|
1691
|
+
}
|
|
1692
|
+
const lowerValue = entry.value.toLowerCase();
|
|
1693
|
+
return expectedValues.some((value) => lowerValue.includes(value));
|
|
1694
|
+
});
|
|
1695
|
+
});
|
|
1696
|
+
}
|
|
1697
|
+
/**
|
|
1698
|
+
* パス固有のマルチプライヤーを取得(最長プレフィックスマッチ)
|
|
1699
|
+
* 配列の順序に依存せず、常に最長一致のプレフィックスを選択
|
|
1700
|
+
* @param filePath - ファイルパス
|
|
1701
|
+
* @param profileConfig - ブーストプロファイル設定
|
|
1702
|
+
* @returns パス固有のマルチプライヤー(マッチなしの場合は1.0)
|
|
674
1703
|
*/
|
|
675
|
-
function
|
|
676
|
-
|
|
677
|
-
const
|
|
678
|
-
|
|
1704
|
+
function getPathMultiplier(filePath, profileConfig) {
|
|
1705
|
+
let bestMatch = { prefix: "", multiplier: 1.0 };
|
|
1706
|
+
for (const { prefix, multiplier } of profileConfig.pathMultipliers) {
|
|
1707
|
+
if (filePath.startsWith(prefix) && prefix.length > bestMatch.prefix.length) {
|
|
1708
|
+
bestMatch = { prefix, multiplier };
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
return bestMatch.multiplier;
|
|
679
1712
|
}
|
|
680
1713
|
/**
|
|
681
1714
|
* files_search専用のファイルタイプブースト適用(v0.7.0+: 設定可能な乗算的ペナルティ)
|
|
@@ -686,7 +1719,7 @@ function splitQueryWords(query) {
|
|
|
686
1719
|
* @param weights - スコアリングウェイト設定(乗算的ペナルティに使用)
|
|
687
1720
|
* @returns ブースト適用後のスコア
|
|
688
1721
|
*/
|
|
689
|
-
function applyFileTypeBoost(path, baseScore,
|
|
1722
|
+
function applyFileTypeBoost(path, baseScore, profileConfig, weights) {
|
|
690
1723
|
// Blacklisted directories that are almost always irrelevant for code context
|
|
691
1724
|
const blacklistedDirs = [
|
|
692
1725
|
".cursor/",
|
|
@@ -699,63 +1732,45 @@ function applyFileTypeBoost(path, baseScore, profile = "default", weights) {
|
|
|
699
1732
|
];
|
|
700
1733
|
for (const dir of blacklistedDirs) {
|
|
701
1734
|
if (path.startsWith(dir)) {
|
|
702
|
-
//
|
|
703
|
-
if (
|
|
1735
|
+
// ✅ Decoupled: Check denylist overrides from profile config
|
|
1736
|
+
if (profileConfig.denylistOverrides.includes(dir)) {
|
|
704
1737
|
continue;
|
|
705
1738
|
}
|
|
706
|
-
|
|
1739
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
1740
|
+
return baseScore * weights.blacklistPenaltyMultiplier;
|
|
707
1741
|
}
|
|
708
1742
|
}
|
|
709
|
-
|
|
710
|
-
return baseScore;
|
|
711
|
-
}
|
|
712
|
-
// Extract file extension for type detection
|
|
1743
|
+
const fileName = path.split("/").pop() ?? "";
|
|
713
1744
|
const ext = path.includes(".") ? path.substring(path.lastIndexOf(".")) : null;
|
|
714
|
-
// ✅ UNIFIED LOGIC: Use same multiplicative penalties as context_bundle
|
|
715
|
-
if (profile === "docs") {
|
|
716
|
-
// Boost documentation files
|
|
717
|
-
if (path.endsWith(".md") || path.endsWith(".yaml") || path.endsWith(".yml")) {
|
|
718
|
-
return baseScore * 1.5; // 50% boost (same as context_bundle)
|
|
719
|
-
}
|
|
720
|
-
// Penalty for implementation files in docs mode
|
|
721
|
-
if (path.startsWith("src/") &&
|
|
722
|
-
(path.endsWith(".ts") || path.endsWith(".js") || path.endsWith(".tsx"))) {
|
|
723
|
-
return baseScore * 0.5; // 50% penalty
|
|
724
|
-
}
|
|
725
|
-
return baseScore;
|
|
726
|
-
}
|
|
727
|
-
// Default profile: Use configurable multiplicative penalties
|
|
728
1745
|
let multiplier = 1.0;
|
|
729
|
-
|
|
730
|
-
// ✅ Step 1: Config files get strongest penalty (95% reduction)
|
|
1746
|
+
// ✅ Step 1: Config files
|
|
731
1747
|
if (isConfigFile(path, fileName)) {
|
|
732
|
-
multiplier *=
|
|
1748
|
+
multiplier *= profileConfig.fileTypeMultipliers.config;
|
|
733
1749
|
return baseScore * multiplier;
|
|
734
1750
|
}
|
|
735
|
-
// ✅ Step 2: Documentation files
|
|
1751
|
+
// ✅ Step 2: Documentation files
|
|
736
1752
|
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
737
1753
|
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
738
|
-
multiplier *=
|
|
1754
|
+
multiplier *= profileConfig.fileTypeMultipliers.doc;
|
|
739
1755
|
return baseScore * multiplier;
|
|
740
1756
|
}
|
|
741
|
-
// ✅ Step 3: Implementation
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
multiplier *=
|
|
747
|
-
|
|
748
|
-
else if (path.startsWith("src/lib/")) {
|
|
749
|
-
multiplier *= weights.implBoostMultiplier * 1.2;
|
|
1757
|
+
// ✅ Step 3: Implementation files with path-specific boosts
|
|
1758
|
+
const implMultiplier = profileConfig.fileTypeMultipliers.impl;
|
|
1759
|
+
// ✅ Use longest-prefix-match logic (order-independent)
|
|
1760
|
+
const pathBoost = getPathMultiplier(path, profileConfig);
|
|
1761
|
+
if (pathBoost !== 1.0) {
|
|
1762
|
+
multiplier *= implMultiplier * pathBoost;
|
|
1763
|
+
return baseScore * multiplier;
|
|
750
1764
|
}
|
|
751
|
-
|
|
1765
|
+
// Fallback for other src/ files
|
|
1766
|
+
if (path.startsWith("src/")) {
|
|
752
1767
|
if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
|
|
753
|
-
multiplier *=
|
|
1768
|
+
multiplier *= implMultiplier;
|
|
754
1769
|
}
|
|
755
1770
|
}
|
|
756
|
-
// Test files:
|
|
1771
|
+
// Test files: multiplicative penalty (v1.0.0)
|
|
757
1772
|
if (path.startsWith("tests/") || path.startsWith("test/")) {
|
|
758
|
-
return baseScore *
|
|
1773
|
+
return baseScore * weights.testPenaltyMultiplier;
|
|
759
1774
|
}
|
|
760
1775
|
return baseScore * multiplier;
|
|
761
1776
|
}
|
|
@@ -767,49 +1782,115 @@ function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
|
|
|
767
1782
|
if (!extractedTerms || weights.pathMatch <= 0) {
|
|
768
1783
|
return;
|
|
769
1784
|
}
|
|
1785
|
+
// hasAddedScore gates additive boosts; pathMatchHits/reasons still track every hit for penalties/debugging.
|
|
1786
|
+
let hasAddedScore = false;
|
|
770
1787
|
// フレーズがパスに完全一致する場合(最高の重み)
|
|
771
1788
|
for (const phrase of extractedTerms.phrases) {
|
|
772
1789
|
if (lowerPath.includes(phrase)) {
|
|
773
|
-
|
|
1790
|
+
if (!hasAddedScore) {
|
|
1791
|
+
candidate.score += weights.pathMatch * 1.5; // 1.5倍のブースト
|
|
1792
|
+
hasAddedScore = true;
|
|
1793
|
+
}
|
|
774
1794
|
candidate.reasons.add(`path-phrase:${phrase}`);
|
|
775
|
-
|
|
1795
|
+
candidate.pathMatchHits++; // Issue #68: Track path match for penalty calculation
|
|
776
1796
|
}
|
|
777
1797
|
}
|
|
778
1798
|
// パスセグメントがマッチする場合(中程度の重み)
|
|
779
1799
|
const pathParts = lowerPath.split("/");
|
|
780
1800
|
for (const segment of extractedTerms.pathSegments) {
|
|
781
1801
|
if (pathParts.includes(segment)) {
|
|
782
|
-
|
|
1802
|
+
if (!hasAddedScore) {
|
|
1803
|
+
candidate.score += weights.pathMatch;
|
|
1804
|
+
hasAddedScore = true;
|
|
1805
|
+
}
|
|
783
1806
|
candidate.reasons.add(`path-segment:${segment}`);
|
|
784
|
-
|
|
1807
|
+
candidate.pathMatchHits++; // Issue #68: Track path match for penalty calculation
|
|
785
1808
|
}
|
|
786
1809
|
}
|
|
787
1810
|
// 通常のキーワードがパスに含まれる場合(低い重み)
|
|
1811
|
+
const matchedKeywords = new Set();
|
|
788
1812
|
for (const keyword of extractedTerms.keywords) {
|
|
789
1813
|
if (lowerPath.includes(keyword)) {
|
|
790
|
-
|
|
1814
|
+
if (!hasAddedScore) {
|
|
1815
|
+
candidate.score += weights.pathMatch * 0.5; // 0.5倍のブースト
|
|
1816
|
+
hasAddedScore = true;
|
|
1817
|
+
}
|
|
791
1818
|
candidate.reasons.add(`path-keyword:${keyword}`);
|
|
792
|
-
|
|
1819
|
+
candidate.pathMatchHits++; // Issue #68: Track path match for penalty calculation
|
|
1820
|
+
matchedKeywords.add(keyword); // Track for abbreviation expansion
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
// ADR 003: Abbreviation expansion for keywords with zero exact matches
|
|
1824
|
+
// Avoid double-counting by only expanding keywords that didn't match exactly
|
|
1825
|
+
// Skip abbreviation expansion for files that will be heavily penalized (test/config/lock files)
|
|
1826
|
+
const fileName = lowerPath.split("/").pop() ?? "";
|
|
1827
|
+
const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
|
|
1828
|
+
const lockFiles = [
|
|
1829
|
+
"package-lock.json",
|
|
1830
|
+
"pnpm-lock.yaml",
|
|
1831
|
+
"yarn.lock",
|
|
1832
|
+
"bun.lockb",
|
|
1833
|
+
"gemfile.lock",
|
|
1834
|
+
"cargo.lock",
|
|
1835
|
+
"poetry.lock",
|
|
1836
|
+
];
|
|
1837
|
+
const configPatterns = [
|
|
1838
|
+
"tsconfig.json",
|
|
1839
|
+
"vite.config",
|
|
1840
|
+
"vitest.config",
|
|
1841
|
+
"eslint.config",
|
|
1842
|
+
"prettier.config",
|
|
1843
|
+
"package.json",
|
|
1844
|
+
".env",
|
|
1845
|
+
"dockerfile",
|
|
1846
|
+
];
|
|
1847
|
+
const shouldSkipAbbreviation = testPatterns.some((pattern) => lowerPath.endsWith(pattern)) ||
|
|
1848
|
+
lockFiles.some((lock) => fileName === lock) ||
|
|
1849
|
+
configPatterns.some((cfg) => fileName.includes(cfg));
|
|
1850
|
+
if (!shouldSkipAbbreviation) {
|
|
1851
|
+
for (const keyword of extractedTerms.keywords) {
|
|
1852
|
+
if (matchedKeywords.has(keyword)) {
|
|
1853
|
+
continue; // Skip keywords that already matched exactly
|
|
1854
|
+
}
|
|
1855
|
+
const expandedTerms = expandAbbreviations(keyword);
|
|
1856
|
+
// Try each expanded variant (except the original keyword itself)
|
|
1857
|
+
for (const term of expandedTerms) {
|
|
1858
|
+
if (term === keyword)
|
|
1859
|
+
continue; // Skip original to avoid duplicate check
|
|
1860
|
+
if (lowerPath.includes(term)) {
|
|
1861
|
+
// Lower weight (0.4x) for abbreviation-expanded matches
|
|
1862
|
+
if (!hasAddedScore) {
|
|
1863
|
+
candidate.score += weights.pathMatch * 0.4;
|
|
1864
|
+
hasAddedScore = true;
|
|
1865
|
+
}
|
|
1866
|
+
candidate.reasons.add(`abbr-path:${keyword}→${term}`);
|
|
1867
|
+
candidate.pathMatchHits++; // Count for penalty calculation
|
|
1868
|
+
break; // Only count first match per keyword to avoid over-boosting
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
793
1871
|
}
|
|
794
1872
|
}
|
|
795
1873
|
}
|
|
796
1874
|
/**
|
|
797
|
-
*
|
|
798
|
-
* ブラックリストディレクトリ、テストファイル、lock
|
|
799
|
-
*
|
|
800
|
-
* @
|
|
1875
|
+
* 乗算的ファイルペナルティを適用(v1.0.0+)
|
|
1876
|
+
* ブラックリストディレクトリ、テストファイル、lockファイルに乗算ペナルティ
|
|
1877
|
+
* v1.0.0: 絶対ペナルティ(-100)から乗算ペナルティ(×0.01など)に移行
|
|
1878
|
+
* @param weights - スコアリングウェイト設定(乗算ペナルティ係数を含む)
|
|
1879
|
+
* @param profile - boost_profile設定(denylistOverridesなど)
|
|
1880
|
+
* @returns true if severe penalty was applied (caller should skip further boosts)
|
|
801
1881
|
*/
|
|
802
|
-
function
|
|
803
|
-
//
|
|
1882
|
+
function applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig) {
|
|
1883
|
+
// Returns true if a severe penalty was applied (should skip further boosts)
|
|
1884
|
+
// Blacklisted directories - apply strong multiplicative penalty (99% reduction)
|
|
1885
|
+
// v1.0.0: test/ and tests/ removed - handled by testPenaltyMultiplier instead
|
|
804
1886
|
const blacklistedDirs = [
|
|
805
1887
|
".cursor/",
|
|
806
1888
|
".devcontainer/",
|
|
807
1889
|
".serena/",
|
|
808
1890
|
"__mocks__/",
|
|
809
1891
|
"docs/",
|
|
810
|
-
"test/",
|
|
811
|
-
"tests/",
|
|
812
1892
|
".git/",
|
|
1893
|
+
".github/",
|
|
813
1894
|
"node_modules/",
|
|
814
1895
|
"db/migrate/",
|
|
815
1896
|
"db/migrations/",
|
|
@@ -825,24 +1906,30 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
825
1906
|
];
|
|
826
1907
|
for (const dir of blacklistedDirs) {
|
|
827
1908
|
if (path.startsWith(dir)) {
|
|
828
|
-
// ✅
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
continue; // このブラックリストエントリをスキップ
|
|
1909
|
+
// ✅ Decoupled: Check denylist overrides from profile config
|
|
1910
|
+
if (profileConfig.denylistOverrides.includes(dir)) {
|
|
1911
|
+
continue; // Skip this blacklisted directory
|
|
832
1912
|
}
|
|
833
|
-
|
|
1913
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
1914
|
+
candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
|
|
834
1915
|
candidate.reasons.add("penalty:blacklisted-dir");
|
|
835
|
-
return true;
|
|
1916
|
+
return true; // Signal to skip further boosts - this is the strongest penalty
|
|
836
1917
|
}
|
|
837
1918
|
}
|
|
838
|
-
|
|
1919
|
+
if (isSuppressedPath(path)) {
|
|
1920
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
1921
|
+
candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
|
|
1922
|
+
candidate.reasons.add("penalty:suppressed");
|
|
1923
|
+
return true; // Signal to skip further boosts
|
|
1924
|
+
}
|
|
1925
|
+
// Test files - strong multiplicative penalty (95% reduction)
|
|
839
1926
|
const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
|
|
840
1927
|
if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
|
|
841
|
-
candidate.
|
|
1928
|
+
candidate.scoreMultiplier *= weights.testPenaltyMultiplier;
|
|
842
1929
|
candidate.reasons.add("penalty:test-file");
|
|
843
|
-
return true;
|
|
1930
|
+
return true; // Signal to skip further boosts
|
|
844
1931
|
}
|
|
845
|
-
// Lock files - very strong penalty
|
|
1932
|
+
// Lock files - very strong multiplicative penalty (99% reduction)
|
|
846
1933
|
const lockFiles = [
|
|
847
1934
|
"package-lock.json",
|
|
848
1935
|
"pnpm-lock.yaml",
|
|
@@ -853,231 +1940,299 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
853
1940
|
"poetry.lock",
|
|
854
1941
|
];
|
|
855
1942
|
if (lockFiles.some((lockFile) => fileName === lockFile)) {
|
|
856
|
-
candidate.
|
|
1943
|
+
candidate.scoreMultiplier *= weights.lockPenaltyMultiplier;
|
|
857
1944
|
candidate.reasons.add("penalty:lock-file");
|
|
858
|
-
return true;
|
|
859
|
-
}
|
|
860
|
-
// Configuration files - strong penalty
|
|
861
|
-
const configPatterns = [
|
|
862
|
-
".config.js",
|
|
863
|
-
".config.ts",
|
|
864
|
-
".config.mjs",
|
|
865
|
-
".config.cjs",
|
|
866
|
-
"tsconfig.json",
|
|
867
|
-
"jsconfig.json",
|
|
868
|
-
"package.json",
|
|
869
|
-
".eslintrc",
|
|
870
|
-
".prettierrc",
|
|
871
|
-
"jest.config",
|
|
872
|
-
"vite.config",
|
|
873
|
-
"vitest.config",
|
|
874
|
-
"webpack.config",
|
|
875
|
-
"rollup.config",
|
|
876
|
-
];
|
|
877
|
-
if (configPatterns.some((pattern) => lowerPath.endsWith(pattern) || fileName.startsWith(".env")) ||
|
|
878
|
-
fileName === "Dockerfile" ||
|
|
879
|
-
fileName === "docker-compose.yml" ||
|
|
880
|
-
fileName === "docker-compose.yaml") {
|
|
881
|
-
candidate.score -= 1.5;
|
|
882
|
-
candidate.reasons.add("penalty:config-file");
|
|
883
|
-
return true;
|
|
884
|
-
}
|
|
885
|
-
// Migration files - strong penalty
|
|
886
|
-
if (lowerPath.includes("migrate") || lowerPath.includes("migration")) {
|
|
887
|
-
candidate.score -= 2.0;
|
|
888
|
-
candidate.reasons.add("penalty:migration-file");
|
|
889
|
-
return true;
|
|
1945
|
+
return true; // Signal to skip further boosts
|
|
890
1946
|
}
|
|
891
|
-
|
|
1947
|
+
// v1.0.0: No penalty applied, allow further boosts/penalties
|
|
1948
|
+
return false;
|
|
892
1949
|
}
|
|
893
1950
|
/**
|
|
894
1951
|
* ファイルタイプ別の乗算的ペナルティ/ブーストを適用(v0.7.0+)
|
|
895
1952
|
* profile="docs": ドキュメントファイルをブースト
|
|
896
1953
|
* profile="default": ドキュメントファイルにペナルティ、実装ファイルをブースト
|
|
897
1954
|
*/
|
|
898
|
-
function applyFileTypeMultipliers(candidate, path, ext,
|
|
899
|
-
|
|
900
|
-
|
|
1955
|
+
function applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights) {
|
|
1956
|
+
const fileName = path.split("/").pop() ?? "";
|
|
1957
|
+
const lowerPath = path.toLowerCase();
|
|
1958
|
+
// ✅ Step 1: Low-value files (v1.0.0: syntax/perf/legal/migration)
|
|
1959
|
+
// Apply configPenaltyMultiplier (strong penalty) to rarely useful file types
|
|
1960
|
+
const isSyntaxGrammar = path.includes("/syntaxes/") &&
|
|
1961
|
+
(lowerPath.endsWith(".tmlanguage") ||
|
|
1962
|
+
lowerPath.endsWith(".tmlanguage.json") ||
|
|
1963
|
+
lowerPath.endsWith(".tmtheme") ||
|
|
1964
|
+
lowerPath.endsWith(".plist"));
|
|
1965
|
+
const isPerfData = lowerPath.includes(".perf.data") ||
|
|
1966
|
+
lowerPath.includes(".perf-data") ||
|
|
1967
|
+
lowerPath.includes("-perf-data");
|
|
1968
|
+
const isLegalFile = fileName.toLowerCase().includes("thirdpartynotices") ||
|
|
1969
|
+
fileName.toLowerCase() === "cgmanifest.json";
|
|
1970
|
+
const isMigrationFile = lowerPath.includes("migrate") || lowerPath.includes("migration");
|
|
1971
|
+
if (isSyntaxGrammar || isPerfData || isLegalFile || isMigrationFile) {
|
|
1972
|
+
candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
|
|
1973
|
+
candidate.reasons.add("penalty:low-value-file");
|
|
1974
|
+
return; // Don't apply impl boosts
|
|
1975
|
+
}
|
|
1976
|
+
// ✅ Step 2: Config files
|
|
1977
|
+
if (isConfigFile(path, fileName)) {
|
|
1978
|
+
candidate.scoreMultiplier *= profileConfig.fileTypeMultipliers.config;
|
|
1979
|
+
candidate.reasons.add("penalty:config-file");
|
|
1980
|
+
return; // Don't apply impl boosts to config files
|
|
901
1981
|
}
|
|
902
|
-
// ✅
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
1982
|
+
// ✅ Step 3: Documentation files
|
|
1983
|
+
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
1984
|
+
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
1985
|
+
const docMultiplier = profileConfig.fileTypeMultipliers.doc;
|
|
1986
|
+
candidate.scoreMultiplier *= docMultiplier;
|
|
1987
|
+
if (docMultiplier > 1.0) {
|
|
907
1988
|
candidate.reasons.add("boost:doc-file");
|
|
908
1989
|
}
|
|
909
|
-
|
|
910
|
-
return;
|
|
911
|
-
}
|
|
912
|
-
// DEFAULT PROFILE: Use MULTIPLICATIVE penalties for config/docs, MULTIPLICATIVE boosts for impl files
|
|
913
|
-
if (profile === "default") {
|
|
914
|
-
const fileName = path.split("/").pop() ?? "";
|
|
915
|
-
// ✅ Step 1: Config files get strongest penalty (95% reduction)
|
|
916
|
-
if (isConfigFile(path, fileName)) {
|
|
917
|
-
candidate.scoreMultiplier *= weights.configPenaltyMultiplier; // 0.05 = 95% reduction
|
|
918
|
-
candidate.reasons.add("penalty:config-file");
|
|
919
|
-
return; // Don't apply impl boosts to config files
|
|
920
|
-
}
|
|
921
|
-
// ✅ Step 2: Documentation files get moderate penalty (50% reduction)
|
|
922
|
-
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
923
|
-
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
924
|
-
candidate.scoreMultiplier *= weights.docPenaltyMultiplier; // 0.5 = 50% reduction
|
|
1990
|
+
else if (docMultiplier < 1.0) {
|
|
925
1991
|
candidate.reasons.add("penalty:doc-file");
|
|
926
|
-
return; // Don't apply impl boosts to docs
|
|
927
1992
|
}
|
|
928
|
-
//
|
|
1993
|
+
return; // Don't apply impl boosts to docs
|
|
1994
|
+
}
|
|
1995
|
+
// ✅ Step 4: Implementation files with path-specific boosts
|
|
1996
|
+
const implMultiplier = profileConfig.fileTypeMultipliers.impl;
|
|
1997
|
+
// ✅ Use longest-prefix-match logic (order-independent)
|
|
1998
|
+
const pathBoost = getPathMultiplier(path, profileConfig);
|
|
1999
|
+
if (pathBoost !== 1.0) {
|
|
2000
|
+
candidate.scoreMultiplier *= implMultiplier * pathBoost;
|
|
2001
|
+
// Add specific reason based on matched path
|
|
929
2002
|
if (path.startsWith("src/app/")) {
|
|
930
|
-
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.4; // Extra boost for app files
|
|
931
2003
|
candidate.reasons.add("boost:app-file");
|
|
932
2004
|
}
|
|
933
2005
|
else if (path.startsWith("src/components/")) {
|
|
934
|
-
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.3;
|
|
935
2006
|
candidate.reasons.add("boost:component-file");
|
|
936
2007
|
}
|
|
937
2008
|
else if (path.startsWith("src/lib/")) {
|
|
938
|
-
candidate.scoreMultiplier *= weights.implBoostMultiplier * 1.2;
|
|
939
2009
|
candidate.reasons.add("boost:lib-file");
|
|
940
2010
|
}
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
2011
|
+
return;
|
|
2012
|
+
}
|
|
2013
|
+
// Fallback for other src/ files
|
|
2014
|
+
if (path.startsWith("src/")) {
|
|
2015
|
+
if (ext === ".ts" || ext === ".tsx" || ext === ".js") {
|
|
2016
|
+
candidate.scoreMultiplier *= implMultiplier;
|
|
2017
|
+
candidate.reasons.add("boost:impl-file");
|
|
946
2018
|
}
|
|
947
2019
|
}
|
|
948
2020
|
}
|
|
949
2021
|
/**
|
|
950
|
-
* contextBundle専用のブーストプロファイル適用(
|
|
2022
|
+
* contextBundle専用のブーストプロファイル適用(v1.0.0: 乗算ペナルティモデル)
|
|
951
2023
|
* 複雑度を削減するために3つのヘルパー関数に分割:
|
|
952
2024
|
* 1. applyPathBasedScoring: パスベースの加算的スコアリング
|
|
953
|
-
* 2.
|
|
954
|
-
* 3. applyFileTypeMultipliers:
|
|
2025
|
+
* 2. applyMultiplicativeFilePenalties: 乗算的ペナルティ(blacklist/test/lock)
|
|
2026
|
+
* 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト(doc/config/impl)
|
|
2027
|
+
*
|
|
2028
|
+
* v1.0.0 CHANGES:
|
|
2029
|
+
* - 絶対ペナルティ(-100)を乗算ペナルティ(×0.01など)に置き換え
|
|
2030
|
+
* - すべてのペナルティが組み合わせ可能に(boost_profileとの相互作用が予測可能)
|
|
2031
|
+
* - v0.9.0の特別ケース処理(if profile === "docs")が不要に
|
|
955
2032
|
*
|
|
956
|
-
*
|
|
957
|
-
* 1.
|
|
958
|
-
* 2.
|
|
959
|
-
* 3.
|
|
2033
|
+
* SCORING PHASES:
|
|
2034
|
+
* 1. Additive phase: テキストマッチ、パスマッチ、依存関係、近接性を加算
|
|
2035
|
+
* 2. Multiplicative phase: ペナルティとブーストを scoreMultiplier に蓄積
|
|
2036
|
+
* 3. Final application: score *= scoreMultiplier(最終段階で一度だけ適用)
|
|
960
2037
|
*/
|
|
961
|
-
function applyBoostProfile(candidate, row,
|
|
962
|
-
if (profile === "none") {
|
|
963
|
-
return;
|
|
964
|
-
}
|
|
2038
|
+
function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms) {
|
|
965
2039
|
const { path, ext } = row;
|
|
966
2040
|
const lowerPath = path.toLowerCase();
|
|
967
2041
|
const fileName = path.split("/").pop() ?? "";
|
|
968
2042
|
// Step 1: パスベースのスコアリング(加算的ブースト)
|
|
969
2043
|
applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms);
|
|
970
|
-
// Step 2:
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
return; // ペナルティが適用された場合は処理終了
|
|
974
|
-
}
|
|
2044
|
+
// Step 2: 乗算的ペナルティ(ブラックリスト、テスト、lock)
|
|
2045
|
+
// v1.0.0: Returns true if severe penalty applied (should skip further boosts)
|
|
2046
|
+
const skipFurtherBoosts = applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig);
|
|
975
2047
|
// Step 3: ファイルタイプ別の乗算的ペナルティ/ブースト
|
|
976
|
-
|
|
2048
|
+
// Skip if severe penalty was applied (blacklist/test/lock files shouldn't get impl boosts)
|
|
2049
|
+
if (!skipFurtherBoosts) {
|
|
2050
|
+
applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights);
|
|
2051
|
+
}
|
|
977
2052
|
}
|
|
978
2053
|
export async function filesSearch(context, params) {
|
|
979
2054
|
const { db, repoId } = context;
|
|
980
|
-
const
|
|
981
|
-
|
|
982
|
-
|
|
2055
|
+
const rawQuery = params.query ?? "";
|
|
2056
|
+
const inlineMetadata = parseInlineMetadataFilters(rawQuery);
|
|
2057
|
+
const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
|
|
2058
|
+
const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
|
|
2059
|
+
const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
|
|
2060
|
+
const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
|
|
2061
|
+
const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
|
|
2062
|
+
const hasHintMetadataFilters = hintMetadataFilters.length > 0;
|
|
2063
|
+
const hasAnyMetadataFilters = metadataFilters.length > 0;
|
|
2064
|
+
let cleanedQuery = inlineMetadata.cleanedQuery;
|
|
2065
|
+
let hasTextQuery = cleanedQuery.length > 0;
|
|
2066
|
+
if (!hasTextQuery && hasHintMetadataFilters) {
|
|
2067
|
+
cleanedQuery = hintMetadataFilters
|
|
2068
|
+
.flatMap((filter) => filter.values)
|
|
2069
|
+
.map((value) => value.trim())
|
|
2070
|
+
.filter((value) => value.length > 0)
|
|
2071
|
+
.join(" ");
|
|
2072
|
+
cleanedQuery = cleanedQuery.trim();
|
|
2073
|
+
hasTextQuery = cleanedQuery.length > 0;
|
|
2074
|
+
}
|
|
2075
|
+
const metadataValueSeed = metadataFilters
|
|
2076
|
+
.flatMap((filter) => filter.values)
|
|
2077
|
+
.map((value) => value.trim())
|
|
2078
|
+
.filter((value) => value.length > 0)
|
|
2079
|
+
.join(" ");
|
|
2080
|
+
if (metadataValueSeed.length > 0) {
|
|
2081
|
+
cleanedQuery = `${cleanedQuery} ${metadataValueSeed}`.trim();
|
|
2082
|
+
hasTextQuery = cleanedQuery.length > 0;
|
|
2083
|
+
}
|
|
2084
|
+
if (!hasTextQuery && !hasAnyMetadataFilters) {
|
|
2085
|
+
throw new Error("files_search requires a query or metadata_filters. Provide keywords or structured filters to continue.");
|
|
983
2086
|
}
|
|
984
2087
|
const limit = normalizeLimit(params.limit);
|
|
985
2088
|
const ftsStatus = await getFreshFtsStatus(context);
|
|
986
2089
|
const hasFTS = ftsStatus.ready;
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
if (
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
SELECT
|
|
1013
|
-
FROM
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
values = [repoId];
|
|
1027
|
-
const words = splitQueryWords(query);
|
|
1028
|
-
if (words.length === 1) {
|
|
1029
|
-
conditions.push("b.content ILIKE '%' || ? || '%'");
|
|
1030
|
-
values.push(query);
|
|
2090
|
+
const metadataClauses = buildMetadataFilterConditions(strictMetadataFilters);
|
|
2091
|
+
const candidateRows = [];
|
|
2092
|
+
if (hasTextQuery) {
|
|
2093
|
+
let sql;
|
|
2094
|
+
let values;
|
|
2095
|
+
if (hasFTS) {
|
|
2096
|
+
const conditions = ["f.repo_id = ?"];
|
|
2097
|
+
values = [repoId];
|
|
2098
|
+
if (params.lang) {
|
|
2099
|
+
conditions.push("COALESCE(f.lang, '') = ?");
|
|
2100
|
+
values.push(params.lang);
|
|
2101
|
+
}
|
|
2102
|
+
if (params.ext) {
|
|
2103
|
+
conditions.push("COALESCE(f.ext, '') = ?");
|
|
2104
|
+
values.push(params.ext);
|
|
2105
|
+
}
|
|
2106
|
+
if (params.path_prefix) {
|
|
2107
|
+
conditions.push("f.path LIKE ?");
|
|
2108
|
+
values.push(`${params.path_prefix}%`);
|
|
2109
|
+
}
|
|
2110
|
+
for (const clause of metadataClauses) {
|
|
2111
|
+
conditions.push(clause.sql);
|
|
2112
|
+
values.push(...clause.params);
|
|
2113
|
+
}
|
|
2114
|
+
sql = `
|
|
2115
|
+
SELECT f.path, f.lang, f.ext, b.content, fts.score
|
|
2116
|
+
FROM file f
|
|
2117
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
2118
|
+
JOIN (
|
|
2119
|
+
SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
|
|
2120
|
+
FROM blob
|
|
2121
|
+
WHERE score IS NOT NULL
|
|
2122
|
+
) fts ON fts.hash = b.hash
|
|
2123
|
+
WHERE ${conditions.join(" AND ")}
|
|
2124
|
+
ORDER BY fts.score DESC
|
|
2125
|
+
LIMIT ?
|
|
2126
|
+
`;
|
|
2127
|
+
values.unshift(cleanedQuery);
|
|
2128
|
+
values.push(limit);
|
|
1031
2129
|
}
|
|
1032
2130
|
else {
|
|
1033
|
-
const
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
2131
|
+
const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
|
|
2132
|
+
values = [repoId];
|
|
2133
|
+
const words = splitQueryWords(cleanedQuery);
|
|
2134
|
+
if (words.length === 1) {
|
|
2135
|
+
conditions.push("b.content ILIKE '%' || ? || '%'");
|
|
2136
|
+
values.push(cleanedQuery);
|
|
2137
|
+
}
|
|
2138
|
+
else {
|
|
2139
|
+
const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
|
|
2140
|
+
conditions.push(`(${wordConditions.join(" OR ")})`);
|
|
2141
|
+
values.push(...words);
|
|
2142
|
+
}
|
|
2143
|
+
if (params.lang) {
|
|
2144
|
+
conditions.push("COALESCE(f.lang, '') = ?");
|
|
2145
|
+
values.push(params.lang);
|
|
2146
|
+
}
|
|
2147
|
+
if (params.ext) {
|
|
2148
|
+
conditions.push("COALESCE(f.ext, '') = ?");
|
|
2149
|
+
values.push(params.ext);
|
|
2150
|
+
}
|
|
2151
|
+
if (params.path_prefix) {
|
|
2152
|
+
conditions.push("f.path LIKE ?");
|
|
2153
|
+
values.push(`${params.path_prefix}%`);
|
|
2154
|
+
}
|
|
2155
|
+
for (const clause of metadataClauses) {
|
|
2156
|
+
conditions.push(clause.sql);
|
|
2157
|
+
values.push(...clause.params);
|
|
2158
|
+
}
|
|
2159
|
+
sql = `
|
|
2160
|
+
SELECT f.path, f.lang, f.ext, b.content
|
|
2161
|
+
FROM file f
|
|
2162
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
2163
|
+
WHERE ${conditions.join(" AND ")}
|
|
2164
|
+
ORDER BY f.path
|
|
2165
|
+
LIMIT ?
|
|
2166
|
+
`;
|
|
2167
|
+
values.push(limit);
|
|
2168
|
+
}
|
|
2169
|
+
const textRows = await db.all(sql, values);
|
|
2170
|
+
candidateRows.push(...textRows);
|
|
2171
|
+
}
|
|
2172
|
+
if (!hasTextQuery && hasAnyMetadataFilters) {
|
|
2173
|
+
const metadataOnlyRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
|
|
2174
|
+
for (const row of metadataOnlyRows) {
|
|
2175
|
+
row.score = 1 + metadataFilters.length * 0.2;
|
|
2176
|
+
}
|
|
2177
|
+
candidateRows.push(...metadataOnlyRows);
|
|
2178
|
+
}
|
|
2179
|
+
if (hasTextQuery) {
|
|
2180
|
+
const metadataKeywords = splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase());
|
|
2181
|
+
if (metadataKeywords.length > 0) {
|
|
2182
|
+
const excludePaths = new Set(candidateRows.map((row) => row.path));
|
|
2183
|
+
const metadataRows = await fetchMetadataKeywordMatches(db, context.tableAvailability, repoId, metadataKeywords, metadataFilters, limit * 2, excludePaths);
|
|
2184
|
+
candidateRows.push(...metadataRows);
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
if (candidateRows.length === 0) {
|
|
2188
|
+
return [];
|
|
2189
|
+
}
|
|
2190
|
+
const rowMap = new Map();
|
|
2191
|
+
for (const row of candidateRows) {
|
|
2192
|
+
const base = row.score ?? (hasTextQuery ? 1.0 : 0.8);
|
|
2193
|
+
const existing = rowMap.get(row.path);
|
|
2194
|
+
const existingScore = existing?.score ?? (hasTextQuery ? 1.0 : 0.8);
|
|
2195
|
+
if (!existing || base > existingScore) {
|
|
2196
|
+
rowMap.set(row.path, { ...row, score: base });
|
|
2197
|
+
}
|
|
2198
|
+
}
|
|
2199
|
+
const dedupedRows = Array.from(rowMap.values()).sort((a, b) => (b.score ?? 1) - (a.score ?? 1));
|
|
2200
|
+
const limitedRows = dedupedRows.slice(0, limit);
|
|
2201
|
+
const paths = limitedRows.map((row) => row.path);
|
|
2202
|
+
const metadataMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, paths);
|
|
2203
|
+
const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, paths);
|
|
2204
|
+
const metadataKeywordSet = hasTextQuery
|
|
2205
|
+
? new Set(splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase()))
|
|
2206
|
+
: new Set();
|
|
2207
|
+
const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
|
|
2208
|
+
const boostProfile = params.boost_profile ??
|
|
2209
|
+
(hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
|
|
2210
|
+
const profileConfig = getBoostProfile(boostProfile);
|
|
1063
2211
|
const weights = loadScoringProfile(null);
|
|
1064
2212
|
const options = parseOutputOptions(params);
|
|
1065
|
-
|
|
2213
|
+
const previewQuery = hasTextQuery
|
|
2214
|
+
? cleanedQuery
|
|
2215
|
+
: (metadataFilters[0]?.values[0] ?? rawQuery.trim());
|
|
2216
|
+
return limitedRows
|
|
1066
2217
|
.map((row) => {
|
|
1067
2218
|
let preview;
|
|
1068
2219
|
let matchLine;
|
|
2220
|
+
const previewSource = previewQuery || row.path;
|
|
1069
2221
|
if (options.includePreview) {
|
|
1070
|
-
|
|
1071
|
-
const previewData = buildPreview(row.content ?? "", query);
|
|
2222
|
+
const previewData = buildPreview(row.content ?? "", previewSource);
|
|
1072
2223
|
preview = previewData.preview;
|
|
1073
2224
|
matchLine = previewData.line;
|
|
1074
2225
|
}
|
|
1075
2226
|
else {
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
const
|
|
1080
|
-
const
|
|
2227
|
+
matchLine = findFirstMatchLine(row.content ?? "", previewSource);
|
|
2228
|
+
}
|
|
2229
|
+
const metadataEntries = metadataMap.get(row.path);
|
|
2230
|
+
const metadataBoost = computeMetadataBoost(metadataEntries, metadataKeywordSet, filterValueSet);
|
|
2231
|
+
const inboundBoost = computeInboundLinkBoost(inboundCounts.get(row.path));
|
|
2232
|
+
const baseScore = (row.score ?? (hasTextQuery ? 1.0 : 0.8)) + metadataBoost + inboundBoost;
|
|
2233
|
+
const boostedScore = boostProfile === "none"
|
|
2234
|
+
? baseScore
|
|
2235
|
+
: applyFileTypeBoost(row.path, baseScore, profileConfig, weights);
|
|
1081
2236
|
const result = {
|
|
1082
2237
|
path: row.path,
|
|
1083
2238
|
matchLine,
|
|
@@ -1090,102 +2245,310 @@ export async function filesSearch(context, params) {
|
|
|
1090
2245
|
}
|
|
1091
2246
|
return result;
|
|
1092
2247
|
})
|
|
1093
|
-
.
|
|
2248
|
+
.filter((result) => result.score > SCORE_FILTER_THRESHOLD) // v1.0.0: Filter out extremely low-scored files (multiplicative penalties)
|
|
2249
|
+
.sort((a, b) => b.score - a.score);
|
|
1094
2250
|
}
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
2251
|
+
// snippetsGet has been extracted to ./handlers/snippets-get.ts and re-exported above
|
|
2252
|
+
// ============================================================================
|
|
2253
|
+
// Issue #68: Path/Large File Penalty Helper Functions
|
|
2254
|
+
// ============================================================================
|
|
2255
|
+
/**
|
|
2256
|
+
* v1.0.0: Score filtering threshold for multiplicative penalty model
|
|
2257
|
+
* Files with score < threshold are filtered out (unless they are hint paths)
|
|
2258
|
+
* Default: 0.05 removes files with >95% penalty while keeping relevant files
|
|
2259
|
+
* Can be overridden via KIRI_SCORE_THRESHOLD environment variable
|
|
2260
|
+
*/
|
|
2261
|
+
const SCORE_FILTER_THRESHOLD = parseFloat(process.env.KIRI_SCORE_THRESHOLD ?? "0.05");
|
|
2262
|
+
/**
|
|
2263
|
+
* 環境変数からペナルティ機能フラグを読み取る
|
|
2264
|
+
*/
|
|
2265
|
+
function readPenaltyFlags() {
|
|
2266
|
+
return {
|
|
2267
|
+
pathPenalty: process.env.KIRI_PATH_PENALTY === "1",
|
|
2268
|
+
largeFilePenalty: process.env.KIRI_LARGE_FILE_PENALTY === "1",
|
|
2269
|
+
};
|
|
2270
|
+
}
|
|
2271
|
+
/**
|
|
2272
|
+
* クエリ統計を計算(単語数と平均単語長)
|
|
2273
|
+
*/
|
|
2274
|
+
function computeQueryStats(goal) {
|
|
2275
|
+
const words = goal
|
|
2276
|
+
.trim()
|
|
2277
|
+
.split(/\s+/)
|
|
2278
|
+
.filter((w) => w.length > 0);
|
|
2279
|
+
const totalLength = words.reduce((sum, w) => sum + w.length, 0);
|
|
2280
|
+
return {
|
|
2281
|
+
wordCount: words.length,
|
|
2282
|
+
avgWordLength: words.length > 0 ? totalLength / words.length : 0,
|
|
2283
|
+
};
|
|
2284
|
+
}
|
|
2285
|
+
/**
|
|
2286
|
+
* Path Miss Penaltyをcandidateに適用(レガシー: Binary penalty)
|
|
2287
|
+
* 条件: wordCount >= 2 AND avgWordLength >= 4 AND pathMatchHits === 0
|
|
2288
|
+
*
|
|
2289
|
+
* @deprecated Use applyGraduatedPenalty() instead (ADR 002)
|
|
2290
|
+
*/
|
|
2291
|
+
function applyPathMissPenalty(candidate, queryStats) {
|
|
2292
|
+
if (queryStats.wordCount >= 2 && queryStats.avgWordLength >= 4 && candidate.pathMatchHits === 0) {
|
|
2293
|
+
candidate.score += PATH_MISS_DELTA; // -0.5
|
|
2294
|
+
recordPenaltyEvent(candidate, "path-miss", PATH_MISS_DELTA, {
|
|
2295
|
+
wordCount: queryStats.wordCount,
|
|
2296
|
+
avgWordLength: queryStats.avgWordLength,
|
|
2297
|
+
pathMatchHits: candidate.pathMatchHits,
|
|
2298
|
+
});
|
|
1109
2299
|
}
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
const
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
let snippetSelection = null;
|
|
1136
|
-
if (useSymbolSnippets) {
|
|
1137
|
-
snippetSelection =
|
|
1138
|
-
snippetRows.find((snippet) => requestedStart >= snippet.start_line && requestedStart <= snippet.end_line) ?? null;
|
|
1139
|
-
if (!snippetSelection) {
|
|
1140
|
-
const firstSnippet = snippetRows[0];
|
|
1141
|
-
if (firstSnippet && requestedStart < firstSnippet.start_line) {
|
|
1142
|
-
snippetSelection = firstSnippet;
|
|
1143
|
-
}
|
|
1144
|
-
else {
|
|
1145
|
-
snippetSelection = snippetRows[snippetRows.length - 1] ?? null;
|
|
1146
|
-
}
|
|
1147
|
-
}
|
|
2300
|
+
}
|
|
2301
|
+
/**
|
|
2302
|
+
* 段階的ペナルティをcandidateに適用(Issue #68: Graduated Penalty)
|
|
2303
|
+
* ADR 002: Graduated Penalty System
|
|
2304
|
+
*
|
|
2305
|
+
* @param candidate Candidate to apply penalty to
|
|
2306
|
+
* @param queryStats Query statistics for eligibility check
|
|
2307
|
+
* @param config Graduated penalty configuration
|
|
2308
|
+
*/
|
|
2309
|
+
function applyGraduatedPenalty(candidate, queryStats, config) {
|
|
2310
|
+
const penalty = computeGraduatedPenalty(candidate.pathMatchHits, queryStats, config);
|
|
2311
|
+
if (penalty !== 0) {
|
|
2312
|
+
candidate.score += penalty;
|
|
2313
|
+
recordPenaltyEvent(candidate, "path-miss", penalty, {
|
|
2314
|
+
wordCount: queryStats.wordCount,
|
|
2315
|
+
avgWordLength: queryStats.avgWordLength,
|
|
2316
|
+
pathMatchHits: candidate.pathMatchHits,
|
|
2317
|
+
tier: candidate.pathMatchHits === 0
|
|
2318
|
+
? "tier0"
|
|
2319
|
+
: candidate.pathMatchHits === 1
|
|
2320
|
+
? "tier1"
|
|
2321
|
+
: candidate.pathMatchHits === 2
|
|
2322
|
+
? "tier2"
|
|
2323
|
+
: "no-penalty",
|
|
2324
|
+
});
|
|
1148
2325
|
}
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
2326
|
+
}
|
|
2327
|
+
/**
|
|
2328
|
+
* Large File Penaltyをcandidateに適用
|
|
2329
|
+
* 条件: totalLines > 500 AND matchLine > 120
|
|
2330
|
+
* TODO(Issue #68): Add "no symbol at match location" check after selectSnippet integration
|
|
2331
|
+
*/
|
|
2332
|
+
function applyLargeFilePenalty(candidate) {
|
|
2333
|
+
const { totalLines, matchLine } = candidate;
|
|
2334
|
+
if (totalLines !== null && totalLines > 500 && matchLine !== null && matchLine > 120) {
|
|
2335
|
+
candidate.score += LARGE_FILE_DELTA; // -0.8
|
|
2336
|
+
recordPenaltyEvent(candidate, "large-file", LARGE_FILE_DELTA, {
|
|
2337
|
+
totalLines,
|
|
2338
|
+
matchLine,
|
|
2339
|
+
});
|
|
1158
2340
|
}
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
2341
|
+
}
|
|
2342
|
+
/**
|
|
2343
|
+
* ペナルティイベントを記録(テレメトリ用)
|
|
2344
|
+
*/
|
|
2345
|
+
function recordPenaltyEvent(candidate, kind, delta, details) {
|
|
2346
|
+
candidate.penalties.push({ kind, delta, details });
|
|
2347
|
+
candidate.reasons.add(`penalty:${kind}`);
|
|
2348
|
+
}
|
|
2349
|
+
/**
|
|
2350
|
+
* pathMatchHits分布を計算(Issue #68: Telemetry)
|
|
2351
|
+
* LDE: 純粋関数として実装(副作用なし、イミュータブル)
|
|
2352
|
+
*/
|
|
2353
|
+
function computePathMatchDistribution(candidates) {
|
|
2354
|
+
let zero = 0;
|
|
2355
|
+
let one = 0;
|
|
2356
|
+
let two = 0;
|
|
2357
|
+
let three = 0;
|
|
2358
|
+
let fourPlus = 0;
|
|
2359
|
+
for (const candidate of candidates) {
|
|
2360
|
+
const hits = candidate.pathMatchHits;
|
|
2361
|
+
if (hits === 0)
|
|
2362
|
+
zero++;
|
|
2363
|
+
else if (hits === 1)
|
|
2364
|
+
one++;
|
|
2365
|
+
else if (hits === 2)
|
|
2366
|
+
two++;
|
|
2367
|
+
else if (hits === 3)
|
|
2368
|
+
three++;
|
|
2369
|
+
else
|
|
2370
|
+
fourPlus++;
|
|
1162
2371
|
}
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
2372
|
+
return {
|
|
2373
|
+
zero,
|
|
2374
|
+
one,
|
|
2375
|
+
two,
|
|
2376
|
+
three,
|
|
2377
|
+
fourPlus,
|
|
2378
|
+
total: candidates.length,
|
|
2379
|
+
};
|
|
2380
|
+
}
|
|
2381
|
+
/**
|
|
2382
|
+
* スコア統計を計算(Issue #68: Telemetry)
|
|
2383
|
+
* LDE: 純粋関数として実装(副作用なし、イミュータブル)
|
|
2384
|
+
*/
|
|
2385
|
+
function computeScoreStats(candidates) {
|
|
2386
|
+
if (candidates.length === 0) {
|
|
2387
|
+
return { min: 0, max: 0, mean: 0, median: 0 };
|
|
2388
|
+
}
|
|
2389
|
+
const scores = candidates.map((c) => c.score).sort((a, b) => a - b);
|
|
2390
|
+
const sum = scores.reduce((acc, s) => acc + s, 0);
|
|
2391
|
+
const mean = sum / scores.length;
|
|
2392
|
+
const median = scores[Math.floor(scores.length / 2)] ?? 0;
|
|
2393
|
+
return {
|
|
2394
|
+
min: scores[0] ?? 0,
|
|
2395
|
+
max: scores[scores.length - 1] ?? 0,
|
|
2396
|
+
mean,
|
|
2397
|
+
median,
|
|
2398
|
+
};
|
|
2399
|
+
}
|
|
2400
|
+
/**
|
|
2401
|
+
* ペナルティ適用状況を計算(Issue #68: Telemetry)
|
|
2402
|
+
* LDE: 純粋関数として実装(副作用なし、イミュータブル)
|
|
2403
|
+
*/
|
|
2404
|
+
function computePenaltyTelemetry(candidates) {
|
|
2405
|
+
let pathMissPenalties = 0;
|
|
2406
|
+
let largeFilePenalties = 0;
|
|
2407
|
+
for (const candidate of candidates) {
|
|
2408
|
+
for (const penalty of candidate.penalties) {
|
|
2409
|
+
if (penalty.kind === "path-miss")
|
|
2410
|
+
pathMissPenalties++;
|
|
2411
|
+
if (penalty.kind === "large-file")
|
|
2412
|
+
largeFilePenalties++;
|
|
2413
|
+
}
|
|
1169
2414
|
}
|
|
1170
2415
|
return {
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
symbolName,
|
|
1177
|
-
symbolKind,
|
|
2416
|
+
pathMissPenalties,
|
|
2417
|
+
largeFilePenalties,
|
|
2418
|
+
totalCandidates: candidates.length,
|
|
2419
|
+
pathMatchDistribution: computePathMatchDistribution(candidates),
|
|
2420
|
+
scoreStats: computeScoreStats(candidates),
|
|
1178
2421
|
};
|
|
1179
2422
|
}
|
|
1180
|
-
|
|
2423
|
+
/**
|
|
2424
|
+
* テレメトリーをファイル出力(Issue #68: Debug)
|
|
2425
|
+
* LDE: 副作用を分離(I/O操作)
|
|
2426
|
+
*
|
|
2427
|
+
* JSON Lines形式で /tmp/kiri-penalty-telemetry.jsonl に追記
|
|
2428
|
+
*/
|
|
2429
|
+
function logPenaltyTelemetry(telemetry, queryStats) {
|
|
2430
|
+
const dist = telemetry.pathMatchDistribution;
|
|
2431
|
+
const scores = telemetry.scoreStats;
|
|
2432
|
+
// JSON Lines形式でテレメトリーデータを記録
|
|
2433
|
+
const telemetryRecord = {
|
|
2434
|
+
timestamp: new Date().toISOString(),
|
|
2435
|
+
query: {
|
|
2436
|
+
wordCount: queryStats.wordCount,
|
|
2437
|
+
avgWordLength: queryStats.avgWordLength,
|
|
2438
|
+
},
|
|
2439
|
+
totalCandidates: telemetry.totalCandidates,
|
|
2440
|
+
pathMissPenalties: telemetry.pathMissPenalties,
|
|
2441
|
+
largeFilePenalties: telemetry.largeFilePenalties,
|
|
2442
|
+
pathMatchDistribution: {
|
|
2443
|
+
zero: dist.zero,
|
|
2444
|
+
one: dist.one,
|
|
2445
|
+
two: dist.two,
|
|
2446
|
+
three: dist.three,
|
|
2447
|
+
fourPlus: dist.fourPlus,
|
|
2448
|
+
total: dist.total,
|
|
2449
|
+
percentages: {
|
|
2450
|
+
zero: ((dist.zero / dist.total) * 100).toFixed(1),
|
|
2451
|
+
one: ((dist.one / dist.total) * 100).toFixed(1),
|
|
2452
|
+
two: ((dist.two / dist.total) * 100).toFixed(1),
|
|
2453
|
+
three: ((dist.three / dist.total) * 100).toFixed(1),
|
|
2454
|
+
fourPlus: ((dist.fourPlus / dist.total) * 100).toFixed(1),
|
|
2455
|
+
},
|
|
2456
|
+
},
|
|
2457
|
+
scoreStats: {
|
|
2458
|
+
min: scores.min.toFixed(2),
|
|
2459
|
+
max: scores.max.toFixed(2),
|
|
2460
|
+
mean: scores.mean.toFixed(2),
|
|
2461
|
+
median: scores.median.toFixed(2),
|
|
2462
|
+
// 最大ペナルティ(-0.8)との比率
|
|
2463
|
+
penaltyRatio: ((0.8 / scores.mean) * 100).toFixed(1) + "%",
|
|
2464
|
+
},
|
|
2465
|
+
};
|
|
2466
|
+
const telemetryFile = "/tmp/kiri-penalty-telemetry.jsonl";
|
|
2467
|
+
fs.appendFileSync(telemetryFile, JSON.stringify(telemetryRecord) + "\n");
|
|
2468
|
+
}
|
|
2469
|
+
/**
|
|
2470
|
+
* 環境変数から段階的ペナルティ設定を読み込む(Issue #68: Graduated Penalty)
|
|
2471
|
+
* LDE: 純粋関数(I/O分離、テスト可能)
|
|
2472
|
+
*/
|
|
2473
|
+
function readGraduatedPenaltyConfig() {
|
|
2474
|
+
return {
|
|
2475
|
+
enabled: process.env.KIRI_GRADUATED_PENALTY === "1",
|
|
2476
|
+
minWordCount: parseFloat(process.env.KIRI_PENALTY_MIN_WORD_COUNT || "2"),
|
|
2477
|
+
minAvgWordLength: parseFloat(process.env.KIRI_PENALTY_MIN_AVG_WORD_LENGTH || "4.0"),
|
|
2478
|
+
tier0Delta: parseFloat(process.env.KIRI_PENALTY_TIER_0 || "-0.8"),
|
|
2479
|
+
tier1Delta: parseFloat(process.env.KIRI_PENALTY_TIER_1 || "-0.4"),
|
|
2480
|
+
tier2Delta: parseFloat(process.env.KIRI_PENALTY_TIER_2 || "-0.2"),
|
|
2481
|
+
};
|
|
2482
|
+
}
|
|
2483
|
+
/**
|
|
2484
|
+
* 段階的ペナルティ値を計算(Issue #68: Graduated Penalty)
|
|
2485
|
+
* LDE: 純粋関数(副作用なし、参照透明性)
|
|
2486
|
+
*
|
|
2487
|
+
* ADR 002: Graduated Penalty System
|
|
2488
|
+
* - Tier 0 (pathMatchHits === 0): Strong penalty (no path evidence)
|
|
2489
|
+
* - Tier 1 (pathMatchHits === 1): Medium penalty (weak path evidence)
|
|
2490
|
+
* - Tier 2 (pathMatchHits === 2): Light penalty (moderate path evidence)
|
|
2491
|
+
* - Tier 3+ (pathMatchHits >= 3): No penalty (strong path evidence)
|
|
2492
|
+
*
|
|
2493
|
+
* Invariants:
|
|
2494
|
+
* - Result is always <= 0 (non-positive)
|
|
2495
|
+
* - More path hits → less penalty (monotonicity)
|
|
2496
|
+
* - Query must meet eligibility criteria
|
|
2497
|
+
*
|
|
2498
|
+
* @param pathMatchHits Number of path-based scoring matches
|
|
2499
|
+
* @param queryStats Query word count and average word length
|
|
2500
|
+
* @param config Graduated penalty configuration
|
|
2501
|
+
* @returns Penalty delta (always <= 0)
|
|
2502
|
+
*/
|
|
2503
|
+
function computeGraduatedPenalty(pathMatchHits, queryStats, config) {
|
|
2504
|
+
// Early return if query doesn't meet criteria
|
|
2505
|
+
if (queryStats.wordCount < config.minWordCount ||
|
|
2506
|
+
queryStats.avgWordLength < config.minAvgWordLength) {
|
|
2507
|
+
return 0;
|
|
2508
|
+
}
|
|
2509
|
+
// Graduated penalty tiers
|
|
2510
|
+
if (pathMatchHits === 0)
|
|
2511
|
+
return config.tier0Delta;
|
|
2512
|
+
if (pathMatchHits === 1)
|
|
2513
|
+
return config.tier1Delta;
|
|
2514
|
+
if (pathMatchHits === 2)
|
|
2515
|
+
return config.tier2Delta;
|
|
2516
|
+
return 0; // pathMatchHits >= 3: no penalty
|
|
2517
|
+
}
|
|
2518
|
+
async function contextBundleImpl(context, params) {
|
|
1181
2519
|
context.warningManager.startRequest();
|
|
1182
2520
|
const { db, repoId } = context;
|
|
1183
|
-
const
|
|
1184
|
-
if (
|
|
2521
|
+
const rawGoal = params.goal?.trim() ?? "";
|
|
2522
|
+
if (rawGoal.length === 0) {
|
|
1185
2523
|
throw new Error("context_bundle requires a non-empty goal. Describe your objective to receive context.");
|
|
1186
2524
|
}
|
|
2525
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
2526
|
+
console.info(`[metadata-trace-env] goal=${rawGoal}`);
|
|
2527
|
+
}
|
|
2528
|
+
const inlineMetadata = parseInlineMetadataFilters(rawGoal);
|
|
2529
|
+
const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
|
|
2530
|
+
const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
|
|
2531
|
+
const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
|
|
2532
|
+
const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
|
|
2533
|
+
const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
|
|
2534
|
+
const hasHintMetadataFilters = hintMetadataFilters.length > 0;
|
|
2535
|
+
const hasAnyMetadataFilters = metadataFilters.length > 0;
|
|
2536
|
+
const goal = inlineMetadata.cleanedQuery.length > 0 ? inlineMetadata.cleanedQuery : rawGoal;
|
|
2537
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
2538
|
+
console.info("[metadata-trace]", JSON.stringify({
|
|
2539
|
+
rawGoal,
|
|
2540
|
+
cleanedGoal: goal,
|
|
2541
|
+
inlineFilters: inlineMetadata.filters,
|
|
2542
|
+
paramFilters,
|
|
2543
|
+
mergedFilters: metadataFilters,
|
|
2544
|
+
}));
|
|
2545
|
+
}
|
|
1187
2546
|
const limit = normalizeBundleLimit(params.limit);
|
|
1188
2547
|
const artifacts = params.artifacts ?? {};
|
|
2548
|
+
const artifactHints = normalizeArtifactHints(artifacts.hints);
|
|
2549
|
+
const hintBuckets = bucketArtifactHints(artifactHints);
|
|
2550
|
+
const artifactPathHints = hintBuckets.pathHints;
|
|
2551
|
+
const substringHints = hintBuckets.substringHints;
|
|
1189
2552
|
const includeTokensEstimate = params.includeTokensEstimate === true;
|
|
1190
2553
|
const isCompact = params.compact === true;
|
|
1191
2554
|
// 項目2: トークンバジェット保護警告
|
|
@@ -1208,6 +2571,15 @@ export async function contextBundle(context, params) {
|
|
|
1208
2571
|
if (artifacts.editing_path) {
|
|
1209
2572
|
keywordSources.push(artifacts.editing_path);
|
|
1210
2573
|
}
|
|
2574
|
+
if (artifactHints.length > 0) {
|
|
2575
|
+
keywordSources.push(artifactHints.join(" "));
|
|
2576
|
+
}
|
|
2577
|
+
if (hasAnyMetadataFilters) {
|
|
2578
|
+
const filterSeed = metadataFilters
|
|
2579
|
+
.map((filter) => `${filter.source ?? "meta"}:${filter.key}=${filter.values.join(",")}`)
|
|
2580
|
+
.join(" ");
|
|
2581
|
+
keywordSources.push(filterSeed);
|
|
2582
|
+
}
|
|
1211
2583
|
const semanticSeed = keywordSources.join(" ");
|
|
1212
2584
|
const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
|
|
1213
2585
|
const extractedTerms = extractKeywords(semanticSeed);
|
|
@@ -1224,11 +2596,17 @@ export async function contextBundle(context, params) {
|
|
|
1224
2596
|
const candidates = new Map();
|
|
1225
2597
|
const stringMatchSeeds = new Set();
|
|
1226
2598
|
const fileCache = new Map();
|
|
2599
|
+
// ✅ Cache boost profile config to avoid redundant lookups in hot path
|
|
2600
|
+
const boostProfile = params.boost_profile ??
|
|
2601
|
+
(hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
|
|
2602
|
+
const profileConfig = getBoostProfile(boostProfile);
|
|
1227
2603
|
// フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
|
|
1228
2604
|
if (extractedTerms.phrases.length > 0) {
|
|
1229
2605
|
const phrasePlaceholders = extractedTerms.phrases
|
|
1230
2606
|
.map(() => "b.content ILIKE '%' || ? || '%'")
|
|
1231
2607
|
.join(" OR ");
|
|
2608
|
+
// DEBUG: Log SQL query parameters for troubleshooting
|
|
2609
|
+
console.log(`[DEBUG contextBundle] Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
|
|
1232
2610
|
const rows = await db.all(`
|
|
1233
2611
|
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
1234
2612
|
FROM file f
|
|
@@ -1242,7 +2620,14 @@ export async function contextBundle(context, params) {
|
|
|
1242
2620
|
ORDER BY f.path
|
|
1243
2621
|
LIMIT ?
|
|
1244
2622
|
`, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
|
|
1245
|
-
|
|
2623
|
+
// DEBUG: Log returned paths and verify they match expected repo_id
|
|
2624
|
+
if (rows.length > 0) {
|
|
2625
|
+
console.log(`[DEBUG contextBundle] Phrase match returned ${rows.length} rows. Sample paths:`, rows.slice(0, 3).map((r) => r.path));
|
|
2626
|
+
// Verify repo_id of returned files
|
|
2627
|
+
const pathsToCheck = rows.slice(0, 3).map((r) => r.path);
|
|
2628
|
+
const verification = await db.all(`SELECT path, repo_id FROM file WHERE path IN (${pathsToCheck.map(() => "?").join(", ")}) LIMIT 3`, pathsToCheck);
|
|
2629
|
+
console.log(`[DEBUG contextBundle] Repo ID verification:`, verification);
|
|
2630
|
+
}
|
|
1246
2631
|
for (const row of rows) {
|
|
1247
2632
|
if (row.content === null) {
|
|
1248
2633
|
continue;
|
|
@@ -1261,7 +2646,9 @@ export async function contextBundle(context, params) {
|
|
|
1261
2646
|
candidate.reasons.add(`phrase:${phrase}`);
|
|
1262
2647
|
}
|
|
1263
2648
|
// Apply boost profile once per file
|
|
1264
|
-
|
|
2649
|
+
if (boostProfile !== "none") {
|
|
2650
|
+
applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
|
|
2651
|
+
}
|
|
1265
2652
|
// Use first matched phrase for preview (guaranteed to exist due to length check above)
|
|
1266
2653
|
const { line } = buildPreview(row.content, matchedPhrases[0]);
|
|
1267
2654
|
candidate.matchLine =
|
|
@@ -1301,7 +2688,6 @@ export async function contextBundle(context, params) {
|
|
|
1301
2688
|
ORDER BY f.path
|
|
1302
2689
|
LIMIT ?
|
|
1303
2690
|
`, [repoId, ...extractedTerms.keywords, MAX_MATCHES_PER_KEYWORD * extractedTerms.keywords.length]);
|
|
1304
|
-
const boostProfile = params.boost_profile ?? "default";
|
|
1305
2691
|
for (const row of rows) {
|
|
1306
2692
|
if (row.content === null) {
|
|
1307
2693
|
continue;
|
|
@@ -1319,7 +2705,9 @@ export async function contextBundle(context, params) {
|
|
|
1319
2705
|
candidate.reasons.add(`text:${keyword}`);
|
|
1320
2706
|
}
|
|
1321
2707
|
// Apply boost profile once per file
|
|
1322
|
-
|
|
2708
|
+
if (boostProfile !== "none") {
|
|
2709
|
+
applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
|
|
2710
|
+
}
|
|
1323
2711
|
// Use first matched keyword for preview (guaranteed to exist due to length check above)
|
|
1324
2712
|
const { line } = buildPreview(row.content, matchedKeywords[0]);
|
|
1325
2713
|
candidate.matchLine =
|
|
@@ -1341,6 +2729,31 @@ export async function contextBundle(context, params) {
|
|
|
1341
2729
|
}
|
|
1342
2730
|
}
|
|
1343
2731
|
}
|
|
2732
|
+
const artifactPathTargets = artifactPathHints.map((hintPath) => ({
|
|
2733
|
+
path: hintPath,
|
|
2734
|
+
sourceHint: hintPath,
|
|
2735
|
+
origin: "artifact",
|
|
2736
|
+
}));
|
|
2737
|
+
const dictionaryPathTargets = await fetchDictionaryPathHints(db, context.tableAvailability, repoId, substringHints, HINT_DICTIONARY_LIMIT);
|
|
2738
|
+
const { list: resolvedPathHintTargets, meta: hintSeedMeta } = createHintSeedMeta([
|
|
2739
|
+
...artifactPathTargets,
|
|
2740
|
+
...dictionaryPathTargets,
|
|
2741
|
+
]);
|
|
2742
|
+
if (resolvedPathHintTargets.length > 0) {
|
|
2743
|
+
await applyPathHintPromotions({
|
|
2744
|
+
db,
|
|
2745
|
+
tableAvailability: context.tableAvailability,
|
|
2746
|
+
repoId,
|
|
2747
|
+
hintTargets: resolvedPathHintTargets,
|
|
2748
|
+
candidates,
|
|
2749
|
+
fileCache,
|
|
2750
|
+
weights,
|
|
2751
|
+
hintSeedMeta,
|
|
2752
|
+
});
|
|
2753
|
+
}
|
|
2754
|
+
if (substringHints.length > 0) {
|
|
2755
|
+
await addHintSubstringMatches(db, context.tableAvailability, repoId, substringHints, candidates, HINT_SUBSTRING_LIMIT, HINT_SUBSTRING_BOOST);
|
|
2756
|
+
}
|
|
1344
2757
|
if (artifacts.editing_path) {
|
|
1345
2758
|
const editingCandidate = ensureCandidate(candidates, artifacts.editing_path);
|
|
1346
2759
|
editingCandidate.score += weights.editingPath;
|
|
@@ -1348,7 +2761,6 @@ export async function contextBundle(context, params) {
|
|
|
1348
2761
|
editingCandidate.matchLine ??= 1;
|
|
1349
2762
|
}
|
|
1350
2763
|
// SQL injection防御: ファイルパスの検証パターン
|
|
1351
|
-
const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
|
|
1352
2764
|
const dependencySeeds = new Set();
|
|
1353
2765
|
for (const pathSeed of stringMatchSeeds) {
|
|
1354
2766
|
if (!SAFE_PATH_PATTERN.test(pathSeed)) {
|
|
@@ -1362,10 +2774,13 @@ export async function contextBundle(context, params) {
|
|
|
1362
2774
|
}
|
|
1363
2775
|
if (artifacts.editing_path) {
|
|
1364
2776
|
if (!SAFE_PATH_PATTERN.test(artifacts.editing_path)) {
|
|
1365
|
-
throw new Error(`Invalid editing_path format.
|
|
2777
|
+
throw new Error(`Invalid editing_path format: ${artifacts.editing_path}. Use only A-Z, 0-9, _, ., -, / characters.`);
|
|
1366
2778
|
}
|
|
1367
2779
|
dependencySeeds.add(artifacts.editing_path);
|
|
1368
2780
|
}
|
|
2781
|
+
for (const target of resolvedPathHintTargets) {
|
|
2782
|
+
dependencySeeds.add(target.path);
|
|
2783
|
+
}
|
|
1369
2784
|
if (dependencySeeds.size > 0) {
|
|
1370
2785
|
// SQL injection防御: プレースホルダー生成前にサイズを検証
|
|
1371
2786
|
if (dependencySeeds.size > MAX_DEPENDENCY_SEEDS_QUERY_LIMIT) {
|
|
@@ -1375,7 +2790,7 @@ export async function contextBundle(context, params) {
|
|
|
1375
2790
|
// 防御的チェック: プレースホルダーが正しい形式であることを確認
|
|
1376
2791
|
// 期待される形式: "?, ?, ..." (クエスチョンマーク、カンマ、スペースのみ)
|
|
1377
2792
|
if (!/^(\?)(,\s*\?)*$/.test(placeholders)) {
|
|
1378
|
-
throw new Error("Invalid placeholder
|
|
2793
|
+
throw new Error("Invalid dependency placeholder sequence detected. Remove unsafe dependency seeds and retry the request.");
|
|
1379
2794
|
}
|
|
1380
2795
|
const depRows = await db.all(`
|
|
1381
2796
|
SELECT src_path, dst_kind, dst, rel
|
|
@@ -1413,31 +2828,72 @@ export async function contextBundle(context, params) {
|
|
|
1413
2828
|
}
|
|
1414
2829
|
}
|
|
1415
2830
|
}
|
|
1416
|
-
const
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
candidate.content = cached.content;
|
|
1422
|
-
candidate.lang = cached.lang;
|
|
1423
|
-
candidate.ext = cached.ext;
|
|
1424
|
-
candidate.totalLines = cached.totalLines;
|
|
1425
|
-
candidate.embedding = cached.embedding;
|
|
2831
|
+
const materializeCandidates = async () => {
|
|
2832
|
+
const result = [];
|
|
2833
|
+
for (const candidate of candidates.values()) {
|
|
2834
|
+
if (isSuppressedPath(candidate.path)) {
|
|
2835
|
+
continue;
|
|
1426
2836
|
}
|
|
1427
|
-
|
|
1428
|
-
const
|
|
1429
|
-
if (
|
|
1430
|
-
|
|
2837
|
+
if (!candidate.content) {
|
|
2838
|
+
const cached = fileCache.get(candidate.path);
|
|
2839
|
+
if (cached) {
|
|
2840
|
+
candidate.content = cached.content;
|
|
2841
|
+
candidate.lang = cached.lang;
|
|
2842
|
+
candidate.ext = cached.ext;
|
|
2843
|
+
candidate.totalLines = cached.totalLines;
|
|
2844
|
+
candidate.embedding = cached.embedding;
|
|
1431
2845
|
}
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
2846
|
+
else {
|
|
2847
|
+
const loaded = await loadFileContent(db, repoId, candidate.path);
|
|
2848
|
+
if (!loaded) {
|
|
2849
|
+
continue;
|
|
2850
|
+
}
|
|
2851
|
+
candidate.content = loaded.content;
|
|
2852
|
+
candidate.lang = loaded.lang;
|
|
2853
|
+
candidate.ext = loaded.ext;
|
|
2854
|
+
candidate.totalLines = loaded.totalLines;
|
|
2855
|
+
candidate.embedding = loaded.embedding;
|
|
2856
|
+
fileCache.set(candidate.path, loaded);
|
|
2857
|
+
}
|
|
2858
|
+
}
|
|
2859
|
+
result.push(candidate);
|
|
2860
|
+
}
|
|
2861
|
+
return result;
|
|
2862
|
+
};
|
|
2863
|
+
const addMetadataFallbackCandidates = async () => {
|
|
2864
|
+
if (!hasAnyMetadataFilters) {
|
|
2865
|
+
return;
|
|
2866
|
+
}
|
|
2867
|
+
const metadataRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
|
|
2868
|
+
if (metadataRows.length === 0) {
|
|
2869
|
+
return;
|
|
2870
|
+
}
|
|
2871
|
+
for (const row of metadataRows) {
|
|
2872
|
+
const candidate = ensureCandidate(candidates, row.path);
|
|
2873
|
+
if (row.content) {
|
|
2874
|
+
candidate.content = row.content;
|
|
2875
|
+
candidate.totalLines = row.content.split(/\r?\n/).length;
|
|
2876
|
+
fileCache.set(row.path, {
|
|
2877
|
+
content: row.content,
|
|
2878
|
+
lang: row.lang,
|
|
2879
|
+
ext: row.ext,
|
|
2880
|
+
totalLines: candidate.totalLines,
|
|
2881
|
+
embedding: candidate.embedding,
|
|
2882
|
+
});
|
|
1438
2883
|
}
|
|
2884
|
+
candidate.lang ??= row.lang;
|
|
2885
|
+
candidate.ext ??= row.ext;
|
|
2886
|
+
candidate.matchLine ??= 1;
|
|
2887
|
+
candidate.score = Math.max(candidate.score, 1 + metadataFilters.length * 0.2);
|
|
1439
2888
|
}
|
|
1440
|
-
|
|
2889
|
+
};
|
|
2890
|
+
if (hasAnyMetadataFilters) {
|
|
2891
|
+
await addMetadataFallbackCandidates();
|
|
2892
|
+
}
|
|
2893
|
+
let materializedCandidates = await materializeCandidates();
|
|
2894
|
+
if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
|
|
2895
|
+
await addMetadataFallbackCandidates();
|
|
2896
|
+
materializedCandidates = await materializeCandidates();
|
|
1441
2897
|
}
|
|
1442
2898
|
if (materializedCandidates.length === 0) {
|
|
1443
2899
|
// Get warnings from WarningManager (includes breaking change notification if applicable)
|
|
@@ -1448,6 +2904,72 @@ export async function contextBundle(context, params) {
|
|
|
1448
2904
|
...(warnings.length > 0 && { warnings }),
|
|
1449
2905
|
};
|
|
1450
2906
|
}
|
|
2907
|
+
const metadataKeywordSet = new Set(extractedTerms.keywords.map((keyword) => keyword.toLowerCase()));
|
|
2908
|
+
const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
|
|
2909
|
+
let metadataEntriesMap;
|
|
2910
|
+
if (hasAnyMetadataFilters || metadataKeywordSet.size > 0 || filterValueSet.size > 0) {
|
|
2911
|
+
metadataEntriesMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
|
|
2912
|
+
}
|
|
2913
|
+
if (hasStrictMetadataFilters) {
|
|
2914
|
+
metadataEntriesMap ??= new Map();
|
|
2915
|
+
for (let i = materializedCandidates.length - 1; i >= 0; i--) {
|
|
2916
|
+
const candidate = materializedCandidates[i];
|
|
2917
|
+
if (!candidate) {
|
|
2918
|
+
continue; // Skip undefined entries
|
|
2919
|
+
}
|
|
2920
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
2921
|
+
const matchesFilters = candidateMatchesMetadataFilters(entries, strictMetadataFilters);
|
|
2922
|
+
if (!matchesFilters) {
|
|
2923
|
+
materializedCandidates.splice(i, 1);
|
|
2924
|
+
continue;
|
|
2925
|
+
}
|
|
2926
|
+
candidate.reasons.add("metadata:filter");
|
|
2927
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
2928
|
+
console.info(`[metadata-trace-match] path=${candidate.path}`);
|
|
2929
|
+
}
|
|
2930
|
+
}
|
|
2931
|
+
if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
|
|
2932
|
+
await addMetadataFallbackCandidates();
|
|
2933
|
+
materializedCandidates = await materializeCandidates();
|
|
2934
|
+
}
|
|
2935
|
+
if (materializedCandidates.length === 0) {
|
|
2936
|
+
const warnings = [...context.warningManager.responseWarnings];
|
|
2937
|
+
return {
|
|
2938
|
+
context: [],
|
|
2939
|
+
...(includeTokensEstimate && { tokens_estimate: 0 }),
|
|
2940
|
+
...(warnings.length > 0 && { warnings }),
|
|
2941
|
+
};
|
|
2942
|
+
}
|
|
2943
|
+
}
|
|
2944
|
+
if (hasHintMetadataFilters) {
|
|
2945
|
+
metadataEntriesMap ??= new Map();
|
|
2946
|
+
for (const candidate of materializedCandidates) {
|
|
2947
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
2948
|
+
const matchesHints = candidateMatchesMetadataFilters(entries, hintMetadataFilters);
|
|
2949
|
+
if (matchesHints) {
|
|
2950
|
+
candidate.score += METADATA_HINT_BONUS;
|
|
2951
|
+
candidate.reasons.add("metadata:hint");
|
|
2952
|
+
}
|
|
2953
|
+
}
|
|
2954
|
+
}
|
|
2955
|
+
const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
|
|
2956
|
+
if (metadataEntriesMap) {
|
|
2957
|
+
for (const candidate of materializedCandidates) {
|
|
2958
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
2959
|
+
const metadataBoost = computeMetadataBoost(entries, metadataKeywordSet, filterValueSet);
|
|
2960
|
+
if (metadataBoost > 0) {
|
|
2961
|
+
candidate.score += metadataBoost;
|
|
2962
|
+
candidate.reasons.add("boost:metadata");
|
|
2963
|
+
}
|
|
2964
|
+
}
|
|
2965
|
+
}
|
|
2966
|
+
for (const candidate of materializedCandidates) {
|
|
2967
|
+
const linkBoost = computeInboundLinkBoost(inboundCounts.get(candidate.path));
|
|
2968
|
+
if (linkBoost > 0) {
|
|
2969
|
+
candidate.score += linkBoost;
|
|
2970
|
+
candidate.reasons.add("boost:links");
|
|
2971
|
+
}
|
|
2972
|
+
}
|
|
1451
2973
|
applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
|
|
1452
2974
|
// ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
|
|
1453
2975
|
// Only apply to positive scores to prevent negative score inversion
|
|
@@ -1456,18 +2978,61 @@ export async function contextBundle(context, params) {
|
|
|
1456
2978
|
candidate.score *= candidate.scoreMultiplier;
|
|
1457
2979
|
}
|
|
1458
2980
|
}
|
|
1459
|
-
|
|
1460
|
-
|
|
2981
|
+
// Issue #68: Apply Path-Based Penalties (after multipliers, before sorting)
|
|
2982
|
+
const penaltyFlags = readPenaltyFlags();
|
|
2983
|
+
const queryStats = computeQueryStats(goal); // Always compute for telemetry
|
|
2984
|
+
const graduatedConfig = readGraduatedPenaltyConfig();
|
|
2985
|
+
// ADR 002: Use graduated penalty system if enabled, otherwise use legacy binary penalty
|
|
2986
|
+
if (graduatedConfig.enabled && penaltyFlags.pathPenalty) {
|
|
2987
|
+
for (const candidate of materializedCandidates) {
|
|
2988
|
+
applyGraduatedPenalty(candidate, queryStats, graduatedConfig);
|
|
2989
|
+
}
|
|
2990
|
+
}
|
|
2991
|
+
else if (penaltyFlags.pathPenalty) {
|
|
2992
|
+
// Legacy mode: Binary penalty (pathMatchHits === 0 only)
|
|
2993
|
+
for (const candidate of materializedCandidates) {
|
|
2994
|
+
applyPathMissPenalty(candidate, queryStats);
|
|
2995
|
+
}
|
|
2996
|
+
}
|
|
2997
|
+
// Issue #68: Apply Large File Penalty (after multipliers, before sorting)
|
|
2998
|
+
if (penaltyFlags.largeFilePenalty) {
|
|
2999
|
+
for (const candidate of materializedCandidates) {
|
|
3000
|
+
applyLargeFilePenalty(candidate);
|
|
3001
|
+
}
|
|
3002
|
+
}
|
|
3003
|
+
// Issue #68: Telemetry(デバッグ用、環境変数で制御)
|
|
3004
|
+
// LDE: 純粋関数(計算)と副作用(I/O)を分離
|
|
3005
|
+
const enableTelemetry = process.env.KIRI_PENALTY_TELEMETRY === "1";
|
|
3006
|
+
if (enableTelemetry) {
|
|
3007
|
+
console.error(`[DEBUG] Telemetry enabled. Flags: pathPenalty=${penaltyFlags.pathPenalty}, largeFilePenalty=${penaltyFlags.largeFilePenalty}`);
|
|
3008
|
+
const telemetry = computePenaltyTelemetry(materializedCandidates);
|
|
3009
|
+
logPenaltyTelemetry(telemetry, queryStats);
|
|
3010
|
+
}
|
|
3011
|
+
// v1.0.0: Filter out extremely low-scored candidates (result of multiplicative penalties)
|
|
3012
|
+
// Threshold removes files with >95% penalty while keeping reasonably relevant files
|
|
3013
|
+
// Hint paths are exempt from this threshold (always included if score > 0)
|
|
3014
|
+
const hintPathSet = new Set(resolvedPathHintTargets.map((target) => target.path));
|
|
3015
|
+
const rankedCandidates = materializedCandidates
|
|
3016
|
+
.filter((candidate) => candidate.score > SCORE_FILTER_THRESHOLD ||
|
|
3017
|
+
(candidate.score > 0 && hintPathSet.has(candidate.path)))
|
|
1461
3018
|
.sort((a, b) => {
|
|
1462
3019
|
if (b.score === a.score) {
|
|
1463
3020
|
return a.path.localeCompare(b.path);
|
|
1464
3021
|
}
|
|
1465
3022
|
return b.score - a.score;
|
|
1466
|
-
})
|
|
1467
|
-
|
|
1468
|
-
|
|
3023
|
+
});
|
|
3024
|
+
const prioritizedCandidates = prioritizeHintCandidates(rankedCandidates, resolvedPathHintTargets.map((target) => target.path), limit);
|
|
3025
|
+
if (prioritizedCandidates.length === 0) {
|
|
3026
|
+
const warnings = [...context.warningManager.responseWarnings];
|
|
3027
|
+
return {
|
|
3028
|
+
context: [],
|
|
3029
|
+
...(includeTokensEstimate && { tokens_estimate: 0 }),
|
|
3030
|
+
...(warnings.length > 0 && { warnings }),
|
|
3031
|
+
};
|
|
3032
|
+
}
|
|
3033
|
+
const maxScore = Math.max(...prioritizedCandidates.map((candidate) => candidate.score));
|
|
1469
3034
|
const results = [];
|
|
1470
|
-
for (const candidate of
|
|
3035
|
+
for (const candidate of prioritizedCandidates) {
|
|
1471
3036
|
if (!candidate.content) {
|
|
1472
3037
|
continue;
|
|
1473
3038
|
}
|
|
@@ -1495,6 +3060,23 @@ export async function contextBundle(context, params) {
|
|
|
1495
3060
|
startLine = Math.max(1, matchLine - windowHalf);
|
|
1496
3061
|
endLine = Math.min(totalLines === 0 ? matchLine + windowHalf : totalLines, startLine + FALLBACK_SNIPPET_WINDOW - 1);
|
|
1497
3062
|
}
|
|
3063
|
+
if (CLAMP_SNIPPETS_ENABLED) {
|
|
3064
|
+
// Clamp snippet length to FALLBACK_SNIPPET_WINDOW even when symbol spans large regions
|
|
3065
|
+
const maxWindow = FALLBACK_SNIPPET_WINDOW;
|
|
3066
|
+
const selectedEnd = selected ? selected.end_line : endLine;
|
|
3067
|
+
const selectedStart = selected ? selected.start_line : startLine;
|
|
3068
|
+
if (endLine - startLine + 1 > maxWindow) {
|
|
3069
|
+
const anchor = candidate.matchLine ?? startLine;
|
|
3070
|
+
let clampedStart = Math.max(selectedStart, anchor - Math.floor(maxWindow / 2));
|
|
3071
|
+
let clampedEnd = clampedStart + maxWindow - 1;
|
|
3072
|
+
if (clampedEnd > selectedEnd) {
|
|
3073
|
+
clampedEnd = selectedEnd;
|
|
3074
|
+
clampedStart = Math.max(selectedStart, clampedEnd - maxWindow + 1);
|
|
3075
|
+
}
|
|
3076
|
+
startLine = clampedStart;
|
|
3077
|
+
endLine = Math.max(clampedStart, clampedEnd);
|
|
3078
|
+
}
|
|
3079
|
+
}
|
|
1498
3080
|
if (endLine < startLine) {
|
|
1499
3081
|
endLine = startLine;
|
|
1500
3082
|
}
|
|
@@ -1522,7 +3104,7 @@ export async function contextBundle(context, params) {
|
|
|
1522
3104
|
let tokensEstimate;
|
|
1523
3105
|
if (includeTokensEstimate) {
|
|
1524
3106
|
tokensEstimate = results.reduce((acc, item) => {
|
|
1525
|
-
const candidate =
|
|
3107
|
+
const candidate = prioritizedCandidates.find((c) => c.path === item.path);
|
|
1526
3108
|
if (candidate && candidate.content) {
|
|
1527
3109
|
return acc + estimateTokensFromContent(candidate.content, item.range[0], item.range[1]);
|
|
1528
3110
|
}
|
|
@@ -1533,8 +3115,13 @@ export async function contextBundle(context, params) {
|
|
|
1533
3115
|
}
|
|
1534
3116
|
// Get warnings from WarningManager (includes breaking change notification if applicable)
|
|
1535
3117
|
const warnings = [...context.warningManager.responseWarnings];
|
|
3118
|
+
const shouldFilterResults = FINAL_RESULT_SUPPRESSION_ENABLED && SUPPRESS_NON_CODE_ENABLED;
|
|
3119
|
+
const sanitizedResults = shouldFilterResults
|
|
3120
|
+
? results.filter((item) => !isSuppressedPath(item.path))
|
|
3121
|
+
: results;
|
|
3122
|
+
const finalResults = sanitizedResults.length > 0 ? sanitizedResults : results;
|
|
1536
3123
|
const payload = {
|
|
1537
|
-
context:
|
|
3124
|
+
context: finalResults,
|
|
1538
3125
|
...(warnings.length > 0 && { warnings }),
|
|
1539
3126
|
};
|
|
1540
3127
|
if (tokensEstimate !== undefined) {
|
|
@@ -1737,35 +3324,27 @@ export async function depsClosure(context, params) {
|
|
|
1737
3324
|
edges,
|
|
1738
3325
|
};
|
|
1739
3326
|
}
|
|
1740
|
-
|
|
3327
|
+
/**
|
|
3328
|
+
* リポジトリのrootパスをデータベースIDに解決する。
|
|
3329
|
+
*
|
|
3330
|
+
* この関数は下位互換性のために保持されているが、内部的には新しいRepoResolverを使用する。
|
|
3331
|
+
*
|
|
3332
|
+
* @param db - DuckDBクライアント
|
|
3333
|
+
* @param repoRoot - リポジトリのrootパス
|
|
3334
|
+
* @param services - オプショナルなServerServices(指定がなければ新規作成される)
|
|
3335
|
+
* @returns リポジトリID
|
|
3336
|
+
* @throws Error リポジトリがインデックスされていない場合
|
|
3337
|
+
*/
|
|
3338
|
+
export async function resolveRepoId(db, repoRoot, services) {
|
|
3339
|
+
const svc = services ?? createServerServices(db);
|
|
3340
|
+
return await svc.repoResolver.resolveId(repoRoot);
|
|
3341
|
+
}
|
|
3342
|
+
export async function contextBundle(context, params) {
|
|
1741
3343
|
try {
|
|
1742
|
-
|
|
1743
|
-
const normalized = candidates[0];
|
|
1744
|
-
const placeholders = candidates.map(() => "?").join(", ");
|
|
1745
|
-
const rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders}) LIMIT 1`, candidates);
|
|
1746
|
-
if (rows.length === 0) {
|
|
1747
|
-
const existingRows = await db.all("SELECT id, root FROM repo");
|
|
1748
|
-
for (const candidate of existingRows) {
|
|
1749
|
-
if (normalizeRepoPath(candidate.root) === normalized) {
|
|
1750
|
-
await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, candidate.id]);
|
|
1751
|
-
return candidate.id;
|
|
1752
|
-
}
|
|
1753
|
-
}
|
|
1754
|
-
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
1755
|
-
}
|
|
1756
|
-
const row = rows[0];
|
|
1757
|
-
if (!row) {
|
|
1758
|
-
throw new Error("Failed to retrieve repository record. Database returned empty result.");
|
|
1759
|
-
}
|
|
1760
|
-
if (row.root !== normalized) {
|
|
1761
|
-
await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, row.id]);
|
|
1762
|
-
}
|
|
1763
|
-
return row.id;
|
|
3344
|
+
return await contextBundleImpl(context, params);
|
|
1764
3345
|
}
|
|
1765
3346
|
catch (error) {
|
|
1766
|
-
|
|
1767
|
-
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
1768
|
-
}
|
|
3347
|
+
console.error("context_bundle error:", error);
|
|
1769
3348
|
throw error;
|
|
1770
3349
|
}
|
|
1771
3350
|
}
|