kiri-mcp-server 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/config/scoring-profiles.yml +82 -35
- package/dist/config/scoring-profiles.yml +82 -35
- package/dist/package.json +9 -1
- package/dist/src/indexer/cli.d.ts.map +1 -1
- package/dist/src/indexer/cli.js +712 -98
- package/dist/src/indexer/cli.js.map +1 -1
- package/dist/src/indexer/git.d.ts.map +1 -1
- package/dist/src/indexer/git.js +41 -3
- package/dist/src/indexer/git.js.map +1 -1
- package/dist/src/indexer/migrations/repo-merger.d.ts +33 -0
- package/dist/src/indexer/migrations/repo-merger.d.ts.map +1 -0
- package/dist/src/indexer/migrations/repo-merger.js +67 -0
- package/dist/src/indexer/migrations/repo-merger.js.map +1 -0
- package/dist/src/indexer/schema.d.ts +66 -0
- package/dist/src/indexer/schema.d.ts.map +1 -1
- package/dist/src/indexer/schema.js +337 -0
- package/dist/src/indexer/schema.js.map +1 -1
- package/dist/src/server/boost-profiles.d.ts +1 -1
- package/dist/src/server/boost-profiles.d.ts.map +1 -1
- package/dist/src/server/boost-profiles.js +116 -0
- package/dist/src/server/boost-profiles.js.map +1 -1
- package/dist/src/server/config.d.ts +45 -0
- package/dist/src/server/config.d.ts.map +1 -0
- package/dist/src/server/config.js +146 -0
- package/dist/src/server/config.js.map +1 -0
- package/dist/src/server/context.d.ts +29 -0
- package/dist/src/server/context.d.ts.map +1 -1
- package/dist/src/server/context.js +26 -1
- package/dist/src/server/context.js.map +1 -1
- package/dist/src/server/handlers/snippets-get.d.ts +36 -0
- package/dist/src/server/handlers/snippets-get.d.ts.map +1 -0
- package/dist/src/server/handlers/snippets-get.js +120 -0
- package/dist/src/server/handlers/snippets-get.js.map +1 -0
- package/dist/src/server/handlers.d.ts +32 -20
- package/dist/src/server/handlers.d.ts.map +1 -1
- package/dist/src/server/handlers.js +1554 -338
- package/dist/src/server/handlers.js.map +1 -1
- package/dist/src/server/indexBootstrap.d.ts.map +1 -1
- package/dist/src/server/indexBootstrap.js +49 -2
- package/dist/src/server/indexBootstrap.js.map +1 -1
- package/dist/src/server/main.d.ts.map +1 -1
- package/dist/src/server/main.js +7 -0
- package/dist/src/server/main.js.map +1 -1
- package/dist/src/server/profile-selector.d.ts +33 -0
- package/dist/src/server/profile-selector.d.ts.map +1 -0
- package/dist/src/server/profile-selector.js +291 -0
- package/dist/src/server/profile-selector.js.map +1 -0
- package/dist/src/server/rpc.d.ts.map +1 -1
- package/dist/src/server/rpc.js +36 -6
- package/dist/src/server/rpc.js.map +1 -1
- package/dist/src/server/runtime.d.ts.map +1 -1
- package/dist/src/server/runtime.js +14 -4
- package/dist/src/server/runtime.js.map +1 -1
- package/dist/src/server/scoring.d.ts +7 -1
- package/dist/src/server/scoring.d.ts.map +1 -1
- package/dist/src/server/scoring.js +121 -21
- package/dist/src/server/scoring.js.map +1 -1
- package/dist/src/server/services/index.d.ts +24 -0
- package/dist/src/server/services/index.d.ts.map +1 -0
- package/dist/src/server/services/index.js +20 -0
- package/dist/src/server/services/index.js.map +1 -0
- package/dist/src/server/services/repo-repository.d.ts +61 -0
- package/dist/src/server/services/repo-repository.d.ts.map +1 -0
- package/dist/src/server/services/repo-repository.js +93 -0
- package/dist/src/server/services/repo-repository.js.map +1 -0
- package/dist/src/server/services/repo-resolver.d.ts +28 -0
- package/dist/src/server/services/repo-resolver.d.ts.map +1 -0
- package/dist/src/server/services/repo-resolver.js +62 -0
- package/dist/src/server/services/repo-resolver.js.map +1 -0
- package/dist/src/shared/duckdb.d.ts.map +1 -1
- package/dist/src/shared/duckdb.js +21 -1
- package/dist/src/shared/duckdb.js.map +1 -1
- package/dist/src/shared/fs/safePath.d.ts +7 -0
- package/dist/src/shared/fs/safePath.d.ts.map +1 -0
- package/dist/src/shared/fs/safePath.js +23 -0
- package/dist/src/shared/fs/safePath.js.map +1 -0
- package/dist/src/shared/utils/glob.d.ts +5 -0
- package/dist/src/shared/utils/glob.d.ts.map +1 -0
- package/dist/src/shared/utils/glob.js +22 -0
- package/dist/src/shared/utils/glob.js.map +1 -0
- package/dist/src/shared/utils/retry.d.ts +8 -0
- package/dist/src/shared/utils/retry.d.ts.map +1 -0
- package/dist/src/shared/utils/retry.js +20 -0
- package/dist/src/shared/utils/retry.js.map +1 -0
- package/package.json +28 -22
|
@@ -3,10 +3,13 @@ import path from "node:path";
|
|
|
3
3
|
import { checkFTSSchemaExists } from "../indexer/schema.js";
|
|
4
4
|
import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
|
|
5
5
|
import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
|
|
6
|
-
import { getRepoPathCandidates, normalizeRepoPath } from "../shared/utils/path.js";
|
|
7
6
|
import { expandAbbreviations } from "./abbreviations.js";
|
|
8
7
|
import { getBoostProfile, } from "./boost-profiles.js";
|
|
8
|
+
import { loadServerConfig } from "./config.js";
|
|
9
9
|
import { coerceProfileName, loadScoringProfile } from "./scoring.js";
|
|
10
|
+
import { createServerServices } from "./services/index.js";
|
|
11
|
+
// Re-export extracted handlers for backward compatibility
|
|
12
|
+
export { snippetsGet, } from "./handlers/snippets-get.js";
|
|
10
13
|
// Configuration file patterns (v0.8.0+: consolidated to avoid duplication)
|
|
11
14
|
// Comprehensive list covering multiple languages and tools
|
|
12
15
|
const CONFIG_FILES = [
|
|
@@ -133,6 +136,83 @@ const CONFIG_PATTERNS = [
|
|
|
133
136
|
".github/workflows",
|
|
134
137
|
];
|
|
135
138
|
const FTS_STATUS_CACHE_TTL_MS = 10_000;
|
|
139
|
+
const METADATA_ALIAS_MAP = new Map([
|
|
140
|
+
["tag", { key: "tags" }],
|
|
141
|
+
["tags", { key: "tags" }],
|
|
142
|
+
["category", { key: "category" }],
|
|
143
|
+
["title", { key: "title" }],
|
|
144
|
+
["service", { key: "service" }],
|
|
145
|
+
]);
|
|
146
|
+
const METADATA_KEY_PREFIXES = [
|
|
147
|
+
{ prefix: "meta." },
|
|
148
|
+
{ prefix: "metadata.", strict: true },
|
|
149
|
+
{ prefix: "docmeta.", strict: true },
|
|
150
|
+
{ prefix: "frontmatter.", source: "front_matter" },
|
|
151
|
+
{ prefix: "fm.", source: "front_matter" },
|
|
152
|
+
{ prefix: "yaml.", source: "yaml" },
|
|
153
|
+
{ prefix: "json.", source: "json" },
|
|
154
|
+
];
|
|
155
|
+
const METADATA_MATCH_WEIGHT = 0.15;
|
|
156
|
+
const METADATA_FILTER_MATCH_WEIGHT = 0.1;
|
|
157
|
+
const METADATA_HINT_BONUS = 0.25;
|
|
158
|
+
const INBOUND_LINK_WEIGHT = 0.2;
|
|
159
|
+
/**
|
|
160
|
+
* checkTableAvailability
|
|
161
|
+
*
|
|
162
|
+
* 起動時にテーブルの存在を確認し、TableAvailabilityオブジェクトを生成する。
|
|
163
|
+
* これにより、グローバルミュータブル変数による競合状態を回避する。
|
|
164
|
+
*
|
|
165
|
+
* NOTE: スキーマ変更(テーブル追加)後はサーバーの再起動が必要です。
|
|
166
|
+
*
|
|
167
|
+
* @param db - DuckDBClient インスタンス
|
|
168
|
+
* @returns TableAvailability オブジェクト
|
|
169
|
+
* @throws データベース接続エラー等、テーブル不在以外のエラーが発生した場合
|
|
170
|
+
*/
|
|
171
|
+
export async function checkTableAvailability(db) {
|
|
172
|
+
const ALLOWED_TABLES = [
|
|
173
|
+
"document_metadata_kv",
|
|
174
|
+
"markdown_link",
|
|
175
|
+
"hint_expansion",
|
|
176
|
+
"hint_dictionary",
|
|
177
|
+
];
|
|
178
|
+
const checkTable = async (tableName) => {
|
|
179
|
+
if (!ALLOWED_TABLES.includes(tableName)) {
|
|
180
|
+
throw new Error(`Invalid table name: ${tableName}`);
|
|
181
|
+
}
|
|
182
|
+
try {
|
|
183
|
+
await db.all(`SELECT 1 FROM ${tableName} LIMIT 0`);
|
|
184
|
+
return true;
|
|
185
|
+
}
|
|
186
|
+
catch (error) {
|
|
187
|
+
// テーブル不在エラーのみキャッチ
|
|
188
|
+
if (isTableMissingError(error, tableName)) {
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
// その他のエラー(接続エラー等)は再スロー
|
|
192
|
+
throw new Error(`Failed to check table availability for ${tableName}: ${error instanceof Error ? error.message : String(error)}`);
|
|
193
|
+
}
|
|
194
|
+
};
|
|
195
|
+
const result = {
|
|
196
|
+
hasMetadataTables: await checkTable("document_metadata_kv"),
|
|
197
|
+
hasLinkTable: await checkTable("markdown_link"),
|
|
198
|
+
hasHintLog: await checkTable("hint_expansion"),
|
|
199
|
+
hasHintDictionary: await checkTable("hint_dictionary"),
|
|
200
|
+
};
|
|
201
|
+
// 起動時警告: テーブルが存在しない場合に通知
|
|
202
|
+
if (!result.hasMetadataTables) {
|
|
203
|
+
console.warn("document_metadata_kv table is missing. Metadata filters and boosts disabled until database is upgraded.");
|
|
204
|
+
}
|
|
205
|
+
if (!result.hasLinkTable) {
|
|
206
|
+
console.warn("markdown_link table is missing. Inbound link boosting disabled until database is upgraded.");
|
|
207
|
+
}
|
|
208
|
+
if (!result.hasHintLog) {
|
|
209
|
+
console.warn("hint_expansion table is missing. Hint logging disabled. Enable the latest schema and rerun the indexer to capture hint logs.");
|
|
210
|
+
}
|
|
211
|
+
if (!result.hasHintDictionary) {
|
|
212
|
+
console.warn("hint_dictionary table is missing. Dictionary hints disabled. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
|
|
213
|
+
}
|
|
214
|
+
return result;
|
|
215
|
+
}
|
|
136
216
|
async function hasDirtyRepos(db) {
|
|
137
217
|
const statusCheck = await db.all(`SELECT COUNT(*) as count FROM repo
|
|
138
218
|
WHERE fts_dirty = true OR fts_status IN ('dirty', 'rebuilding')`);
|
|
@@ -217,8 +297,154 @@ function isConfigFile(path, fileName) {
|
|
|
217
297
|
fileName.startsWith(".env") ||
|
|
218
298
|
isInConfigDirectory);
|
|
219
299
|
}
|
|
300
|
+
function normalizeArtifactHints(hints) {
|
|
301
|
+
if (!Array.isArray(hints)) {
|
|
302
|
+
return [];
|
|
303
|
+
}
|
|
304
|
+
const normalized = [];
|
|
305
|
+
const seen = new Set();
|
|
306
|
+
for (const rawHint of hints) {
|
|
307
|
+
if (typeof rawHint !== "string") {
|
|
308
|
+
continue;
|
|
309
|
+
}
|
|
310
|
+
const trimmed = rawHint.trim();
|
|
311
|
+
if (!trimmed || seen.has(trimmed)) {
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
normalized.push(trimmed);
|
|
315
|
+
seen.add(trimmed);
|
|
316
|
+
if (normalized.length >= MAX_ARTIFACT_HINTS) {
|
|
317
|
+
break;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return normalized;
|
|
321
|
+
}
|
|
322
|
+
function bucketArtifactHints(hints) {
|
|
323
|
+
const buckets = {
|
|
324
|
+
pathHints: [],
|
|
325
|
+
substringHints: [],
|
|
326
|
+
};
|
|
327
|
+
for (const hint of hints) {
|
|
328
|
+
if (hint.includes("/") && SAFE_PATH_PATTERN.test(hint)) {
|
|
329
|
+
buckets.pathHints.push(hint);
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
const normalized = hint.trim().toLowerCase();
|
|
333
|
+
if (normalized.length >= 3) {
|
|
334
|
+
buckets.substringHints.push(normalized);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
return buckets;
|
|
338
|
+
}
|
|
339
|
+
function isMissingTableError(error, table) {
|
|
340
|
+
if (!(error instanceof Error)) {
|
|
341
|
+
return false;
|
|
342
|
+
}
|
|
343
|
+
return /Table with name/i.test(error.message) && error.message.includes(table);
|
|
344
|
+
}
|
|
345
|
+
async function logHintExpansionEntry(db, tableAvailability, entry) {
|
|
346
|
+
if (!HINT_LOG_ENABLED) {
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
if (!tableAvailability.hasHintLog) {
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
try {
|
|
353
|
+
await db.run(`
|
|
354
|
+
INSERT INTO hint_expansion (repo_id, hint_value, expansion_kind, target_path, payload)
|
|
355
|
+
VALUES (?, ?, ?, ?, ?)
|
|
356
|
+
`, [
|
|
357
|
+
entry.repoId,
|
|
358
|
+
entry.hintValue,
|
|
359
|
+
entry.kind,
|
|
360
|
+
entry.targetPath ?? null,
|
|
361
|
+
entry.payload ? JSON.stringify(entry.payload) : null,
|
|
362
|
+
]);
|
|
363
|
+
}
|
|
364
|
+
catch (error) {
|
|
365
|
+
if (isMissingTableError(error, "hint_expansion")) {
|
|
366
|
+
console.warn("hint_expansion table is missing in the active database. Enable the latest schema and rerun the indexer to capture hint logs.");
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
369
|
+
throw error;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
async function fetchDictionaryPathHints(db, tableAvailability, repoId, hints, perHintLimit) {
|
|
373
|
+
if (!HINT_DICTIONARY_ENABLED || perHintLimit <= 0 || hints.length === 0) {
|
|
374
|
+
return [];
|
|
375
|
+
}
|
|
376
|
+
if (!tableAvailability.hasHintDictionary) {
|
|
377
|
+
return [];
|
|
378
|
+
}
|
|
379
|
+
const uniqueHints = Array.from(new Set(hints));
|
|
380
|
+
const targets = [];
|
|
381
|
+
for (const hint of uniqueHints) {
|
|
382
|
+
let rows = [];
|
|
383
|
+
try {
|
|
384
|
+
rows = await db.all(`
|
|
385
|
+
SELECT target_path
|
|
386
|
+
FROM hint_dictionary
|
|
387
|
+
WHERE repo_id = ?
|
|
388
|
+
AND hint_value = ?
|
|
389
|
+
ORDER BY freq DESC, target_path
|
|
390
|
+
LIMIT ?
|
|
391
|
+
`, [repoId, hint, perHintLimit]);
|
|
392
|
+
}
|
|
393
|
+
catch (error) {
|
|
394
|
+
if (isMissingTableError(error, "hint_dictionary")) {
|
|
395
|
+
console.warn("hint_dictionary table is missing in the active database. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
|
|
396
|
+
return [];
|
|
397
|
+
}
|
|
398
|
+
throw error;
|
|
399
|
+
}
|
|
400
|
+
for (const row of rows) {
|
|
401
|
+
if (!row.target_path || !SAFE_PATH_PATTERN.test(row.target_path)) {
|
|
402
|
+
continue;
|
|
403
|
+
}
|
|
404
|
+
targets.push({ path: row.target_path, sourceHint: hint, origin: "dictionary" });
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
return targets;
|
|
408
|
+
}
|
|
409
|
+
function createHintSeedMeta(targets) {
|
|
410
|
+
const meta = new Map();
|
|
411
|
+
const deduped = [];
|
|
412
|
+
for (const target of targets) {
|
|
413
|
+
if (meta.has(target.path)) {
|
|
414
|
+
continue;
|
|
415
|
+
}
|
|
416
|
+
meta.set(target.path, { sourceHint: target.sourceHint, origin: target.origin });
|
|
417
|
+
deduped.push(target);
|
|
418
|
+
}
|
|
419
|
+
return { list: deduped, meta };
|
|
420
|
+
}
|
|
421
|
+
function getHintSeedMeta(seedMeta, path) {
|
|
422
|
+
return seedMeta?.get(path);
|
|
423
|
+
}
|
|
424
|
+
function computeHintPriorityBoost(weights) {
|
|
425
|
+
const textComponent = weights.textMatch * HINT_PRIORITY_TEXT_MULTIPLIER;
|
|
426
|
+
const pathComponent = weights.pathMatch * HINT_PRIORITY_PATH_MULTIPLIER;
|
|
427
|
+
const aggregate = textComponent + pathComponent + weights.editingPath + weights.dependency;
|
|
428
|
+
return Math.max(HINT_PRIORITY_BASE_BONUS, aggregate);
|
|
429
|
+
}
|
|
430
|
+
function createHintExpansionConfig(weights) {
|
|
431
|
+
return {
|
|
432
|
+
dirLimit: Math.max(0, HINT_DIR_LIMIT),
|
|
433
|
+
dirMaxFiles: Math.max(1, HINT_DIR_MAX_FILES),
|
|
434
|
+
depOutLimit: Math.max(0, HINT_DEP_OUT_LIMIT),
|
|
435
|
+
depInLimit: Math.max(0, HINT_DEP_IN_LIMIT),
|
|
436
|
+
semLimit: Math.max(0, HINT_SEM_LIMIT),
|
|
437
|
+
semDirCandidateLimit: Math.max(1, HINT_SEM_DIR_CANDIDATE_LIMIT),
|
|
438
|
+
semThreshold: Number.isFinite(HINT_SEM_THRESHOLD) ? HINT_SEM_THRESHOLD : 0.65,
|
|
439
|
+
perHintLimit: Math.max(0, HINT_PER_HINT_LIMIT),
|
|
440
|
+
dbQueryBudget: Math.max(0, HINT_DB_QUERY_BUDGET),
|
|
441
|
+
dirBoost: computeHintPriorityBoost(weights) * 0.35,
|
|
442
|
+
depBoost: weights.dependency * 0.8,
|
|
443
|
+
substringLimit: Math.max(0, HINT_SUBSTRING_LIMIT),
|
|
444
|
+
substringBoost: Math.max(0, HINT_SUBSTRING_BOOST),
|
|
445
|
+
};
|
|
446
|
+
}
|
|
220
447
|
const DEFAULT_SEARCH_LIMIT = 50;
|
|
221
|
-
const DEFAULT_SNIPPET_WINDOW = 150;
|
|
222
448
|
const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
|
|
223
449
|
const MAX_BUNDLE_LIMIT = 20;
|
|
224
450
|
const MAX_KEYWORDS = 12;
|
|
@@ -226,35 +452,76 @@ const MAX_MATCHES_PER_KEYWORD = 40;
|
|
|
226
452
|
const MAX_DEPENDENCY_SEEDS = 8;
|
|
227
453
|
const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
|
|
228
454
|
const NEARBY_LIMIT = 6;
|
|
229
|
-
const
|
|
455
|
+
const serverConfig = loadServerConfig();
|
|
456
|
+
const SUPPRESS_NON_CODE_ENABLED = serverConfig.features.suppressNonCode;
|
|
457
|
+
const FINAL_RESULT_SUPPRESSION_ENABLED = serverConfig.features.suppressFinalResults;
|
|
458
|
+
const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
|
|
459
|
+
const FALLBACK_SNIPPET_WINDOW = serverConfig.features.snippetWindow;
|
|
230
460
|
const MAX_RERANK_LIMIT = 50;
|
|
461
|
+
const MAX_ARTIFACT_HINTS = 8;
|
|
462
|
+
const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
|
|
463
|
+
const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
|
|
464
|
+
const HINT_PRIORITY_PATH_MULTIPLIER = serverConfig.hints.priority.pathMultiplier;
|
|
465
|
+
const HINT_PRIORITY_BASE_BONUS = serverConfig.hints.priority.baseBonus;
|
|
466
|
+
const HINT_DIR_LIMIT = serverConfig.hints.directory.limit;
|
|
467
|
+
const HINT_DIR_MAX_FILES = serverConfig.hints.directory.maxFiles;
|
|
468
|
+
const HINT_DEP_OUT_LIMIT = serverConfig.hints.dependency.outLimit;
|
|
469
|
+
const HINT_DEP_IN_LIMIT = serverConfig.hints.dependency.inLimit;
|
|
470
|
+
const HINT_SEM_LIMIT = serverConfig.hints.semantic.limit;
|
|
471
|
+
const HINT_SEM_DIR_CANDIDATE_LIMIT = serverConfig.hints.semantic.dirCandidateLimit;
|
|
472
|
+
const HINT_SEM_THRESHOLD = serverConfig.hints.semantic.threshold;
|
|
473
|
+
const SUPPRESSED_PATH_PREFIXES = [".github/", ".git/", "ThirdPartyNotices", "node_modules/"];
|
|
474
|
+
const SUPPRESSED_FILE_NAMES = ["thirdpartynotices.txt", "thirdpartynotices.md", "cgmanifest.json"];
|
|
475
|
+
function isSuppressedPath(path) {
|
|
476
|
+
if (!SUPPRESS_NON_CODE_ENABLED) {
|
|
477
|
+
return false;
|
|
478
|
+
}
|
|
479
|
+
const normalized = path.startsWith("./") ? path.replace(/^\.\/+/u, "") : path;
|
|
480
|
+
const lower = normalized.toLowerCase();
|
|
481
|
+
if (SUPPRESSED_FILE_NAMES.some((name) => lower.endsWith(name))) {
|
|
482
|
+
return true;
|
|
483
|
+
}
|
|
484
|
+
const lowerPrefixMatches = SUPPRESSED_PATH_PREFIXES.map((prefix) => prefix.toLowerCase());
|
|
485
|
+
return lowerPrefixMatches.some((prefix) => lower.includes(prefix));
|
|
486
|
+
}
|
|
487
|
+
const HINT_PER_HINT_LIMIT = serverConfig.hints.perHintLimit;
|
|
488
|
+
const HINT_DB_QUERY_BUDGET = serverConfig.hints.dbQueryLimit;
|
|
489
|
+
const HINT_SUBSTRING_LIMIT = serverConfig.hints.substring.limit;
|
|
490
|
+
const HINT_SUBSTRING_BOOST = serverConfig.hints.substring.boost;
|
|
491
|
+
const HINT_LOG_ENABLED = process.env.KIRI_HINT_LOG === "1";
|
|
492
|
+
const HINT_DICTIONARY_ENABLED = process.env.KIRI_HINT_DICTIONARY !== "0";
|
|
493
|
+
const HINT_DICTIONARY_LIMIT = Math.max(0, Number.parseInt(process.env.KIRI_HINT_DICTIONARY_LIMIT ?? "2", 10));
|
|
231
494
|
// Issue #68: Path/Large File Penalty configuration (環境変数で上書き可能)
|
|
232
|
-
const PATH_MISS_DELTA =
|
|
233
|
-
const LARGE_FILE_DELTA =
|
|
495
|
+
const PATH_MISS_DELTA = serverConfig.penalties.pathMissDelta;
|
|
496
|
+
const LARGE_FILE_DELTA = serverConfig.penalties.largeFileDelta;
|
|
234
497
|
const MAX_WHY_TAGS = 10;
|
|
235
498
|
// 項目3: whyタグの優先度マップ(低い数値ほど高優先度)
|
|
236
499
|
// All actual tag prefixes used in the codebase
|
|
237
500
|
const WHY_TAG_PRIORITY = {
|
|
238
|
-
artifact: 1, // User-provided hints (editing_path, failing_tests)
|
|
501
|
+
artifact: 1, // User-provided hints (editing_path, failing_tests, hints)
|
|
502
|
+
dictionary: 1, // Dictionary-provided hints
|
|
239
503
|
phrase: 2, // Multi-word literal matches (strongest signal)
|
|
240
504
|
text: 3, // Single keyword matches
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
"path-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
505
|
+
metadata: 4, // Front matter / metadata filters & boosts
|
|
506
|
+
substring: 4, // Substring hint expansion
|
|
507
|
+
"path-phrase": 5, // Path contains multi-word phrase
|
|
508
|
+
structural: 6, // Semantic similarity
|
|
509
|
+
"path-segment": 7, // Path component matches
|
|
510
|
+
"path-keyword": 8, // Path keyword match
|
|
511
|
+
dep: 9, // Dependency relationship
|
|
512
|
+
near: 10, // Proximity to editing file
|
|
513
|
+
boost: 11, // File type boost
|
|
514
|
+
recent: 12, // Recently changed
|
|
515
|
+
symbol: 13, // Symbol match
|
|
516
|
+
penalty: 14, // Penalty explanations (keep for transparency)
|
|
517
|
+
keyword: 15, // Generic keyword (deprecated, kept for compatibility)
|
|
252
518
|
};
|
|
253
519
|
// Reserve at least one slot for important structural tags
|
|
254
520
|
const RESERVED_WHY_SLOTS = {
|
|
255
521
|
dep: 1, // Dependency relationships are critical
|
|
256
522
|
symbol: 1, // Symbol boundaries help understand context
|
|
257
523
|
near: 1, // Proximity explains file selection
|
|
524
|
+
metadata: 1, // Preserve metadata reasons when filters/boosts are active
|
|
258
525
|
};
|
|
259
526
|
function parseOutputOptions(params) {
|
|
260
527
|
return {
|
|
@@ -277,6 +544,9 @@ function selectWhyTags(reasons) {
|
|
|
277
544
|
reasons = new Set(Array.from(reasons).slice(0, 1000));
|
|
278
545
|
}
|
|
279
546
|
const selected = new Set();
|
|
547
|
+
if (reasons.has("boost:links")) {
|
|
548
|
+
selected.add("boost:links");
|
|
549
|
+
}
|
|
280
550
|
const byCategory = new Map();
|
|
281
551
|
for (const reason of reasons) {
|
|
282
552
|
const prefix = reason.split(":")[0] ?? "";
|
|
@@ -342,6 +612,45 @@ const STOP_WORDS = new Set([
|
|
|
342
612
|
"need",
|
|
343
613
|
"goal",
|
|
344
614
|
]);
|
|
615
|
+
function prioritizeHintCandidates(rankedCandidates, hintPaths, limit) {
|
|
616
|
+
if (rankedCandidates.length === 0) {
|
|
617
|
+
return [];
|
|
618
|
+
}
|
|
619
|
+
const sanitizedLimit = Math.max(1, Math.min(limit, rankedCandidates.length));
|
|
620
|
+
const candidateByPath = new Map();
|
|
621
|
+
for (const candidate of rankedCandidates) {
|
|
622
|
+
if (!candidateByPath.has(candidate.path)) {
|
|
623
|
+
candidateByPath.set(candidate.path, candidate);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
const final = [];
|
|
627
|
+
const seen = new Set();
|
|
628
|
+
for (const hintPath of hintPaths) {
|
|
629
|
+
if (final.length >= sanitizedLimit) {
|
|
630
|
+
break;
|
|
631
|
+
}
|
|
632
|
+
const candidate = candidateByPath.get(hintPath);
|
|
633
|
+
if (!candidate || seen.has(candidate.path)) {
|
|
634
|
+
continue;
|
|
635
|
+
}
|
|
636
|
+
final.push(candidate);
|
|
637
|
+
seen.add(candidate.path);
|
|
638
|
+
}
|
|
639
|
+
if (final.length >= sanitizedLimit) {
|
|
640
|
+
return final;
|
|
641
|
+
}
|
|
642
|
+
for (const candidate of rankedCandidates) {
|
|
643
|
+
if (final.length >= sanitizedLimit) {
|
|
644
|
+
break;
|
|
645
|
+
}
|
|
646
|
+
if (seen.has(candidate.path)) {
|
|
647
|
+
continue;
|
|
648
|
+
}
|
|
649
|
+
final.push(candidate);
|
|
650
|
+
seen.add(candidate.path);
|
|
651
|
+
}
|
|
652
|
+
return final;
|
|
653
|
+
}
|
|
345
654
|
function normalizeLimit(limit) {
|
|
346
655
|
if (!limit || Number.isNaN(limit)) {
|
|
347
656
|
return DEFAULT_SEARCH_LIMIT;
|
|
@@ -526,8 +835,351 @@ function ensureCandidate(map, filePath) {
|
|
|
526
835
|
}
|
|
527
836
|
return candidate;
|
|
528
837
|
}
|
|
838
|
+
async function expandHintCandidatesForHints(params) {
|
|
839
|
+
const { hintPaths, config } = params;
|
|
840
|
+
if (hintPaths.length === 0 || config.perHintLimit <= 0 || config.dbQueryBudget <= 0) {
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
const state = { remainingDbQueries: config.dbQueryBudget };
|
|
844
|
+
for (const hintPath of hintPaths) {
|
|
845
|
+
if (state.remainingDbQueries <= 0) {
|
|
846
|
+
break;
|
|
847
|
+
}
|
|
848
|
+
await expandSingleHintNeighborhood({ ...params, hintPath, state });
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
async function expandSingleHintNeighborhood(args) {
|
|
852
|
+
const { config } = args;
|
|
853
|
+
let remaining = config.perHintLimit;
|
|
854
|
+
if (remaining <= 0) {
|
|
855
|
+
return;
|
|
856
|
+
}
|
|
857
|
+
if (config.dirLimit > 0) {
|
|
858
|
+
const added = await addHintDirectoryNeighbors(args, Math.min(config.dirLimit, remaining));
|
|
859
|
+
remaining -= added;
|
|
860
|
+
if (remaining <= 0) {
|
|
861
|
+
return;
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
if (config.depOutLimit > 0 || config.depInLimit > 0) {
|
|
865
|
+
const added = await addHintDependencyNeighbors(args, remaining);
|
|
866
|
+
remaining -= added;
|
|
867
|
+
if (remaining <= 0) {
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
if (config.semLimit > 0) {
|
|
872
|
+
await addHintSemanticNeighbors(args, Math.min(config.semLimit, remaining));
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
function useHintDbBudget(state, cost = 1) {
|
|
876
|
+
if (state.remainingDbQueries < cost) {
|
|
877
|
+
return false;
|
|
878
|
+
}
|
|
879
|
+
state.remainingDbQueries -= cost;
|
|
880
|
+
return true;
|
|
881
|
+
}
|
|
882
|
+
function applyHintReasonBoost(candidate, reason, scoreDelta, lang, ext) {
|
|
883
|
+
if (scoreDelta <= 0 || candidate.reasons.has(reason)) {
|
|
884
|
+
return false;
|
|
885
|
+
}
|
|
886
|
+
candidate.score += scoreDelta;
|
|
887
|
+
candidate.reasons.add(reason);
|
|
888
|
+
candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 2);
|
|
889
|
+
candidate.matchLine ??= 1;
|
|
890
|
+
if (lang && !candidate.lang) {
|
|
891
|
+
candidate.lang = lang;
|
|
892
|
+
}
|
|
893
|
+
if (ext && !candidate.ext) {
|
|
894
|
+
candidate.ext = ext;
|
|
895
|
+
}
|
|
896
|
+
return true;
|
|
897
|
+
}
|
|
898
|
+
async function applyPathHintPromotions(args) {
|
|
899
|
+
const { hintTargets } = args;
|
|
900
|
+
if (hintTargets.length === 0) {
|
|
901
|
+
return;
|
|
902
|
+
}
|
|
903
|
+
const hintBoost = computeHintPriorityBoost(args.weights);
|
|
904
|
+
for (const target of hintTargets) {
|
|
905
|
+
const candidate = ensureCandidate(args.candidates, target.path);
|
|
906
|
+
const reasonPrefix = target.origin === "dictionary" ? "dictionary:hint" : "artifact:hint";
|
|
907
|
+
candidate.score += hintBoost;
|
|
908
|
+
candidate.reasons.add(`${reasonPrefix}:${target.path}`);
|
|
909
|
+
candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 3);
|
|
910
|
+
candidate.matchLine ??= 1;
|
|
911
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
912
|
+
repoId: args.repoId,
|
|
913
|
+
hintValue: target.sourceHint,
|
|
914
|
+
kind: target.origin === "dictionary" ? "dictionary" : "path",
|
|
915
|
+
targetPath: target.path,
|
|
916
|
+
payload: {
|
|
917
|
+
origin: target.origin,
|
|
918
|
+
source_hint: target.sourceHint,
|
|
919
|
+
},
|
|
920
|
+
});
|
|
921
|
+
}
|
|
922
|
+
await expandHintCandidatesForHints({
|
|
923
|
+
db: args.db,
|
|
924
|
+
tableAvailability: args.tableAvailability,
|
|
925
|
+
repoId: args.repoId,
|
|
926
|
+
hintPaths: hintTargets.map((target) => target.path),
|
|
927
|
+
candidates: args.candidates,
|
|
928
|
+
fileCache: args.fileCache,
|
|
929
|
+
weights: args.weights,
|
|
930
|
+
config: createHintExpansionConfig(args.weights),
|
|
931
|
+
hintSeedMeta: args.hintSeedMeta,
|
|
932
|
+
});
|
|
933
|
+
}
|
|
934
|
+
async function addHintSubstringMatches(db, tableAvailability, repoId, hints, candidates, limitPerHint, boost) {
|
|
935
|
+
if (limitPerHint <= 0 || boost <= 0) {
|
|
936
|
+
return;
|
|
937
|
+
}
|
|
938
|
+
for (const hint of hints) {
|
|
939
|
+
if (!SAFE_PATH_PATTERN.test(hint.replace(/[^a-zA-Z0-9_.-]/g, ""))) {
|
|
940
|
+
continue;
|
|
941
|
+
}
|
|
942
|
+
const rows = await db.all(`
|
|
943
|
+
SELECT path
|
|
944
|
+
FROM file
|
|
945
|
+
WHERE repo_id = ?
|
|
946
|
+
AND is_binary = FALSE
|
|
947
|
+
AND LOWER(path) LIKE '%' || ? || '%'
|
|
948
|
+
ORDER BY path
|
|
949
|
+
LIMIT ?
|
|
950
|
+
`, [repoId, hint, limitPerHint]);
|
|
951
|
+
for (const row of rows) {
|
|
952
|
+
const candidate = ensureCandidate(candidates, row.path);
|
|
953
|
+
const reason = `substring:hint:${hint}`;
|
|
954
|
+
if (applyHintReasonBoost(candidate, reason, boost)) {
|
|
955
|
+
await logHintExpansionEntry(db, tableAvailability, {
|
|
956
|
+
repoId,
|
|
957
|
+
hintValue: hint,
|
|
958
|
+
kind: "substring",
|
|
959
|
+
targetPath: row.path,
|
|
960
|
+
});
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
}
|
|
965
|
+
async function addHintDirectoryNeighbors(args, limit) {
|
|
966
|
+
if (limit <= 0) {
|
|
967
|
+
return 0;
|
|
968
|
+
}
|
|
969
|
+
const dir = path.posix.dirname(args.hintPath);
|
|
970
|
+
if (!dir || dir === "." || dir === "/") {
|
|
971
|
+
return 0;
|
|
972
|
+
}
|
|
973
|
+
if (!useHintDbBudget(args.state)) {
|
|
974
|
+
return 0;
|
|
975
|
+
}
|
|
976
|
+
const rows = await args.db.all(`
|
|
977
|
+
SELECT path, lang, ext
|
|
978
|
+
FROM file
|
|
979
|
+
WHERE repo_id = ?
|
|
980
|
+
AND is_binary = FALSE
|
|
981
|
+
AND path LIKE ?
|
|
982
|
+
ORDER BY path
|
|
983
|
+
LIMIT ?
|
|
984
|
+
`, [args.repoId, `${dir}/%`, args.config.dirMaxFiles + 1]);
|
|
985
|
+
if (rows.length === 0 || rows.length > args.config.dirMaxFiles) {
|
|
986
|
+
return 0;
|
|
987
|
+
}
|
|
988
|
+
rows.sort((a, b) => hintNeighborRank(a.path) - hintNeighborRank(b.path));
|
|
989
|
+
let added = 0;
|
|
990
|
+
for (const row of rows) {
|
|
991
|
+
if (row.path === args.hintPath) {
|
|
992
|
+
continue;
|
|
993
|
+
}
|
|
994
|
+
if (!SAFE_PATH_PATTERN.test(row.path)) {
|
|
995
|
+
continue;
|
|
996
|
+
}
|
|
997
|
+
const candidate = ensureCandidate(args.candidates, row.path);
|
|
998
|
+
const reason = `artifact:hint_dir:${args.hintPath}:${row.path}`;
|
|
999
|
+
if (applyHintReasonBoost(candidate, reason, args.config.dirBoost, row.lang, row.ext)) {
|
|
1000
|
+
added += 1;
|
|
1001
|
+
const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
|
|
1002
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
1003
|
+
repoId: args.repoId,
|
|
1004
|
+
hintValue: seedMeta?.sourceHint ?? args.hintPath,
|
|
1005
|
+
kind: "directory",
|
|
1006
|
+
targetPath: row.path,
|
|
1007
|
+
payload: {
|
|
1008
|
+
origin: seedMeta?.origin ?? "artifact",
|
|
1009
|
+
},
|
|
1010
|
+
});
|
|
1011
|
+
if (added >= limit) {
|
|
1012
|
+
break;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
return added;
|
|
1017
|
+
}
|
|
1018
|
+
async function addHintDependencyNeighbors(args, perHintRemaining) {
|
|
1019
|
+
if (perHintRemaining <= 0) {
|
|
1020
|
+
return 0;
|
|
1021
|
+
}
|
|
1022
|
+
let added = 0;
|
|
1023
|
+
if (args.config.depOutLimit > 0) {
|
|
1024
|
+
const outLimit = Math.min(args.config.depOutLimit, perHintRemaining - added);
|
|
1025
|
+
if (outLimit > 0) {
|
|
1026
|
+
added += await addHintDependencyDirection(args, outLimit, "out");
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
if (perHintRemaining - added <= 0) {
|
|
1030
|
+
return added;
|
|
1031
|
+
}
|
|
1032
|
+
if (args.config.depInLimit > 0) {
|
|
1033
|
+
const inLimit = Math.min(args.config.depInLimit, perHintRemaining - added);
|
|
1034
|
+
if (inLimit > 0) {
|
|
1035
|
+
added += await addHintDependencyDirection(args, inLimit, "in");
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
return added;
|
|
1039
|
+
}
|
|
1040
|
+
async function addHintDependencyDirection(args, limit, direction) {
|
|
1041
|
+
if (limit <= 0) {
|
|
1042
|
+
return 0;
|
|
1043
|
+
}
|
|
1044
|
+
if (!useHintDbBudget(args.state)) {
|
|
1045
|
+
return 0;
|
|
1046
|
+
}
|
|
1047
|
+
const fetchLimit = Math.min(limit * 4, 25);
|
|
1048
|
+
if (direction === "out") {
|
|
1049
|
+
const rows = await args.db.all(`
|
|
1050
|
+
SELECT dst
|
|
1051
|
+
FROM dependency
|
|
1052
|
+
WHERE repo_id = ?
|
|
1053
|
+
AND src_path = ?
|
|
1054
|
+
AND dst_kind = 'path'
|
|
1055
|
+
LIMIT ?
|
|
1056
|
+
`, [args.repoId, args.hintPath, fetchLimit]);
|
|
1057
|
+
return await applyDependencyRows(args, rows.map((row) => row.dst), limit, direction);
|
|
1058
|
+
}
|
|
1059
|
+
const rows = await args.db.all(`
|
|
1060
|
+
SELECT src_path
|
|
1061
|
+
FROM dependency
|
|
1062
|
+
WHERE repo_id = ?
|
|
1063
|
+
AND dst = ?
|
|
1064
|
+
AND dst_kind = 'path'
|
|
1065
|
+
LIMIT ?
|
|
1066
|
+
`, [args.repoId, args.hintPath, fetchLimit]);
|
|
1067
|
+
return await applyDependencyRows(args, rows.map((row) => row.src_path), limit, direction);
|
|
1068
|
+
}
|
|
1069
|
+
async function applyDependencyRows(args, paths, limit, direction) {
|
|
1070
|
+
if (paths.length === 0) {
|
|
1071
|
+
return 0;
|
|
1072
|
+
}
|
|
1073
|
+
const uniquePaths = Array.from(new Set(paths)).filter((p) => p && SAFE_PATH_PATTERN.test(p));
|
|
1074
|
+
uniquePaths.sort((a, b) => hintNeighborRank(a) - hintNeighborRank(b));
|
|
1075
|
+
let added = 0;
|
|
1076
|
+
for (const dependencyPath of uniquePaths) {
|
|
1077
|
+
if (dependencyPath === args.hintPath) {
|
|
1078
|
+
continue;
|
|
1079
|
+
}
|
|
1080
|
+
const candidate = ensureCandidate(args.candidates, dependencyPath);
|
|
1081
|
+
const reason = `artifact:hint_dep_${direction}:${args.hintPath}:${dependencyPath}`;
|
|
1082
|
+
if (applyHintReasonBoost(candidate, reason, args.config.depBoost)) {
|
|
1083
|
+
added += 1;
|
|
1084
|
+
const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
|
|
1085
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
1086
|
+
repoId: args.repoId,
|
|
1087
|
+
hintValue: seedMeta?.sourceHint ?? args.hintPath,
|
|
1088
|
+
kind: "dependency",
|
|
1089
|
+
targetPath: dependencyPath,
|
|
1090
|
+
payload: {
|
|
1091
|
+
origin: seedMeta?.origin ?? "artifact",
|
|
1092
|
+
direction,
|
|
1093
|
+
},
|
|
1094
|
+
});
|
|
1095
|
+
if (added >= limit) {
|
|
1096
|
+
break;
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
return added;
|
|
1101
|
+
}
|
|
1102
|
+
async function addHintSemanticNeighbors(args, limit) {
|
|
1103
|
+
if (limit <= 0) {
|
|
1104
|
+
return 0;
|
|
1105
|
+
}
|
|
1106
|
+
const dir = path.posix.dirname(args.hintPath);
|
|
1107
|
+
if (!dir || dir === "." || dir === "/") {
|
|
1108
|
+
return 0;
|
|
1109
|
+
}
|
|
1110
|
+
if (!useHintDbBudget(args.state)) {
|
|
1111
|
+
return 0;
|
|
1112
|
+
}
|
|
1113
|
+
const rows = await args.db.all(`
|
|
1114
|
+
SELECT path
|
|
1115
|
+
FROM file
|
|
1116
|
+
WHERE repo_id = ?
|
|
1117
|
+
AND is_binary = FALSE
|
|
1118
|
+
AND path LIKE ?
|
|
1119
|
+
ORDER BY path
|
|
1120
|
+
LIMIT ?
|
|
1121
|
+
`, [args.repoId, `${dir}/%`, args.config.semDirCandidateLimit]);
|
|
1122
|
+
const candidatePaths = rows.map((row) => row.path).filter((p) => p !== args.hintPath);
|
|
1123
|
+
if (candidatePaths.length === 0) {
|
|
1124
|
+
return 0;
|
|
1125
|
+
}
|
|
1126
|
+
if (!useHintDbBudget(args.state)) {
|
|
1127
|
+
return 0;
|
|
1128
|
+
}
|
|
1129
|
+
const embeddingMap = await fetchEmbeddingMap(args.db, args.repoId, [
|
|
1130
|
+
args.hintPath,
|
|
1131
|
+
...candidatePaths,
|
|
1132
|
+
]);
|
|
1133
|
+
const hintEmbedding = embeddingMap.get(args.hintPath);
|
|
1134
|
+
if (!hintEmbedding) {
|
|
1135
|
+
return 0;
|
|
1136
|
+
}
|
|
1137
|
+
let added = 0;
|
|
1138
|
+
for (const candidatePath of candidatePaths) {
|
|
1139
|
+
if (!SAFE_PATH_PATTERN.test(candidatePath)) {
|
|
1140
|
+
continue;
|
|
1141
|
+
}
|
|
1142
|
+
const embedding = embeddingMap.get(candidatePath);
|
|
1143
|
+
if (!embedding) {
|
|
1144
|
+
continue;
|
|
1145
|
+
}
|
|
1146
|
+
const similarity = structuralSimilarity(hintEmbedding, embedding);
|
|
1147
|
+
if (!Number.isFinite(similarity) || similarity < args.config.semThreshold) {
|
|
1148
|
+
continue;
|
|
1149
|
+
}
|
|
1150
|
+
const candidate = ensureCandidate(args.candidates, candidatePath);
|
|
1151
|
+
const reason = `artifact:hint_sem:${args.hintPath}:${candidatePath}`;
|
|
1152
|
+
if (applyHintReasonBoost(candidate, reason, args.weights.structural * similarity)) {
|
|
1153
|
+
added += 1;
|
|
1154
|
+
if (added >= limit) {
|
|
1155
|
+
break;
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
return added;
|
|
1160
|
+
}
|
|
1161
|
+
function hintNeighborRank(filePath) {
|
|
1162
|
+
if (filePath.startsWith("src/") || filePath.startsWith("external/assay-kit/src/")) {
|
|
1163
|
+
return 0;
|
|
1164
|
+
}
|
|
1165
|
+
if (isTestLikePath(filePath)) {
|
|
1166
|
+
return 2;
|
|
1167
|
+
}
|
|
1168
|
+
if (filePath.startsWith("docs/")) {
|
|
1169
|
+
return 3;
|
|
1170
|
+
}
|
|
1171
|
+
return 1;
|
|
1172
|
+
}
|
|
1173
|
+
function isTestLikePath(filePath) {
|
|
1174
|
+
return (/(^|\/)(tests?|__tests__|fixtures)\//.test(filePath) ||
|
|
1175
|
+
filePath.endsWith(".spec.ts") ||
|
|
1176
|
+
filePath.endsWith(".spec.tsx") ||
|
|
1177
|
+
filePath.endsWith(".test.ts") ||
|
|
1178
|
+
filePath.endsWith(".test.tsx"));
|
|
1179
|
+
}
|
|
529
1180
|
function parseEmbedding(vectorJson, vectorDims) {
|
|
530
|
-
|
|
1181
|
+
const dims = vectorDims === null ? null : typeof vectorDims === "bigint" ? Number(vectorDims) : vectorDims;
|
|
1182
|
+
if (!vectorJson || !dims || dims <= 0) {
|
|
531
1183
|
return null;
|
|
532
1184
|
}
|
|
533
1185
|
try {
|
|
@@ -536,7 +1188,7 @@ function parseEmbedding(vectorJson, vectorDims) {
|
|
|
536
1188
|
return null;
|
|
537
1189
|
}
|
|
538
1190
|
const values = [];
|
|
539
|
-
for (let i = 0; i < parsed.length && i <
|
|
1191
|
+
for (let i = 0; i < parsed.length && i < dims; i += 1) {
|
|
540
1192
|
const raw = parsed[i];
|
|
541
1193
|
const num = typeof raw === "number" ? raw : Number(raw);
|
|
542
1194
|
if (!Number.isFinite(num)) {
|
|
@@ -544,7 +1196,7 @@ function parseEmbedding(vectorJson, vectorDims) {
|
|
|
544
1196
|
}
|
|
545
1197
|
values.push(num);
|
|
546
1198
|
}
|
|
547
|
-
return values.length ===
|
|
1199
|
+
return values.length === dims ? values : null;
|
|
548
1200
|
}
|
|
549
1201
|
catch {
|
|
550
1202
|
return null;
|
|
@@ -638,18 +1290,6 @@ function buildSnippetPreview(content, startLine, endLine) {
|
|
|
638
1290
|
}
|
|
639
1291
|
return `${snippet.slice(0, 239)}…`;
|
|
640
1292
|
}
|
|
641
|
-
function prependLineNumbers(snippet, startLine) {
|
|
642
|
-
const lines = snippet.split(/\r?\n/);
|
|
643
|
-
if (lines.length === 0) {
|
|
644
|
-
return snippet;
|
|
645
|
-
}
|
|
646
|
-
// Calculate required width from the last line number (dynamic sizing)
|
|
647
|
-
const endLine = startLine + lines.length - 1;
|
|
648
|
-
const width = String(endLine).length;
|
|
649
|
-
return lines
|
|
650
|
-
.map((line, index) => `${String(startLine + index).padStart(width, " ")}→${line}`)
|
|
651
|
-
.join("\n");
|
|
652
|
-
}
|
|
653
1293
|
/**
|
|
654
1294
|
* トークン数を推定(コンテンツベース)
|
|
655
1295
|
* 実際のGPTトークナイザーを使用して正確にカウント
|
|
@@ -685,6 +1325,375 @@ function splitQueryWords(query) {
|
|
|
685
1325
|
const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
|
|
686
1326
|
return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
|
|
687
1327
|
}
|
|
1328
|
+
function normalizeMetadataFilterKey(rawKey) {
|
|
1329
|
+
if (!rawKey) {
|
|
1330
|
+
return null;
|
|
1331
|
+
}
|
|
1332
|
+
const normalized = rawKey.toLowerCase();
|
|
1333
|
+
const alias = METADATA_ALIAS_MAP.get(normalized);
|
|
1334
|
+
if (alias) {
|
|
1335
|
+
return { ...alias };
|
|
1336
|
+
}
|
|
1337
|
+
for (const entry of METADATA_KEY_PREFIXES) {
|
|
1338
|
+
if (normalized.startsWith(entry.prefix)) {
|
|
1339
|
+
const remainder = normalized.slice(entry.prefix.length);
|
|
1340
|
+
if (!remainder) {
|
|
1341
|
+
return null;
|
|
1342
|
+
}
|
|
1343
|
+
return {
|
|
1344
|
+
key: remainder,
|
|
1345
|
+
source: entry.source,
|
|
1346
|
+
...(entry.strict !== undefined && { strict: entry.strict }),
|
|
1347
|
+
};
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
return null;
|
|
1351
|
+
}
|
|
1352
|
+
function normalizeFilterValues(value) {
|
|
1353
|
+
if (typeof value === "string") {
|
|
1354
|
+
const trimmed = value.trim();
|
|
1355
|
+
return trimmed ? [trimmed] : [];
|
|
1356
|
+
}
|
|
1357
|
+
if (Array.isArray(value)) {
|
|
1358
|
+
const values = [];
|
|
1359
|
+
for (const item of value) {
|
|
1360
|
+
if (typeof item === "string") {
|
|
1361
|
+
const trimmed = item.trim();
|
|
1362
|
+
if (trimmed) {
|
|
1363
|
+
values.push(trimmed);
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
return values;
|
|
1368
|
+
}
|
|
1369
|
+
return [];
|
|
1370
|
+
}
|
|
1371
|
+
function normalizeMetadataFiltersParam(input) {
|
|
1372
|
+
if (!input || typeof input !== "object") {
|
|
1373
|
+
return [];
|
|
1374
|
+
}
|
|
1375
|
+
const filters = [];
|
|
1376
|
+
for (const [rawKey, rawValue] of Object.entries(input)) {
|
|
1377
|
+
const normalizedKey = normalizeMetadataFilterKey(rawKey);
|
|
1378
|
+
if (!normalizedKey) {
|
|
1379
|
+
continue;
|
|
1380
|
+
}
|
|
1381
|
+
const values = normalizeFilterValues(rawValue);
|
|
1382
|
+
if (values.length === 0) {
|
|
1383
|
+
continue;
|
|
1384
|
+
}
|
|
1385
|
+
const filter = {
|
|
1386
|
+
key: normalizedKey.key,
|
|
1387
|
+
values,
|
|
1388
|
+
source: normalizedKey.source,
|
|
1389
|
+
};
|
|
1390
|
+
if (normalizedKey.strict !== undefined) {
|
|
1391
|
+
filter.strict = normalizedKey.strict;
|
|
1392
|
+
}
|
|
1393
|
+
filters.push(filter);
|
|
1394
|
+
}
|
|
1395
|
+
return filters;
|
|
1396
|
+
}
|
|
1397
|
+
function mergeMetadataFilters(filters) {
|
|
1398
|
+
const merged = new Map();
|
|
1399
|
+
for (const filter of filters) {
|
|
1400
|
+
if (filter.values.length === 0)
|
|
1401
|
+
continue;
|
|
1402
|
+
const mapKey = `${filter.source ?? "*"}::${filter.key}::${filter.strict ? "strict" : "hint"}`;
|
|
1403
|
+
const existing = merged.get(mapKey);
|
|
1404
|
+
if (existing) {
|
|
1405
|
+
const existingSet = new Set(existing.values.map((val) => val.toLowerCase()));
|
|
1406
|
+
for (const value of filter.values) {
|
|
1407
|
+
if (!existingSet.has(value.toLowerCase())) {
|
|
1408
|
+
existing.values.push(value);
|
|
1409
|
+
existingSet.add(value.toLowerCase());
|
|
1410
|
+
}
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
else {
|
|
1414
|
+
const entry = {
|
|
1415
|
+
key: filter.key,
|
|
1416
|
+
source: filter.source,
|
|
1417
|
+
values: [...filter.values],
|
|
1418
|
+
};
|
|
1419
|
+
if (filter.strict !== undefined) {
|
|
1420
|
+
entry.strict = filter.strict;
|
|
1421
|
+
}
|
|
1422
|
+
merged.set(mapKey, entry);
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
return Array.from(merged.values());
|
|
1426
|
+
}
|
|
1427
|
+
function parseInlineMetadataFilters(query) {
|
|
1428
|
+
if (!query) {
|
|
1429
|
+
return { cleanedQuery: "", filters: [] };
|
|
1430
|
+
}
|
|
1431
|
+
const matches = [];
|
|
1432
|
+
const pattern = /(\b[\w.]+):("[^"]+"|'[^']+'|[^\s]+)/g;
|
|
1433
|
+
let match;
|
|
1434
|
+
while ((match = pattern.exec(query)) !== null) {
|
|
1435
|
+
const normalizedKey = normalizeMetadataFilterKey(match[1] ?? "");
|
|
1436
|
+
if (!normalizedKey) {
|
|
1437
|
+
continue;
|
|
1438
|
+
}
|
|
1439
|
+
let rawValue = match[2] ?? "";
|
|
1440
|
+
if ((rawValue.startsWith('"') && rawValue.endsWith('"')) ||
|
|
1441
|
+
(rawValue.startsWith("'") && rawValue.endsWith("'"))) {
|
|
1442
|
+
rawValue = rawValue.slice(1, -1);
|
|
1443
|
+
}
|
|
1444
|
+
const value = rawValue.trim();
|
|
1445
|
+
if (!value) {
|
|
1446
|
+
continue;
|
|
1447
|
+
}
|
|
1448
|
+
const filter = {
|
|
1449
|
+
key: normalizedKey.key,
|
|
1450
|
+
source: normalizedKey.source,
|
|
1451
|
+
values: [value],
|
|
1452
|
+
};
|
|
1453
|
+
if (normalizedKey.strict !== undefined) {
|
|
1454
|
+
filter.strict = normalizedKey.strict;
|
|
1455
|
+
}
|
|
1456
|
+
matches.push({
|
|
1457
|
+
start: match.index,
|
|
1458
|
+
end: pattern.lastIndex,
|
|
1459
|
+
filter,
|
|
1460
|
+
});
|
|
1461
|
+
}
|
|
1462
|
+
if (matches.length === 0) {
|
|
1463
|
+
return { cleanedQuery: query.trim(), filters: [] };
|
|
1464
|
+
}
|
|
1465
|
+
let cleaned = "";
|
|
1466
|
+
let lastIndex = 0;
|
|
1467
|
+
for (const info of matches) {
|
|
1468
|
+
cleaned += query.slice(lastIndex, info.start);
|
|
1469
|
+
lastIndex = info.end;
|
|
1470
|
+
}
|
|
1471
|
+
cleaned += query.slice(lastIndex);
|
|
1472
|
+
const normalizedQuery = cleaned.replace(/\s{2,}/g, " ").trim();
|
|
1473
|
+
return {
|
|
1474
|
+
cleanedQuery: normalizedQuery,
|
|
1475
|
+
filters: mergeMetadataFilters(matches.map((m) => m.filter)),
|
|
1476
|
+
};
|
|
1477
|
+
}
|
|
1478
|
+
function buildMetadataFilterConditions(filters, alias = "f") {
|
|
1479
|
+
// SQL Injection対策: aliasをリテラル型で制限し、念のため検証
|
|
1480
|
+
if (!["f", "mk"].includes(alias)) {
|
|
1481
|
+
throw new Error(`Invalid SQL alias: ${alias}`);
|
|
1482
|
+
}
|
|
1483
|
+
const clauses = [];
|
|
1484
|
+
for (const filter of filters) {
|
|
1485
|
+
if (!filter.key || filter.values.length === 0) {
|
|
1486
|
+
continue;
|
|
1487
|
+
}
|
|
1488
|
+
const likeClauses = filter.values.map(() => "mk.value ILIKE ?").join(" OR ");
|
|
1489
|
+
const whereParts = [`mk.repo_id = ${alias}.repo_id`, `mk.path = ${alias}.path`];
|
|
1490
|
+
const params = [];
|
|
1491
|
+
if (filter.source) {
|
|
1492
|
+
whereParts.push("mk.source = ?");
|
|
1493
|
+
params.push(filter.source);
|
|
1494
|
+
}
|
|
1495
|
+
whereParts.push("mk.key = ?");
|
|
1496
|
+
params.push(filter.key);
|
|
1497
|
+
whereParts.push(`(${likeClauses})`);
|
|
1498
|
+
params.push(...filter.values.map((value) => `%${value}%`));
|
|
1499
|
+
const sql = `EXISTS (SELECT 1 FROM document_metadata_kv mk WHERE ${whereParts.join(" AND ")})`;
|
|
1500
|
+
clauses.push({ sql, params });
|
|
1501
|
+
}
|
|
1502
|
+
return clauses;
|
|
1503
|
+
}
|
|
1504
|
+
function isTableMissingError(error, table) {
|
|
1505
|
+
if (!(error instanceof Error)) {
|
|
1506
|
+
return false;
|
|
1507
|
+
}
|
|
1508
|
+
return error.message.includes(`Table with name ${table}`) || error.message.includes(table);
|
|
1509
|
+
}
|
|
1510
|
+
async function safeMetadataQuery(db, tableAvailability, sql, params) {
|
|
1511
|
+
if (!tableAvailability.hasMetadataTables) {
|
|
1512
|
+
return [];
|
|
1513
|
+
}
|
|
1514
|
+
try {
|
|
1515
|
+
return await db.all(sql, params);
|
|
1516
|
+
}
|
|
1517
|
+
catch (error) {
|
|
1518
|
+
if (isTableMissingError(error, "document_metadata_kv")) {
|
|
1519
|
+
console.warn("Metadata tables not found; disabling metadata filters and boosts until database is upgraded.");
|
|
1520
|
+
return [];
|
|
1521
|
+
}
|
|
1522
|
+
throw error;
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
async function safeLinkQuery(db, tableAvailability, sql, params) {
|
|
1526
|
+
if (!tableAvailability.hasLinkTable) {
|
|
1527
|
+
return [];
|
|
1528
|
+
}
|
|
1529
|
+
try {
|
|
1530
|
+
return await db.all(sql, params);
|
|
1531
|
+
}
|
|
1532
|
+
catch (error) {
|
|
1533
|
+
if (isTableMissingError(error, "markdown_link")) {
|
|
1534
|
+
console.warn("Markdown link table not found; inbound link boosting disabled until database is upgraded.");
|
|
1535
|
+
return [];
|
|
1536
|
+
}
|
|
1537
|
+
throw error;
|
|
1538
|
+
}
|
|
1539
|
+
}
|
|
1540
|
+
async function fetchMetadataOnlyCandidates(db, tableAvailability, repoId, filters, limit) {
|
|
1541
|
+
if (!tableAvailability.hasMetadataTables || filters.length === 0 || limit <= 0) {
|
|
1542
|
+
return [];
|
|
1543
|
+
}
|
|
1544
|
+
const filterClauses = buildMetadataFilterConditions(filters);
|
|
1545
|
+
const whereClauses = ["f.repo_id = ?"];
|
|
1546
|
+
const params = [repoId];
|
|
1547
|
+
for (const clause of filterClauses) {
|
|
1548
|
+
whereClauses.push(clause.sql);
|
|
1549
|
+
params.push(...clause.params);
|
|
1550
|
+
}
|
|
1551
|
+
const sql = `
|
|
1552
|
+
SELECT f.path, f.lang, f.ext, b.content
|
|
1553
|
+
FROM file f
|
|
1554
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
1555
|
+
WHERE ${whereClauses.join(" AND ")}
|
|
1556
|
+
ORDER BY f.path
|
|
1557
|
+
LIMIT ?
|
|
1558
|
+
`;
|
|
1559
|
+
params.push(limit);
|
|
1560
|
+
try {
|
|
1561
|
+
return await db.all(sql, params);
|
|
1562
|
+
}
|
|
1563
|
+
catch (error) {
|
|
1564
|
+
if (isTableMissingError(error, "document_metadata_kv")) {
|
|
1565
|
+
console.warn("Metadata tables not found; disabling metadata-only searches until database is upgraded.");
|
|
1566
|
+
return [];
|
|
1567
|
+
}
|
|
1568
|
+
throw error;
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
async function fetchMetadataKeywordMatches(db, tableAvailability, repoId, keywords, filters, limit, excludePaths) {
|
|
1572
|
+
if (!tableAvailability.hasMetadataTables || keywords.length === 0 || limit <= 0) {
|
|
1573
|
+
return [];
|
|
1574
|
+
}
|
|
1575
|
+
const keywordClauses = keywords.map(() => "mk.value ILIKE ?").join(" OR ");
|
|
1576
|
+
const params = [repoId, ...keywords.map((kw) => `%${kw}%`)];
|
|
1577
|
+
const whereClauses = ["mk.repo_id = ?", `(${keywordClauses})`];
|
|
1578
|
+
if (excludePaths.size > 0) {
|
|
1579
|
+
const placeholders = Array.from(excludePaths)
|
|
1580
|
+
.map(() => "?")
|
|
1581
|
+
.join(", ");
|
|
1582
|
+
whereClauses.push(`f.path NOT IN (${placeholders})`);
|
|
1583
|
+
params.push(...excludePaths);
|
|
1584
|
+
}
|
|
1585
|
+
const filterClauses = buildMetadataFilterConditions(filters, "f");
|
|
1586
|
+
for (const clause of filterClauses) {
|
|
1587
|
+
whereClauses.push(clause.sql);
|
|
1588
|
+
params.push(...clause.params);
|
|
1589
|
+
}
|
|
1590
|
+
params.push(limit);
|
|
1591
|
+
const sql = `
|
|
1592
|
+
SELECT f.path, f.lang, f.ext, b.content, COUNT(*) AS score
|
|
1593
|
+
FROM document_metadata_kv mk
|
|
1594
|
+
JOIN file f ON f.repo_id = mk.repo_id AND f.path = mk.path
|
|
1595
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
1596
|
+
WHERE ${whereClauses.join(" AND ")}
|
|
1597
|
+
GROUP BY f.path, f.lang, f.ext, b.content
|
|
1598
|
+
ORDER BY score DESC, f.path
|
|
1599
|
+
LIMIT ?
|
|
1600
|
+
`;
|
|
1601
|
+
const rows = await safeMetadataQuery(db, tableAvailability, sql, params);
|
|
1602
|
+
return rows.map((row) => ({ ...row, score: Number(row.score ?? 1) }));
|
|
1603
|
+
}
|
|
1604
|
+
async function loadMetadataForPaths(db, tableAvailability, repoId, paths) {
|
|
1605
|
+
const result = new Map();
|
|
1606
|
+
if (!tableAvailability.hasMetadataTables || paths.length === 0) {
|
|
1607
|
+
return result;
|
|
1608
|
+
}
|
|
1609
|
+
const placeholders = paths.map(() => "?").join(", ");
|
|
1610
|
+
const sql = `
|
|
1611
|
+
SELECT path, key, value, source
|
|
1612
|
+
FROM document_metadata_kv
|
|
1613
|
+
WHERE repo_id = ? AND path IN (${placeholders})
|
|
1614
|
+
`;
|
|
1615
|
+
const rows = await safeMetadataQuery(db, tableAvailability, sql, [repoId, ...paths]);
|
|
1616
|
+
for (const row of rows) {
|
|
1617
|
+
if (!result.has(row.path)) {
|
|
1618
|
+
result.set(row.path, []);
|
|
1619
|
+
}
|
|
1620
|
+
result.get(row.path).push({
|
|
1621
|
+
key: row.key,
|
|
1622
|
+
value: row.value,
|
|
1623
|
+
source: row.source ?? undefined,
|
|
1624
|
+
});
|
|
1625
|
+
}
|
|
1626
|
+
return result;
|
|
1627
|
+
}
|
|
1628
|
+
async function loadInboundLinkCounts(db, tableAvailability, repoId, paths) {
|
|
1629
|
+
const counts = new Map();
|
|
1630
|
+
if (!tableAvailability.hasLinkTable || paths.length === 0) {
|
|
1631
|
+
return counts;
|
|
1632
|
+
}
|
|
1633
|
+
const placeholders = paths.map(() => "?").join(", ");
|
|
1634
|
+
const sql = `
|
|
1635
|
+
SELECT resolved_path AS path, COUNT(*) AS inbound
|
|
1636
|
+
FROM markdown_link
|
|
1637
|
+
WHERE repo_id = ? AND resolved_path IS NOT NULL AND resolved_path IN (${placeholders})
|
|
1638
|
+
GROUP BY resolved_path
|
|
1639
|
+
`;
|
|
1640
|
+
const rows = await safeLinkQuery(db, tableAvailability, sql, [repoId, ...paths]);
|
|
1641
|
+
for (const row of rows) {
|
|
1642
|
+
const inboundValue = typeof row.inbound === "bigint" ? Number(row.inbound) : Number(row.inbound ?? 0);
|
|
1643
|
+
counts.set(row.path, inboundValue);
|
|
1644
|
+
}
|
|
1645
|
+
return counts;
|
|
1646
|
+
}
|
|
1647
|
+
function computeMetadataBoost(entries, keywordSet, filterValueSet) {
|
|
1648
|
+
if (!entries || entries.length === 0) {
|
|
1649
|
+
return 0;
|
|
1650
|
+
}
|
|
1651
|
+
let boost = 0;
|
|
1652
|
+
for (const entry of entries) {
|
|
1653
|
+
const valueLower = entry.value.toLowerCase();
|
|
1654
|
+
for (const keyword of keywordSet) {
|
|
1655
|
+
if (valueLower.includes(keyword)) {
|
|
1656
|
+
boost += METADATA_MATCH_WEIGHT;
|
|
1657
|
+
break;
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
if (filterValueSet.has(valueLower)) {
|
|
1661
|
+
boost += METADATA_FILTER_MATCH_WEIGHT;
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
return Math.min(boost, 1.5);
|
|
1665
|
+
}
|
|
1666
|
+
function computeInboundLinkBoost(count) {
|
|
1667
|
+
let numericCount = count;
|
|
1668
|
+
if (typeof numericCount === "bigint") {
|
|
1669
|
+
numericCount = Number(numericCount);
|
|
1670
|
+
}
|
|
1671
|
+
if (!numericCount || numericCount <= 0) {
|
|
1672
|
+
return 0;
|
|
1673
|
+
}
|
|
1674
|
+
return Math.min(Math.log1p(numericCount) * INBOUND_LINK_WEIGHT, 1.0);
|
|
1675
|
+
}
|
|
1676
|
+
function candidateMatchesMetadataFilters(entries, filters) {
|
|
1677
|
+
if (filters.length === 0) {
|
|
1678
|
+
return true;
|
|
1679
|
+
}
|
|
1680
|
+
if (!entries || entries.length === 0) {
|
|
1681
|
+
return false;
|
|
1682
|
+
}
|
|
1683
|
+
return filters.every((filter) => {
|
|
1684
|
+
const expectedValues = filter.values.map((value) => value.toLowerCase());
|
|
1685
|
+
return entries.some((entry) => {
|
|
1686
|
+
if (entry.key !== filter.key) {
|
|
1687
|
+
return false;
|
|
1688
|
+
}
|
|
1689
|
+
if (filter.source && entry.source !== filter.source) {
|
|
1690
|
+
return false;
|
|
1691
|
+
}
|
|
1692
|
+
const lowerValue = entry.value.toLowerCase();
|
|
1693
|
+
return expectedValues.some((value) => lowerValue.includes(value));
|
|
1694
|
+
});
|
|
1695
|
+
});
|
|
1696
|
+
}
|
|
688
1697
|
/**
|
|
689
1698
|
* パス固有のマルチプライヤーを取得(最長プレフィックスマッチ)
|
|
690
1699
|
* 配列の順序に依存せず、常に最長一致のプレフィックスを選択
|
|
@@ -710,7 +1719,7 @@ function getPathMultiplier(filePath, profileConfig) {
|
|
|
710
1719
|
* @param weights - スコアリングウェイト設定(乗算的ペナルティに使用)
|
|
711
1720
|
* @returns ブースト適用後のスコア
|
|
712
1721
|
*/
|
|
713
|
-
function applyFileTypeBoost(path, baseScore, profileConfig,
|
|
1722
|
+
function applyFileTypeBoost(path, baseScore, profileConfig, weights) {
|
|
714
1723
|
// Blacklisted directories that are almost always irrelevant for code context
|
|
715
1724
|
const blacklistedDirs = [
|
|
716
1725
|
".cursor/",
|
|
@@ -727,7 +1736,8 @@ function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
|
|
|
727
1736
|
if (profileConfig.denylistOverrides.includes(dir)) {
|
|
728
1737
|
continue;
|
|
729
1738
|
}
|
|
730
|
-
|
|
1739
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
1740
|
+
return baseScore * weights.blacklistPenaltyMultiplier;
|
|
731
1741
|
}
|
|
732
1742
|
}
|
|
733
1743
|
const fileName = path.split("/").pop() ?? "";
|
|
@@ -758,9 +1768,9 @@ function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
|
|
|
758
1768
|
multiplier *= implMultiplier;
|
|
759
1769
|
}
|
|
760
1770
|
}
|
|
761
|
-
// Test files:
|
|
1771
|
+
// Test files: multiplicative penalty (v1.0.0)
|
|
762
1772
|
if (path.startsWith("tests/") || path.startsWith("test/")) {
|
|
763
|
-
return baseScore *
|
|
1773
|
+
return baseScore * weights.testPenaltyMultiplier;
|
|
764
1774
|
}
|
|
765
1775
|
return baseScore * multiplier;
|
|
766
1776
|
}
|
|
@@ -862,22 +1872,25 @@ function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
|
|
|
862
1872
|
}
|
|
863
1873
|
}
|
|
864
1874
|
/**
|
|
865
|
-
*
|
|
866
|
-
* ブラックリストディレクトリ、テストファイル、lock
|
|
867
|
-
*
|
|
868
|
-
* @
|
|
1875
|
+
* 乗算的ファイルペナルティを適用(v1.0.0+)
|
|
1876
|
+
* ブラックリストディレクトリ、テストファイル、lockファイルに乗算ペナルティ
|
|
1877
|
+
* v1.0.0: 絶対ペナルティ(-100)から乗算ペナルティ(×0.01など)に移行
|
|
1878
|
+
* @param weights - スコアリングウェイト設定(乗算ペナルティ係数を含む)
|
|
1879
|
+
* @param profile - boost_profile設定(denylistOverridesなど)
|
|
1880
|
+
* @returns true if severe penalty was applied (caller should skip further boosts)
|
|
869
1881
|
*/
|
|
870
|
-
function
|
|
871
|
-
//
|
|
1882
|
+
function applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig) {
|
|
1883
|
+
// Returns true if a severe penalty was applied (should skip further boosts)
|
|
1884
|
+
// Blacklisted directories - apply strong multiplicative penalty (99% reduction)
|
|
1885
|
+
// v1.0.0: test/ and tests/ removed - handled by testPenaltyMultiplier instead
|
|
872
1886
|
const blacklistedDirs = [
|
|
873
1887
|
".cursor/",
|
|
874
1888
|
".devcontainer/",
|
|
875
1889
|
".serena/",
|
|
876
1890
|
"__mocks__/",
|
|
877
1891
|
"docs/",
|
|
878
|
-
"test/",
|
|
879
|
-
"tests/",
|
|
880
1892
|
".git/",
|
|
1893
|
+
".github/",
|
|
881
1894
|
"node_modules/",
|
|
882
1895
|
"db/migrate/",
|
|
883
1896
|
"db/migrations/",
|
|
@@ -897,19 +1910,26 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
897
1910
|
if (profileConfig.denylistOverrides.includes(dir)) {
|
|
898
1911
|
continue; // Skip this blacklisted directory
|
|
899
1912
|
}
|
|
900
|
-
|
|
1913
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
1914
|
+
candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
|
|
901
1915
|
candidate.reasons.add("penalty:blacklisted-dir");
|
|
902
|
-
return true;
|
|
1916
|
+
return true; // Signal to skip further boosts - this is the strongest penalty
|
|
903
1917
|
}
|
|
904
1918
|
}
|
|
905
|
-
|
|
1919
|
+
if (isSuppressedPath(path)) {
|
|
1920
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
1921
|
+
candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
|
|
1922
|
+
candidate.reasons.add("penalty:suppressed");
|
|
1923
|
+
return true; // Signal to skip further boosts
|
|
1924
|
+
}
|
|
1925
|
+
// Test files - strong multiplicative penalty (95% reduction)
|
|
906
1926
|
const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
|
|
907
1927
|
if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
|
|
908
|
-
candidate.
|
|
1928
|
+
candidate.scoreMultiplier *= weights.testPenaltyMultiplier;
|
|
909
1929
|
candidate.reasons.add("penalty:test-file");
|
|
910
|
-
return true;
|
|
1930
|
+
return true; // Signal to skip further boosts
|
|
911
1931
|
}
|
|
912
|
-
// Lock files - very strong penalty
|
|
1932
|
+
// Lock files - very strong multiplicative penalty (99% reduction)
|
|
913
1933
|
const lockFiles = [
|
|
914
1934
|
"package-lock.json",
|
|
915
1935
|
"pnpm-lock.yaml",
|
|
@@ -920,63 +1940,46 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
920
1940
|
"poetry.lock",
|
|
921
1941
|
];
|
|
922
1942
|
if (lockFiles.some((lockFile) => fileName === lockFile)) {
|
|
923
|
-
candidate.
|
|
1943
|
+
candidate.scoreMultiplier *= weights.lockPenaltyMultiplier;
|
|
924
1944
|
candidate.reasons.add("penalty:lock-file");
|
|
925
|
-
return true;
|
|
1945
|
+
return true; // Signal to skip further boosts
|
|
926
1946
|
}
|
|
927
|
-
//
|
|
928
|
-
|
|
929
|
-
".config.js",
|
|
930
|
-
".config.ts",
|
|
931
|
-
".config.mjs",
|
|
932
|
-
".config.cjs",
|
|
933
|
-
"tsconfig.json",
|
|
934
|
-
"jsconfig.json",
|
|
935
|
-
"package.json",
|
|
936
|
-
".eslintrc",
|
|
937
|
-
".prettierrc",
|
|
938
|
-
"jest.config",
|
|
939
|
-
"vite.config",
|
|
940
|
-
"vitest.config",
|
|
941
|
-
"webpack.config",
|
|
942
|
-
"rollup.config",
|
|
943
|
-
];
|
|
944
|
-
if (configPatterns.some((pattern) => lowerPath.endsWith(pattern) || fileName.startsWith(".env")) ||
|
|
945
|
-
fileName === "Dockerfile" ||
|
|
946
|
-
fileName === "docker-compose.yml" ||
|
|
947
|
-
fileName === "docker-compose.yaml") {
|
|
948
|
-
// ✅ Use explicit flag instead of magic number (0.3) to determine behavior
|
|
949
|
-
// This decouples profile detection from multiplier values
|
|
950
|
-
if (profileConfig.skipConfigAdditivePenalty) {
|
|
951
|
-
return false; // Continue to multiplicative penalty only
|
|
952
|
-
}
|
|
953
|
-
// For other profiles, apply strong additive penalty
|
|
954
|
-
candidate.score -= 1.5;
|
|
955
|
-
candidate.reasons.add("penalty:config-file");
|
|
956
|
-
return true;
|
|
957
|
-
}
|
|
958
|
-
// Migration files - strong penalty
|
|
959
|
-
if (lowerPath.includes("migrate") || lowerPath.includes("migration")) {
|
|
960
|
-
candidate.score -= 2.0;
|
|
961
|
-
candidate.reasons.add("penalty:migration-file");
|
|
962
|
-
return true;
|
|
963
|
-
}
|
|
964
|
-
return false; // No penalty applied, continue processing
|
|
1947
|
+
// v1.0.0: No penalty applied, allow further boosts/penalties
|
|
1948
|
+
return false;
|
|
965
1949
|
}
|
|
966
1950
|
/**
|
|
967
1951
|
* ファイルタイプ別の乗算的ペナルティ/ブーストを適用(v0.7.0+)
|
|
968
1952
|
* profile="docs": ドキュメントファイルをブースト
|
|
969
1953
|
* profile="default": ドキュメントファイルにペナルティ、実装ファイルをブースト
|
|
970
1954
|
*/
|
|
971
|
-
function applyFileTypeMultipliers(candidate, path, ext, profileConfig,
|
|
1955
|
+
function applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights) {
|
|
972
1956
|
const fileName = path.split("/").pop() ?? "";
|
|
973
|
-
|
|
1957
|
+
const lowerPath = path.toLowerCase();
|
|
1958
|
+
// ✅ Step 1: Low-value files (v1.0.0: syntax/perf/legal/migration)
|
|
1959
|
+
// Apply configPenaltyMultiplier (strong penalty) to rarely useful file types
|
|
1960
|
+
const isSyntaxGrammar = path.includes("/syntaxes/") &&
|
|
1961
|
+
(lowerPath.endsWith(".tmlanguage") ||
|
|
1962
|
+
lowerPath.endsWith(".tmlanguage.json") ||
|
|
1963
|
+
lowerPath.endsWith(".tmtheme") ||
|
|
1964
|
+
lowerPath.endsWith(".plist"));
|
|
1965
|
+
const isPerfData = lowerPath.includes(".perf.data") ||
|
|
1966
|
+
lowerPath.includes(".perf-data") ||
|
|
1967
|
+
lowerPath.includes("-perf-data");
|
|
1968
|
+
const isLegalFile = fileName.toLowerCase().includes("thirdpartynotices") ||
|
|
1969
|
+
fileName.toLowerCase() === "cgmanifest.json";
|
|
1970
|
+
const isMigrationFile = lowerPath.includes("migrate") || lowerPath.includes("migration");
|
|
1971
|
+
if (isSyntaxGrammar || isPerfData || isLegalFile || isMigrationFile) {
|
|
1972
|
+
candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
|
|
1973
|
+
candidate.reasons.add("penalty:low-value-file");
|
|
1974
|
+
return; // Don't apply impl boosts
|
|
1975
|
+
}
|
|
1976
|
+
// ✅ Step 2: Config files
|
|
974
1977
|
if (isConfigFile(path, fileName)) {
|
|
975
1978
|
candidate.scoreMultiplier *= profileConfig.fileTypeMultipliers.config;
|
|
976
1979
|
candidate.reasons.add("penalty:config-file");
|
|
977
1980
|
return; // Don't apply impl boosts to config files
|
|
978
1981
|
}
|
|
979
|
-
// ✅ Step
|
|
1982
|
+
// ✅ Step 3: Documentation files
|
|
980
1983
|
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
981
1984
|
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
982
1985
|
const docMultiplier = profileConfig.fileTypeMultipliers.doc;
|
|
@@ -989,7 +1992,7 @@ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights)
|
|
|
989
1992
|
}
|
|
990
1993
|
return; // Don't apply impl boosts to docs
|
|
991
1994
|
}
|
|
992
|
-
// ✅ Step
|
|
1995
|
+
// ✅ Step 4: Implementation files with path-specific boosts
|
|
993
1996
|
const implMultiplier = profileConfig.fileTypeMultipliers.impl;
|
|
994
1997
|
// ✅ Use longest-prefix-match logic (order-independent)
|
|
995
1998
|
const pathBoost = getPathMultiplier(path, profileConfig);
|
|
@@ -1016,16 +2019,21 @@ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights)
|
|
|
1016
2019
|
}
|
|
1017
2020
|
}
|
|
1018
2021
|
/**
|
|
1019
|
-
* contextBundle専用のブーストプロファイル適用(
|
|
2022
|
+
* contextBundle専用のブーストプロファイル適用(v1.0.0: 乗算ペナルティモデル)
|
|
1020
2023
|
* 複雑度を削減するために3つのヘルパー関数に分割:
|
|
1021
2024
|
* 1. applyPathBasedScoring: パスベースの加算的スコアリング
|
|
1022
|
-
* 2.
|
|
1023
|
-
* 3. applyFileTypeMultipliers:
|
|
2025
|
+
* 2. applyMultiplicativeFilePenalties: 乗算的ペナルティ(blacklist/test/lock)
|
|
2026
|
+
* 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト(doc/config/impl)
|
|
1024
2027
|
*
|
|
1025
|
-
*
|
|
1026
|
-
*
|
|
1027
|
-
*
|
|
1028
|
-
*
|
|
2028
|
+
* v1.0.0 CHANGES:
|
|
2029
|
+
* - 絶対ペナルティ(-100)を乗算ペナルティ(×0.01など)に置き換え
|
|
2030
|
+
* - すべてのペナルティが組み合わせ可能に(boost_profileとの相互作用が予測可能)
|
|
2031
|
+
* - v0.9.0の特別ケース処理(if profile === "docs")が不要に
|
|
2032
|
+
*
|
|
2033
|
+
* SCORING PHASES:
|
|
2034
|
+
* 1. Additive phase: テキストマッチ、パスマッチ、依存関係、近接性を加算
|
|
2035
|
+
* 2. Multiplicative phase: ペナルティとブーストを scoreMultiplier に蓄積
|
|
2036
|
+
* 3. Final application: score *= scoreMultiplier(最終段階で一度だけ適用)
|
|
1029
2037
|
*/
|
|
1030
2038
|
function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms) {
|
|
1031
2039
|
const { path, ext } = row;
|
|
@@ -1033,117 +2041,195 @@ function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerm
|
|
|
1033
2041
|
const fileName = path.split("/").pop() ?? "";
|
|
1034
2042
|
// Step 1: パスベースのスコアリング(加算的ブースト)
|
|
1035
2043
|
applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms);
|
|
1036
|
-
// Step 2:
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
return; // ペナルティが適用された場合は処理終了
|
|
1040
|
-
}
|
|
2044
|
+
// Step 2: 乗算的ペナルティ(ブラックリスト、テスト、lock)
|
|
2045
|
+
// v1.0.0: Returns true if severe penalty applied (should skip further boosts)
|
|
2046
|
+
const skipFurtherBoosts = applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig);
|
|
1041
2047
|
// Step 3: ファイルタイプ別の乗算的ペナルティ/ブースト
|
|
1042
|
-
|
|
2048
|
+
// Skip if severe penalty was applied (blacklist/test/lock files shouldn't get impl boosts)
|
|
2049
|
+
if (!skipFurtherBoosts) {
|
|
2050
|
+
applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights);
|
|
2051
|
+
}
|
|
1043
2052
|
}
|
|
1044
2053
|
export async function filesSearch(context, params) {
|
|
1045
2054
|
const { db, repoId } = context;
|
|
1046
|
-
const
|
|
1047
|
-
|
|
1048
|
-
|
|
2055
|
+
const rawQuery = params.query ?? "";
|
|
2056
|
+
const inlineMetadata = parseInlineMetadataFilters(rawQuery);
|
|
2057
|
+
const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
|
|
2058
|
+
const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
|
|
2059
|
+
const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
|
|
2060
|
+
const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
|
|
2061
|
+
const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
|
|
2062
|
+
const hasHintMetadataFilters = hintMetadataFilters.length > 0;
|
|
2063
|
+
const hasAnyMetadataFilters = metadataFilters.length > 0;
|
|
2064
|
+
let cleanedQuery = inlineMetadata.cleanedQuery;
|
|
2065
|
+
let hasTextQuery = cleanedQuery.length > 0;
|
|
2066
|
+
if (!hasTextQuery && hasHintMetadataFilters) {
|
|
2067
|
+
cleanedQuery = hintMetadataFilters
|
|
2068
|
+
.flatMap((filter) => filter.values)
|
|
2069
|
+
.map((value) => value.trim())
|
|
2070
|
+
.filter((value) => value.length > 0)
|
|
2071
|
+
.join(" ");
|
|
2072
|
+
cleanedQuery = cleanedQuery.trim();
|
|
2073
|
+
hasTextQuery = cleanedQuery.length > 0;
|
|
2074
|
+
}
|
|
2075
|
+
const metadataValueSeed = metadataFilters
|
|
2076
|
+
.flatMap((filter) => filter.values)
|
|
2077
|
+
.map((value) => value.trim())
|
|
2078
|
+
.filter((value) => value.length > 0)
|
|
2079
|
+
.join(" ");
|
|
2080
|
+
if (metadataValueSeed.length > 0) {
|
|
2081
|
+
cleanedQuery = `${cleanedQuery} ${metadataValueSeed}`.trim();
|
|
2082
|
+
hasTextQuery = cleanedQuery.length > 0;
|
|
2083
|
+
}
|
|
2084
|
+
if (!hasTextQuery && !hasAnyMetadataFilters) {
|
|
2085
|
+
throw new Error("files_search requires a query or metadata_filters. Provide keywords or structured filters to continue.");
|
|
1049
2086
|
}
|
|
1050
2087
|
const limit = normalizeLimit(params.limit);
|
|
1051
2088
|
const ftsStatus = await getFreshFtsStatus(context);
|
|
1052
2089
|
const hasFTS = ftsStatus.ready;
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
if (
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
SELECT
|
|
1079
|
-
FROM
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
values = [repoId];
|
|
1093
|
-
const words = splitQueryWords(query);
|
|
1094
|
-
if (words.length === 1) {
|
|
1095
|
-
conditions.push("b.content ILIKE '%' || ? || '%'");
|
|
1096
|
-
values.push(query);
|
|
2090
|
+
const metadataClauses = buildMetadataFilterConditions(strictMetadataFilters);
|
|
2091
|
+
const candidateRows = [];
|
|
2092
|
+
if (hasTextQuery) {
|
|
2093
|
+
let sql;
|
|
2094
|
+
let values;
|
|
2095
|
+
if (hasFTS) {
|
|
2096
|
+
const conditions = ["f.repo_id = ?"];
|
|
2097
|
+
values = [repoId];
|
|
2098
|
+
if (params.lang) {
|
|
2099
|
+
conditions.push("COALESCE(f.lang, '') = ?");
|
|
2100
|
+
values.push(params.lang);
|
|
2101
|
+
}
|
|
2102
|
+
if (params.ext) {
|
|
2103
|
+
conditions.push("COALESCE(f.ext, '') = ?");
|
|
2104
|
+
values.push(params.ext);
|
|
2105
|
+
}
|
|
2106
|
+
if (params.path_prefix) {
|
|
2107
|
+
conditions.push("f.path LIKE ?");
|
|
2108
|
+
values.push(`${params.path_prefix}%`);
|
|
2109
|
+
}
|
|
2110
|
+
for (const clause of metadataClauses) {
|
|
2111
|
+
conditions.push(clause.sql);
|
|
2112
|
+
values.push(...clause.params);
|
|
2113
|
+
}
|
|
2114
|
+
sql = `
|
|
2115
|
+
SELECT f.path, f.lang, f.ext, b.content, fts.score
|
|
2116
|
+
FROM file f
|
|
2117
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
2118
|
+
JOIN (
|
|
2119
|
+
SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
|
|
2120
|
+
FROM blob
|
|
2121
|
+
WHERE score IS NOT NULL
|
|
2122
|
+
) fts ON fts.hash = b.hash
|
|
2123
|
+
WHERE ${conditions.join(" AND ")}
|
|
2124
|
+
ORDER BY fts.score DESC
|
|
2125
|
+
LIMIT ?
|
|
2126
|
+
`;
|
|
2127
|
+
values.unshift(cleanedQuery);
|
|
2128
|
+
values.push(limit);
|
|
1097
2129
|
}
|
|
1098
2130
|
else {
|
|
1099
|
-
const
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
2131
|
+
const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
|
|
2132
|
+
values = [repoId];
|
|
2133
|
+
const words = splitQueryWords(cleanedQuery);
|
|
2134
|
+
if (words.length === 1) {
|
|
2135
|
+
conditions.push("b.content ILIKE '%' || ? || '%'");
|
|
2136
|
+
values.push(cleanedQuery);
|
|
2137
|
+
}
|
|
2138
|
+
else {
|
|
2139
|
+
const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
|
|
2140
|
+
conditions.push(`(${wordConditions.join(" OR ")})`);
|
|
2141
|
+
values.push(...words);
|
|
2142
|
+
}
|
|
2143
|
+
if (params.lang) {
|
|
2144
|
+
conditions.push("COALESCE(f.lang, '') = ?");
|
|
2145
|
+
values.push(params.lang);
|
|
2146
|
+
}
|
|
2147
|
+
if (params.ext) {
|
|
2148
|
+
conditions.push("COALESCE(f.ext, '') = ?");
|
|
2149
|
+
values.push(params.ext);
|
|
2150
|
+
}
|
|
2151
|
+
if (params.path_prefix) {
|
|
2152
|
+
conditions.push("f.path LIKE ?");
|
|
2153
|
+
values.push(`${params.path_prefix}%`);
|
|
2154
|
+
}
|
|
2155
|
+
for (const clause of metadataClauses) {
|
|
2156
|
+
conditions.push(clause.sql);
|
|
2157
|
+
values.push(...clause.params);
|
|
2158
|
+
}
|
|
2159
|
+
sql = `
|
|
2160
|
+
SELECT f.path, f.lang, f.ext, b.content
|
|
2161
|
+
FROM file f
|
|
2162
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
2163
|
+
WHERE ${conditions.join(" AND ")}
|
|
2164
|
+
ORDER BY f.path
|
|
2165
|
+
LIMIT ?
|
|
2166
|
+
`;
|
|
2167
|
+
values.push(limit);
|
|
2168
|
+
}
|
|
2169
|
+
const textRows = await db.all(sql, values);
|
|
2170
|
+
candidateRows.push(...textRows);
|
|
2171
|
+
}
|
|
2172
|
+
if (!hasTextQuery && hasAnyMetadataFilters) {
|
|
2173
|
+
const metadataOnlyRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
|
|
2174
|
+
for (const row of metadataOnlyRows) {
|
|
2175
|
+
row.score = 1 + metadataFilters.length * 0.2;
|
|
2176
|
+
}
|
|
2177
|
+
candidateRows.push(...metadataOnlyRows);
|
|
2178
|
+
}
|
|
2179
|
+
if (hasTextQuery) {
|
|
2180
|
+
const metadataKeywords = splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase());
|
|
2181
|
+
if (metadataKeywords.length > 0) {
|
|
2182
|
+
const excludePaths = new Set(candidateRows.map((row) => row.path));
|
|
2183
|
+
const metadataRows = await fetchMetadataKeywordMatches(db, context.tableAvailability, repoId, metadataKeywords, metadataFilters, limit * 2, excludePaths);
|
|
2184
|
+
candidateRows.push(...metadataRows);
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
if (candidateRows.length === 0) {
|
|
2188
|
+
return [];
|
|
2189
|
+
}
|
|
2190
|
+
const rowMap = new Map();
|
|
2191
|
+
for (const row of candidateRows) {
|
|
2192
|
+
const base = row.score ?? (hasTextQuery ? 1.0 : 0.8);
|
|
2193
|
+
const existing = rowMap.get(row.path);
|
|
2194
|
+
const existingScore = existing?.score ?? (hasTextQuery ? 1.0 : 0.8);
|
|
2195
|
+
if (!existing || base > existingScore) {
|
|
2196
|
+
rowMap.set(row.path, { ...row, score: base });
|
|
2197
|
+
}
|
|
2198
|
+
}
|
|
2199
|
+
const dedupedRows = Array.from(rowMap.values()).sort((a, b) => (b.score ?? 1) - (a.score ?? 1));
|
|
2200
|
+
const limitedRows = dedupedRows.slice(0, limit);
|
|
2201
|
+
const paths = limitedRows.map((row) => row.path);
|
|
2202
|
+
const metadataMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, paths);
|
|
2203
|
+
const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, paths);
|
|
2204
|
+
const metadataKeywordSet = hasTextQuery
|
|
2205
|
+
? new Set(splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase()))
|
|
2206
|
+
: new Set();
|
|
2207
|
+
const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
|
|
2208
|
+
const boostProfile = params.boost_profile ??
|
|
2209
|
+
(hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
|
|
1127
2210
|
const profileConfig = getBoostProfile(boostProfile);
|
|
1128
|
-
// ✅ v0.7.0+: Load configurable scoring weights for unified boosting logic
|
|
1129
|
-
// Note: filesSearch doesn't have a separate profile parameter, uses default weights
|
|
1130
2211
|
const weights = loadScoringProfile(null);
|
|
1131
2212
|
const options = parseOutputOptions(params);
|
|
1132
|
-
|
|
2213
|
+
const previewQuery = hasTextQuery
|
|
2214
|
+
? cleanedQuery
|
|
2215
|
+
: (metadataFilters[0]?.values[0] ?? rawQuery.trim());
|
|
2216
|
+
return limitedRows
|
|
1133
2217
|
.map((row) => {
|
|
1134
2218
|
let preview;
|
|
1135
2219
|
let matchLine;
|
|
2220
|
+
const previewSource = previewQuery || row.path;
|
|
1136
2221
|
if (options.includePreview) {
|
|
1137
|
-
|
|
1138
|
-
const previewData = buildPreview(row.content ?? "", query);
|
|
2222
|
+
const previewData = buildPreview(row.content ?? "", previewSource);
|
|
1139
2223
|
preview = previewData.preview;
|
|
1140
2224
|
matchLine = previewData.line;
|
|
1141
2225
|
}
|
|
1142
2226
|
else {
|
|
1143
|
-
|
|
1144
|
-
matchLine = findFirstMatchLine(row.content ?? "", query);
|
|
2227
|
+
matchLine = findFirstMatchLine(row.content ?? "", previewSource);
|
|
1145
2228
|
}
|
|
1146
|
-
const
|
|
2229
|
+
const metadataEntries = metadataMap.get(row.path);
|
|
2230
|
+
const metadataBoost = computeMetadataBoost(metadataEntries, metadataKeywordSet, filterValueSet);
|
|
2231
|
+
const inboundBoost = computeInboundLinkBoost(inboundCounts.get(row.path));
|
|
2232
|
+
const baseScore = (row.score ?? (hasTextQuery ? 1.0 : 0.8)) + metadataBoost + inboundBoost;
|
|
1147
2233
|
const boostedScore = boostProfile === "none"
|
|
1148
2234
|
? baseScore
|
|
1149
2235
|
: applyFileTypeBoost(row.path, baseScore, profileConfig, weights);
|
|
@@ -1159,96 +2245,20 @@ export async function filesSearch(context, params) {
|
|
|
1159
2245
|
}
|
|
1160
2246
|
return result;
|
|
1161
2247
|
})
|
|
1162
|
-
.
|
|
1163
|
-
|
|
1164
|
-
export async function snippetsGet(context, params) {
|
|
1165
|
-
const { db, repoId } = context;
|
|
1166
|
-
if (!params.path) {
|
|
1167
|
-
throw new Error("snippets_get requires a file path. Specify a tracked text file path to continue.");
|
|
1168
|
-
}
|
|
1169
|
-
const rows = await db.all(`
|
|
1170
|
-
SELECT f.path, f.lang, f.ext, f.is_binary, b.content
|
|
1171
|
-
FROM file f
|
|
1172
|
-
JOIN blob b ON b.hash = f.blob_hash
|
|
1173
|
-
WHERE f.repo_id = ? AND f.path = ?
|
|
1174
|
-
LIMIT 1
|
|
1175
|
-
`, [repoId, params.path]);
|
|
1176
|
-
if (rows.length === 0) {
|
|
1177
|
-
throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
|
|
1178
|
-
}
|
|
1179
|
-
const row = rows[0];
|
|
1180
|
-
if (!row) {
|
|
1181
|
-
throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
|
|
1182
|
-
}
|
|
1183
|
-
if (row.is_binary) {
|
|
1184
|
-
throw new Error("Binary snippets are not supported. Choose a text file to preview its content.");
|
|
1185
|
-
}
|
|
1186
|
-
if (row.content === null) {
|
|
1187
|
-
throw new Error("Snippet content is unavailable. Re-run the indexer to refresh DuckDB state.");
|
|
1188
|
-
}
|
|
1189
|
-
const lines = row.content.split(/\r?\n/);
|
|
1190
|
-
const totalLines = lines.length;
|
|
1191
|
-
const snippetRows = await db.all(`
|
|
1192
|
-
SELECT s.snippet_id, s.start_line, s.end_line, s.symbol_id, sym.name AS symbol_name, sym.kind AS symbol_kind
|
|
1193
|
-
FROM snippet s
|
|
1194
|
-
LEFT JOIN symbol sym
|
|
1195
|
-
ON sym.repo_id = s.repo_id
|
|
1196
|
-
AND sym.path = s.path
|
|
1197
|
-
AND sym.symbol_id = s.symbol_id
|
|
1198
|
-
WHERE s.repo_id = ? AND s.path = ?
|
|
1199
|
-
ORDER BY s.start_line
|
|
1200
|
-
`, [repoId, params.path]);
|
|
1201
|
-
const requestedStart = params.start_line ?? 1;
|
|
1202
|
-
const requestedEnd = params.end_line ?? Math.min(totalLines, requestedStart + DEFAULT_SNIPPET_WINDOW - 1);
|
|
1203
|
-
const useSymbolSnippets = snippetRows.length > 0 && params.end_line === undefined;
|
|
1204
|
-
let snippetSelection = null;
|
|
1205
|
-
if (useSymbolSnippets) {
|
|
1206
|
-
snippetSelection =
|
|
1207
|
-
snippetRows.find((snippet) => requestedStart >= snippet.start_line && requestedStart <= snippet.end_line) ?? null;
|
|
1208
|
-
if (!snippetSelection) {
|
|
1209
|
-
const firstSnippet = snippetRows[0];
|
|
1210
|
-
if (firstSnippet && requestedStart < firstSnippet.start_line) {
|
|
1211
|
-
snippetSelection = firstSnippet;
|
|
1212
|
-
}
|
|
1213
|
-
else {
|
|
1214
|
-
snippetSelection = snippetRows[snippetRows.length - 1] ?? null;
|
|
1215
|
-
}
|
|
1216
|
-
}
|
|
1217
|
-
}
|
|
1218
|
-
let startLine;
|
|
1219
|
-
let endLine;
|
|
1220
|
-
let symbolName = null;
|
|
1221
|
-
let symbolKind = null;
|
|
1222
|
-
if (snippetSelection) {
|
|
1223
|
-
startLine = snippetSelection.start_line;
|
|
1224
|
-
endLine = snippetSelection.end_line;
|
|
1225
|
-
symbolName = snippetSelection.symbol_name;
|
|
1226
|
-
symbolKind = snippetSelection.symbol_kind;
|
|
1227
|
-
}
|
|
1228
|
-
else {
|
|
1229
|
-
startLine = Math.max(1, Math.min(totalLines, requestedStart));
|
|
1230
|
-
endLine = Math.max(startLine, Math.min(totalLines, requestedEnd));
|
|
1231
|
-
}
|
|
1232
|
-
const isCompact = params.compact === true;
|
|
1233
|
-
const addLineNumbers = params.includeLineNumbers === true && !isCompact;
|
|
1234
|
-
let content;
|
|
1235
|
-
if (!isCompact) {
|
|
1236
|
-
const snippetContent = lines.slice(startLine - 1, endLine).join("\n");
|
|
1237
|
-
content = addLineNumbers ? prependLineNumbers(snippetContent, startLine) : snippetContent;
|
|
1238
|
-
}
|
|
1239
|
-
return {
|
|
1240
|
-
path: row.path,
|
|
1241
|
-
startLine,
|
|
1242
|
-
endLine,
|
|
1243
|
-
...(content !== undefined && { content }),
|
|
1244
|
-
totalLines,
|
|
1245
|
-
symbolName,
|
|
1246
|
-
symbolKind,
|
|
1247
|
-
};
|
|
2248
|
+
.filter((result) => result.score > SCORE_FILTER_THRESHOLD) // v1.0.0: Filter out extremely low-scored files (multiplicative penalties)
|
|
2249
|
+
.sort((a, b) => b.score - a.score);
|
|
1248
2250
|
}
|
|
2251
|
+
// snippetsGet has been extracted to ./handlers/snippets-get.ts and re-exported above
|
|
1249
2252
|
// ============================================================================
|
|
1250
2253
|
// Issue #68: Path/Large File Penalty Helper Functions
|
|
1251
2254
|
// ============================================================================
|
|
2255
|
+
/**
|
|
2256
|
+
* v1.0.0: Score filtering threshold for multiplicative penalty model
|
|
2257
|
+
* Files with score < threshold are filtered out (unless they are hint paths)
|
|
2258
|
+
* Default: 0.05 removes files with >95% penalty while keeping relevant files
|
|
2259
|
+
* Can be overridden via KIRI_SCORE_THRESHOLD environment variable
|
|
2260
|
+
*/
|
|
2261
|
+
const SCORE_FILTER_THRESHOLD = parseFloat(process.env.KIRI_SCORE_THRESHOLD ?? "0.05");
|
|
1252
2262
|
/**
|
|
1253
2263
|
* 環境変数からペナルティ機能フラグを読み取る
|
|
1254
2264
|
*/
|
|
@@ -1505,15 +2515,40 @@ function computeGraduatedPenalty(pathMatchHits, queryStats, config) {
|
|
|
1505
2515
|
return config.tier2Delta;
|
|
1506
2516
|
return 0; // pathMatchHits >= 3: no penalty
|
|
1507
2517
|
}
|
|
1508
|
-
|
|
2518
|
+
async function contextBundleImpl(context, params) {
|
|
1509
2519
|
context.warningManager.startRequest();
|
|
1510
2520
|
const { db, repoId } = context;
|
|
1511
|
-
const
|
|
1512
|
-
if (
|
|
2521
|
+
const rawGoal = params.goal?.trim() ?? "";
|
|
2522
|
+
if (rawGoal.length === 0) {
|
|
1513
2523
|
throw new Error("context_bundle requires a non-empty goal. Describe your objective to receive context.");
|
|
1514
2524
|
}
|
|
2525
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
2526
|
+
console.info(`[metadata-trace-env] goal=${rawGoal}`);
|
|
2527
|
+
}
|
|
2528
|
+
const inlineMetadata = parseInlineMetadataFilters(rawGoal);
|
|
2529
|
+
const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
|
|
2530
|
+
const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
|
|
2531
|
+
const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
|
|
2532
|
+
const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
|
|
2533
|
+
const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
|
|
2534
|
+
const hasHintMetadataFilters = hintMetadataFilters.length > 0;
|
|
2535
|
+
const hasAnyMetadataFilters = metadataFilters.length > 0;
|
|
2536
|
+
const goal = inlineMetadata.cleanedQuery.length > 0 ? inlineMetadata.cleanedQuery : rawGoal;
|
|
2537
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
2538
|
+
console.info("[metadata-trace]", JSON.stringify({
|
|
2539
|
+
rawGoal,
|
|
2540
|
+
cleanedGoal: goal,
|
|
2541
|
+
inlineFilters: inlineMetadata.filters,
|
|
2542
|
+
paramFilters,
|
|
2543
|
+
mergedFilters: metadataFilters,
|
|
2544
|
+
}));
|
|
2545
|
+
}
|
|
1515
2546
|
const limit = normalizeBundleLimit(params.limit);
|
|
1516
2547
|
const artifacts = params.artifacts ?? {};
|
|
2548
|
+
const artifactHints = normalizeArtifactHints(artifacts.hints);
|
|
2549
|
+
const hintBuckets = bucketArtifactHints(artifactHints);
|
|
2550
|
+
const artifactPathHints = hintBuckets.pathHints;
|
|
2551
|
+
const substringHints = hintBuckets.substringHints;
|
|
1517
2552
|
const includeTokensEstimate = params.includeTokensEstimate === true;
|
|
1518
2553
|
const isCompact = params.compact === true;
|
|
1519
2554
|
// 項目2: トークンバジェット保護警告
|
|
@@ -1536,6 +2571,15 @@ export async function contextBundle(context, params) {
|
|
|
1536
2571
|
if (artifacts.editing_path) {
|
|
1537
2572
|
keywordSources.push(artifacts.editing_path);
|
|
1538
2573
|
}
|
|
2574
|
+
if (artifactHints.length > 0) {
|
|
2575
|
+
keywordSources.push(artifactHints.join(" "));
|
|
2576
|
+
}
|
|
2577
|
+
if (hasAnyMetadataFilters) {
|
|
2578
|
+
const filterSeed = metadataFilters
|
|
2579
|
+
.map((filter) => `${filter.source ?? "meta"}:${filter.key}=${filter.values.join(",")}`)
|
|
2580
|
+
.join(" ");
|
|
2581
|
+
keywordSources.push(filterSeed);
|
|
2582
|
+
}
|
|
1539
2583
|
const semanticSeed = keywordSources.join(" ");
|
|
1540
2584
|
const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
|
|
1541
2585
|
const extractedTerms = extractKeywords(semanticSeed);
|
|
@@ -1553,13 +2597,16 @@ export async function contextBundle(context, params) {
|
|
|
1553
2597
|
const stringMatchSeeds = new Set();
|
|
1554
2598
|
const fileCache = new Map();
|
|
1555
2599
|
// ✅ Cache boost profile config to avoid redundant lookups in hot path
|
|
1556
|
-
const boostProfile = params.boost_profile ??
|
|
2600
|
+
const boostProfile = params.boost_profile ??
|
|
2601
|
+
(hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
|
|
1557
2602
|
const profileConfig = getBoostProfile(boostProfile);
|
|
1558
2603
|
// フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
|
|
1559
2604
|
if (extractedTerms.phrases.length > 0) {
|
|
1560
2605
|
const phrasePlaceholders = extractedTerms.phrases
|
|
1561
2606
|
.map(() => "b.content ILIKE '%' || ? || '%'")
|
|
1562
2607
|
.join(" OR ");
|
|
2608
|
+
// DEBUG: Log SQL query parameters for troubleshooting
|
|
2609
|
+
console.log(`[DEBUG contextBundle] Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
|
|
1563
2610
|
const rows = await db.all(`
|
|
1564
2611
|
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
1565
2612
|
FROM file f
|
|
@@ -1573,6 +2620,14 @@ export async function contextBundle(context, params) {
|
|
|
1573
2620
|
ORDER BY f.path
|
|
1574
2621
|
LIMIT ?
|
|
1575
2622
|
`, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
|
|
2623
|
+
// DEBUG: Log returned paths and verify they match expected repo_id
|
|
2624
|
+
if (rows.length > 0) {
|
|
2625
|
+
console.log(`[DEBUG contextBundle] Phrase match returned ${rows.length} rows. Sample paths:`, rows.slice(0, 3).map((r) => r.path));
|
|
2626
|
+
// Verify repo_id of returned files
|
|
2627
|
+
const pathsToCheck = rows.slice(0, 3).map((r) => r.path);
|
|
2628
|
+
const verification = await db.all(`SELECT path, repo_id FROM file WHERE path IN (${pathsToCheck.map(() => "?").join(", ")}) LIMIT 3`, pathsToCheck);
|
|
2629
|
+
console.log(`[DEBUG contextBundle] Repo ID verification:`, verification);
|
|
2630
|
+
}
|
|
1576
2631
|
for (const row of rows) {
|
|
1577
2632
|
if (row.content === null) {
|
|
1578
2633
|
continue;
|
|
@@ -1674,6 +2729,31 @@ export async function contextBundle(context, params) {
|
|
|
1674
2729
|
}
|
|
1675
2730
|
}
|
|
1676
2731
|
}
|
|
2732
|
+
const artifactPathTargets = artifactPathHints.map((hintPath) => ({
|
|
2733
|
+
path: hintPath,
|
|
2734
|
+
sourceHint: hintPath,
|
|
2735
|
+
origin: "artifact",
|
|
2736
|
+
}));
|
|
2737
|
+
const dictionaryPathTargets = await fetchDictionaryPathHints(db, context.tableAvailability, repoId, substringHints, HINT_DICTIONARY_LIMIT);
|
|
2738
|
+
const { list: resolvedPathHintTargets, meta: hintSeedMeta } = createHintSeedMeta([
|
|
2739
|
+
...artifactPathTargets,
|
|
2740
|
+
...dictionaryPathTargets,
|
|
2741
|
+
]);
|
|
2742
|
+
if (resolvedPathHintTargets.length > 0) {
|
|
2743
|
+
await applyPathHintPromotions({
|
|
2744
|
+
db,
|
|
2745
|
+
tableAvailability: context.tableAvailability,
|
|
2746
|
+
repoId,
|
|
2747
|
+
hintTargets: resolvedPathHintTargets,
|
|
2748
|
+
candidates,
|
|
2749
|
+
fileCache,
|
|
2750
|
+
weights,
|
|
2751
|
+
hintSeedMeta,
|
|
2752
|
+
});
|
|
2753
|
+
}
|
|
2754
|
+
if (substringHints.length > 0) {
|
|
2755
|
+
await addHintSubstringMatches(db, context.tableAvailability, repoId, substringHints, candidates, HINT_SUBSTRING_LIMIT, HINT_SUBSTRING_BOOST);
|
|
2756
|
+
}
|
|
1677
2757
|
if (artifacts.editing_path) {
|
|
1678
2758
|
const editingCandidate = ensureCandidate(candidates, artifacts.editing_path);
|
|
1679
2759
|
editingCandidate.score += weights.editingPath;
|
|
@@ -1681,7 +2761,6 @@ export async function contextBundle(context, params) {
|
|
|
1681
2761
|
editingCandidate.matchLine ??= 1;
|
|
1682
2762
|
}
|
|
1683
2763
|
// SQL injection防御: ファイルパスの検証パターン
|
|
1684
|
-
const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
|
|
1685
2764
|
const dependencySeeds = new Set();
|
|
1686
2765
|
for (const pathSeed of stringMatchSeeds) {
|
|
1687
2766
|
if (!SAFE_PATH_PATTERN.test(pathSeed)) {
|
|
@@ -1695,10 +2774,13 @@ export async function contextBundle(context, params) {
|
|
|
1695
2774
|
}
|
|
1696
2775
|
if (artifacts.editing_path) {
|
|
1697
2776
|
if (!SAFE_PATH_PATTERN.test(artifacts.editing_path)) {
|
|
1698
|
-
throw new Error(`Invalid editing_path format.
|
|
2777
|
+
throw new Error(`Invalid editing_path format: ${artifacts.editing_path}. Use only A-Z, 0-9, _, ., -, / characters.`);
|
|
1699
2778
|
}
|
|
1700
2779
|
dependencySeeds.add(artifacts.editing_path);
|
|
1701
2780
|
}
|
|
2781
|
+
for (const target of resolvedPathHintTargets) {
|
|
2782
|
+
dependencySeeds.add(target.path);
|
|
2783
|
+
}
|
|
1702
2784
|
if (dependencySeeds.size > 0) {
|
|
1703
2785
|
// SQL injection防御: プレースホルダー生成前にサイズを検証
|
|
1704
2786
|
if (dependencySeeds.size > MAX_DEPENDENCY_SEEDS_QUERY_LIMIT) {
|
|
@@ -1708,7 +2790,7 @@ export async function contextBundle(context, params) {
|
|
|
1708
2790
|
// 防御的チェック: プレースホルダーが正しい形式であることを確認
|
|
1709
2791
|
// 期待される形式: "?, ?, ..." (クエスチョンマーク、カンマ、スペースのみ)
|
|
1710
2792
|
if (!/^(\?)(,\s*\?)*$/.test(placeholders)) {
|
|
1711
|
-
throw new Error("Invalid placeholder
|
|
2793
|
+
throw new Error("Invalid dependency placeholder sequence detected. Remove unsafe dependency seeds and retry the request.");
|
|
1712
2794
|
}
|
|
1713
2795
|
const depRows = await db.all(`
|
|
1714
2796
|
SELECT src_path, dst_kind, dst, rel
|
|
@@ -1746,31 +2828,72 @@ export async function contextBundle(context, params) {
|
|
|
1746
2828
|
}
|
|
1747
2829
|
}
|
|
1748
2830
|
}
|
|
1749
|
-
const
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
candidate.content = cached.content;
|
|
1755
|
-
candidate.lang = cached.lang;
|
|
1756
|
-
candidate.ext = cached.ext;
|
|
1757
|
-
candidate.totalLines = cached.totalLines;
|
|
1758
|
-
candidate.embedding = cached.embedding;
|
|
2831
|
+
const materializeCandidates = async () => {
|
|
2832
|
+
const result = [];
|
|
2833
|
+
for (const candidate of candidates.values()) {
|
|
2834
|
+
if (isSuppressedPath(candidate.path)) {
|
|
2835
|
+
continue;
|
|
1759
2836
|
}
|
|
1760
|
-
|
|
1761
|
-
const
|
|
1762
|
-
if (
|
|
1763
|
-
|
|
2837
|
+
if (!candidate.content) {
|
|
2838
|
+
const cached = fileCache.get(candidate.path);
|
|
2839
|
+
if (cached) {
|
|
2840
|
+
candidate.content = cached.content;
|
|
2841
|
+
candidate.lang = cached.lang;
|
|
2842
|
+
candidate.ext = cached.ext;
|
|
2843
|
+
candidate.totalLines = cached.totalLines;
|
|
2844
|
+
candidate.embedding = cached.embedding;
|
|
1764
2845
|
}
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
2846
|
+
else {
|
|
2847
|
+
const loaded = await loadFileContent(db, repoId, candidate.path);
|
|
2848
|
+
if (!loaded) {
|
|
2849
|
+
continue;
|
|
2850
|
+
}
|
|
2851
|
+
candidate.content = loaded.content;
|
|
2852
|
+
candidate.lang = loaded.lang;
|
|
2853
|
+
candidate.ext = loaded.ext;
|
|
2854
|
+
candidate.totalLines = loaded.totalLines;
|
|
2855
|
+
candidate.embedding = loaded.embedding;
|
|
2856
|
+
fileCache.set(candidate.path, loaded);
|
|
2857
|
+
}
|
|
2858
|
+
}
|
|
2859
|
+
result.push(candidate);
|
|
2860
|
+
}
|
|
2861
|
+
return result;
|
|
2862
|
+
};
|
|
2863
|
+
const addMetadataFallbackCandidates = async () => {
|
|
2864
|
+
if (!hasAnyMetadataFilters) {
|
|
2865
|
+
return;
|
|
2866
|
+
}
|
|
2867
|
+
const metadataRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
|
|
2868
|
+
if (metadataRows.length === 0) {
|
|
2869
|
+
return;
|
|
2870
|
+
}
|
|
2871
|
+
for (const row of metadataRows) {
|
|
2872
|
+
const candidate = ensureCandidate(candidates, row.path);
|
|
2873
|
+
if (row.content) {
|
|
2874
|
+
candidate.content = row.content;
|
|
2875
|
+
candidate.totalLines = row.content.split(/\r?\n/).length;
|
|
2876
|
+
fileCache.set(row.path, {
|
|
2877
|
+
content: row.content,
|
|
2878
|
+
lang: row.lang,
|
|
2879
|
+
ext: row.ext,
|
|
2880
|
+
totalLines: candidate.totalLines,
|
|
2881
|
+
embedding: candidate.embedding,
|
|
2882
|
+
});
|
|
1771
2883
|
}
|
|
2884
|
+
candidate.lang ??= row.lang;
|
|
2885
|
+
candidate.ext ??= row.ext;
|
|
2886
|
+
candidate.matchLine ??= 1;
|
|
2887
|
+
candidate.score = Math.max(candidate.score, 1 + metadataFilters.length * 0.2);
|
|
1772
2888
|
}
|
|
1773
|
-
|
|
2889
|
+
};
|
|
2890
|
+
if (hasAnyMetadataFilters) {
|
|
2891
|
+
await addMetadataFallbackCandidates();
|
|
2892
|
+
}
|
|
2893
|
+
let materializedCandidates = await materializeCandidates();
|
|
2894
|
+
if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
|
|
2895
|
+
await addMetadataFallbackCandidates();
|
|
2896
|
+
materializedCandidates = await materializeCandidates();
|
|
1774
2897
|
}
|
|
1775
2898
|
if (materializedCandidates.length === 0) {
|
|
1776
2899
|
// Get warnings from WarningManager (includes breaking change notification if applicable)
|
|
@@ -1781,6 +2904,72 @@ export async function contextBundle(context, params) {
|
|
|
1781
2904
|
...(warnings.length > 0 && { warnings }),
|
|
1782
2905
|
};
|
|
1783
2906
|
}
|
|
2907
|
+
const metadataKeywordSet = new Set(extractedTerms.keywords.map((keyword) => keyword.toLowerCase()));
|
|
2908
|
+
const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
|
|
2909
|
+
let metadataEntriesMap;
|
|
2910
|
+
if (hasAnyMetadataFilters || metadataKeywordSet.size > 0 || filterValueSet.size > 0) {
|
|
2911
|
+
metadataEntriesMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
|
|
2912
|
+
}
|
|
2913
|
+
if (hasStrictMetadataFilters) {
|
|
2914
|
+
metadataEntriesMap ??= new Map();
|
|
2915
|
+
for (let i = materializedCandidates.length - 1; i >= 0; i--) {
|
|
2916
|
+
const candidate = materializedCandidates[i];
|
|
2917
|
+
if (!candidate) {
|
|
2918
|
+
continue; // Skip undefined entries
|
|
2919
|
+
}
|
|
2920
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
2921
|
+
const matchesFilters = candidateMatchesMetadataFilters(entries, strictMetadataFilters);
|
|
2922
|
+
if (!matchesFilters) {
|
|
2923
|
+
materializedCandidates.splice(i, 1);
|
|
2924
|
+
continue;
|
|
2925
|
+
}
|
|
2926
|
+
candidate.reasons.add("metadata:filter");
|
|
2927
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
2928
|
+
console.info(`[metadata-trace-match] path=${candidate.path}`);
|
|
2929
|
+
}
|
|
2930
|
+
}
|
|
2931
|
+
if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
|
|
2932
|
+
await addMetadataFallbackCandidates();
|
|
2933
|
+
materializedCandidates = await materializeCandidates();
|
|
2934
|
+
}
|
|
2935
|
+
if (materializedCandidates.length === 0) {
|
|
2936
|
+
const warnings = [...context.warningManager.responseWarnings];
|
|
2937
|
+
return {
|
|
2938
|
+
context: [],
|
|
2939
|
+
...(includeTokensEstimate && { tokens_estimate: 0 }),
|
|
2940
|
+
...(warnings.length > 0 && { warnings }),
|
|
2941
|
+
};
|
|
2942
|
+
}
|
|
2943
|
+
}
|
|
2944
|
+
if (hasHintMetadataFilters) {
|
|
2945
|
+
metadataEntriesMap ??= new Map();
|
|
2946
|
+
for (const candidate of materializedCandidates) {
|
|
2947
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
2948
|
+
const matchesHints = candidateMatchesMetadataFilters(entries, hintMetadataFilters);
|
|
2949
|
+
if (matchesHints) {
|
|
2950
|
+
candidate.score += METADATA_HINT_BONUS;
|
|
2951
|
+
candidate.reasons.add("metadata:hint");
|
|
2952
|
+
}
|
|
2953
|
+
}
|
|
2954
|
+
}
|
|
2955
|
+
const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
|
|
2956
|
+
if (metadataEntriesMap) {
|
|
2957
|
+
for (const candidate of materializedCandidates) {
|
|
2958
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
2959
|
+
const metadataBoost = computeMetadataBoost(entries, metadataKeywordSet, filterValueSet);
|
|
2960
|
+
if (metadataBoost > 0) {
|
|
2961
|
+
candidate.score += metadataBoost;
|
|
2962
|
+
candidate.reasons.add("boost:metadata");
|
|
2963
|
+
}
|
|
2964
|
+
}
|
|
2965
|
+
}
|
|
2966
|
+
for (const candidate of materializedCandidates) {
|
|
2967
|
+
const linkBoost = computeInboundLinkBoost(inboundCounts.get(candidate.path));
|
|
2968
|
+
if (linkBoost > 0) {
|
|
2969
|
+
candidate.score += linkBoost;
|
|
2970
|
+
candidate.reasons.add("boost:links");
|
|
2971
|
+
}
|
|
2972
|
+
}
|
|
1784
2973
|
applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
|
|
1785
2974
|
// ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
|
|
1786
2975
|
// Only apply to positive scores to prevent negative score inversion
|
|
@@ -1819,18 +3008,31 @@ export async function contextBundle(context, params) {
|
|
|
1819
3008
|
const telemetry = computePenaltyTelemetry(materializedCandidates);
|
|
1820
3009
|
logPenaltyTelemetry(telemetry, queryStats);
|
|
1821
3010
|
}
|
|
1822
|
-
|
|
1823
|
-
|
|
3011
|
+
// v1.0.0: Filter out extremely low-scored candidates (result of multiplicative penalties)
|
|
3012
|
+
// Threshold removes files with >95% penalty while keeping reasonably relevant files
|
|
3013
|
+
// Hint paths are exempt from this threshold (always included if score > 0)
|
|
3014
|
+
const hintPathSet = new Set(resolvedPathHintTargets.map((target) => target.path));
|
|
3015
|
+
const rankedCandidates = materializedCandidates
|
|
3016
|
+
.filter((candidate) => candidate.score > SCORE_FILTER_THRESHOLD ||
|
|
3017
|
+
(candidate.score > 0 && hintPathSet.has(candidate.path)))
|
|
1824
3018
|
.sort((a, b) => {
|
|
1825
3019
|
if (b.score === a.score) {
|
|
1826
3020
|
return a.path.localeCompare(b.path);
|
|
1827
3021
|
}
|
|
1828
3022
|
return b.score - a.score;
|
|
1829
|
-
})
|
|
1830
|
-
|
|
1831
|
-
|
|
3023
|
+
});
|
|
3024
|
+
const prioritizedCandidates = prioritizeHintCandidates(rankedCandidates, resolvedPathHintTargets.map((target) => target.path), limit);
|
|
3025
|
+
if (prioritizedCandidates.length === 0) {
|
|
3026
|
+
const warnings = [...context.warningManager.responseWarnings];
|
|
3027
|
+
return {
|
|
3028
|
+
context: [],
|
|
3029
|
+
...(includeTokensEstimate && { tokens_estimate: 0 }),
|
|
3030
|
+
...(warnings.length > 0 && { warnings }),
|
|
3031
|
+
};
|
|
3032
|
+
}
|
|
3033
|
+
const maxScore = Math.max(...prioritizedCandidates.map((candidate) => candidate.score));
|
|
1832
3034
|
const results = [];
|
|
1833
|
-
for (const candidate of
|
|
3035
|
+
for (const candidate of prioritizedCandidates) {
|
|
1834
3036
|
if (!candidate.content) {
|
|
1835
3037
|
continue;
|
|
1836
3038
|
}
|
|
@@ -1858,6 +3060,23 @@ export async function contextBundle(context, params) {
|
|
|
1858
3060
|
startLine = Math.max(1, matchLine - windowHalf);
|
|
1859
3061
|
endLine = Math.min(totalLines === 0 ? matchLine + windowHalf : totalLines, startLine + FALLBACK_SNIPPET_WINDOW - 1);
|
|
1860
3062
|
}
|
|
3063
|
+
if (CLAMP_SNIPPETS_ENABLED) {
|
|
3064
|
+
// Clamp snippet length to FALLBACK_SNIPPET_WINDOW even when symbol spans large regions
|
|
3065
|
+
const maxWindow = FALLBACK_SNIPPET_WINDOW;
|
|
3066
|
+
const selectedEnd = selected ? selected.end_line : endLine;
|
|
3067
|
+
const selectedStart = selected ? selected.start_line : startLine;
|
|
3068
|
+
if (endLine - startLine + 1 > maxWindow) {
|
|
3069
|
+
const anchor = candidate.matchLine ?? startLine;
|
|
3070
|
+
let clampedStart = Math.max(selectedStart, anchor - Math.floor(maxWindow / 2));
|
|
3071
|
+
let clampedEnd = clampedStart + maxWindow - 1;
|
|
3072
|
+
if (clampedEnd > selectedEnd) {
|
|
3073
|
+
clampedEnd = selectedEnd;
|
|
3074
|
+
clampedStart = Math.max(selectedStart, clampedEnd - maxWindow + 1);
|
|
3075
|
+
}
|
|
3076
|
+
startLine = clampedStart;
|
|
3077
|
+
endLine = Math.max(clampedStart, clampedEnd);
|
|
3078
|
+
}
|
|
3079
|
+
}
|
|
1861
3080
|
if (endLine < startLine) {
|
|
1862
3081
|
endLine = startLine;
|
|
1863
3082
|
}
|
|
@@ -1885,7 +3104,7 @@ export async function contextBundle(context, params) {
|
|
|
1885
3104
|
let tokensEstimate;
|
|
1886
3105
|
if (includeTokensEstimate) {
|
|
1887
3106
|
tokensEstimate = results.reduce((acc, item) => {
|
|
1888
|
-
const candidate =
|
|
3107
|
+
const candidate = prioritizedCandidates.find((c) => c.path === item.path);
|
|
1889
3108
|
if (candidate && candidate.content) {
|
|
1890
3109
|
return acc + estimateTokensFromContent(candidate.content, item.range[0], item.range[1]);
|
|
1891
3110
|
}
|
|
@@ -1896,8 +3115,13 @@ export async function contextBundle(context, params) {
|
|
|
1896
3115
|
}
|
|
1897
3116
|
// Get warnings from WarningManager (includes breaking change notification if applicable)
|
|
1898
3117
|
const warnings = [...context.warningManager.responseWarnings];
|
|
3118
|
+
const shouldFilterResults = FINAL_RESULT_SUPPRESSION_ENABLED && SUPPRESS_NON_CODE_ENABLED;
|
|
3119
|
+
const sanitizedResults = shouldFilterResults
|
|
3120
|
+
? results.filter((item) => !isSuppressedPath(item.path))
|
|
3121
|
+
: results;
|
|
3122
|
+
const finalResults = sanitizedResults.length > 0 ? sanitizedResults : results;
|
|
1899
3123
|
const payload = {
|
|
1900
|
-
context:
|
|
3124
|
+
context: finalResults,
|
|
1901
3125
|
...(warnings.length > 0 && { warnings }),
|
|
1902
3126
|
};
|
|
1903
3127
|
if (tokensEstimate !== undefined) {
|
|
@@ -2100,35 +3324,27 @@ export async function depsClosure(context, params) {
|
|
|
2100
3324
|
edges,
|
|
2101
3325
|
};
|
|
2102
3326
|
}
|
|
2103
|
-
|
|
3327
|
+
/**
|
|
3328
|
+
* リポジトリのrootパスをデータベースIDに解決する。
|
|
3329
|
+
*
|
|
3330
|
+
* この関数は下位互換性のために保持されているが、内部的には新しいRepoResolverを使用する。
|
|
3331
|
+
*
|
|
3332
|
+
* @param db - DuckDBクライアント
|
|
3333
|
+
* @param repoRoot - リポジトリのrootパス
|
|
3334
|
+
* @param services - オプショナルなServerServices(指定がなければ新規作成される)
|
|
3335
|
+
* @returns リポジトリID
|
|
3336
|
+
* @throws Error リポジトリがインデックスされていない場合
|
|
3337
|
+
*/
|
|
3338
|
+
export async function resolveRepoId(db, repoRoot, services) {
|
|
3339
|
+
const svc = services ?? createServerServices(db);
|
|
3340
|
+
return await svc.repoResolver.resolveId(repoRoot);
|
|
3341
|
+
}
|
|
3342
|
+
export async function contextBundle(context, params) {
|
|
2104
3343
|
try {
|
|
2105
|
-
|
|
2106
|
-
const normalized = candidates[0];
|
|
2107
|
-
const placeholders = candidates.map(() => "?").join(", ");
|
|
2108
|
-
const rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders}) LIMIT 1`, candidates);
|
|
2109
|
-
if (rows.length === 0) {
|
|
2110
|
-
const existingRows = await db.all("SELECT id, root FROM repo");
|
|
2111
|
-
for (const candidate of existingRows) {
|
|
2112
|
-
if (normalizeRepoPath(candidate.root) === normalized) {
|
|
2113
|
-
await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, candidate.id]);
|
|
2114
|
-
return candidate.id;
|
|
2115
|
-
}
|
|
2116
|
-
}
|
|
2117
|
-
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
2118
|
-
}
|
|
2119
|
-
const row = rows[0];
|
|
2120
|
-
if (!row) {
|
|
2121
|
-
throw new Error("Failed to retrieve repository record. Database returned empty result.");
|
|
2122
|
-
}
|
|
2123
|
-
if (row.root !== normalized) {
|
|
2124
|
-
await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, row.id]);
|
|
2125
|
-
}
|
|
2126
|
-
return row.id;
|
|
3344
|
+
return await contextBundleImpl(context, params);
|
|
2127
3345
|
}
|
|
2128
3346
|
catch (error) {
|
|
2129
|
-
|
|
2130
|
-
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
2131
|
-
}
|
|
3347
|
+
console.error("context_bundle error:", error);
|
|
2132
3348
|
throw error;
|
|
2133
3349
|
}
|
|
2134
3350
|
}
|