kiri-mcp-server 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -10
- package/config/kiri.yml +25 -0
- package/config/scoring-profiles.yml +82 -35
- package/dist/config/kiri.yml +25 -0
- package/dist/config/scoring-profiles.yml +82 -35
- package/dist/package.json +9 -1
- package/dist/src/indexer/cli.d.ts.map +1 -1
- package/dist/src/indexer/cli.js +712 -98
- package/dist/src/indexer/cli.js.map +1 -1
- package/dist/src/indexer/git.d.ts.map +1 -1
- package/dist/src/indexer/git.js +41 -3
- package/dist/src/indexer/git.js.map +1 -1
- package/dist/src/indexer/migrations/repo-merger.d.ts +33 -0
- package/dist/src/indexer/migrations/repo-merger.d.ts.map +1 -0
- package/dist/src/indexer/migrations/repo-merger.js +67 -0
- package/dist/src/indexer/migrations/repo-merger.js.map +1 -0
- package/dist/src/indexer/schema.d.ts +66 -0
- package/dist/src/indexer/schema.d.ts.map +1 -1
- package/dist/src/indexer/schema.js +337 -0
- package/dist/src/indexer/schema.js.map +1 -1
- package/dist/src/server/boost-profiles.d.ts +6 -5
- package/dist/src/server/boost-profiles.d.ts.map +1 -1
- package/dist/src/server/boost-profiles.js +138 -0
- package/dist/src/server/boost-profiles.js.map +1 -1
- package/dist/src/server/config-loader.d.ts +9 -0
- package/dist/src/server/config-loader.d.ts.map +1 -0
- package/dist/src/server/config-loader.js +121 -0
- package/dist/src/server/config-loader.js.map +1 -0
- package/dist/src/server/config.d.ts +47 -0
- package/dist/src/server/config.d.ts.map +1 -0
- package/dist/src/server/config.js +157 -0
- package/dist/src/server/config.js.map +1 -0
- package/dist/src/server/context.d.ts +29 -0
- package/dist/src/server/context.d.ts.map +1 -1
- package/dist/src/server/context.js +26 -1
- package/dist/src/server/context.js.map +1 -1
- package/dist/src/server/handlers/snippets-get.d.ts +36 -0
- package/dist/src/server/handlers/snippets-get.d.ts.map +1 -0
- package/dist/src/server/handlers/snippets-get.js +120 -0
- package/dist/src/server/handlers/snippets-get.js.map +1 -0
- package/dist/src/server/handlers.d.ts +33 -20
- package/dist/src/server/handlers.d.ts.map +1 -1
- package/dist/src/server/handlers.js +1805 -370
- package/dist/src/server/handlers.js.map +1 -1
- package/dist/src/server/indexBootstrap.d.ts.map +1 -1
- package/dist/src/server/indexBootstrap.js +49 -2
- package/dist/src/server/indexBootstrap.js.map +1 -1
- package/dist/src/server/main.d.ts.map +1 -1
- package/dist/src/server/main.js +7 -0
- package/dist/src/server/main.js.map +1 -1
- package/dist/src/server/profile-selector.d.ts +33 -0
- package/dist/src/server/profile-selector.d.ts.map +1 -0
- package/dist/src/server/profile-selector.js +291 -0
- package/dist/src/server/profile-selector.js.map +1 -0
- package/dist/src/server/rpc.d.ts.map +1 -1
- package/dist/src/server/rpc.js +60 -10
- package/dist/src/server/rpc.js.map +1 -1
- package/dist/src/server/runtime.d.ts.map +1 -1
- package/dist/src/server/runtime.js +14 -4
- package/dist/src/server/runtime.js.map +1 -1
- package/dist/src/server/scoring.d.ts +7 -1
- package/dist/src/server/scoring.d.ts.map +1 -1
- package/dist/src/server/scoring.js +121 -21
- package/dist/src/server/scoring.js.map +1 -1
- package/dist/src/server/services/index.d.ts +24 -0
- package/dist/src/server/services/index.d.ts.map +1 -0
- package/dist/src/server/services/index.js +20 -0
- package/dist/src/server/services/index.js.map +1 -0
- package/dist/src/server/services/repo-repository.d.ts +61 -0
- package/dist/src/server/services/repo-repository.d.ts.map +1 -0
- package/dist/src/server/services/repo-repository.js +93 -0
- package/dist/src/server/services/repo-repository.js.map +1 -0
- package/dist/src/server/services/repo-resolver.d.ts +28 -0
- package/dist/src/server/services/repo-resolver.d.ts.map +1 -0
- package/dist/src/server/services/repo-resolver.js +62 -0
- package/dist/src/server/services/repo-resolver.js.map +1 -0
- package/dist/src/shared/duckdb.d.ts.map +1 -1
- package/dist/src/shared/duckdb.js +21 -1
- package/dist/src/shared/duckdb.js.map +1 -1
- package/dist/src/shared/fs/safePath.d.ts +7 -0
- package/dist/src/shared/fs/safePath.d.ts.map +1 -0
- package/dist/src/shared/fs/safePath.js +23 -0
- package/dist/src/shared/fs/safePath.js.map +1 -0
- package/dist/src/shared/tokenizer.d.ts +1 -1
- package/dist/src/shared/tokenizer.d.ts.map +1 -1
- package/dist/src/shared/tokenizer.js +97 -15
- package/dist/src/shared/tokenizer.js.map +1 -1
- package/dist/src/shared/utils/glob.d.ts +5 -0
- package/dist/src/shared/utils/glob.d.ts.map +1 -0
- package/dist/src/shared/utils/glob.js +22 -0
- package/dist/src/shared/utils/glob.js.map +1 -0
- package/dist/src/shared/utils/retry.d.ts +8 -0
- package/dist/src/shared/utils/retry.d.ts.map +1 -0
- package/dist/src/shared/utils/retry.js +20 -0
- package/dist/src/shared/utils/retry.js.map +1 -0
- package/package.json +9 -1
|
@@ -3,10 +3,14 @@ import path from "node:path";
|
|
|
3
3
|
import { checkFTSSchemaExists } from "../indexer/schema.js";
|
|
4
4
|
import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
|
|
5
5
|
import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
|
|
6
|
-
import { getRepoPathCandidates, normalizeRepoPath } from "../shared/utils/path.js";
|
|
7
6
|
import { expandAbbreviations } from "./abbreviations.js";
|
|
8
7
|
import { getBoostProfile, } from "./boost-profiles.js";
|
|
8
|
+
import { loadPathPenalties, mergePathPenaltyEntries } from "./config-loader.js";
|
|
9
|
+
import { loadServerConfig } from "./config.js";
|
|
9
10
|
import { coerceProfileName, loadScoringProfile } from "./scoring.js";
|
|
11
|
+
import { createServerServices } from "./services/index.js";
|
|
12
|
+
// Re-export extracted handlers for backward compatibility
|
|
13
|
+
export { snippetsGet, } from "./handlers/snippets-get.js";
|
|
10
14
|
// Configuration file patterns (v0.8.0+: consolidated to avoid duplication)
|
|
11
15
|
// Comprehensive list covering multiple languages and tools
|
|
12
16
|
const CONFIG_FILES = [
|
|
@@ -133,6 +137,83 @@ const CONFIG_PATTERNS = [
|
|
|
133
137
|
".github/workflows",
|
|
134
138
|
];
|
|
135
139
|
const FTS_STATUS_CACHE_TTL_MS = 10_000;
|
|
140
|
+
const METADATA_ALIAS_MAP = new Map([
|
|
141
|
+
["tag", { key: "tags" }],
|
|
142
|
+
["tags", { key: "tags" }],
|
|
143
|
+
["category", { key: "category" }],
|
|
144
|
+
["title", { key: "title" }],
|
|
145
|
+
["service", { key: "service" }],
|
|
146
|
+
]);
|
|
147
|
+
const METADATA_KEY_PREFIXES = [
|
|
148
|
+
{ prefix: "meta." },
|
|
149
|
+
{ prefix: "metadata.", strict: true },
|
|
150
|
+
{ prefix: "docmeta.", strict: true },
|
|
151
|
+
{ prefix: "frontmatter.", source: "front_matter" },
|
|
152
|
+
{ prefix: "fm.", source: "front_matter" },
|
|
153
|
+
{ prefix: "yaml.", source: "yaml" },
|
|
154
|
+
{ prefix: "json.", source: "json" },
|
|
155
|
+
];
|
|
156
|
+
const METADATA_MATCH_WEIGHT = 0.15;
|
|
157
|
+
const METADATA_FILTER_MATCH_WEIGHT = 0.1;
|
|
158
|
+
const METADATA_HINT_BONUS = 0.25;
|
|
159
|
+
const INBOUND_LINK_WEIGHT = 0.2;
|
|
160
|
+
/**
|
|
161
|
+
* checkTableAvailability
|
|
162
|
+
*
|
|
163
|
+
* 起動時にテーブルの存在を確認し、TableAvailabilityオブジェクトを生成する。
|
|
164
|
+
* これにより、グローバルミュータブル変数による競合状態を回避する。
|
|
165
|
+
*
|
|
166
|
+
* NOTE: スキーマ変更(テーブル追加)後はサーバーの再起動が必要です。
|
|
167
|
+
*
|
|
168
|
+
* @param db - DuckDBClient インスタンス
|
|
169
|
+
* @returns TableAvailability オブジェクト
|
|
170
|
+
* @throws データベース接続エラー等、テーブル不在以外のエラーが発生した場合
|
|
171
|
+
*/
|
|
172
|
+
export async function checkTableAvailability(db) {
|
|
173
|
+
const ALLOWED_TABLES = [
|
|
174
|
+
"document_metadata_kv",
|
|
175
|
+
"markdown_link",
|
|
176
|
+
"hint_expansion",
|
|
177
|
+
"hint_dictionary",
|
|
178
|
+
];
|
|
179
|
+
const checkTable = async (tableName) => {
|
|
180
|
+
if (!ALLOWED_TABLES.includes(tableName)) {
|
|
181
|
+
throw new Error(`Invalid table name: ${tableName}`);
|
|
182
|
+
}
|
|
183
|
+
try {
|
|
184
|
+
await db.all(`SELECT 1 FROM ${tableName} LIMIT 0`);
|
|
185
|
+
return true;
|
|
186
|
+
}
|
|
187
|
+
catch (error) {
|
|
188
|
+
// テーブル不在エラーのみキャッチ
|
|
189
|
+
if (isTableMissingError(error, tableName)) {
|
|
190
|
+
return false;
|
|
191
|
+
}
|
|
192
|
+
// その他のエラー(接続エラー等)は再スロー
|
|
193
|
+
throw new Error(`Failed to check table availability for ${tableName}: ${error instanceof Error ? error.message : String(error)}`);
|
|
194
|
+
}
|
|
195
|
+
};
|
|
196
|
+
const result = {
|
|
197
|
+
hasMetadataTables: await checkTable("document_metadata_kv"),
|
|
198
|
+
hasLinkTable: await checkTable("markdown_link"),
|
|
199
|
+
hasHintLog: await checkTable("hint_expansion"),
|
|
200
|
+
hasHintDictionary: await checkTable("hint_dictionary"),
|
|
201
|
+
};
|
|
202
|
+
// 起動時警告: テーブルが存在しない場合に通知
|
|
203
|
+
if (!result.hasMetadataTables) {
|
|
204
|
+
console.warn("document_metadata_kv table is missing. Metadata filters and boosts disabled until database is upgraded.");
|
|
205
|
+
}
|
|
206
|
+
if (!result.hasLinkTable) {
|
|
207
|
+
console.warn("markdown_link table is missing. Inbound link boosting disabled until database is upgraded.");
|
|
208
|
+
}
|
|
209
|
+
if (!result.hasHintLog) {
|
|
210
|
+
console.warn("hint_expansion table is missing. Hint logging disabled. Enable the latest schema and rerun the indexer to capture hint logs.");
|
|
211
|
+
}
|
|
212
|
+
if (!result.hasHintDictionary) {
|
|
213
|
+
console.warn("hint_dictionary table is missing. Dictionary hints disabled. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
|
|
214
|
+
}
|
|
215
|
+
return result;
|
|
216
|
+
}
|
|
136
217
|
async function hasDirtyRepos(db) {
|
|
137
218
|
const statusCheck = await db.all(`SELECT COUNT(*) as count FROM repo
|
|
138
219
|
WHERE fts_dirty = true OR fts_status IN ('dirty', 'rebuilding')`);
|
|
@@ -217,44 +298,242 @@ function isConfigFile(path, fileName) {
|
|
|
217
298
|
fileName.startsWith(".env") ||
|
|
218
299
|
isInConfigDirectory);
|
|
219
300
|
}
|
|
301
|
+
function normalizeArtifactHints(hints) {
|
|
302
|
+
if (!Array.isArray(hints)) {
|
|
303
|
+
return [];
|
|
304
|
+
}
|
|
305
|
+
const normalized = [];
|
|
306
|
+
const seen = new Set();
|
|
307
|
+
for (const rawHint of hints) {
|
|
308
|
+
if (typeof rawHint !== "string") {
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
const trimmed = rawHint.trim();
|
|
312
|
+
if (!trimmed || seen.has(trimmed)) {
|
|
313
|
+
continue;
|
|
314
|
+
}
|
|
315
|
+
normalized.push(trimmed);
|
|
316
|
+
seen.add(trimmed);
|
|
317
|
+
if (normalized.length >= MAX_ARTIFACT_HINTS) {
|
|
318
|
+
break;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
return normalized;
|
|
322
|
+
}
|
|
323
|
+
function bucketArtifactHints(hints) {
|
|
324
|
+
const buckets = {
|
|
325
|
+
pathHints: [],
|
|
326
|
+
substringHints: [],
|
|
327
|
+
};
|
|
328
|
+
for (const hint of hints) {
|
|
329
|
+
if (hint.includes("/") && SAFE_PATH_PATTERN.test(hint)) {
|
|
330
|
+
buckets.pathHints.push(hint);
|
|
331
|
+
continue;
|
|
332
|
+
}
|
|
333
|
+
const normalized = hint.trim().toLowerCase();
|
|
334
|
+
if (normalized.length >= 3) {
|
|
335
|
+
buckets.substringHints.push(normalized);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
return buckets;
|
|
339
|
+
}
|
|
340
|
+
function isMissingTableError(error, table) {
|
|
341
|
+
if (!(error instanceof Error)) {
|
|
342
|
+
return false;
|
|
343
|
+
}
|
|
344
|
+
return /Table with name/i.test(error.message) && error.message.includes(table);
|
|
345
|
+
}
|
|
346
|
+
async function logHintExpansionEntry(db, tableAvailability, entry) {
|
|
347
|
+
if (!HINT_LOG_ENABLED) {
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
350
|
+
if (!tableAvailability.hasHintLog) {
|
|
351
|
+
return;
|
|
352
|
+
}
|
|
353
|
+
try {
|
|
354
|
+
await db.run(`
|
|
355
|
+
INSERT INTO hint_expansion (repo_id, hint_value, expansion_kind, target_path, payload)
|
|
356
|
+
VALUES (?, ?, ?, ?, ?)
|
|
357
|
+
`, [
|
|
358
|
+
entry.repoId,
|
|
359
|
+
entry.hintValue,
|
|
360
|
+
entry.kind,
|
|
361
|
+
entry.targetPath ?? null,
|
|
362
|
+
entry.payload ? JSON.stringify(entry.payload) : null,
|
|
363
|
+
]);
|
|
364
|
+
}
|
|
365
|
+
catch (error) {
|
|
366
|
+
if (isMissingTableError(error, "hint_expansion")) {
|
|
367
|
+
console.warn("hint_expansion table is missing in the active database. Enable the latest schema and rerun the indexer to capture hint logs.");
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
throw error;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
async function fetchDictionaryPathHints(db, tableAvailability, repoId, hints, perHintLimit) {
|
|
374
|
+
if (!HINT_DICTIONARY_ENABLED || perHintLimit <= 0 || hints.length === 0) {
|
|
375
|
+
return [];
|
|
376
|
+
}
|
|
377
|
+
if (!tableAvailability.hasHintDictionary) {
|
|
378
|
+
return [];
|
|
379
|
+
}
|
|
380
|
+
const uniqueHints = Array.from(new Set(hints));
|
|
381
|
+
const targets = [];
|
|
382
|
+
for (const hint of uniqueHints) {
|
|
383
|
+
let rows = [];
|
|
384
|
+
try {
|
|
385
|
+
rows = await db.all(`
|
|
386
|
+
SELECT target_path
|
|
387
|
+
FROM hint_dictionary
|
|
388
|
+
WHERE repo_id = ?
|
|
389
|
+
AND hint_value = ?
|
|
390
|
+
ORDER BY freq DESC, target_path
|
|
391
|
+
LIMIT ?
|
|
392
|
+
`, [repoId, hint, perHintLimit]);
|
|
393
|
+
}
|
|
394
|
+
catch (error) {
|
|
395
|
+
if (isMissingTableError(error, "hint_dictionary")) {
|
|
396
|
+
console.warn("hint_dictionary table is missing in the active database. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
|
|
397
|
+
return [];
|
|
398
|
+
}
|
|
399
|
+
throw error;
|
|
400
|
+
}
|
|
401
|
+
for (const row of rows) {
|
|
402
|
+
if (!row.target_path || !SAFE_PATH_PATTERN.test(row.target_path)) {
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
targets.push({ path: row.target_path, sourceHint: hint, origin: "dictionary" });
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
return targets;
|
|
409
|
+
}
|
|
410
|
+
function createHintSeedMeta(targets) {
|
|
411
|
+
const meta = new Map();
|
|
412
|
+
const deduped = [];
|
|
413
|
+
for (const target of targets) {
|
|
414
|
+
if (meta.has(target.path)) {
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
417
|
+
meta.set(target.path, { sourceHint: target.sourceHint, origin: target.origin });
|
|
418
|
+
deduped.push(target);
|
|
419
|
+
}
|
|
420
|
+
return { list: deduped, meta };
|
|
421
|
+
}
|
|
422
|
+
function getHintSeedMeta(seedMeta, path) {
|
|
423
|
+
return seedMeta?.get(path);
|
|
424
|
+
}
|
|
425
|
+
function computeHintPriorityBoost(weights) {
|
|
426
|
+
const textComponent = weights.textMatch * HINT_PRIORITY_TEXT_MULTIPLIER;
|
|
427
|
+
const pathComponent = weights.pathMatch * HINT_PRIORITY_PATH_MULTIPLIER;
|
|
428
|
+
const aggregate = textComponent + pathComponent + weights.editingPath + weights.dependency;
|
|
429
|
+
return Math.max(HINT_PRIORITY_BASE_BONUS, aggregate);
|
|
430
|
+
}
|
|
431
|
+
function createHintExpansionConfig(weights) {
|
|
432
|
+
return {
|
|
433
|
+
dirLimit: Math.max(0, HINT_DIR_LIMIT),
|
|
434
|
+
dirMaxFiles: Math.max(1, HINT_DIR_MAX_FILES),
|
|
435
|
+
depOutLimit: Math.max(0, HINT_DEP_OUT_LIMIT),
|
|
436
|
+
depInLimit: Math.max(0, HINT_DEP_IN_LIMIT),
|
|
437
|
+
semLimit: Math.max(0, HINT_SEM_LIMIT),
|
|
438
|
+
semDirCandidateLimit: Math.max(1, HINT_SEM_DIR_CANDIDATE_LIMIT),
|
|
439
|
+
semThreshold: Number.isFinite(HINT_SEM_THRESHOLD) ? HINT_SEM_THRESHOLD : 0.65,
|
|
440
|
+
perHintLimit: Math.max(0, HINT_PER_HINT_LIMIT),
|
|
441
|
+
dbQueryBudget: Math.max(0, HINT_DB_QUERY_BUDGET),
|
|
442
|
+
dirBoost: computeHintPriorityBoost(weights) * 0.35,
|
|
443
|
+
depBoost: weights.dependency * 0.8,
|
|
444
|
+
substringLimit: Math.max(0, HINT_SUBSTRING_LIMIT),
|
|
445
|
+
substringBoost: Math.max(0, HINT_SUBSTRING_BOOST),
|
|
446
|
+
};
|
|
447
|
+
}
|
|
220
448
|
const DEFAULT_SEARCH_LIMIT = 50;
|
|
221
|
-
const DEFAULT_SNIPPET_WINDOW = 150;
|
|
222
449
|
const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
|
|
223
450
|
const MAX_BUNDLE_LIMIT = 20;
|
|
451
|
+
const TRACE_SEARCH = process.env.KIRI_TRACE_SEARCH === "1";
|
|
224
452
|
const MAX_KEYWORDS = 12;
|
|
225
453
|
const MAX_MATCHES_PER_KEYWORD = 40;
|
|
226
454
|
const MAX_DEPENDENCY_SEEDS = 8;
|
|
227
455
|
const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
|
|
228
456
|
const NEARBY_LIMIT = 6;
|
|
229
|
-
const
|
|
457
|
+
const serverConfig = loadServerConfig();
|
|
458
|
+
const mergedPathMultiplierCache = new Map();
|
|
459
|
+
const SUPPRESS_NON_CODE_ENABLED = serverConfig.features.suppressNonCode;
|
|
460
|
+
const FINAL_RESULT_SUPPRESSION_ENABLED = serverConfig.features.suppressFinalResults;
|
|
461
|
+
const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
|
|
462
|
+
const FALLBACK_SNIPPET_WINDOW = serverConfig.features.snippetWindow;
|
|
230
463
|
const MAX_RERANK_LIMIT = 50;
|
|
464
|
+
const MAX_ARTIFACT_HINTS = 8;
|
|
465
|
+
const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
|
|
466
|
+
const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
|
|
467
|
+
const HINT_PRIORITY_PATH_MULTIPLIER = serverConfig.hints.priority.pathMultiplier;
|
|
468
|
+
const HINT_PRIORITY_BASE_BONUS = serverConfig.hints.priority.baseBonus;
|
|
469
|
+
const PATH_FALLBACK_LIMIT = 40;
|
|
470
|
+
const PATH_FALLBACK_TERMS_LIMIT = 5;
|
|
471
|
+
const PATH_FALLBACK_KEEP = 8;
|
|
472
|
+
const AUTO_PATH_SEGMENT_LIMIT = 4;
|
|
473
|
+
function traceSearch(message, ...args) {
|
|
474
|
+
if (TRACE_SEARCH) {
|
|
475
|
+
console.log(`[TRACE context_bundle] ${message}`, ...args);
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
const HINT_DIR_LIMIT = serverConfig.hints.directory.limit;
|
|
479
|
+
const HINT_DIR_MAX_FILES = serverConfig.hints.directory.maxFiles;
|
|
480
|
+
const HINT_DEP_OUT_LIMIT = serverConfig.hints.dependency.outLimit;
|
|
481
|
+
const HINT_DEP_IN_LIMIT = serverConfig.hints.dependency.inLimit;
|
|
482
|
+
const HINT_SEM_LIMIT = serverConfig.hints.semantic.limit;
|
|
483
|
+
const HINT_SEM_DIR_CANDIDATE_LIMIT = serverConfig.hints.semantic.dirCandidateLimit;
|
|
484
|
+
const HINT_SEM_THRESHOLD = serverConfig.hints.semantic.threshold;
|
|
485
|
+
const SUPPRESSED_PATH_PREFIXES = [".github/", ".git/", "ThirdPartyNotices", "node_modules/"];
|
|
486
|
+
const SUPPRESSED_FILE_NAMES = ["thirdpartynotices.txt", "thirdpartynotices.md", "cgmanifest.json"];
|
|
487
|
+
function isSuppressedPath(path) {
|
|
488
|
+
if (!SUPPRESS_NON_CODE_ENABLED) {
|
|
489
|
+
return false;
|
|
490
|
+
}
|
|
491
|
+
const normalized = path.startsWith("./") ? path.replace(/^\.\/+/u, "") : path;
|
|
492
|
+
const lower = normalized.toLowerCase();
|
|
493
|
+
if (SUPPRESSED_FILE_NAMES.some((name) => lower.endsWith(name))) {
|
|
494
|
+
return true;
|
|
495
|
+
}
|
|
496
|
+
const lowerPrefixMatches = SUPPRESSED_PATH_PREFIXES.map((prefix) => prefix.toLowerCase());
|
|
497
|
+
return lowerPrefixMatches.some((prefix) => lower.includes(prefix));
|
|
498
|
+
}
|
|
499
|
+
const HINT_PER_HINT_LIMIT = serverConfig.hints.perHintLimit;
|
|
500
|
+
const HINT_DB_QUERY_BUDGET = serverConfig.hints.dbQueryLimit;
|
|
501
|
+
const HINT_SUBSTRING_LIMIT = serverConfig.hints.substring.limit;
|
|
502
|
+
const HINT_SUBSTRING_BOOST = serverConfig.hints.substring.boost;
|
|
503
|
+
const HINT_LOG_ENABLED = process.env.KIRI_HINT_LOG === "1";
|
|
504
|
+
const HINT_DICTIONARY_ENABLED = process.env.KIRI_HINT_DICTIONARY !== "0";
|
|
505
|
+
const HINT_DICTIONARY_LIMIT = Math.max(0, Number.parseInt(process.env.KIRI_HINT_DICTIONARY_LIMIT ?? "2", 10));
|
|
231
506
|
// Issue #68: Path/Large File Penalty configuration (環境変数で上書き可能)
|
|
232
|
-
const PATH_MISS_DELTA =
|
|
233
|
-
const LARGE_FILE_DELTA =
|
|
507
|
+
const PATH_MISS_DELTA = serverConfig.penalties.pathMissDelta;
|
|
508
|
+
const LARGE_FILE_DELTA = serverConfig.penalties.largeFileDelta;
|
|
234
509
|
const MAX_WHY_TAGS = 10;
|
|
235
510
|
// 項目3: whyタグの優先度マップ(低い数値ほど高優先度)
|
|
236
511
|
// All actual tag prefixes used in the codebase
|
|
237
512
|
const WHY_TAG_PRIORITY = {
|
|
238
|
-
artifact: 1, // User-provided hints (editing_path, failing_tests)
|
|
513
|
+
artifact: 1, // User-provided hints (editing_path, failing_tests, hints)
|
|
514
|
+
dictionary: 1, // Dictionary-provided hints
|
|
239
515
|
phrase: 2, // Multi-word literal matches (strongest signal)
|
|
240
516
|
text: 3, // Single keyword matches
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
"path-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
517
|
+
metadata: 4, // Front matter / metadata filters & boosts
|
|
518
|
+
substring: 4, // Substring hint expansion
|
|
519
|
+
"path-phrase": 5, // Path contains multi-word phrase
|
|
520
|
+
structural: 6, // Semantic similarity
|
|
521
|
+
"path-segment": 7, // Path component matches
|
|
522
|
+
"path-keyword": 8, // Path keyword match
|
|
523
|
+
dep: 9, // Dependency relationship
|
|
524
|
+
near: 10, // Proximity to editing file
|
|
525
|
+
boost: 11, // File type boost
|
|
526
|
+
recent: 12, // Recently changed
|
|
527
|
+
symbol: 13, // Symbol match
|
|
528
|
+
penalty: 14, // Penalty explanations (keep for transparency)
|
|
529
|
+
keyword: 15, // Generic keyword (deprecated, kept for compatibility)
|
|
252
530
|
};
|
|
253
531
|
// Reserve at least one slot for important structural tags
|
|
254
532
|
const RESERVED_WHY_SLOTS = {
|
|
255
533
|
dep: 1, // Dependency relationships are critical
|
|
256
534
|
symbol: 1, // Symbol boundaries help understand context
|
|
257
535
|
near: 1, // Proximity explains file selection
|
|
536
|
+
metadata: 1, // Preserve metadata reasons when filters/boosts are active
|
|
258
537
|
};
|
|
259
538
|
function parseOutputOptions(params) {
|
|
260
539
|
return {
|
|
@@ -277,6 +556,9 @@ function selectWhyTags(reasons) {
|
|
|
277
556
|
reasons = new Set(Array.from(reasons).slice(0, 1000));
|
|
278
557
|
}
|
|
279
558
|
const selected = new Set();
|
|
559
|
+
if (reasons.has("boost:links")) {
|
|
560
|
+
selected.add("boost:links");
|
|
561
|
+
}
|
|
280
562
|
const byCategory = new Map();
|
|
281
563
|
for (const reason of reasons) {
|
|
282
564
|
const prefix = reason.split(":")[0] ?? "";
|
|
@@ -342,6 +624,45 @@ const STOP_WORDS = new Set([
|
|
|
342
624
|
"need",
|
|
343
625
|
"goal",
|
|
344
626
|
]);
|
|
627
|
+
function prioritizeHintCandidates(rankedCandidates, hintPaths, limit) {
|
|
628
|
+
if (rankedCandidates.length === 0) {
|
|
629
|
+
return [];
|
|
630
|
+
}
|
|
631
|
+
const sanitizedLimit = Math.max(1, Math.min(limit, rankedCandidates.length));
|
|
632
|
+
const candidateByPath = new Map();
|
|
633
|
+
for (const candidate of rankedCandidates) {
|
|
634
|
+
if (!candidateByPath.has(candidate.path)) {
|
|
635
|
+
candidateByPath.set(candidate.path, candidate);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
const final = [];
|
|
639
|
+
const seen = new Set();
|
|
640
|
+
for (const hintPath of hintPaths) {
|
|
641
|
+
if (final.length >= sanitizedLimit) {
|
|
642
|
+
break;
|
|
643
|
+
}
|
|
644
|
+
const candidate = candidateByPath.get(hintPath);
|
|
645
|
+
if (!candidate || seen.has(candidate.path)) {
|
|
646
|
+
continue;
|
|
647
|
+
}
|
|
648
|
+
final.push(candidate);
|
|
649
|
+
seen.add(candidate.path);
|
|
650
|
+
}
|
|
651
|
+
if (final.length >= sanitizedLimit) {
|
|
652
|
+
return final;
|
|
653
|
+
}
|
|
654
|
+
for (const candidate of rankedCandidates) {
|
|
655
|
+
if (final.length >= sanitizedLimit) {
|
|
656
|
+
break;
|
|
657
|
+
}
|
|
658
|
+
if (seen.has(candidate.path)) {
|
|
659
|
+
continue;
|
|
660
|
+
}
|
|
661
|
+
final.push(candidate);
|
|
662
|
+
seen.add(candidate.path);
|
|
663
|
+
}
|
|
664
|
+
return final;
|
|
665
|
+
}
|
|
345
666
|
function normalizeLimit(limit) {
|
|
346
667
|
if (!limit || Number.isNaN(limit)) {
|
|
347
668
|
return DEFAULT_SEARCH_LIMIT;
|
|
@@ -502,8 +823,30 @@ function extractKeywords(text) {
|
|
|
502
823
|
}
|
|
503
824
|
}
|
|
504
825
|
}
|
|
826
|
+
addKeywordDerivedPathSegments(result);
|
|
505
827
|
return result;
|
|
506
828
|
}
|
|
829
|
+
function addKeywordDerivedPathSegments(result) {
|
|
830
|
+
if (result.pathSegments.length >= AUTO_PATH_SEGMENT_LIMIT) {
|
|
831
|
+
return;
|
|
832
|
+
}
|
|
833
|
+
const additional = [];
|
|
834
|
+
for (const keyword of result.keywords) {
|
|
835
|
+
if (keyword.length < 3 || STOP_WORDS.has(keyword)) {
|
|
836
|
+
continue;
|
|
837
|
+
}
|
|
838
|
+
if (result.pathSegments.includes(keyword) || additional.includes(keyword)) {
|
|
839
|
+
continue;
|
|
840
|
+
}
|
|
841
|
+
additional.push(keyword);
|
|
842
|
+
if (result.pathSegments.length + additional.length >= AUTO_PATH_SEGMENT_LIMIT) {
|
|
843
|
+
break;
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
if (additional.length > 0) {
|
|
847
|
+
result.pathSegments.push(...additional);
|
|
848
|
+
}
|
|
849
|
+
}
|
|
507
850
|
function ensureCandidate(map, filePath) {
|
|
508
851
|
let candidate = map.get(filePath);
|
|
509
852
|
if (!candidate) {
|
|
@@ -520,14 +863,361 @@ function ensureCandidate(map, filePath) {
|
|
|
520
863
|
embedding: null,
|
|
521
864
|
semanticSimilarity: null,
|
|
522
865
|
pathMatchHits: 0, // Issue #68: Track path match count
|
|
866
|
+
keywordHits: new Set(),
|
|
867
|
+
phraseHits: 0,
|
|
868
|
+
// pathFallbackReason は optional なので省略(exactOptionalPropertyTypes対応)
|
|
869
|
+
fallbackTextHits: 0,
|
|
523
870
|
penalties: [], // Issue #68: Penalty log for telemetry
|
|
524
871
|
};
|
|
525
872
|
map.set(filePath, candidate);
|
|
526
873
|
}
|
|
527
874
|
return candidate;
|
|
528
875
|
}
|
|
876
|
+
async function expandHintCandidatesForHints(params) {
|
|
877
|
+
const { hintPaths, config } = params;
|
|
878
|
+
if (hintPaths.length === 0 || config.perHintLimit <= 0 || config.dbQueryBudget <= 0) {
|
|
879
|
+
return;
|
|
880
|
+
}
|
|
881
|
+
const state = { remainingDbQueries: config.dbQueryBudget };
|
|
882
|
+
for (const hintPath of hintPaths) {
|
|
883
|
+
if (state.remainingDbQueries <= 0) {
|
|
884
|
+
break;
|
|
885
|
+
}
|
|
886
|
+
await expandSingleHintNeighborhood({ ...params, hintPath, state });
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
async function expandSingleHintNeighborhood(args) {
|
|
890
|
+
const { config } = args;
|
|
891
|
+
let remaining = config.perHintLimit;
|
|
892
|
+
if (remaining <= 0) {
|
|
893
|
+
return;
|
|
894
|
+
}
|
|
895
|
+
if (config.dirLimit > 0) {
|
|
896
|
+
const added = await addHintDirectoryNeighbors(args, Math.min(config.dirLimit, remaining));
|
|
897
|
+
remaining -= added;
|
|
898
|
+
if (remaining <= 0) {
|
|
899
|
+
return;
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
if (config.depOutLimit > 0 || config.depInLimit > 0) {
|
|
903
|
+
const added = await addHintDependencyNeighbors(args, remaining);
|
|
904
|
+
remaining -= added;
|
|
905
|
+
if (remaining <= 0) {
|
|
906
|
+
return;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
if (config.semLimit > 0) {
|
|
910
|
+
await addHintSemanticNeighbors(args, Math.min(config.semLimit, remaining));
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
function useHintDbBudget(state, cost = 1) {
|
|
914
|
+
if (state.remainingDbQueries < cost) {
|
|
915
|
+
return false;
|
|
916
|
+
}
|
|
917
|
+
state.remainingDbQueries -= cost;
|
|
918
|
+
return true;
|
|
919
|
+
}
|
|
920
|
+
function applyHintReasonBoost(candidate, reason, scoreDelta, lang, ext) {
|
|
921
|
+
if (scoreDelta <= 0 || candidate.reasons.has(reason)) {
|
|
922
|
+
return false;
|
|
923
|
+
}
|
|
924
|
+
candidate.score += scoreDelta;
|
|
925
|
+
candidate.reasons.add(reason);
|
|
926
|
+
candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 2);
|
|
927
|
+
candidate.matchLine ??= 1;
|
|
928
|
+
if (lang && !candidate.lang) {
|
|
929
|
+
candidate.lang = lang;
|
|
930
|
+
}
|
|
931
|
+
if (ext && !candidate.ext) {
|
|
932
|
+
candidate.ext = ext;
|
|
933
|
+
}
|
|
934
|
+
return true;
|
|
935
|
+
}
|
|
936
|
+
async function applyPathHintPromotions(args) {
|
|
937
|
+
const { hintTargets } = args;
|
|
938
|
+
if (hintTargets.length === 0) {
|
|
939
|
+
return;
|
|
940
|
+
}
|
|
941
|
+
const hintBoost = computeHintPriorityBoost(args.weights);
|
|
942
|
+
for (const target of hintTargets) {
|
|
943
|
+
const candidate = ensureCandidate(args.candidates, target.path);
|
|
944
|
+
const reasonPrefix = target.origin === "dictionary" ? "dictionary:hint" : "artifact:hint";
|
|
945
|
+
candidate.score += hintBoost;
|
|
946
|
+
candidate.reasons.add(`${reasonPrefix}:${target.path}`);
|
|
947
|
+
candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 3);
|
|
948
|
+
candidate.matchLine ??= 1;
|
|
949
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
950
|
+
repoId: args.repoId,
|
|
951
|
+
hintValue: target.sourceHint,
|
|
952
|
+
kind: target.origin === "dictionary" ? "dictionary" : "path",
|
|
953
|
+
targetPath: target.path,
|
|
954
|
+
payload: {
|
|
955
|
+
origin: target.origin,
|
|
956
|
+
source_hint: target.sourceHint,
|
|
957
|
+
},
|
|
958
|
+
});
|
|
959
|
+
}
|
|
960
|
+
await expandHintCandidatesForHints({
|
|
961
|
+
db: args.db,
|
|
962
|
+
tableAvailability: args.tableAvailability,
|
|
963
|
+
repoId: args.repoId,
|
|
964
|
+
hintPaths: hintTargets.map((target) => target.path),
|
|
965
|
+
candidates: args.candidates,
|
|
966
|
+
fileCache: args.fileCache,
|
|
967
|
+
weights: args.weights,
|
|
968
|
+
config: createHintExpansionConfig(args.weights),
|
|
969
|
+
hintSeedMeta: args.hintSeedMeta,
|
|
970
|
+
});
|
|
971
|
+
}
|
|
972
|
+
async function addHintSubstringMatches(db, tableAvailability, repoId, hints, candidates, limitPerHint, boost) {
|
|
973
|
+
if (limitPerHint <= 0 || boost <= 0) {
|
|
974
|
+
return;
|
|
975
|
+
}
|
|
976
|
+
for (const hint of hints) {
|
|
977
|
+
if (!SAFE_PATH_PATTERN.test(hint.replace(/[^a-zA-Z0-9_.-]/g, ""))) {
|
|
978
|
+
continue;
|
|
979
|
+
}
|
|
980
|
+
const rows = await db.all(`
|
|
981
|
+
SELECT path
|
|
982
|
+
FROM file
|
|
983
|
+
WHERE repo_id = ?
|
|
984
|
+
AND is_binary = FALSE
|
|
985
|
+
AND LOWER(path) LIKE '%' || ? || '%'
|
|
986
|
+
ORDER BY path
|
|
987
|
+
LIMIT ?
|
|
988
|
+
`, [repoId, hint, limitPerHint]);
|
|
989
|
+
for (const row of rows) {
|
|
990
|
+
const candidate = ensureCandidate(candidates, row.path);
|
|
991
|
+
const reason = `substring:hint:${hint}`;
|
|
992
|
+
if (applyHintReasonBoost(candidate, reason, boost)) {
|
|
993
|
+
await logHintExpansionEntry(db, tableAvailability, {
|
|
994
|
+
repoId,
|
|
995
|
+
hintValue: hint,
|
|
996
|
+
kind: "substring",
|
|
997
|
+
targetPath: row.path,
|
|
998
|
+
});
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
async function addHintDirectoryNeighbors(args, limit) {
|
|
1004
|
+
if (limit <= 0) {
|
|
1005
|
+
return 0;
|
|
1006
|
+
}
|
|
1007
|
+
const dir = path.posix.dirname(args.hintPath);
|
|
1008
|
+
if (!dir || dir === "." || dir === "/") {
|
|
1009
|
+
return 0;
|
|
1010
|
+
}
|
|
1011
|
+
if (!useHintDbBudget(args.state)) {
|
|
1012
|
+
return 0;
|
|
1013
|
+
}
|
|
1014
|
+
const rows = await args.db.all(`
|
|
1015
|
+
SELECT path, lang, ext
|
|
1016
|
+
FROM file
|
|
1017
|
+
WHERE repo_id = ?
|
|
1018
|
+
AND is_binary = FALSE
|
|
1019
|
+
AND path LIKE ?
|
|
1020
|
+
ORDER BY path
|
|
1021
|
+
LIMIT ?
|
|
1022
|
+
`, [args.repoId, `${dir}/%`, args.config.dirMaxFiles + 1]);
|
|
1023
|
+
if (rows.length === 0 || rows.length > args.config.dirMaxFiles) {
|
|
1024
|
+
return 0;
|
|
1025
|
+
}
|
|
1026
|
+
rows.sort((a, b) => hintNeighborRank(a.path) - hintNeighborRank(b.path));
|
|
1027
|
+
let added = 0;
|
|
1028
|
+
for (const row of rows) {
|
|
1029
|
+
if (row.path === args.hintPath) {
|
|
1030
|
+
continue;
|
|
1031
|
+
}
|
|
1032
|
+
if (!SAFE_PATH_PATTERN.test(row.path)) {
|
|
1033
|
+
continue;
|
|
1034
|
+
}
|
|
1035
|
+
const candidate = ensureCandidate(args.candidates, row.path);
|
|
1036
|
+
const reason = `artifact:hint_dir:${args.hintPath}:${row.path}`;
|
|
1037
|
+
if (applyHintReasonBoost(candidate, reason, args.config.dirBoost, row.lang, row.ext)) {
|
|
1038
|
+
added += 1;
|
|
1039
|
+
const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
|
|
1040
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
1041
|
+
repoId: args.repoId,
|
|
1042
|
+
hintValue: seedMeta?.sourceHint ?? args.hintPath,
|
|
1043
|
+
kind: "directory",
|
|
1044
|
+
targetPath: row.path,
|
|
1045
|
+
payload: {
|
|
1046
|
+
origin: seedMeta?.origin ?? "artifact",
|
|
1047
|
+
},
|
|
1048
|
+
});
|
|
1049
|
+
if (added >= limit) {
|
|
1050
|
+
break;
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
return added;
|
|
1055
|
+
}
|
|
1056
|
+
async function addHintDependencyNeighbors(args, perHintRemaining) {
|
|
1057
|
+
if (perHintRemaining <= 0) {
|
|
1058
|
+
return 0;
|
|
1059
|
+
}
|
|
1060
|
+
let added = 0;
|
|
1061
|
+
if (args.config.depOutLimit > 0) {
|
|
1062
|
+
const outLimit = Math.min(args.config.depOutLimit, perHintRemaining - added);
|
|
1063
|
+
if (outLimit > 0) {
|
|
1064
|
+
added += await addHintDependencyDirection(args, outLimit, "out");
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
if (perHintRemaining - added <= 0) {
|
|
1068
|
+
return added;
|
|
1069
|
+
}
|
|
1070
|
+
if (args.config.depInLimit > 0) {
|
|
1071
|
+
const inLimit = Math.min(args.config.depInLimit, perHintRemaining - added);
|
|
1072
|
+
if (inLimit > 0) {
|
|
1073
|
+
added += await addHintDependencyDirection(args, inLimit, "in");
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
return added;
|
|
1077
|
+
}
|
|
1078
|
+
async function addHintDependencyDirection(args, limit, direction) {
|
|
1079
|
+
if (limit <= 0) {
|
|
1080
|
+
return 0;
|
|
1081
|
+
}
|
|
1082
|
+
if (!useHintDbBudget(args.state)) {
|
|
1083
|
+
return 0;
|
|
1084
|
+
}
|
|
1085
|
+
const fetchLimit = Math.min(limit * 4, 25);
|
|
1086
|
+
if (direction === "out") {
|
|
1087
|
+
const rows = await args.db.all(`
|
|
1088
|
+
SELECT dst
|
|
1089
|
+
FROM dependency
|
|
1090
|
+
WHERE repo_id = ?
|
|
1091
|
+
AND src_path = ?
|
|
1092
|
+
AND dst_kind = 'path'
|
|
1093
|
+
LIMIT ?
|
|
1094
|
+
`, [args.repoId, args.hintPath, fetchLimit]);
|
|
1095
|
+
return await applyDependencyRows(args, rows.map((row) => row.dst), limit, direction);
|
|
1096
|
+
}
|
|
1097
|
+
const rows = await args.db.all(`
|
|
1098
|
+
SELECT src_path
|
|
1099
|
+
FROM dependency
|
|
1100
|
+
WHERE repo_id = ?
|
|
1101
|
+
AND dst = ?
|
|
1102
|
+
AND dst_kind = 'path'
|
|
1103
|
+
LIMIT ?
|
|
1104
|
+
`, [args.repoId, args.hintPath, fetchLimit]);
|
|
1105
|
+
return await applyDependencyRows(args, rows.map((row) => row.src_path), limit, direction);
|
|
1106
|
+
}
|
|
1107
|
+
async function applyDependencyRows(args, paths, limit, direction) {
|
|
1108
|
+
if (paths.length === 0) {
|
|
1109
|
+
return 0;
|
|
1110
|
+
}
|
|
1111
|
+
const uniquePaths = Array.from(new Set(paths)).filter((p) => p && SAFE_PATH_PATTERN.test(p));
|
|
1112
|
+
uniquePaths.sort((a, b) => hintNeighborRank(a) - hintNeighborRank(b));
|
|
1113
|
+
let added = 0;
|
|
1114
|
+
for (const dependencyPath of uniquePaths) {
|
|
1115
|
+
if (dependencyPath === args.hintPath) {
|
|
1116
|
+
continue;
|
|
1117
|
+
}
|
|
1118
|
+
const candidate = ensureCandidate(args.candidates, dependencyPath);
|
|
1119
|
+
const reason = `artifact:hint_dep_${direction}:${args.hintPath}:${dependencyPath}`;
|
|
1120
|
+
if (applyHintReasonBoost(candidate, reason, args.config.depBoost)) {
|
|
1121
|
+
added += 1;
|
|
1122
|
+
const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
|
|
1123
|
+
await logHintExpansionEntry(args.db, args.tableAvailability, {
|
|
1124
|
+
repoId: args.repoId,
|
|
1125
|
+
hintValue: seedMeta?.sourceHint ?? args.hintPath,
|
|
1126
|
+
kind: "dependency",
|
|
1127
|
+
targetPath: dependencyPath,
|
|
1128
|
+
payload: {
|
|
1129
|
+
origin: seedMeta?.origin ?? "artifact",
|
|
1130
|
+
direction,
|
|
1131
|
+
},
|
|
1132
|
+
});
|
|
1133
|
+
if (added >= limit) {
|
|
1134
|
+
break;
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
return added;
|
|
1139
|
+
}
|
|
1140
|
+
async function addHintSemanticNeighbors(args, limit) {
|
|
1141
|
+
if (limit <= 0) {
|
|
1142
|
+
return 0;
|
|
1143
|
+
}
|
|
1144
|
+
const dir = path.posix.dirname(args.hintPath);
|
|
1145
|
+
if (!dir || dir === "." || dir === "/") {
|
|
1146
|
+
return 0;
|
|
1147
|
+
}
|
|
1148
|
+
if (!useHintDbBudget(args.state)) {
|
|
1149
|
+
return 0;
|
|
1150
|
+
}
|
|
1151
|
+
const rows = await args.db.all(`
|
|
1152
|
+
SELECT path
|
|
1153
|
+
FROM file
|
|
1154
|
+
WHERE repo_id = ?
|
|
1155
|
+
AND is_binary = FALSE
|
|
1156
|
+
AND path LIKE ?
|
|
1157
|
+
ORDER BY path
|
|
1158
|
+
LIMIT ?
|
|
1159
|
+
`, [args.repoId, `${dir}/%`, args.config.semDirCandidateLimit]);
|
|
1160
|
+
const candidatePaths = rows.map((row) => row.path).filter((p) => p !== args.hintPath);
|
|
1161
|
+
if (candidatePaths.length === 0) {
|
|
1162
|
+
return 0;
|
|
1163
|
+
}
|
|
1164
|
+
if (!useHintDbBudget(args.state)) {
|
|
1165
|
+
return 0;
|
|
1166
|
+
}
|
|
1167
|
+
const embeddingMap = await fetchEmbeddingMap(args.db, args.repoId, [
|
|
1168
|
+
args.hintPath,
|
|
1169
|
+
...candidatePaths,
|
|
1170
|
+
]);
|
|
1171
|
+
const hintEmbedding = embeddingMap.get(args.hintPath);
|
|
1172
|
+
if (!hintEmbedding) {
|
|
1173
|
+
return 0;
|
|
1174
|
+
}
|
|
1175
|
+
let added = 0;
|
|
1176
|
+
for (const candidatePath of candidatePaths) {
|
|
1177
|
+
if (!SAFE_PATH_PATTERN.test(candidatePath)) {
|
|
1178
|
+
continue;
|
|
1179
|
+
}
|
|
1180
|
+
const embedding = embeddingMap.get(candidatePath);
|
|
1181
|
+
if (!embedding) {
|
|
1182
|
+
continue;
|
|
1183
|
+
}
|
|
1184
|
+
const similarity = structuralSimilarity(hintEmbedding, embedding);
|
|
1185
|
+
if (!Number.isFinite(similarity) || similarity < args.config.semThreshold) {
|
|
1186
|
+
continue;
|
|
1187
|
+
}
|
|
1188
|
+
const candidate = ensureCandidate(args.candidates, candidatePath);
|
|
1189
|
+
const reason = `artifact:hint_sem:${args.hintPath}:${candidatePath}`;
|
|
1190
|
+
if (applyHintReasonBoost(candidate, reason, args.weights.structural * similarity)) {
|
|
1191
|
+
added += 1;
|
|
1192
|
+
if (added >= limit) {
|
|
1193
|
+
break;
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
return added;
|
|
1198
|
+
}
|
|
1199
|
+
function hintNeighborRank(filePath) {
|
|
1200
|
+
if (filePath.startsWith("src/") || filePath.startsWith("external/assay-kit/src/")) {
|
|
1201
|
+
return 0;
|
|
1202
|
+
}
|
|
1203
|
+
if (isTestLikePath(filePath)) {
|
|
1204
|
+
return 2;
|
|
1205
|
+
}
|
|
1206
|
+
if (filePath.startsWith("docs/")) {
|
|
1207
|
+
return 3;
|
|
1208
|
+
}
|
|
1209
|
+
return 1;
|
|
1210
|
+
}
|
|
1211
|
+
function isTestLikePath(filePath) {
|
|
1212
|
+
return (/(^|\/)(tests?|__tests__|fixtures)\//.test(filePath) ||
|
|
1213
|
+
filePath.endsWith(".spec.ts") ||
|
|
1214
|
+
filePath.endsWith(".spec.tsx") ||
|
|
1215
|
+
filePath.endsWith(".test.ts") ||
|
|
1216
|
+
filePath.endsWith(".test.tsx"));
|
|
1217
|
+
}
|
|
529
1218
|
function parseEmbedding(vectorJson, vectorDims) {
|
|
530
|
-
|
|
1219
|
+
const dims = vectorDims === null ? null : typeof vectorDims === "bigint" ? Number(vectorDims) : vectorDims;
|
|
1220
|
+
if (!vectorJson || !dims || dims <= 0) {
|
|
531
1221
|
return null;
|
|
532
1222
|
}
|
|
533
1223
|
try {
|
|
@@ -536,7 +1226,7 @@ function parseEmbedding(vectorJson, vectorDims) {
|
|
|
536
1226
|
return null;
|
|
537
1227
|
}
|
|
538
1228
|
const values = [];
|
|
539
|
-
for (let i = 0; i < parsed.length && i <
|
|
1229
|
+
for (let i = 0; i < parsed.length && i < dims; i += 1) {
|
|
540
1230
|
const raw = parsed[i];
|
|
541
1231
|
const num = typeof raw === "number" ? raw : Number(raw);
|
|
542
1232
|
if (!Number.isFinite(num)) {
|
|
@@ -544,7 +1234,7 @@ function parseEmbedding(vectorJson, vectorDims) {
|
|
|
544
1234
|
}
|
|
545
1235
|
values.push(num);
|
|
546
1236
|
}
|
|
547
|
-
return values.length ===
|
|
1237
|
+
return values.length === dims ? values : null;
|
|
548
1238
|
}
|
|
549
1239
|
catch {
|
|
550
1240
|
return null;
|
|
@@ -636,54 +1326,411 @@ function buildSnippetPreview(content, startLine, endLine) {
|
|
|
636
1326
|
if (snippet.length <= 240) {
|
|
637
1327
|
return snippet;
|
|
638
1328
|
}
|
|
639
|
-
return `${snippet.slice(0, 239)}…`;
|
|
1329
|
+
return `${snippet.slice(0, 239)}…`;
|
|
1330
|
+
}
|
|
1331
|
+
/**
|
|
1332
|
+
* トークン数を推定(コンテンツベース)
|
|
1333
|
+
* 実際のGPTトークナイザーを使用して正確にカウント
|
|
1334
|
+
*
|
|
1335
|
+
* @param content - ファイル全体のコンテンツ
|
|
1336
|
+
* @param startLine - 開始行(1-indexed)
|
|
1337
|
+
* @param endLine - 終了行(1-indexed)
|
|
1338
|
+
* @returns 推定トークン数
|
|
1339
|
+
*/
|
|
1340
|
+
function estimateTokensFromContent(content, startLine, endLine) {
|
|
1341
|
+
const lines = content.split(/\r?\n/);
|
|
1342
|
+
const startIndex = Math.max(0, startLine - 1);
|
|
1343
|
+
const endIndex = Math.min(endLine, lines.length);
|
|
1344
|
+
const selectedLines = lines.slice(startIndex, endIndex);
|
|
1345
|
+
const text = selectedLines.join("\n");
|
|
1346
|
+
try {
|
|
1347
|
+
// 実際のGPTトークナイザーを使用
|
|
1348
|
+
return encodeGPT(text).length;
|
|
1349
|
+
}
|
|
1350
|
+
catch (error) {
|
|
1351
|
+
// フォールバック: 平均的な英語テキストで4文字 ≈ 1トークン
|
|
1352
|
+
console.warn("Token encoding failed, using character-based fallback", error);
|
|
1353
|
+
return Math.max(1, Math.ceil(text.length / 4));
|
|
1354
|
+
}
|
|
1355
|
+
}
|
|
1356
|
+
/**
|
|
1357
|
+
* 複数単語クエリを単語分割してOR検索条件を構築
|
|
1358
|
+
* @param query - 検索クエリ文字列
|
|
1359
|
+
* @returns 単語配列(2文字以下を除外)
|
|
1360
|
+
*/
|
|
1361
|
+
function splitQueryWords(query) {
|
|
1362
|
+
// 空白、スラッシュ、ハイフン、アンダースコアで分割
|
|
1363
|
+
const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
|
|
1364
|
+
return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
|
|
1365
|
+
}
|
|
1366
|
+
function normalizeMetadataFilterKey(rawKey) {
|
|
1367
|
+
if (!rawKey) {
|
|
1368
|
+
return null;
|
|
1369
|
+
}
|
|
1370
|
+
const normalized = rawKey.toLowerCase();
|
|
1371
|
+
const alias = METADATA_ALIAS_MAP.get(normalized);
|
|
1372
|
+
if (alias) {
|
|
1373
|
+
return { ...alias };
|
|
1374
|
+
}
|
|
1375
|
+
for (const entry of METADATA_KEY_PREFIXES) {
|
|
1376
|
+
if (normalized.startsWith(entry.prefix)) {
|
|
1377
|
+
const remainder = normalized.slice(entry.prefix.length);
|
|
1378
|
+
if (!remainder) {
|
|
1379
|
+
return null;
|
|
1380
|
+
}
|
|
1381
|
+
return {
|
|
1382
|
+
key: remainder,
|
|
1383
|
+
source: entry.source,
|
|
1384
|
+
...(entry.strict !== undefined && { strict: entry.strict }),
|
|
1385
|
+
};
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
return null;
|
|
1389
|
+
}
|
|
1390
|
+
function normalizeFilterValues(value) {
|
|
1391
|
+
if (typeof value === "string") {
|
|
1392
|
+
const trimmed = value.trim();
|
|
1393
|
+
return trimmed ? [trimmed] : [];
|
|
1394
|
+
}
|
|
1395
|
+
if (Array.isArray(value)) {
|
|
1396
|
+
const values = [];
|
|
1397
|
+
for (const item of value) {
|
|
1398
|
+
if (typeof item === "string") {
|
|
1399
|
+
const trimmed = item.trim();
|
|
1400
|
+
if (trimmed) {
|
|
1401
|
+
values.push(trimmed);
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
return values;
|
|
1406
|
+
}
|
|
1407
|
+
return [];
|
|
1408
|
+
}
|
|
1409
|
+
function normalizeMetadataFiltersParam(input) {
|
|
1410
|
+
if (!input || typeof input !== "object") {
|
|
1411
|
+
return [];
|
|
1412
|
+
}
|
|
1413
|
+
const filters = [];
|
|
1414
|
+
for (const [rawKey, rawValue] of Object.entries(input)) {
|
|
1415
|
+
const normalizedKey = normalizeMetadataFilterKey(rawKey);
|
|
1416
|
+
if (!normalizedKey) {
|
|
1417
|
+
continue;
|
|
1418
|
+
}
|
|
1419
|
+
const values = normalizeFilterValues(rawValue);
|
|
1420
|
+
if (values.length === 0) {
|
|
1421
|
+
continue;
|
|
1422
|
+
}
|
|
1423
|
+
const filter = {
|
|
1424
|
+
key: normalizedKey.key,
|
|
1425
|
+
values,
|
|
1426
|
+
source: normalizedKey.source,
|
|
1427
|
+
};
|
|
1428
|
+
if (normalizedKey.strict !== undefined) {
|
|
1429
|
+
filter.strict = normalizedKey.strict;
|
|
1430
|
+
}
|
|
1431
|
+
filters.push(filter);
|
|
1432
|
+
}
|
|
1433
|
+
return filters;
|
|
1434
|
+
}
|
|
1435
|
+
function mergeMetadataFilters(filters) {
|
|
1436
|
+
const merged = new Map();
|
|
1437
|
+
for (const filter of filters) {
|
|
1438
|
+
if (filter.values.length === 0)
|
|
1439
|
+
continue;
|
|
1440
|
+
const mapKey = `${filter.source ?? "*"}::${filter.key}::${filter.strict ? "strict" : "hint"}`;
|
|
1441
|
+
const existing = merged.get(mapKey);
|
|
1442
|
+
if (existing) {
|
|
1443
|
+
const existingSet = new Set(existing.values.map((val) => val.toLowerCase()));
|
|
1444
|
+
for (const value of filter.values) {
|
|
1445
|
+
if (!existingSet.has(value.toLowerCase())) {
|
|
1446
|
+
existing.values.push(value);
|
|
1447
|
+
existingSet.add(value.toLowerCase());
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
}
|
|
1451
|
+
else {
|
|
1452
|
+
const entry = {
|
|
1453
|
+
key: filter.key,
|
|
1454
|
+
source: filter.source,
|
|
1455
|
+
values: [...filter.values],
|
|
1456
|
+
};
|
|
1457
|
+
if (filter.strict !== undefined) {
|
|
1458
|
+
entry.strict = filter.strict;
|
|
1459
|
+
}
|
|
1460
|
+
merged.set(mapKey, entry);
|
|
1461
|
+
}
|
|
1462
|
+
}
|
|
1463
|
+
return Array.from(merged.values());
|
|
1464
|
+
}
|
|
1465
|
+
function parseInlineMetadataFilters(query) {
|
|
1466
|
+
if (!query) {
|
|
1467
|
+
return { cleanedQuery: "", filters: [] };
|
|
1468
|
+
}
|
|
1469
|
+
const matches = [];
|
|
1470
|
+
const pattern = /(\b[\w.]+):("[^"]+"|'[^']+'|[^\s]+)/g;
|
|
1471
|
+
let match;
|
|
1472
|
+
while ((match = pattern.exec(query)) !== null) {
|
|
1473
|
+
const normalizedKey = normalizeMetadataFilterKey(match[1] ?? "");
|
|
1474
|
+
if (!normalizedKey) {
|
|
1475
|
+
continue;
|
|
1476
|
+
}
|
|
1477
|
+
let rawValue = match[2] ?? "";
|
|
1478
|
+
if ((rawValue.startsWith('"') && rawValue.endsWith('"')) ||
|
|
1479
|
+
(rawValue.startsWith("'") && rawValue.endsWith("'"))) {
|
|
1480
|
+
rawValue = rawValue.slice(1, -1);
|
|
1481
|
+
}
|
|
1482
|
+
const value = rawValue.trim();
|
|
1483
|
+
if (!value) {
|
|
1484
|
+
continue;
|
|
1485
|
+
}
|
|
1486
|
+
const filter = {
|
|
1487
|
+
key: normalizedKey.key,
|
|
1488
|
+
source: normalizedKey.source,
|
|
1489
|
+
values: [value],
|
|
1490
|
+
};
|
|
1491
|
+
if (normalizedKey.strict !== undefined) {
|
|
1492
|
+
filter.strict = normalizedKey.strict;
|
|
1493
|
+
}
|
|
1494
|
+
matches.push({
|
|
1495
|
+
start: match.index,
|
|
1496
|
+
end: pattern.lastIndex,
|
|
1497
|
+
filter,
|
|
1498
|
+
});
|
|
1499
|
+
}
|
|
1500
|
+
if (matches.length === 0) {
|
|
1501
|
+
return { cleanedQuery: query.trim(), filters: [] };
|
|
1502
|
+
}
|
|
1503
|
+
let cleaned = "";
|
|
1504
|
+
let lastIndex = 0;
|
|
1505
|
+
for (const info of matches) {
|
|
1506
|
+
cleaned += query.slice(lastIndex, info.start);
|
|
1507
|
+
lastIndex = info.end;
|
|
1508
|
+
}
|
|
1509
|
+
cleaned += query.slice(lastIndex);
|
|
1510
|
+
const normalizedQuery = cleaned.replace(/\s{2,}/g, " ").trim();
|
|
1511
|
+
return {
|
|
1512
|
+
cleanedQuery: normalizedQuery,
|
|
1513
|
+
filters: mergeMetadataFilters(matches.map((m) => m.filter)),
|
|
1514
|
+
};
|
|
1515
|
+
}
|
|
1516
|
+
function buildMetadataFilterConditions(filters, alias = "f") {
|
|
1517
|
+
// SQL Injection対策: aliasをリテラル型で制限し、念のため検証
|
|
1518
|
+
if (!["f", "mk"].includes(alias)) {
|
|
1519
|
+
throw new Error(`Invalid SQL alias: ${alias}`);
|
|
1520
|
+
}
|
|
1521
|
+
const clauses = [];
|
|
1522
|
+
for (const filter of filters) {
|
|
1523
|
+
if (!filter.key || filter.values.length === 0) {
|
|
1524
|
+
continue;
|
|
1525
|
+
}
|
|
1526
|
+
const likeClauses = filter.values.map(() => "mk.value ILIKE ?").join(" OR ");
|
|
1527
|
+
const whereParts = [`mk.repo_id = ${alias}.repo_id`, `mk.path = ${alias}.path`];
|
|
1528
|
+
const params = [];
|
|
1529
|
+
if (filter.source) {
|
|
1530
|
+
whereParts.push("mk.source = ?");
|
|
1531
|
+
params.push(filter.source);
|
|
1532
|
+
}
|
|
1533
|
+
whereParts.push("mk.key = ?");
|
|
1534
|
+
params.push(filter.key);
|
|
1535
|
+
whereParts.push(`(${likeClauses})`);
|
|
1536
|
+
params.push(...filter.values.map((value) => `%${value}%`));
|
|
1537
|
+
const sql = `EXISTS (SELECT 1 FROM document_metadata_kv mk WHERE ${whereParts.join(" AND ")})`;
|
|
1538
|
+
clauses.push({ sql, params });
|
|
1539
|
+
}
|
|
1540
|
+
return clauses;
|
|
1541
|
+
}
|
|
1542
|
+
function isTableMissingError(error, table) {
|
|
1543
|
+
if (!(error instanceof Error)) {
|
|
1544
|
+
return false;
|
|
1545
|
+
}
|
|
1546
|
+
return error.message.includes(`Table with name ${table}`) || error.message.includes(table);
|
|
1547
|
+
}
|
|
1548
|
+
async function safeMetadataQuery(db, tableAvailability, sql, params) {
|
|
1549
|
+
if (!tableAvailability.hasMetadataTables) {
|
|
1550
|
+
return [];
|
|
1551
|
+
}
|
|
1552
|
+
try {
|
|
1553
|
+
return await db.all(sql, params);
|
|
1554
|
+
}
|
|
1555
|
+
catch (error) {
|
|
1556
|
+
if (isTableMissingError(error, "document_metadata_kv")) {
|
|
1557
|
+
console.warn("Metadata tables not found; disabling metadata filters and boosts until database is upgraded.");
|
|
1558
|
+
return [];
|
|
1559
|
+
}
|
|
1560
|
+
throw error;
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
async function safeLinkQuery(db, tableAvailability, sql, params) {
|
|
1564
|
+
if (!tableAvailability.hasLinkTable) {
|
|
1565
|
+
return [];
|
|
1566
|
+
}
|
|
1567
|
+
try {
|
|
1568
|
+
return await db.all(sql, params);
|
|
1569
|
+
}
|
|
1570
|
+
catch (error) {
|
|
1571
|
+
if (isTableMissingError(error, "markdown_link")) {
|
|
1572
|
+
console.warn("Markdown link table not found; inbound link boosting disabled until database is upgraded.");
|
|
1573
|
+
return [];
|
|
1574
|
+
}
|
|
1575
|
+
throw error;
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
1578
|
+
async function fetchMetadataOnlyCandidates(db, tableAvailability, repoId, filters, limit) {
|
|
1579
|
+
if (!tableAvailability.hasMetadataTables || filters.length === 0 || limit <= 0) {
|
|
1580
|
+
return [];
|
|
1581
|
+
}
|
|
1582
|
+
const filterClauses = buildMetadataFilterConditions(filters);
|
|
1583
|
+
const whereClauses = ["f.repo_id = ?"];
|
|
1584
|
+
const params = [repoId];
|
|
1585
|
+
for (const clause of filterClauses) {
|
|
1586
|
+
whereClauses.push(clause.sql);
|
|
1587
|
+
params.push(...clause.params);
|
|
1588
|
+
}
|
|
1589
|
+
const sql = `
|
|
1590
|
+
SELECT f.path, f.lang, f.ext, b.content
|
|
1591
|
+
FROM file f
|
|
1592
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
1593
|
+
WHERE ${whereClauses.join(" AND ")}
|
|
1594
|
+
ORDER BY f.path
|
|
1595
|
+
LIMIT ?
|
|
1596
|
+
`;
|
|
1597
|
+
params.push(limit);
|
|
1598
|
+
try {
|
|
1599
|
+
return await db.all(sql, params);
|
|
1600
|
+
}
|
|
1601
|
+
catch (error) {
|
|
1602
|
+
if (isTableMissingError(error, "document_metadata_kv")) {
|
|
1603
|
+
console.warn("Metadata tables not found; disabling metadata-only searches until database is upgraded.");
|
|
1604
|
+
return [];
|
|
1605
|
+
}
|
|
1606
|
+
throw error;
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
async function fetchMetadataKeywordMatches(db, tableAvailability, repoId, keywords, filters, limit, excludePaths) {
|
|
1610
|
+
if (!tableAvailability.hasMetadataTables || keywords.length === 0 || limit <= 0) {
|
|
1611
|
+
return [];
|
|
1612
|
+
}
|
|
1613
|
+
const keywordClauses = keywords.map(() => "mk.value ILIKE ?").join(" OR ");
|
|
1614
|
+
const params = [repoId, ...keywords.map((kw) => `%${kw}%`)];
|
|
1615
|
+
const whereClauses = ["mk.repo_id = ?", `(${keywordClauses})`];
|
|
1616
|
+
if (excludePaths.size > 0) {
|
|
1617
|
+
const placeholders = Array.from(excludePaths)
|
|
1618
|
+
.map(() => "?")
|
|
1619
|
+
.join(", ");
|
|
1620
|
+
whereClauses.push(`f.path NOT IN (${placeholders})`);
|
|
1621
|
+
params.push(...excludePaths);
|
|
1622
|
+
}
|
|
1623
|
+
const filterClauses = buildMetadataFilterConditions(filters, "f");
|
|
1624
|
+
for (const clause of filterClauses) {
|
|
1625
|
+
whereClauses.push(clause.sql);
|
|
1626
|
+
params.push(...clause.params);
|
|
1627
|
+
}
|
|
1628
|
+
params.push(limit);
|
|
1629
|
+
const sql = `
|
|
1630
|
+
SELECT f.path, f.lang, f.ext, b.content, COUNT(*) AS score
|
|
1631
|
+
FROM document_metadata_kv mk
|
|
1632
|
+
JOIN file f ON f.repo_id = mk.repo_id AND f.path = mk.path
|
|
1633
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
1634
|
+
WHERE ${whereClauses.join(" AND ")}
|
|
1635
|
+
GROUP BY f.path, f.lang, f.ext, b.content
|
|
1636
|
+
ORDER BY score DESC, f.path
|
|
1637
|
+
LIMIT ?
|
|
1638
|
+
`;
|
|
1639
|
+
const rows = await safeMetadataQuery(db, tableAvailability, sql, params);
|
|
1640
|
+
return rows.map((row) => ({ ...row, score: Number(row.score ?? 1) }));
|
|
1641
|
+
}
|
|
1642
|
+
async function loadMetadataForPaths(db, tableAvailability, repoId, paths) {
|
|
1643
|
+
const result = new Map();
|
|
1644
|
+
if (!tableAvailability.hasMetadataTables || paths.length === 0) {
|
|
1645
|
+
return result;
|
|
1646
|
+
}
|
|
1647
|
+
const placeholders = paths.map(() => "?").join(", ");
|
|
1648
|
+
const sql = `
|
|
1649
|
+
SELECT path, key, value, source
|
|
1650
|
+
FROM document_metadata_kv
|
|
1651
|
+
WHERE repo_id = ? AND path IN (${placeholders})
|
|
1652
|
+
`;
|
|
1653
|
+
const rows = await safeMetadataQuery(db, tableAvailability, sql, [repoId, ...paths]);
|
|
1654
|
+
for (const row of rows) {
|
|
1655
|
+
if (!result.has(row.path)) {
|
|
1656
|
+
result.set(row.path, []);
|
|
1657
|
+
}
|
|
1658
|
+
result.get(row.path).push({
|
|
1659
|
+
key: row.key,
|
|
1660
|
+
value: row.value,
|
|
1661
|
+
source: row.source ?? undefined,
|
|
1662
|
+
});
|
|
1663
|
+
}
|
|
1664
|
+
return result;
|
|
640
1665
|
}
|
|
641
|
-
function
|
|
642
|
-
const
|
|
643
|
-
if (
|
|
644
|
-
return
|
|
1666
|
+
async function loadInboundLinkCounts(db, tableAvailability, repoId, paths) {
|
|
1667
|
+
const counts = new Map();
|
|
1668
|
+
if (!tableAvailability.hasLinkTable || paths.length === 0) {
|
|
1669
|
+
return counts;
|
|
645
1670
|
}
|
|
646
|
-
|
|
647
|
-
const
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
1671
|
+
const placeholders = paths.map(() => "?").join(", ");
|
|
1672
|
+
const sql = `
|
|
1673
|
+
SELECT resolved_path AS path, COUNT(*) AS inbound
|
|
1674
|
+
FROM markdown_link
|
|
1675
|
+
WHERE repo_id = ? AND resolved_path IS NOT NULL AND resolved_path IN (${placeholders})
|
|
1676
|
+
GROUP BY resolved_path
|
|
1677
|
+
`;
|
|
1678
|
+
const rows = await safeLinkQuery(db, tableAvailability, sql, [repoId, ...paths]);
|
|
1679
|
+
for (const row of rows) {
|
|
1680
|
+
const inboundValue = typeof row.inbound === "bigint" ? Number(row.inbound) : Number(row.inbound ?? 0);
|
|
1681
|
+
counts.set(row.path, inboundValue);
|
|
1682
|
+
}
|
|
1683
|
+
return counts;
|
|
652
1684
|
}
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
*
|
|
657
|
-
* @param content - ファイル全体のコンテンツ
|
|
658
|
-
* @param startLine - 開始行(1-indexed)
|
|
659
|
-
* @param endLine - 終了行(1-indexed)
|
|
660
|
-
* @returns 推定トークン数
|
|
661
|
-
*/
|
|
662
|
-
function estimateTokensFromContent(content, startLine, endLine) {
|
|
663
|
-
const lines = content.split(/\r?\n/);
|
|
664
|
-
const startIndex = Math.max(0, startLine - 1);
|
|
665
|
-
const endIndex = Math.min(endLine, lines.length);
|
|
666
|
-
const selectedLines = lines.slice(startIndex, endIndex);
|
|
667
|
-
const text = selectedLines.join("\n");
|
|
668
|
-
try {
|
|
669
|
-
// 実際のGPTトークナイザーを使用
|
|
670
|
-
return encodeGPT(text).length;
|
|
1685
|
+
function computeMetadataBoost(entries, keywordSet, filterValueSet) {
|
|
1686
|
+
if (!entries || entries.length === 0) {
|
|
1687
|
+
return 0;
|
|
671
1688
|
}
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
1689
|
+
let boost = 0;
|
|
1690
|
+
for (const entry of entries) {
|
|
1691
|
+
const valueLower = entry.value.toLowerCase();
|
|
1692
|
+
for (const keyword of keywordSet) {
|
|
1693
|
+
if (valueLower.includes(keyword)) {
|
|
1694
|
+
boost += METADATA_MATCH_WEIGHT;
|
|
1695
|
+
break;
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
if (filterValueSet.has(valueLower)) {
|
|
1699
|
+
boost += METADATA_FILTER_MATCH_WEIGHT;
|
|
1700
|
+
}
|
|
676
1701
|
}
|
|
1702
|
+
return Math.min(boost, 1.5);
|
|
677
1703
|
}
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
return
|
|
1704
|
+
function computeInboundLinkBoost(count) {
|
|
1705
|
+
let numericCount = count;
|
|
1706
|
+
if (typeof numericCount === "bigint") {
|
|
1707
|
+
numericCount = Number(numericCount);
|
|
1708
|
+
}
|
|
1709
|
+
if (!numericCount || numericCount <= 0) {
|
|
1710
|
+
return 0;
|
|
1711
|
+
}
|
|
1712
|
+
return Math.min(Math.log1p(numericCount) * INBOUND_LINK_WEIGHT, 1.0);
|
|
1713
|
+
}
|
|
1714
|
+
function candidateMatchesMetadataFilters(entries, filters) {
|
|
1715
|
+
if (filters.length === 0) {
|
|
1716
|
+
return true;
|
|
1717
|
+
}
|
|
1718
|
+
if (!entries || entries.length === 0) {
|
|
1719
|
+
return false;
|
|
1720
|
+
}
|
|
1721
|
+
return filters.every((filter) => {
|
|
1722
|
+
const expectedValues = filter.values.map((value) => value.toLowerCase());
|
|
1723
|
+
return entries.some((entry) => {
|
|
1724
|
+
if (entry.key !== filter.key) {
|
|
1725
|
+
return false;
|
|
1726
|
+
}
|
|
1727
|
+
if (filter.source && entry.source !== filter.source) {
|
|
1728
|
+
return false;
|
|
1729
|
+
}
|
|
1730
|
+
const lowerValue = entry.value.toLowerCase();
|
|
1731
|
+
return expectedValues.some((value) => lowerValue.includes(value));
|
|
1732
|
+
});
|
|
1733
|
+
});
|
|
687
1734
|
}
|
|
688
1735
|
/**
|
|
689
1736
|
* パス固有のマルチプライヤーを取得(最長プレフィックスマッチ)
|
|
@@ -710,7 +1757,7 @@ function getPathMultiplier(filePath, profileConfig) {
|
|
|
710
1757
|
* @param weights - スコアリングウェイト設定(乗算的ペナルティに使用)
|
|
711
1758
|
* @returns ブースト適用後のスコア
|
|
712
1759
|
*/
|
|
713
|
-
function applyFileTypeBoost(path, baseScore, profileConfig,
|
|
1760
|
+
function applyFileTypeBoost(path, baseScore, profileConfig, weights) {
|
|
714
1761
|
// Blacklisted directories that are almost always irrelevant for code context
|
|
715
1762
|
const blacklistedDirs = [
|
|
716
1763
|
".cursor/",
|
|
@@ -727,7 +1774,8 @@ function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
|
|
|
727
1774
|
if (profileConfig.denylistOverrides.includes(dir)) {
|
|
728
1775
|
continue;
|
|
729
1776
|
}
|
|
730
|
-
|
|
1777
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
1778
|
+
return baseScore * weights.blacklistPenaltyMultiplier;
|
|
731
1779
|
}
|
|
732
1780
|
}
|
|
733
1781
|
const fileName = path.split("/").pop() ?? "";
|
|
@@ -758,12 +1806,56 @@ function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
|
|
|
758
1806
|
multiplier *= implMultiplier;
|
|
759
1807
|
}
|
|
760
1808
|
}
|
|
761
|
-
// Test files:
|
|
1809
|
+
// Test files: multiplicative penalty (v1.0.0)
|
|
762
1810
|
if (path.startsWith("tests/") || path.startsWith("test/")) {
|
|
763
|
-
return baseScore *
|
|
1811
|
+
return baseScore * weights.testPenaltyMultiplier;
|
|
764
1812
|
}
|
|
765
1813
|
return baseScore * multiplier;
|
|
766
1814
|
}
|
|
1815
|
+
function applyCoverageBoost(candidate, extractedTerms, weights) {
|
|
1816
|
+
// Skip for pure path-fallback candidates without text evidence
|
|
1817
|
+
if (candidate.reasons.has("fallback:path") &&
|
|
1818
|
+
candidate.keywordHits.size === 0 &&
|
|
1819
|
+
candidate.phraseHits === 0) {
|
|
1820
|
+
return;
|
|
1821
|
+
}
|
|
1822
|
+
// Coverage boost is only meaningful for text/phrase evidence; skip if no text evidence at all
|
|
1823
|
+
if (candidate.keywordHits.size === 0 && candidate.phraseHits === 0) {
|
|
1824
|
+
return;
|
|
1825
|
+
}
|
|
1826
|
+
if (extractedTerms.keywords.length > 0 && candidate.keywordHits.size > 0) {
|
|
1827
|
+
const coverage = candidate.keywordHits.size / extractedTerms.keywords.length;
|
|
1828
|
+
const bonus = coverage * weights.textMatch * 0.4;
|
|
1829
|
+
candidate.score += bonus;
|
|
1830
|
+
candidate.reasons.add(`coverage:keywords:${coverage.toFixed(2)}`);
|
|
1831
|
+
}
|
|
1832
|
+
if (extractedTerms.phrases.length > 0 && candidate.phraseHits > 0) {
|
|
1833
|
+
const phraseCoverage = Math.min(1, candidate.phraseHits / extractedTerms.phrases.length);
|
|
1834
|
+
const bonus = phraseCoverage * weights.textMatch * 0.6;
|
|
1835
|
+
candidate.score += bonus;
|
|
1836
|
+
candidate.reasons.add(`coverage:phrases:${phraseCoverage.toFixed(2)}`);
|
|
1837
|
+
}
|
|
1838
|
+
}
|
|
1839
|
+
async function fetchPathFallbackCandidates(db, repoId, terms, limit) {
|
|
1840
|
+
if (terms.length === 0 || limit <= 0) {
|
|
1841
|
+
return [];
|
|
1842
|
+
}
|
|
1843
|
+
const filters = terms.map(() => "f.path ILIKE ?").join(" OR ");
|
|
1844
|
+
const params = [repoId, ...terms.map((term) => `%${term}%`), limit];
|
|
1845
|
+
return await db.all(`
|
|
1846
|
+
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
1847
|
+
FROM file f
|
|
1848
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
1849
|
+
LEFT JOIN file_embedding fe
|
|
1850
|
+
ON fe.repo_id = f.repo_id
|
|
1851
|
+
AND fe.path = f.path
|
|
1852
|
+
WHERE f.repo_id = ?
|
|
1853
|
+
AND f.is_binary = FALSE
|
|
1854
|
+
AND (${filters})
|
|
1855
|
+
ORDER BY f.path
|
|
1856
|
+
LIMIT ?
|
|
1857
|
+
`, params);
|
|
1858
|
+
}
|
|
767
1859
|
/**
|
|
768
1860
|
* パスベースのスコアリングを適用(加算的ブースト)
|
|
769
1861
|
* goalのキーワード/フレーズがファイルパスに含まれる場合にスコアを加算
|
|
@@ -862,22 +1954,25 @@ function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
|
|
|
862
1954
|
}
|
|
863
1955
|
}
|
|
864
1956
|
/**
|
|
865
|
-
*
|
|
866
|
-
* ブラックリストディレクトリ、テストファイル、lock
|
|
867
|
-
*
|
|
868
|
-
* @
|
|
1957
|
+
* 乗算的ファイルペナルティを適用(v1.0.0+)
|
|
1958
|
+
* ブラックリストディレクトリ、テストファイル、lockファイルに乗算ペナルティ
|
|
1959
|
+
* v1.0.0: 絶対ペナルティ(-100)から乗算ペナルティ(×0.01など)に移行
|
|
1960
|
+
* @param weights - スコアリングウェイト設定(乗算ペナルティ係数を含む)
|
|
1961
|
+
* @param profile - boost_profile設定(denylistOverridesなど)
|
|
1962
|
+
* @returns true if severe penalty was applied (caller should skip further boosts)
|
|
869
1963
|
*/
|
|
870
|
-
function
|
|
871
|
-
//
|
|
1964
|
+
function applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig) {
|
|
1965
|
+
// Returns true if a severe penalty was applied (should skip further boosts)
|
|
1966
|
+
// Blacklisted directories - apply strong multiplicative penalty (99% reduction)
|
|
1967
|
+
// v1.0.0: test/ and tests/ removed - handled by testPenaltyMultiplier instead
|
|
872
1968
|
const blacklistedDirs = [
|
|
873
1969
|
".cursor/",
|
|
874
1970
|
".devcontainer/",
|
|
875
1971
|
".serena/",
|
|
876
1972
|
"__mocks__/",
|
|
877
1973
|
"docs/",
|
|
878
|
-
"test/",
|
|
879
|
-
"tests/",
|
|
880
1974
|
".git/",
|
|
1975
|
+
".github/",
|
|
881
1976
|
"node_modules/",
|
|
882
1977
|
"db/migrate/",
|
|
883
1978
|
"db/migrations/",
|
|
@@ -897,19 +1992,26 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
897
1992
|
if (profileConfig.denylistOverrides.includes(dir)) {
|
|
898
1993
|
continue; // Skip this blacklisted directory
|
|
899
1994
|
}
|
|
900
|
-
|
|
1995
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
1996
|
+
candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
|
|
901
1997
|
candidate.reasons.add("penalty:blacklisted-dir");
|
|
902
|
-
return true;
|
|
1998
|
+
return true; // Signal to skip further boosts - this is the strongest penalty
|
|
903
1999
|
}
|
|
904
2000
|
}
|
|
905
|
-
|
|
2001
|
+
if (isSuppressedPath(path)) {
|
|
2002
|
+
// v1.0.0: Use multiplicative penalty instead of absolute -100
|
|
2003
|
+
candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
|
|
2004
|
+
candidate.reasons.add("penalty:suppressed");
|
|
2005
|
+
return true; // Signal to skip further boosts
|
|
2006
|
+
}
|
|
2007
|
+
// Test files - strong multiplicative penalty (95% reduction)
|
|
906
2008
|
const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
|
|
907
2009
|
if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
|
|
908
|
-
candidate.
|
|
2010
|
+
candidate.scoreMultiplier *= weights.testPenaltyMultiplier;
|
|
909
2011
|
candidate.reasons.add("penalty:test-file");
|
|
910
|
-
return true;
|
|
2012
|
+
return true; // Signal to skip further boosts
|
|
911
2013
|
}
|
|
912
|
-
// Lock files - very strong penalty
|
|
2014
|
+
// Lock files - very strong multiplicative penalty (99% reduction)
|
|
913
2015
|
const lockFiles = [
|
|
914
2016
|
"package-lock.json",
|
|
915
2017
|
"pnpm-lock.yaml",
|
|
@@ -920,63 +2022,58 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
|
|
|
920
2022
|
"poetry.lock",
|
|
921
2023
|
];
|
|
922
2024
|
if (lockFiles.some((lockFile) => fileName === lockFile)) {
|
|
923
|
-
candidate.
|
|
2025
|
+
candidate.scoreMultiplier *= weights.lockPenaltyMultiplier;
|
|
924
2026
|
candidate.reasons.add("penalty:lock-file");
|
|
925
|
-
return true;
|
|
926
|
-
}
|
|
927
|
-
// Configuration files - penalty handling depends on profile
|
|
928
|
-
const configPatterns = [
|
|
929
|
-
".config.js",
|
|
930
|
-
".config.ts",
|
|
931
|
-
".config.mjs",
|
|
932
|
-
".config.cjs",
|
|
933
|
-
"tsconfig.json",
|
|
934
|
-
"jsconfig.json",
|
|
935
|
-
"package.json",
|
|
936
|
-
".eslintrc",
|
|
937
|
-
".prettierrc",
|
|
938
|
-
"jest.config",
|
|
939
|
-
"vite.config",
|
|
940
|
-
"vitest.config",
|
|
941
|
-
"webpack.config",
|
|
942
|
-
"rollup.config",
|
|
943
|
-
];
|
|
944
|
-
if (configPatterns.some((pattern) => lowerPath.endsWith(pattern) || fileName.startsWith(".env")) ||
|
|
945
|
-
fileName === "Dockerfile" ||
|
|
946
|
-
fileName === "docker-compose.yml" ||
|
|
947
|
-
fileName === "docker-compose.yaml") {
|
|
948
|
-
// ✅ Use explicit flag instead of magic number (0.3) to determine behavior
|
|
949
|
-
// This decouples profile detection from multiplier values
|
|
950
|
-
if (profileConfig.skipConfigAdditivePenalty) {
|
|
951
|
-
return false; // Continue to multiplicative penalty only
|
|
952
|
-
}
|
|
953
|
-
// For other profiles, apply strong additive penalty
|
|
954
|
-
candidate.score -= 1.5;
|
|
955
|
-
candidate.reasons.add("penalty:config-file");
|
|
956
|
-
return true;
|
|
2027
|
+
return true; // Signal to skip further boosts
|
|
957
2028
|
}
|
|
958
|
-
//
|
|
959
|
-
|
|
960
|
-
candidate.score -= 2.0;
|
|
961
|
-
candidate.reasons.add("penalty:migration-file");
|
|
962
|
-
return true;
|
|
963
|
-
}
|
|
964
|
-
return false; // No penalty applied, continue processing
|
|
2029
|
+
// v1.0.0: No penalty applied, allow further boosts/penalties
|
|
2030
|
+
return false;
|
|
965
2031
|
}
|
|
966
2032
|
/**
|
|
967
2033
|
* ファイルタイプ別の乗算的ペナルティ/ブーストを適用(v0.7.0+)
|
|
968
2034
|
* profile="docs": ドキュメントファイルをブースト
|
|
969
2035
|
* profile="default": ドキュメントファイルにペナルティ、実装ファイルをブースト
|
|
970
2036
|
*/
|
|
971
|
-
function applyFileTypeMultipliers(candidate, path, ext, profileConfig,
|
|
2037
|
+
function applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights) {
|
|
972
2038
|
const fileName = path.split("/").pop() ?? "";
|
|
973
|
-
|
|
2039
|
+
const lowerPath = path.toLowerCase();
|
|
2040
|
+
// Very low value: schemas, fixtures, testdata, examples, baseline
|
|
2041
|
+
const schemaJson = lowerPath.endsWith(".schema.json") || lowerPath.includes("/schemas/");
|
|
2042
|
+
const isFixture = lowerPath.includes("/fixtures/") ||
|
|
2043
|
+
lowerPath.includes("/fixture/") ||
|
|
2044
|
+
lowerPath.includes("/testdata/");
|
|
2045
|
+
const isExample = lowerPath.includes("/examples/") || lowerPath.includes("/example/");
|
|
2046
|
+
const isBaseline = lowerPath.includes("baseline") || lowerPath.includes("golden");
|
|
2047
|
+
if (schemaJson || isFixture || isExample || isBaseline) {
|
|
2048
|
+
candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
|
|
2049
|
+
candidate.reasons.add("penalty:low-value-file");
|
|
2050
|
+
return;
|
|
2051
|
+
}
|
|
2052
|
+
// ✅ Step 1: Low-value files (v1.0.0: syntax/perf/legal/migration)
|
|
2053
|
+
// Apply configPenaltyMultiplier (strong penalty) to rarely useful file types
|
|
2054
|
+
const isSyntaxGrammar = path.includes("/syntaxes/") &&
|
|
2055
|
+
(lowerPath.endsWith(".tmlanguage") ||
|
|
2056
|
+
lowerPath.endsWith(".tmlanguage.json") ||
|
|
2057
|
+
lowerPath.endsWith(".tmtheme") ||
|
|
2058
|
+
lowerPath.endsWith(".plist"));
|
|
2059
|
+
const isPerfData = lowerPath.includes(".perf.data") ||
|
|
2060
|
+
lowerPath.includes(".perf-data") ||
|
|
2061
|
+
lowerPath.includes("-perf-data");
|
|
2062
|
+
const isLegalFile = fileName.toLowerCase().includes("thirdpartynotices") ||
|
|
2063
|
+
fileName.toLowerCase() === "cgmanifest.json";
|
|
2064
|
+
const isMigrationFile = lowerPath.includes("migrate") || lowerPath.includes("migration");
|
|
2065
|
+
if (isSyntaxGrammar || isPerfData || isLegalFile || isMigrationFile) {
|
|
2066
|
+
candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
|
|
2067
|
+
candidate.reasons.add("penalty:low-value-file");
|
|
2068
|
+
return; // Don't apply impl boosts
|
|
2069
|
+
}
|
|
2070
|
+
// ✅ Step 2: Config files
|
|
974
2071
|
if (isConfigFile(path, fileName)) {
|
|
975
2072
|
candidate.scoreMultiplier *= profileConfig.fileTypeMultipliers.config;
|
|
976
2073
|
candidate.reasons.add("penalty:config-file");
|
|
977
2074
|
return; // Don't apply impl boosts to config files
|
|
978
2075
|
}
|
|
979
|
-
// ✅ Step
|
|
2076
|
+
// ✅ Step 3: Documentation files
|
|
980
2077
|
const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
|
|
981
2078
|
if (docExtensions.some((docExt) => path.endsWith(docExt))) {
|
|
982
2079
|
const docMultiplier = profileConfig.fileTypeMultipliers.doc;
|
|
@@ -989,7 +2086,7 @@ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights)
|
|
|
989
2086
|
}
|
|
990
2087
|
return; // Don't apply impl boosts to docs
|
|
991
2088
|
}
|
|
992
|
-
// ✅ Step
|
|
2089
|
+
// ✅ Step 4: Implementation files with path-specific boosts
|
|
993
2090
|
const implMultiplier = profileConfig.fileTypeMultipliers.impl;
|
|
994
2091
|
// ✅ Use longest-prefix-match logic (order-independent)
|
|
995
2092
|
const pathBoost = getPathMultiplier(path, profileConfig);
|
|
@@ -1016,16 +2113,21 @@ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights)
|
|
|
1016
2113
|
}
|
|
1017
2114
|
}
|
|
1018
2115
|
/**
|
|
1019
|
-
* contextBundle専用のブーストプロファイル適用(
|
|
2116
|
+
* contextBundle専用のブーストプロファイル適用(v1.0.0: 乗算ペナルティモデル)
|
|
1020
2117
|
* 複雑度を削減するために3つのヘルパー関数に分割:
|
|
1021
2118
|
* 1. applyPathBasedScoring: パスベースの加算的スコアリング
|
|
1022
|
-
* 2.
|
|
1023
|
-
* 3. applyFileTypeMultipliers:
|
|
2119
|
+
* 2. applyMultiplicativeFilePenalties: 乗算的ペナルティ(blacklist/test/lock)
|
|
2120
|
+
* 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト(doc/config/impl)
|
|
1024
2121
|
*
|
|
1025
|
-
*
|
|
1026
|
-
*
|
|
1027
|
-
*
|
|
1028
|
-
*
|
|
2122
|
+
* v1.0.0 CHANGES:
|
|
2123
|
+
* - 絶対ペナルティ(-100)を乗算ペナルティ(×0.01など)に置き換え
|
|
2124
|
+
* - すべてのペナルティが組み合わせ可能に(boost_profileとの相互作用が予測可能)
|
|
2125
|
+
* - v0.9.0の特別ケース処理(if profile === "docs")が不要に
|
|
2126
|
+
*
|
|
2127
|
+
* SCORING PHASES:
|
|
2128
|
+
* 1. Additive phase: テキストマッチ、パスマッチ、依存関係、近接性を加算
|
|
2129
|
+
* 2. Multiplicative phase: ペナルティとブーストを scoreMultiplier に蓄積
|
|
2130
|
+
* 3. Final application: score *= scoreMultiplier(最終段階で一度だけ適用)
|
|
1029
2131
|
*/
|
|
1030
2132
|
function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms) {
|
|
1031
2133
|
const { path, ext } = row;
|
|
@@ -1033,117 +2135,205 @@ function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerm
|
|
|
1033
2135
|
const fileName = path.split("/").pop() ?? "";
|
|
1034
2136
|
// Step 1: パスベースのスコアリング(加算的ブースト)
|
|
1035
2137
|
applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms);
|
|
1036
|
-
// Step 2:
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
return; // ペナルティが適用された場合は処理終了
|
|
1040
|
-
}
|
|
2138
|
+
// Step 2: 乗算的ペナルティ(ブラックリスト、テスト、lock)
|
|
2139
|
+
// v1.0.0: Returns true if severe penalty applied (should skip further boosts)
|
|
2140
|
+
const skipFurtherBoosts = applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig);
|
|
1041
2141
|
// Step 3: ファイルタイプ別の乗算的ペナルティ/ブースト
|
|
1042
|
-
|
|
2142
|
+
// Skip if severe penalty was applied (blacklist/test/lock files shouldn't get impl boosts)
|
|
2143
|
+
if (!skipFurtherBoosts) {
|
|
2144
|
+
applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights);
|
|
2145
|
+
}
|
|
1043
2146
|
}
|
|
1044
2147
|
export async function filesSearch(context, params) {
|
|
1045
2148
|
const { db, repoId } = context;
|
|
1046
|
-
const
|
|
1047
|
-
|
|
1048
|
-
|
|
2149
|
+
const rawQuery = params.query ?? "";
|
|
2150
|
+
const inlineMetadata = parseInlineMetadataFilters(rawQuery);
|
|
2151
|
+
const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
|
|
2152
|
+
const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
|
|
2153
|
+
const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
|
|
2154
|
+
const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
|
|
2155
|
+
const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
|
|
2156
|
+
const hasHintMetadataFilters = hintMetadataFilters.length > 0;
|
|
2157
|
+
const hasAnyMetadataFilters = metadataFilters.length > 0;
|
|
2158
|
+
let cleanedQuery = inlineMetadata.cleanedQuery;
|
|
2159
|
+
let hasTextQuery = cleanedQuery.length > 0;
|
|
2160
|
+
if (!hasTextQuery && hasHintMetadataFilters) {
|
|
2161
|
+
cleanedQuery = hintMetadataFilters
|
|
2162
|
+
.flatMap((filter) => filter.values)
|
|
2163
|
+
.map((value) => value.trim())
|
|
2164
|
+
.filter((value) => value.length > 0)
|
|
2165
|
+
.join(" ");
|
|
2166
|
+
cleanedQuery = cleanedQuery.trim();
|
|
2167
|
+
hasTextQuery = cleanedQuery.length > 0;
|
|
2168
|
+
}
|
|
2169
|
+
const metadataValueSeed = metadataFilters
|
|
2170
|
+
.flatMap((filter) => filter.values)
|
|
2171
|
+
.map((value) => value.trim())
|
|
2172
|
+
.filter((value) => value.length > 0)
|
|
2173
|
+
.join(" ");
|
|
2174
|
+
if (metadataValueSeed.length > 0) {
|
|
2175
|
+
cleanedQuery = `${cleanedQuery} ${metadataValueSeed}`.trim();
|
|
2176
|
+
hasTextQuery = cleanedQuery.length > 0;
|
|
2177
|
+
}
|
|
2178
|
+
if (!hasTextQuery && !hasAnyMetadataFilters) {
|
|
2179
|
+
throw new Error("files_search requires a query or metadata_filters. Provide keywords or structured filters to continue.");
|
|
1049
2180
|
}
|
|
1050
2181
|
const limit = normalizeLimit(params.limit);
|
|
1051
2182
|
const ftsStatus = await getFreshFtsStatus(context);
|
|
1052
2183
|
const hasFTS = ftsStatus.ready;
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
if (
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
SELECT
|
|
1079
|
-
FROM
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
values = [repoId];
|
|
1093
|
-
const words = splitQueryWords(query);
|
|
1094
|
-
if (words.length === 1) {
|
|
1095
|
-
conditions.push("b.content ILIKE '%' || ? || '%'");
|
|
1096
|
-
values.push(query);
|
|
2184
|
+
const metadataClauses = buildMetadataFilterConditions(strictMetadataFilters);
|
|
2185
|
+
const candidateRows = [];
|
|
2186
|
+
if (hasTextQuery) {
|
|
2187
|
+
let sql;
|
|
2188
|
+
let values;
|
|
2189
|
+
if (hasFTS) {
|
|
2190
|
+
const conditions = ["f.repo_id = ?"];
|
|
2191
|
+
values = [repoId];
|
|
2192
|
+
if (params.lang) {
|
|
2193
|
+
conditions.push("COALESCE(f.lang, '') = ?");
|
|
2194
|
+
values.push(params.lang);
|
|
2195
|
+
}
|
|
2196
|
+
if (params.ext) {
|
|
2197
|
+
conditions.push("COALESCE(f.ext, '') = ?");
|
|
2198
|
+
values.push(params.ext);
|
|
2199
|
+
}
|
|
2200
|
+
if (params.path_prefix) {
|
|
2201
|
+
conditions.push("f.path LIKE ?");
|
|
2202
|
+
values.push(`${params.path_prefix}%`);
|
|
2203
|
+
}
|
|
2204
|
+
for (const clause of metadataClauses) {
|
|
2205
|
+
conditions.push(clause.sql);
|
|
2206
|
+
values.push(...clause.params);
|
|
2207
|
+
}
|
|
2208
|
+
sql = `
|
|
2209
|
+
SELECT f.path, f.lang, f.ext, b.content, fts.score
|
|
2210
|
+
FROM file f
|
|
2211
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
2212
|
+
JOIN (
|
|
2213
|
+
SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
|
|
2214
|
+
FROM blob
|
|
2215
|
+
WHERE score IS NOT NULL
|
|
2216
|
+
) fts ON fts.hash = b.hash
|
|
2217
|
+
WHERE ${conditions.join(" AND ")}
|
|
2218
|
+
ORDER BY fts.score DESC
|
|
2219
|
+
LIMIT ?
|
|
2220
|
+
`;
|
|
2221
|
+
values.unshift(cleanedQuery);
|
|
2222
|
+
values.push(limit);
|
|
1097
2223
|
}
|
|
1098
2224
|
else {
|
|
1099
|
-
const
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
2225
|
+
const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
|
|
2226
|
+
values = [repoId];
|
|
2227
|
+
const words = splitQueryWords(cleanedQuery);
|
|
2228
|
+
if (words.length === 1) {
|
|
2229
|
+
conditions.push("b.content ILIKE '%' || ? || '%'");
|
|
2230
|
+
values.push(cleanedQuery);
|
|
2231
|
+
}
|
|
2232
|
+
else {
|
|
2233
|
+
const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
|
|
2234
|
+
conditions.push(`(${wordConditions.join(" OR ")})`);
|
|
2235
|
+
values.push(...words);
|
|
2236
|
+
}
|
|
2237
|
+
if (params.lang) {
|
|
2238
|
+
conditions.push("COALESCE(f.lang, '') = ?");
|
|
2239
|
+
values.push(params.lang);
|
|
2240
|
+
}
|
|
2241
|
+
if (params.ext) {
|
|
2242
|
+
conditions.push("COALESCE(f.ext, '') = ?");
|
|
2243
|
+
values.push(params.ext);
|
|
2244
|
+
}
|
|
2245
|
+
if (params.path_prefix) {
|
|
2246
|
+
conditions.push("f.path LIKE ?");
|
|
2247
|
+
values.push(`${params.path_prefix}%`);
|
|
2248
|
+
}
|
|
2249
|
+
for (const clause of metadataClauses) {
|
|
2250
|
+
conditions.push(clause.sql);
|
|
2251
|
+
values.push(...clause.params);
|
|
2252
|
+
}
|
|
2253
|
+
sql = `
|
|
2254
|
+
SELECT f.path, f.lang, f.ext, b.content
|
|
2255
|
+
FROM file f
|
|
2256
|
+
JOIN blob b ON b.hash = f.blob_hash
|
|
2257
|
+
WHERE ${conditions.join(" AND ")}
|
|
2258
|
+
ORDER BY f.path
|
|
2259
|
+
LIMIT ?
|
|
2260
|
+
`;
|
|
2261
|
+
values.push(limit);
|
|
2262
|
+
}
|
|
2263
|
+
const textRows = await db.all(sql, values);
|
|
2264
|
+
candidateRows.push(...textRows);
|
|
2265
|
+
}
|
|
2266
|
+
if (!hasTextQuery && hasAnyMetadataFilters) {
|
|
2267
|
+
const metadataOnlyRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
|
|
2268
|
+
for (const row of metadataOnlyRows) {
|
|
2269
|
+
row.score = 1 + metadataFilters.length * 0.2;
|
|
2270
|
+
}
|
|
2271
|
+
candidateRows.push(...metadataOnlyRows);
|
|
2272
|
+
}
|
|
2273
|
+
if (hasTextQuery) {
|
|
2274
|
+
const metadataKeywords = splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase());
|
|
2275
|
+
if (metadataKeywords.length > 0) {
|
|
2276
|
+
const excludePaths = new Set(candidateRows.map((row) => row.path));
|
|
2277
|
+
const metadataRows = await fetchMetadataKeywordMatches(db, context.tableAvailability, repoId, metadataKeywords, metadataFilters, limit * 2, excludePaths);
|
|
2278
|
+
candidateRows.push(...metadataRows);
|
|
2279
|
+
}
|
|
2280
|
+
}
|
|
2281
|
+
if (candidateRows.length === 0) {
|
|
2282
|
+
return [];
|
|
2283
|
+
}
|
|
2284
|
+
const rowMap = new Map();
|
|
2285
|
+
for (const row of candidateRows) {
|
|
2286
|
+
const base = row.score ?? (hasTextQuery ? 1.0 : 0.8);
|
|
2287
|
+
const existing = rowMap.get(row.path);
|
|
2288
|
+
const existingScore = existing?.score ?? (hasTextQuery ? 1.0 : 0.8);
|
|
2289
|
+
if (!existing || base > existingScore) {
|
|
2290
|
+
rowMap.set(row.path, { ...row, score: base });
|
|
2291
|
+
}
|
|
2292
|
+
}
|
|
2293
|
+
const dedupedRows = Array.from(rowMap.values()).sort((a, b) => (b.score ?? 1) - (a.score ?? 1));
|
|
2294
|
+
const limitedRows = dedupedRows.slice(0, limit);
|
|
2295
|
+
const paths = limitedRows.map((row) => row.path);
|
|
2296
|
+
const metadataMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, paths);
|
|
2297
|
+
const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, paths);
|
|
2298
|
+
const metadataKeywordSet = hasTextQuery
|
|
2299
|
+
? new Set(splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase()))
|
|
2300
|
+
: new Set();
|
|
2301
|
+
const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
|
|
2302
|
+
const boostProfile = params.boost_profile ??
|
|
2303
|
+
(hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
|
|
2304
|
+
const baseProfileConfig = getBoostProfile(boostProfile);
|
|
2305
|
+
const cachedMerged = mergedPathMultiplierCache.get(boostProfile);
|
|
2306
|
+
const mergedPathMultipliers = cachedMerged ??
|
|
2307
|
+
mergePathPenaltyEntries(baseProfileConfig.pathMultipliers, [], serverConfig.pathPenalties);
|
|
2308
|
+
if (!cachedMerged) {
|
|
2309
|
+
mergedPathMultiplierCache.set(boostProfile, mergedPathMultipliers);
|
|
2310
|
+
}
|
|
2311
|
+
const profileConfig = {
|
|
2312
|
+
...baseProfileConfig,
|
|
2313
|
+
pathMultipliers: mergedPathMultipliers,
|
|
2314
|
+
};
|
|
1130
2315
|
const weights = loadScoringProfile(null);
|
|
1131
2316
|
const options = parseOutputOptions(params);
|
|
1132
|
-
|
|
2317
|
+
const previewQuery = hasTextQuery
|
|
2318
|
+
? cleanedQuery
|
|
2319
|
+
: (metadataFilters[0]?.values[0] ?? rawQuery.trim());
|
|
2320
|
+
return limitedRows
|
|
1133
2321
|
.map((row) => {
|
|
1134
2322
|
let preview;
|
|
1135
2323
|
let matchLine;
|
|
2324
|
+
const previewSource = previewQuery || row.path;
|
|
1136
2325
|
if (options.includePreview) {
|
|
1137
|
-
|
|
1138
|
-
const previewData = buildPreview(row.content ?? "", query);
|
|
2326
|
+
const previewData = buildPreview(row.content ?? "", previewSource);
|
|
1139
2327
|
preview = previewData.preview;
|
|
1140
2328
|
matchLine = previewData.line;
|
|
1141
2329
|
}
|
|
1142
2330
|
else {
|
|
1143
|
-
|
|
1144
|
-
matchLine = findFirstMatchLine(row.content ?? "", query);
|
|
2331
|
+
matchLine = findFirstMatchLine(row.content ?? "", previewSource);
|
|
1145
2332
|
}
|
|
1146
|
-
const
|
|
2333
|
+
const metadataEntries = metadataMap.get(row.path);
|
|
2334
|
+
const metadataBoost = computeMetadataBoost(metadataEntries, metadataKeywordSet, filterValueSet);
|
|
2335
|
+
const inboundBoost = computeInboundLinkBoost(inboundCounts.get(row.path));
|
|
2336
|
+
const baseScore = (row.score ?? (hasTextQuery ? 1.0 : 0.8)) + metadataBoost + inboundBoost;
|
|
1147
2337
|
const boostedScore = boostProfile === "none"
|
|
1148
2338
|
? baseScore
|
|
1149
2339
|
: applyFileTypeBoost(row.path, baseScore, profileConfig, weights);
|
|
@@ -1159,96 +2349,20 @@ export async function filesSearch(context, params) {
|
|
|
1159
2349
|
}
|
|
1160
2350
|
return result;
|
|
1161
2351
|
})
|
|
1162
|
-
.
|
|
1163
|
-
|
|
1164
|
-
export async function snippetsGet(context, params) {
|
|
1165
|
-
const { db, repoId } = context;
|
|
1166
|
-
if (!params.path) {
|
|
1167
|
-
throw new Error("snippets_get requires a file path. Specify a tracked text file path to continue.");
|
|
1168
|
-
}
|
|
1169
|
-
const rows = await db.all(`
|
|
1170
|
-
SELECT f.path, f.lang, f.ext, f.is_binary, b.content
|
|
1171
|
-
FROM file f
|
|
1172
|
-
JOIN blob b ON b.hash = f.blob_hash
|
|
1173
|
-
WHERE f.repo_id = ? AND f.path = ?
|
|
1174
|
-
LIMIT 1
|
|
1175
|
-
`, [repoId, params.path]);
|
|
1176
|
-
if (rows.length === 0) {
|
|
1177
|
-
throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
|
|
1178
|
-
}
|
|
1179
|
-
const row = rows[0];
|
|
1180
|
-
if (!row) {
|
|
1181
|
-
throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
|
|
1182
|
-
}
|
|
1183
|
-
if (row.is_binary) {
|
|
1184
|
-
throw new Error("Binary snippets are not supported. Choose a text file to preview its content.");
|
|
1185
|
-
}
|
|
1186
|
-
if (row.content === null) {
|
|
1187
|
-
throw new Error("Snippet content is unavailable. Re-run the indexer to refresh DuckDB state.");
|
|
1188
|
-
}
|
|
1189
|
-
const lines = row.content.split(/\r?\n/);
|
|
1190
|
-
const totalLines = lines.length;
|
|
1191
|
-
const snippetRows = await db.all(`
|
|
1192
|
-
SELECT s.snippet_id, s.start_line, s.end_line, s.symbol_id, sym.name AS symbol_name, sym.kind AS symbol_kind
|
|
1193
|
-
FROM snippet s
|
|
1194
|
-
LEFT JOIN symbol sym
|
|
1195
|
-
ON sym.repo_id = s.repo_id
|
|
1196
|
-
AND sym.path = s.path
|
|
1197
|
-
AND sym.symbol_id = s.symbol_id
|
|
1198
|
-
WHERE s.repo_id = ? AND s.path = ?
|
|
1199
|
-
ORDER BY s.start_line
|
|
1200
|
-
`, [repoId, params.path]);
|
|
1201
|
-
const requestedStart = params.start_line ?? 1;
|
|
1202
|
-
const requestedEnd = params.end_line ?? Math.min(totalLines, requestedStart + DEFAULT_SNIPPET_WINDOW - 1);
|
|
1203
|
-
const useSymbolSnippets = snippetRows.length > 0 && params.end_line === undefined;
|
|
1204
|
-
let snippetSelection = null;
|
|
1205
|
-
if (useSymbolSnippets) {
|
|
1206
|
-
snippetSelection =
|
|
1207
|
-
snippetRows.find((snippet) => requestedStart >= snippet.start_line && requestedStart <= snippet.end_line) ?? null;
|
|
1208
|
-
if (!snippetSelection) {
|
|
1209
|
-
const firstSnippet = snippetRows[0];
|
|
1210
|
-
if (firstSnippet && requestedStart < firstSnippet.start_line) {
|
|
1211
|
-
snippetSelection = firstSnippet;
|
|
1212
|
-
}
|
|
1213
|
-
else {
|
|
1214
|
-
snippetSelection = snippetRows[snippetRows.length - 1] ?? null;
|
|
1215
|
-
}
|
|
1216
|
-
}
|
|
1217
|
-
}
|
|
1218
|
-
let startLine;
|
|
1219
|
-
let endLine;
|
|
1220
|
-
let symbolName = null;
|
|
1221
|
-
let symbolKind = null;
|
|
1222
|
-
if (snippetSelection) {
|
|
1223
|
-
startLine = snippetSelection.start_line;
|
|
1224
|
-
endLine = snippetSelection.end_line;
|
|
1225
|
-
symbolName = snippetSelection.symbol_name;
|
|
1226
|
-
symbolKind = snippetSelection.symbol_kind;
|
|
1227
|
-
}
|
|
1228
|
-
else {
|
|
1229
|
-
startLine = Math.max(1, Math.min(totalLines, requestedStart));
|
|
1230
|
-
endLine = Math.max(startLine, Math.min(totalLines, requestedEnd));
|
|
1231
|
-
}
|
|
1232
|
-
const isCompact = params.compact === true;
|
|
1233
|
-
const addLineNumbers = params.includeLineNumbers === true && !isCompact;
|
|
1234
|
-
let content;
|
|
1235
|
-
if (!isCompact) {
|
|
1236
|
-
const snippetContent = lines.slice(startLine - 1, endLine).join("\n");
|
|
1237
|
-
content = addLineNumbers ? prependLineNumbers(snippetContent, startLine) : snippetContent;
|
|
1238
|
-
}
|
|
1239
|
-
return {
|
|
1240
|
-
path: row.path,
|
|
1241
|
-
startLine,
|
|
1242
|
-
endLine,
|
|
1243
|
-
...(content !== undefined && { content }),
|
|
1244
|
-
totalLines,
|
|
1245
|
-
symbolName,
|
|
1246
|
-
symbolKind,
|
|
1247
|
-
};
|
|
2352
|
+
.filter((result) => result.score > SCORE_FILTER_THRESHOLD) // v1.0.0: Filter out extremely low-scored files (multiplicative penalties)
|
|
2353
|
+
.sort((a, b) => b.score - a.score);
|
|
1248
2354
|
}
|
|
2355
|
+
// snippetsGet has been extracted to ./handlers/snippets-get.ts and re-exported above
|
|
1249
2356
|
// ============================================================================
|
|
1250
2357
|
// Issue #68: Path/Large File Penalty Helper Functions
|
|
1251
2358
|
// ============================================================================
|
|
2359
|
+
/**
|
|
2360
|
+
* v1.0.0: Score filtering threshold for multiplicative penalty model
|
|
2361
|
+
* Files with score < threshold are filtered out (unless they are hint paths)
|
|
2362
|
+
* Default: 0.05 removes files with >95% penalty while keeping relevant files
|
|
2363
|
+
* Can be overridden via KIRI_SCORE_THRESHOLD environment variable
|
|
2364
|
+
*/
|
|
2365
|
+
const SCORE_FILTER_THRESHOLD = parseFloat(process.env.KIRI_SCORE_THRESHOLD ?? "0.05");
|
|
1252
2366
|
/**
|
|
1253
2367
|
* 環境変数からペナルティ機能フラグを読み取る
|
|
1254
2368
|
*/
|
|
@@ -1505,15 +2619,40 @@ function computeGraduatedPenalty(pathMatchHits, queryStats, config) {
|
|
|
1505
2619
|
return config.tier2Delta;
|
|
1506
2620
|
return 0; // pathMatchHits >= 3: no penalty
|
|
1507
2621
|
}
|
|
1508
|
-
|
|
2622
|
+
async function contextBundleImpl(context, params) {
|
|
1509
2623
|
context.warningManager.startRequest();
|
|
1510
2624
|
const { db, repoId } = context;
|
|
1511
|
-
const
|
|
1512
|
-
if (
|
|
2625
|
+
const rawGoal = params.goal?.trim() ?? "";
|
|
2626
|
+
if (rawGoal.length === 0) {
|
|
1513
2627
|
throw new Error("context_bundle requires a non-empty goal. Describe your objective to receive context.");
|
|
1514
2628
|
}
|
|
2629
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
2630
|
+
console.info(`[metadata-trace-env] goal=${rawGoal}`);
|
|
2631
|
+
}
|
|
2632
|
+
const inlineMetadata = parseInlineMetadataFilters(rawGoal);
|
|
2633
|
+
const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
|
|
2634
|
+
const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
|
|
2635
|
+
const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
|
|
2636
|
+
const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
|
|
2637
|
+
const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
|
|
2638
|
+
const hasHintMetadataFilters = hintMetadataFilters.length > 0;
|
|
2639
|
+
const hasAnyMetadataFilters = metadataFilters.length > 0;
|
|
2640
|
+
const goal = inlineMetadata.cleanedQuery.length > 0 ? inlineMetadata.cleanedQuery : rawGoal;
|
|
2641
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
2642
|
+
console.info("[metadata-trace]", JSON.stringify({
|
|
2643
|
+
rawGoal,
|
|
2644
|
+
cleanedGoal: goal,
|
|
2645
|
+
inlineFilters: inlineMetadata.filters,
|
|
2646
|
+
paramFilters,
|
|
2647
|
+
mergedFilters: metadataFilters,
|
|
2648
|
+
}));
|
|
2649
|
+
}
|
|
1515
2650
|
const limit = normalizeBundleLimit(params.limit);
|
|
1516
2651
|
const artifacts = params.artifacts ?? {};
|
|
2652
|
+
const artifactHints = normalizeArtifactHints(artifacts.hints);
|
|
2653
|
+
const hintBuckets = bucketArtifactHints(artifactHints);
|
|
2654
|
+
const artifactPathHints = hintBuckets.pathHints;
|
|
2655
|
+
const substringHints = hintBuckets.substringHints;
|
|
1517
2656
|
const includeTokensEstimate = params.includeTokensEstimate === true;
|
|
1518
2657
|
const isCompact = params.compact === true;
|
|
1519
2658
|
// 項目2: トークンバジェット保護警告
|
|
@@ -1536,9 +2675,20 @@ export async function contextBundle(context, params) {
|
|
|
1536
2675
|
if (artifacts.editing_path) {
|
|
1537
2676
|
keywordSources.push(artifacts.editing_path);
|
|
1538
2677
|
}
|
|
2678
|
+
if (artifactHints.length > 0) {
|
|
2679
|
+
keywordSources.push(artifactHints.join(" "));
|
|
2680
|
+
}
|
|
2681
|
+
if (hasAnyMetadataFilters) {
|
|
2682
|
+
const filterSeed = metadataFilters
|
|
2683
|
+
.map((filter) => `${filter.source ?? "meta"}:${filter.key}=${filter.values.join(",")}`)
|
|
2684
|
+
.join(" ");
|
|
2685
|
+
keywordSources.push(filterSeed);
|
|
2686
|
+
}
|
|
1539
2687
|
const semanticSeed = keywordSources.join(" ");
|
|
1540
2688
|
const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
|
|
1541
2689
|
const extractedTerms = extractKeywords(semanticSeed);
|
|
2690
|
+
const segmentPreview = extractedTerms.pathSegments.slice(0, AUTO_PATH_SEGMENT_LIMIT).join(",");
|
|
2691
|
+
traceSearch(`terms repo=${repoId} id=${params.requestId ?? "n/a"} keywords=${extractedTerms.keywords.length} phrases=${extractedTerms.phrases.length} pathSegments=${extractedTerms.pathSegments.length} segs=[${segmentPreview}]`);
|
|
1542
2692
|
// フォールバック: editing_pathからキーワードを抽出
|
|
1543
2693
|
if (extractedTerms.phrases.length === 0 &&
|
|
1544
2694
|
extractedTerms.keywords.length === 0 &&
|
|
@@ -1553,13 +2703,20 @@ export async function contextBundle(context, params) {
|
|
|
1553
2703
|
const stringMatchSeeds = new Set();
|
|
1554
2704
|
const fileCache = new Map();
|
|
1555
2705
|
// ✅ Cache boost profile config to avoid redundant lookups in hot path
|
|
1556
|
-
const boostProfile = params.boost_profile ??
|
|
1557
|
-
|
|
2706
|
+
const boostProfile = params.boost_profile ??
|
|
2707
|
+
(hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
|
|
2708
|
+
const baseProfileConfig = getBoostProfile(boostProfile);
|
|
2709
|
+
const profileConfig = {
|
|
2710
|
+
...baseProfileConfig,
|
|
2711
|
+
pathMultipliers: loadPathPenalties(baseProfileConfig.pathMultipliers),
|
|
2712
|
+
};
|
|
1558
2713
|
// フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
|
|
1559
2714
|
if (extractedTerms.phrases.length > 0) {
|
|
1560
2715
|
const phrasePlaceholders = extractedTerms.phrases
|
|
1561
2716
|
.map(() => "b.content ILIKE '%' || ? || '%'")
|
|
1562
2717
|
.join(" OR ");
|
|
2718
|
+
// DEBUG: Log SQL query parameters for troubleshooting
|
|
2719
|
+
traceSearch(`Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
|
|
1563
2720
|
const rows = await db.all(`
|
|
1564
2721
|
SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
|
|
1565
2722
|
FROM file f
|
|
@@ -1573,6 +2730,17 @@ export async function contextBundle(context, params) {
|
|
|
1573
2730
|
ORDER BY f.path
|
|
1574
2731
|
LIMIT ?
|
|
1575
2732
|
`, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
|
|
2733
|
+
// DEBUG: Log returned paths and verify they match expected repo_id
|
|
2734
|
+
if (rows.length > 0) {
|
|
2735
|
+
traceSearch(`Phrase match returned ${rows.length} rows. Sample paths: ${rows
|
|
2736
|
+
.slice(0, 3)
|
|
2737
|
+
.map((r) => r.path)
|
|
2738
|
+
.join(", ")}`);
|
|
2739
|
+
// Verify repo_id of returned files
|
|
2740
|
+
const pathsToCheck = rows.slice(0, 3).map((r) => r.path);
|
|
2741
|
+
const verification = await db.all(`SELECT path, repo_id FROM file WHERE path IN (${pathsToCheck.map(() => "?").join(", ")}) LIMIT 3`, pathsToCheck);
|
|
2742
|
+
traceSearch(`Repo ID verification`, verification);
|
|
2743
|
+
}
|
|
1576
2744
|
for (const row of rows) {
|
|
1577
2745
|
if (row.content === null) {
|
|
1578
2746
|
continue;
|
|
@@ -1584,6 +2752,7 @@ export async function contextBundle(context, params) {
|
|
|
1584
2752
|
continue; // Should not happen, but defensive check
|
|
1585
2753
|
}
|
|
1586
2754
|
const candidate = ensureCandidate(candidates, row.path);
|
|
2755
|
+
candidate.phraseHits += matchedPhrases.length;
|
|
1587
2756
|
// 各マッチしたフレーズに対してスコアリング
|
|
1588
2757
|
for (const phrase of matchedPhrases) {
|
|
1589
2758
|
// フレーズマッチは通常の2倍のスコア
|
|
@@ -1614,6 +2783,7 @@ export async function contextBundle(context, params) {
|
|
|
1614
2783
|
});
|
|
1615
2784
|
}
|
|
1616
2785
|
}
|
|
2786
|
+
traceSearch(`phrase search produced ${rows.length} rows, candidates=${candidates.size}`);
|
|
1617
2787
|
}
|
|
1618
2788
|
// キーワードマッチング(通常の重み)- 統合クエリでパフォーマンス改善
|
|
1619
2789
|
if (extractedTerms.keywords.length > 0) {
|
|
@@ -1648,6 +2818,7 @@ export async function contextBundle(context, params) {
|
|
|
1648
2818
|
for (const keyword of matchedKeywords) {
|
|
1649
2819
|
candidate.score += weights.textMatch;
|
|
1650
2820
|
candidate.reasons.add(`text:${keyword}`);
|
|
2821
|
+
candidate.keywordHits.add(keyword);
|
|
1651
2822
|
}
|
|
1652
2823
|
// Apply boost profile once per file
|
|
1653
2824
|
if (boostProfile !== "none") {
|
|
@@ -1673,6 +2844,124 @@ export async function contextBundle(context, params) {
|
|
|
1673
2844
|
});
|
|
1674
2845
|
}
|
|
1675
2846
|
}
|
|
2847
|
+
traceSearch(`keyword search produced ${rows.length} rows, candidates=${candidates.size}`);
|
|
2848
|
+
}
|
|
2849
|
+
const fallbackTerms = Array.from(new Set([...extractedTerms.phrases, ...extractedTerms.keywords, ...extractedTerms.pathSegments]
|
|
2850
|
+
.map((term) => term.toLowerCase())
|
|
2851
|
+
.filter((term) => term.length >= 3))).slice(0, PATH_FALLBACK_TERMS_LIMIT);
|
|
2852
|
+
if (fallbackTerms.length > 0) {
|
|
2853
|
+
const fallbackRows = await fetchPathFallbackCandidates(db, repoId, fallbackTerms, Math.min(limit * 2, PATH_FALLBACK_LIMIT));
|
|
2854
|
+
const fallbackReason = stringMatchSeeds.size === 0
|
|
2855
|
+
? "no-string-match"
|
|
2856
|
+
: candidates.size < limit
|
|
2857
|
+
? "low-candidates"
|
|
2858
|
+
: "supplemental";
|
|
2859
|
+
traceSearch(`path fallback triggered (${fallbackReason}) terms=${JSON.stringify(fallbackTerms)} rows=${fallbackRows.length}`);
|
|
2860
|
+
const fallbackWeight = stringMatchSeeds.size === 0 ? weights.pathMatch * 0.75 : weights.pathMatch * 0.2;
|
|
2861
|
+
for (const row of fallbackRows) {
|
|
2862
|
+
const candidate = ensureCandidate(candidates, row.path);
|
|
2863
|
+
candidate.pathFallbackReason = fallbackReason;
|
|
2864
|
+
candidate.score += fallbackWeight;
|
|
2865
|
+
candidate.reasons.add("fallback:path");
|
|
2866
|
+
const contentLower = row.content?.toLowerCase() ?? "";
|
|
2867
|
+
if (contentLower.length > 0) {
|
|
2868
|
+
let textHits = 0;
|
|
2869
|
+
for (const term of fallbackTerms) {
|
|
2870
|
+
if (contentLower.includes(term)) {
|
|
2871
|
+
textHits += 1;
|
|
2872
|
+
candidate.keywordHits.add(term);
|
|
2873
|
+
}
|
|
2874
|
+
}
|
|
2875
|
+
candidate.fallbackTextHits += textHits;
|
|
2876
|
+
if (textHits > 0) {
|
|
2877
|
+
const textBoost = textHits * weights.textMatch * 0.15;
|
|
2878
|
+
candidate.score += textBoost;
|
|
2879
|
+
candidate.reasons.add(`fallback:content:${textHits}`);
|
|
2880
|
+
}
|
|
2881
|
+
}
|
|
2882
|
+
candidate.matchLine ??= 1;
|
|
2883
|
+
candidate.lang ??= row.lang;
|
|
2884
|
+
candidate.ext ??= row.ext;
|
|
2885
|
+
candidate.totalLines ??= row.content?.split(/\r?\n/).length ?? null;
|
|
2886
|
+
candidate.content ??= row.content;
|
|
2887
|
+
candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
|
|
2888
|
+
if (boostProfile !== "none") {
|
|
2889
|
+
applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
|
|
2890
|
+
}
|
|
2891
|
+
stringMatchSeeds.add(row.path);
|
|
2892
|
+
if (!fileCache.has(row.path) && row.content) {
|
|
2893
|
+
fileCache.set(row.path, {
|
|
2894
|
+
content: row.content,
|
|
2895
|
+
lang: row.lang,
|
|
2896
|
+
ext: row.ext,
|
|
2897
|
+
totalLines: candidate.totalLines ?? 0,
|
|
2898
|
+
embedding: candidate.embedding,
|
|
2899
|
+
});
|
|
2900
|
+
}
|
|
2901
|
+
}
|
|
2902
|
+
// Drop fallback-only candidates with zero text evidence before trimming
|
|
2903
|
+
for (const [path, candidate] of Array.from(candidates.entries())) {
|
|
2904
|
+
const isFallbackOnly = candidate.reasons.has("fallback:path") &&
|
|
2905
|
+
candidate.keywordHits.size === 0 &&
|
|
2906
|
+
candidate.phraseHits === 0;
|
|
2907
|
+
const hasTextEvidence = candidate.fallbackTextHits > 0;
|
|
2908
|
+
if (isFallbackOnly && !hasTextEvidence) {
|
|
2909
|
+
candidates.delete(path);
|
|
2910
|
+
}
|
|
2911
|
+
}
|
|
2912
|
+
// Demote fallback-only hits without text evidence
|
|
2913
|
+
for (const candidate of candidates.values()) {
|
|
2914
|
+
const isFallbackOnly = candidate.reasons.has("fallback:path") &&
|
|
2915
|
+
candidate.keywordHits.size === 0 &&
|
|
2916
|
+
candidate.phraseHits === 0;
|
|
2917
|
+
const hasTextEvidence = candidate.fallbackTextHits > 0;
|
|
2918
|
+
if (isFallbackOnly && !hasTextEvidence) {
|
|
2919
|
+
candidate.scoreMultiplier *= 0.5;
|
|
2920
|
+
candidate.reasons.add("penalty:fallback-no-text");
|
|
2921
|
+
}
|
|
2922
|
+
}
|
|
2923
|
+
if (fallbackRows.length > PATH_FALLBACK_KEEP) {
|
|
2924
|
+
const fallbackOnly = Array.from(candidates.entries())
|
|
2925
|
+
.filter(([_, candidate]) => candidate.reasons.has("fallback:path") &&
|
|
2926
|
+
candidate.keywordHits.size === 0 &&
|
|
2927
|
+
candidate.phraseHits === 0)
|
|
2928
|
+
.sort((a, b) => b[1].score - a[1].score);
|
|
2929
|
+
const toDrop = fallbackOnly.slice(PATH_FALLBACK_KEEP);
|
|
2930
|
+
for (const [path] of toDrop) {
|
|
2931
|
+
candidates.delete(path);
|
|
2932
|
+
}
|
|
2933
|
+
traceSearch(`path fallback trimmed kept=${PATH_FALLBACK_KEEP} dropped=${toDrop.length} candidates=${candidates.size}`);
|
|
2934
|
+
}
|
|
2935
|
+
}
|
|
2936
|
+
if (extractedTerms.keywords.length > 0 || extractedTerms.phrases.length > 0) {
|
|
2937
|
+
for (const candidate of candidates.values()) {
|
|
2938
|
+
applyCoverageBoost(candidate, extractedTerms, weights);
|
|
2939
|
+
}
|
|
2940
|
+
}
|
|
2941
|
+
const artifactPathTargets = artifactPathHints.map((hintPath) => ({
|
|
2942
|
+
path: hintPath,
|
|
2943
|
+
sourceHint: hintPath,
|
|
2944
|
+
origin: "artifact",
|
|
2945
|
+
}));
|
|
2946
|
+
const dictionaryPathTargets = await fetchDictionaryPathHints(db, context.tableAvailability, repoId, substringHints, HINT_DICTIONARY_LIMIT);
|
|
2947
|
+
const { list: resolvedPathHintTargets, meta: hintSeedMeta } = createHintSeedMeta([
|
|
2948
|
+
...artifactPathTargets,
|
|
2949
|
+
...dictionaryPathTargets,
|
|
2950
|
+
]);
|
|
2951
|
+
if (resolvedPathHintTargets.length > 0) {
|
|
2952
|
+
await applyPathHintPromotions({
|
|
2953
|
+
db,
|
|
2954
|
+
tableAvailability: context.tableAvailability,
|
|
2955
|
+
repoId,
|
|
2956
|
+
hintTargets: resolvedPathHintTargets,
|
|
2957
|
+
candidates,
|
|
2958
|
+
fileCache,
|
|
2959
|
+
weights,
|
|
2960
|
+
hintSeedMeta,
|
|
2961
|
+
});
|
|
2962
|
+
}
|
|
2963
|
+
if (substringHints.length > 0) {
|
|
2964
|
+
await addHintSubstringMatches(db, context.tableAvailability, repoId, substringHints, candidates, HINT_SUBSTRING_LIMIT, HINT_SUBSTRING_BOOST);
|
|
1676
2965
|
}
|
|
1677
2966
|
if (artifacts.editing_path) {
|
|
1678
2967
|
const editingCandidate = ensureCandidate(candidates, artifacts.editing_path);
|
|
@@ -1681,7 +2970,6 @@ export async function contextBundle(context, params) {
|
|
|
1681
2970
|
editingCandidate.matchLine ??= 1;
|
|
1682
2971
|
}
|
|
1683
2972
|
// SQL injection防御: ファイルパスの検証パターン
|
|
1684
|
-
const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
|
|
1685
2973
|
const dependencySeeds = new Set();
|
|
1686
2974
|
for (const pathSeed of stringMatchSeeds) {
|
|
1687
2975
|
if (!SAFE_PATH_PATTERN.test(pathSeed)) {
|
|
@@ -1695,10 +2983,13 @@ export async function contextBundle(context, params) {
|
|
|
1695
2983
|
}
|
|
1696
2984
|
if (artifacts.editing_path) {
|
|
1697
2985
|
if (!SAFE_PATH_PATTERN.test(artifacts.editing_path)) {
|
|
1698
|
-
throw new Error(`Invalid editing_path format.
|
|
2986
|
+
throw new Error(`Invalid editing_path format: ${artifacts.editing_path}. Use only A-Z, 0-9, _, ., -, / characters.`);
|
|
1699
2987
|
}
|
|
1700
2988
|
dependencySeeds.add(artifacts.editing_path);
|
|
1701
2989
|
}
|
|
2990
|
+
for (const target of resolvedPathHintTargets) {
|
|
2991
|
+
dependencySeeds.add(target.path);
|
|
2992
|
+
}
|
|
1702
2993
|
if (dependencySeeds.size > 0) {
|
|
1703
2994
|
// SQL injection防御: プレースホルダー生成前にサイズを検証
|
|
1704
2995
|
if (dependencySeeds.size > MAX_DEPENDENCY_SEEDS_QUERY_LIMIT) {
|
|
@@ -1708,7 +2999,7 @@ export async function contextBundle(context, params) {
|
|
|
1708
2999
|
// 防御的チェック: プレースホルダーが正しい形式であることを確認
|
|
1709
3000
|
// 期待される形式: "?, ?, ..." (クエスチョンマーク、カンマ、スペースのみ)
|
|
1710
3001
|
if (!/^(\?)(,\s*\?)*$/.test(placeholders)) {
|
|
1711
|
-
throw new Error("Invalid placeholder
|
|
3002
|
+
throw new Error("Invalid dependency placeholder sequence detected. Remove unsafe dependency seeds and retry the request.");
|
|
1712
3003
|
}
|
|
1713
3004
|
const depRows = await db.all(`
|
|
1714
3005
|
SELECT src_path, dst_kind, dst, rel
|
|
@@ -1746,31 +3037,74 @@ export async function contextBundle(context, params) {
|
|
|
1746
3037
|
}
|
|
1747
3038
|
}
|
|
1748
3039
|
}
|
|
1749
|
-
const
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
candidate.content = cached.content;
|
|
1755
|
-
candidate.lang = cached.lang;
|
|
1756
|
-
candidate.ext = cached.ext;
|
|
1757
|
-
candidate.totalLines = cached.totalLines;
|
|
1758
|
-
candidate.embedding = cached.embedding;
|
|
3040
|
+
const materializeCandidates = async () => {
|
|
3041
|
+
const result = [];
|
|
3042
|
+
for (const candidate of candidates.values()) {
|
|
3043
|
+
if (isSuppressedPath(candidate.path)) {
|
|
3044
|
+
continue;
|
|
1759
3045
|
}
|
|
1760
|
-
|
|
1761
|
-
const
|
|
1762
|
-
if (
|
|
1763
|
-
|
|
3046
|
+
if (!candidate.content) {
|
|
3047
|
+
const cached = fileCache.get(candidate.path);
|
|
3048
|
+
if (cached) {
|
|
3049
|
+
candidate.content = cached.content;
|
|
3050
|
+
candidate.lang = cached.lang;
|
|
3051
|
+
candidate.ext = cached.ext;
|
|
3052
|
+
candidate.totalLines = cached.totalLines;
|
|
3053
|
+
candidate.embedding = cached.embedding;
|
|
1764
3054
|
}
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
3055
|
+
else {
|
|
3056
|
+
const loaded = await loadFileContent(db, repoId, candidate.path);
|
|
3057
|
+
if (!loaded) {
|
|
3058
|
+
continue;
|
|
3059
|
+
}
|
|
3060
|
+
candidate.content = loaded.content;
|
|
3061
|
+
candidate.lang = loaded.lang;
|
|
3062
|
+
candidate.ext = loaded.ext;
|
|
3063
|
+
candidate.totalLines = loaded.totalLines;
|
|
3064
|
+
candidate.embedding = loaded.embedding;
|
|
3065
|
+
fileCache.set(candidate.path, loaded);
|
|
3066
|
+
}
|
|
3067
|
+
}
|
|
3068
|
+
result.push(candidate);
|
|
3069
|
+
}
|
|
3070
|
+
return result;
|
|
3071
|
+
};
|
|
3072
|
+
const addMetadataFallbackCandidates = async () => {
|
|
3073
|
+
if (!hasAnyMetadataFilters) {
|
|
3074
|
+
return;
|
|
3075
|
+
}
|
|
3076
|
+
const metadataRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
|
|
3077
|
+
if (metadataRows.length === 0) {
|
|
3078
|
+
return;
|
|
3079
|
+
}
|
|
3080
|
+
for (const row of metadataRows) {
|
|
3081
|
+
const candidate = ensureCandidate(candidates, row.path);
|
|
3082
|
+
if (row.content) {
|
|
3083
|
+
candidate.content = row.content;
|
|
3084
|
+
candidate.totalLines = row.content.split(/\r?\n/).length;
|
|
3085
|
+
fileCache.set(row.path, {
|
|
3086
|
+
content: row.content,
|
|
3087
|
+
lang: row.lang,
|
|
3088
|
+
ext: row.ext,
|
|
3089
|
+
totalLines: candidate.totalLines,
|
|
3090
|
+
embedding: candidate.embedding,
|
|
3091
|
+
});
|
|
1771
3092
|
}
|
|
3093
|
+
candidate.lang ??= row.lang;
|
|
3094
|
+
candidate.ext ??= row.ext;
|
|
3095
|
+
candidate.matchLine ??= 1;
|
|
3096
|
+
candidate.score = Math.max(candidate.score, 1 + metadataFilters.length * 0.2);
|
|
1772
3097
|
}
|
|
1773
|
-
|
|
3098
|
+
};
|
|
3099
|
+
if (hasAnyMetadataFilters) {
|
|
3100
|
+
await addMetadataFallbackCandidates();
|
|
3101
|
+
}
|
|
3102
|
+
let materializedCandidates = await materializeCandidates();
|
|
3103
|
+
traceSearch(`materialized candidates: ${materializedCandidates.length}`);
|
|
3104
|
+
if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
|
|
3105
|
+
await addMetadataFallbackCandidates();
|
|
3106
|
+
materializedCandidates = await materializeCandidates();
|
|
3107
|
+
traceSearch(`materialized candidates after metadata fallback: ${materializedCandidates.length}`);
|
|
1774
3108
|
}
|
|
1775
3109
|
if (materializedCandidates.length === 0) {
|
|
1776
3110
|
// Get warnings from WarningManager (includes breaking change notification if applicable)
|
|
@@ -1781,6 +3115,72 @@ export async function contextBundle(context, params) {
|
|
|
1781
3115
|
...(warnings.length > 0 && { warnings }),
|
|
1782
3116
|
};
|
|
1783
3117
|
}
|
|
3118
|
+
const metadataKeywordSet = new Set(extractedTerms.keywords.map((keyword) => keyword.toLowerCase()));
|
|
3119
|
+
const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
|
|
3120
|
+
let metadataEntriesMap;
|
|
3121
|
+
if (hasAnyMetadataFilters || metadataKeywordSet.size > 0 || filterValueSet.size > 0) {
|
|
3122
|
+
metadataEntriesMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
|
|
3123
|
+
}
|
|
3124
|
+
if (hasStrictMetadataFilters) {
|
|
3125
|
+
metadataEntriesMap ??= new Map();
|
|
3126
|
+
for (let i = materializedCandidates.length - 1; i >= 0; i--) {
|
|
3127
|
+
const candidate = materializedCandidates[i];
|
|
3128
|
+
if (!candidate) {
|
|
3129
|
+
continue; // Skip undefined entries
|
|
3130
|
+
}
|
|
3131
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
3132
|
+
const matchesFilters = candidateMatchesMetadataFilters(entries, strictMetadataFilters);
|
|
3133
|
+
if (!matchesFilters) {
|
|
3134
|
+
materializedCandidates.splice(i, 1);
|
|
3135
|
+
continue;
|
|
3136
|
+
}
|
|
3137
|
+
candidate.reasons.add("metadata:filter");
|
|
3138
|
+
if (process.env.KIRI_TRACE_METADATA === "1") {
|
|
3139
|
+
console.info(`[metadata-trace-match] path=${candidate.path}`);
|
|
3140
|
+
}
|
|
3141
|
+
}
|
|
3142
|
+
if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
|
|
3143
|
+
await addMetadataFallbackCandidates();
|
|
3144
|
+
materializedCandidates = await materializeCandidates();
|
|
3145
|
+
}
|
|
3146
|
+
if (materializedCandidates.length === 0) {
|
|
3147
|
+
const warnings = [...context.warningManager.responseWarnings];
|
|
3148
|
+
return {
|
|
3149
|
+
context: [],
|
|
3150
|
+
...(includeTokensEstimate && { tokens_estimate: 0 }),
|
|
3151
|
+
...(warnings.length > 0 && { warnings }),
|
|
3152
|
+
};
|
|
3153
|
+
}
|
|
3154
|
+
}
|
|
3155
|
+
if (hasHintMetadataFilters) {
|
|
3156
|
+
metadataEntriesMap ??= new Map();
|
|
3157
|
+
for (const candidate of materializedCandidates) {
|
|
3158
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
3159
|
+
const matchesHints = candidateMatchesMetadataFilters(entries, hintMetadataFilters);
|
|
3160
|
+
if (matchesHints) {
|
|
3161
|
+
candidate.score += METADATA_HINT_BONUS;
|
|
3162
|
+
candidate.reasons.add("metadata:hint");
|
|
3163
|
+
}
|
|
3164
|
+
}
|
|
3165
|
+
}
|
|
3166
|
+
const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
|
|
3167
|
+
if (metadataEntriesMap) {
|
|
3168
|
+
for (const candidate of materializedCandidates) {
|
|
3169
|
+
const entries = metadataEntriesMap.get(candidate.path);
|
|
3170
|
+
const metadataBoost = computeMetadataBoost(entries, metadataKeywordSet, filterValueSet);
|
|
3171
|
+
if (metadataBoost > 0) {
|
|
3172
|
+
candidate.score += metadataBoost;
|
|
3173
|
+
candidate.reasons.add("boost:metadata");
|
|
3174
|
+
}
|
|
3175
|
+
}
|
|
3176
|
+
}
|
|
3177
|
+
for (const candidate of materializedCandidates) {
|
|
3178
|
+
const linkBoost = computeInboundLinkBoost(inboundCounts.get(candidate.path));
|
|
3179
|
+
if (linkBoost > 0) {
|
|
3180
|
+
candidate.score += linkBoost;
|
|
3181
|
+
candidate.reasons.add("boost:links");
|
|
3182
|
+
}
|
|
3183
|
+
}
|
|
1784
3184
|
applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
|
|
1785
3185
|
// ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
|
|
1786
3186
|
// Only apply to positive scores to prevent negative score inversion
|
|
@@ -1819,18 +3219,39 @@ export async function contextBundle(context, params) {
|
|
|
1819
3219
|
const telemetry = computePenaltyTelemetry(materializedCandidates);
|
|
1820
3220
|
logPenaltyTelemetry(telemetry, queryStats);
|
|
1821
3221
|
}
|
|
1822
|
-
|
|
1823
|
-
|
|
3222
|
+
// v1.0.0: Filter out extremely low-scored candidates (result of multiplicative penalties)
|
|
3223
|
+
// Threshold removes files with >95% penalty while keeping reasonably relevant files
|
|
3224
|
+
// Hint paths are exempt from this threshold (always included if score > 0)
|
|
3225
|
+
const hintPathSet = new Set(resolvedPathHintTargets.map((target) => target.path));
|
|
3226
|
+
const rankedCandidates = materializedCandidates
|
|
3227
|
+
.filter((candidate) => candidate.score > SCORE_FILTER_THRESHOLD ||
|
|
3228
|
+
(candidate.score > 0 && hintPathSet.has(candidate.path)))
|
|
1824
3229
|
.sort((a, b) => {
|
|
1825
3230
|
if (b.score === a.score) {
|
|
1826
3231
|
return a.path.localeCompare(b.path);
|
|
1827
3232
|
}
|
|
1828
3233
|
return b.score - a.score;
|
|
1829
|
-
})
|
|
1830
|
-
|
|
1831
|
-
|
|
3234
|
+
});
|
|
3235
|
+
if (TRACE_SEARCH) {
|
|
3236
|
+
const sample = rankedCandidates.slice(0, 5).map((candidate) => ({
|
|
3237
|
+
path: candidate.path,
|
|
3238
|
+
score: Number(candidate.score.toFixed(3)),
|
|
3239
|
+
reasons: Array.from(candidate.reasons).slice(0, 3),
|
|
3240
|
+
}));
|
|
3241
|
+
traceSearch(`ranked candidates=${rankedCandidates.length}`, sample);
|
|
3242
|
+
}
|
|
3243
|
+
const prioritizedCandidates = prioritizeHintCandidates(rankedCandidates, resolvedPathHintTargets.map((target) => target.path), limit);
|
|
3244
|
+
if (prioritizedCandidates.length === 0) {
|
|
3245
|
+
const warnings = [...context.warningManager.responseWarnings];
|
|
3246
|
+
return {
|
|
3247
|
+
context: [],
|
|
3248
|
+
...(includeTokensEstimate && { tokens_estimate: 0 }),
|
|
3249
|
+
...(warnings.length > 0 && { warnings }),
|
|
3250
|
+
};
|
|
3251
|
+
}
|
|
3252
|
+
const maxScore = Math.max(...prioritizedCandidates.map((candidate) => candidate.score));
|
|
1832
3253
|
const results = [];
|
|
1833
|
-
for (const candidate of
|
|
3254
|
+
for (const candidate of prioritizedCandidates) {
|
|
1834
3255
|
if (!candidate.content) {
|
|
1835
3256
|
continue;
|
|
1836
3257
|
}
|
|
@@ -1858,6 +3279,23 @@ export async function contextBundle(context, params) {
|
|
|
1858
3279
|
startLine = Math.max(1, matchLine - windowHalf);
|
|
1859
3280
|
endLine = Math.min(totalLines === 0 ? matchLine + windowHalf : totalLines, startLine + FALLBACK_SNIPPET_WINDOW - 1);
|
|
1860
3281
|
}
|
|
3282
|
+
if (CLAMP_SNIPPETS_ENABLED) {
|
|
3283
|
+
// Clamp snippet length to FALLBACK_SNIPPET_WINDOW even when symbol spans large regions
|
|
3284
|
+
const maxWindow = FALLBACK_SNIPPET_WINDOW;
|
|
3285
|
+
const selectedEnd = selected ? selected.end_line : endLine;
|
|
3286
|
+
const selectedStart = selected ? selected.start_line : startLine;
|
|
3287
|
+
if (endLine - startLine + 1 > maxWindow) {
|
|
3288
|
+
const anchor = candidate.matchLine ?? startLine;
|
|
3289
|
+
let clampedStart = Math.max(selectedStart, anchor - Math.floor(maxWindow / 2));
|
|
3290
|
+
let clampedEnd = clampedStart + maxWindow - 1;
|
|
3291
|
+
if (clampedEnd > selectedEnd) {
|
|
3292
|
+
clampedEnd = selectedEnd;
|
|
3293
|
+
clampedStart = Math.max(selectedStart, clampedEnd - maxWindow + 1);
|
|
3294
|
+
}
|
|
3295
|
+
startLine = clampedStart;
|
|
3296
|
+
endLine = Math.max(clampedStart, clampedEnd);
|
|
3297
|
+
}
|
|
3298
|
+
}
|
|
1861
3299
|
if (endLine < startLine) {
|
|
1862
3300
|
endLine = startLine;
|
|
1863
3301
|
}
|
|
@@ -1885,7 +3323,7 @@ export async function contextBundle(context, params) {
|
|
|
1885
3323
|
let tokensEstimate;
|
|
1886
3324
|
if (includeTokensEstimate) {
|
|
1887
3325
|
tokensEstimate = results.reduce((acc, item) => {
|
|
1888
|
-
const candidate =
|
|
3326
|
+
const candidate = prioritizedCandidates.find((c) => c.path === item.path);
|
|
1889
3327
|
if (candidate && candidate.content) {
|
|
1890
3328
|
return acc + estimateTokensFromContent(candidate.content, item.range[0], item.range[1]);
|
|
1891
3329
|
}
|
|
@@ -1896,8 +3334,13 @@ export async function contextBundle(context, params) {
|
|
|
1896
3334
|
}
|
|
1897
3335
|
// Get warnings from WarningManager (includes breaking change notification if applicable)
|
|
1898
3336
|
const warnings = [...context.warningManager.responseWarnings];
|
|
3337
|
+
const shouldFilterResults = FINAL_RESULT_SUPPRESSION_ENABLED && SUPPRESS_NON_CODE_ENABLED;
|
|
3338
|
+
const sanitizedResults = shouldFilterResults
|
|
3339
|
+
? results.filter((item) => !isSuppressedPath(item.path))
|
|
3340
|
+
: results;
|
|
3341
|
+
const finalResults = sanitizedResults.length > 0 ? sanitizedResults : results;
|
|
1899
3342
|
const payload = {
|
|
1900
|
-
context:
|
|
3343
|
+
context: finalResults,
|
|
1901
3344
|
...(warnings.length > 0 && { warnings }),
|
|
1902
3345
|
};
|
|
1903
3346
|
if (tokensEstimate !== undefined) {
|
|
@@ -2100,35 +3543,27 @@ export async function depsClosure(context, params) {
|
|
|
2100
3543
|
edges,
|
|
2101
3544
|
};
|
|
2102
3545
|
}
|
|
2103
|
-
|
|
3546
|
+
/**
|
|
3547
|
+
* リポジトリのrootパスをデータベースIDに解決する。
|
|
3548
|
+
*
|
|
3549
|
+
* この関数は下位互換性のために保持されているが、内部的には新しいRepoResolverを使用する。
|
|
3550
|
+
*
|
|
3551
|
+
* @param db - DuckDBクライアント
|
|
3552
|
+
* @param repoRoot - リポジトリのrootパス
|
|
3553
|
+
* @param services - オプショナルなServerServices(指定がなければ新規作成される)
|
|
3554
|
+
* @returns リポジトリID
|
|
3555
|
+
* @throws Error リポジトリがインデックスされていない場合
|
|
3556
|
+
*/
|
|
3557
|
+
export async function resolveRepoId(db, repoRoot, services) {
|
|
3558
|
+
const svc = services ?? createServerServices(db);
|
|
3559
|
+
return await svc.repoResolver.resolveId(repoRoot);
|
|
3560
|
+
}
|
|
3561
|
+
export async function contextBundle(context, params) {
|
|
2104
3562
|
try {
|
|
2105
|
-
|
|
2106
|
-
const normalized = candidates[0];
|
|
2107
|
-
const placeholders = candidates.map(() => "?").join(", ");
|
|
2108
|
-
const rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders}) LIMIT 1`, candidates);
|
|
2109
|
-
if (rows.length === 0) {
|
|
2110
|
-
const existingRows = await db.all("SELECT id, root FROM repo");
|
|
2111
|
-
for (const candidate of existingRows) {
|
|
2112
|
-
if (normalizeRepoPath(candidate.root) === normalized) {
|
|
2113
|
-
await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, candidate.id]);
|
|
2114
|
-
return candidate.id;
|
|
2115
|
-
}
|
|
2116
|
-
}
|
|
2117
|
-
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
2118
|
-
}
|
|
2119
|
-
const row = rows[0];
|
|
2120
|
-
if (!row) {
|
|
2121
|
-
throw new Error("Failed to retrieve repository record. Database returned empty result.");
|
|
2122
|
-
}
|
|
2123
|
-
if (row.root !== normalized) {
|
|
2124
|
-
await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, row.id]);
|
|
2125
|
-
}
|
|
2126
|
-
return row.id;
|
|
3563
|
+
return await contextBundleImpl(context, params);
|
|
2127
3564
|
}
|
|
2128
3565
|
catch (error) {
|
|
2129
|
-
|
|
2130
|
-
throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
|
|
2131
|
-
}
|
|
3566
|
+
console.error("context_bundle error:", error);
|
|
2132
3567
|
throw error;
|
|
2133
3568
|
}
|
|
2134
3569
|
}
|