kiri-mcp-server 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/README.md +52 -10
  2. package/config/kiri.yml +25 -0
  3. package/config/scoring-profiles.yml +82 -35
  4. package/dist/config/kiri.yml +25 -0
  5. package/dist/config/scoring-profiles.yml +82 -35
  6. package/dist/package.json +9 -1
  7. package/dist/src/indexer/cli.d.ts.map +1 -1
  8. package/dist/src/indexer/cli.js +712 -98
  9. package/dist/src/indexer/cli.js.map +1 -1
  10. package/dist/src/indexer/git.d.ts.map +1 -1
  11. package/dist/src/indexer/git.js +41 -3
  12. package/dist/src/indexer/git.js.map +1 -1
  13. package/dist/src/indexer/migrations/repo-merger.d.ts +33 -0
  14. package/dist/src/indexer/migrations/repo-merger.d.ts.map +1 -0
  15. package/dist/src/indexer/migrations/repo-merger.js +67 -0
  16. package/dist/src/indexer/migrations/repo-merger.js.map +1 -0
  17. package/dist/src/indexer/schema.d.ts +66 -0
  18. package/dist/src/indexer/schema.d.ts.map +1 -1
  19. package/dist/src/indexer/schema.js +337 -0
  20. package/dist/src/indexer/schema.js.map +1 -1
  21. package/dist/src/server/boost-profiles.d.ts +6 -5
  22. package/dist/src/server/boost-profiles.d.ts.map +1 -1
  23. package/dist/src/server/boost-profiles.js +138 -0
  24. package/dist/src/server/boost-profiles.js.map +1 -1
  25. package/dist/src/server/config-loader.d.ts +9 -0
  26. package/dist/src/server/config-loader.d.ts.map +1 -0
  27. package/dist/src/server/config-loader.js +121 -0
  28. package/dist/src/server/config-loader.js.map +1 -0
  29. package/dist/src/server/config.d.ts +47 -0
  30. package/dist/src/server/config.d.ts.map +1 -0
  31. package/dist/src/server/config.js +157 -0
  32. package/dist/src/server/config.js.map +1 -0
  33. package/dist/src/server/context.d.ts +29 -0
  34. package/dist/src/server/context.d.ts.map +1 -1
  35. package/dist/src/server/context.js +26 -1
  36. package/dist/src/server/context.js.map +1 -1
  37. package/dist/src/server/handlers/snippets-get.d.ts +36 -0
  38. package/dist/src/server/handlers/snippets-get.d.ts.map +1 -0
  39. package/dist/src/server/handlers/snippets-get.js +120 -0
  40. package/dist/src/server/handlers/snippets-get.js.map +1 -0
  41. package/dist/src/server/handlers.d.ts +33 -20
  42. package/dist/src/server/handlers.d.ts.map +1 -1
  43. package/dist/src/server/handlers.js +1805 -370
  44. package/dist/src/server/handlers.js.map +1 -1
  45. package/dist/src/server/indexBootstrap.d.ts.map +1 -1
  46. package/dist/src/server/indexBootstrap.js +49 -2
  47. package/dist/src/server/indexBootstrap.js.map +1 -1
  48. package/dist/src/server/main.d.ts.map +1 -1
  49. package/dist/src/server/main.js +7 -0
  50. package/dist/src/server/main.js.map +1 -1
  51. package/dist/src/server/profile-selector.d.ts +33 -0
  52. package/dist/src/server/profile-selector.d.ts.map +1 -0
  53. package/dist/src/server/profile-selector.js +291 -0
  54. package/dist/src/server/profile-selector.js.map +1 -0
  55. package/dist/src/server/rpc.d.ts.map +1 -1
  56. package/dist/src/server/rpc.js +60 -10
  57. package/dist/src/server/rpc.js.map +1 -1
  58. package/dist/src/server/runtime.d.ts.map +1 -1
  59. package/dist/src/server/runtime.js +14 -4
  60. package/dist/src/server/runtime.js.map +1 -1
  61. package/dist/src/server/scoring.d.ts +7 -1
  62. package/dist/src/server/scoring.d.ts.map +1 -1
  63. package/dist/src/server/scoring.js +121 -21
  64. package/dist/src/server/scoring.js.map +1 -1
  65. package/dist/src/server/services/index.d.ts +24 -0
  66. package/dist/src/server/services/index.d.ts.map +1 -0
  67. package/dist/src/server/services/index.js +20 -0
  68. package/dist/src/server/services/index.js.map +1 -0
  69. package/dist/src/server/services/repo-repository.d.ts +61 -0
  70. package/dist/src/server/services/repo-repository.d.ts.map +1 -0
  71. package/dist/src/server/services/repo-repository.js +93 -0
  72. package/dist/src/server/services/repo-repository.js.map +1 -0
  73. package/dist/src/server/services/repo-resolver.d.ts +28 -0
  74. package/dist/src/server/services/repo-resolver.d.ts.map +1 -0
  75. package/dist/src/server/services/repo-resolver.js +62 -0
  76. package/dist/src/server/services/repo-resolver.js.map +1 -0
  77. package/dist/src/shared/duckdb.d.ts.map +1 -1
  78. package/dist/src/shared/duckdb.js +21 -1
  79. package/dist/src/shared/duckdb.js.map +1 -1
  80. package/dist/src/shared/fs/safePath.d.ts +7 -0
  81. package/dist/src/shared/fs/safePath.d.ts.map +1 -0
  82. package/dist/src/shared/fs/safePath.js +23 -0
  83. package/dist/src/shared/fs/safePath.js.map +1 -0
  84. package/dist/src/shared/tokenizer.d.ts +1 -1
  85. package/dist/src/shared/tokenizer.d.ts.map +1 -1
  86. package/dist/src/shared/tokenizer.js +97 -15
  87. package/dist/src/shared/tokenizer.js.map +1 -1
  88. package/dist/src/shared/utils/glob.d.ts +5 -0
  89. package/dist/src/shared/utils/glob.d.ts.map +1 -0
  90. package/dist/src/shared/utils/glob.js +22 -0
  91. package/dist/src/shared/utils/glob.js.map +1 -0
  92. package/dist/src/shared/utils/retry.d.ts +8 -0
  93. package/dist/src/shared/utils/retry.d.ts.map +1 -0
  94. package/dist/src/shared/utils/retry.js +20 -0
  95. package/dist/src/shared/utils/retry.js.map +1 -0
  96. package/package.json +9 -1
@@ -3,10 +3,14 @@ import path from "node:path";
3
3
  import { checkFTSSchemaExists } from "../indexer/schema.js";
4
4
  import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
5
5
  import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
6
- import { getRepoPathCandidates, normalizeRepoPath } from "../shared/utils/path.js";
7
6
  import { expandAbbreviations } from "./abbreviations.js";
8
7
  import { getBoostProfile, } from "./boost-profiles.js";
8
+ import { loadPathPenalties, mergePathPenaltyEntries } from "./config-loader.js";
9
+ import { loadServerConfig } from "./config.js";
9
10
  import { coerceProfileName, loadScoringProfile } from "./scoring.js";
11
+ import { createServerServices } from "./services/index.js";
12
+ // Re-export extracted handlers for backward compatibility
13
+ export { snippetsGet, } from "./handlers/snippets-get.js";
10
14
  // Configuration file patterns (v0.8.0+: consolidated to avoid duplication)
11
15
  // Comprehensive list covering multiple languages and tools
12
16
  const CONFIG_FILES = [
@@ -133,6 +137,83 @@ const CONFIG_PATTERNS = [
133
137
  ".github/workflows",
134
138
  ];
135
139
  const FTS_STATUS_CACHE_TTL_MS = 10_000;
140
+ const METADATA_ALIAS_MAP = new Map([
141
+ ["tag", { key: "tags" }],
142
+ ["tags", { key: "tags" }],
143
+ ["category", { key: "category" }],
144
+ ["title", { key: "title" }],
145
+ ["service", { key: "service" }],
146
+ ]);
147
+ const METADATA_KEY_PREFIXES = [
148
+ { prefix: "meta." },
149
+ { prefix: "metadata.", strict: true },
150
+ { prefix: "docmeta.", strict: true },
151
+ { prefix: "frontmatter.", source: "front_matter" },
152
+ { prefix: "fm.", source: "front_matter" },
153
+ { prefix: "yaml.", source: "yaml" },
154
+ { prefix: "json.", source: "json" },
155
+ ];
156
+ const METADATA_MATCH_WEIGHT = 0.15;
157
+ const METADATA_FILTER_MATCH_WEIGHT = 0.1;
158
+ const METADATA_HINT_BONUS = 0.25;
159
+ const INBOUND_LINK_WEIGHT = 0.2;
160
+ /**
161
+ * checkTableAvailability
162
+ *
163
+ * 起動時にテーブルの存在を確認し、TableAvailabilityオブジェクトを生成する。
164
+ * これにより、グローバルミュータブル変数による競合状態を回避する。
165
+ *
166
+ * NOTE: スキーマ変更(テーブル追加)後はサーバーの再起動が必要です。
167
+ *
168
+ * @param db - DuckDBClient インスタンス
169
+ * @returns TableAvailability オブジェクト
170
+ * @throws データベース接続エラー等、テーブル不在以外のエラーが発生した場合
171
+ */
172
+ export async function checkTableAvailability(db) {
173
+ const ALLOWED_TABLES = [
174
+ "document_metadata_kv",
175
+ "markdown_link",
176
+ "hint_expansion",
177
+ "hint_dictionary",
178
+ ];
179
+ const checkTable = async (tableName) => {
180
+ if (!ALLOWED_TABLES.includes(tableName)) {
181
+ throw new Error(`Invalid table name: ${tableName}`);
182
+ }
183
+ try {
184
+ await db.all(`SELECT 1 FROM ${tableName} LIMIT 0`);
185
+ return true;
186
+ }
187
+ catch (error) {
188
+ // テーブル不在エラーのみキャッチ
189
+ if (isTableMissingError(error, tableName)) {
190
+ return false;
191
+ }
192
+ // その他のエラー(接続エラー等)は再スロー
193
+ throw new Error(`Failed to check table availability for ${tableName}: ${error instanceof Error ? error.message : String(error)}`);
194
+ }
195
+ };
196
+ const result = {
197
+ hasMetadataTables: await checkTable("document_metadata_kv"),
198
+ hasLinkTable: await checkTable("markdown_link"),
199
+ hasHintLog: await checkTable("hint_expansion"),
200
+ hasHintDictionary: await checkTable("hint_dictionary"),
201
+ };
202
+ // 起動時警告: テーブルが存在しない場合に通知
203
+ if (!result.hasMetadataTables) {
204
+ console.warn("document_metadata_kv table is missing. Metadata filters and boosts disabled until database is upgraded.");
205
+ }
206
+ if (!result.hasLinkTable) {
207
+ console.warn("markdown_link table is missing. Inbound link boosting disabled until database is upgraded.");
208
+ }
209
+ if (!result.hasHintLog) {
210
+ console.warn("hint_expansion table is missing. Hint logging disabled. Enable the latest schema and rerun the indexer to capture hint logs.");
211
+ }
212
+ if (!result.hasHintDictionary) {
213
+ console.warn("hint_dictionary table is missing. Dictionary hints disabled. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
214
+ }
215
+ return result;
216
+ }
136
217
  async function hasDirtyRepos(db) {
137
218
  const statusCheck = await db.all(`SELECT COUNT(*) as count FROM repo
138
219
  WHERE fts_dirty = true OR fts_status IN ('dirty', 'rebuilding')`);
@@ -217,44 +298,242 @@ function isConfigFile(path, fileName) {
217
298
  fileName.startsWith(".env") ||
218
299
  isInConfigDirectory);
219
300
  }
301
+ function normalizeArtifactHints(hints) {
302
+ if (!Array.isArray(hints)) {
303
+ return [];
304
+ }
305
+ const normalized = [];
306
+ const seen = new Set();
307
+ for (const rawHint of hints) {
308
+ if (typeof rawHint !== "string") {
309
+ continue;
310
+ }
311
+ const trimmed = rawHint.trim();
312
+ if (!trimmed || seen.has(trimmed)) {
313
+ continue;
314
+ }
315
+ normalized.push(trimmed);
316
+ seen.add(trimmed);
317
+ if (normalized.length >= MAX_ARTIFACT_HINTS) {
318
+ break;
319
+ }
320
+ }
321
+ return normalized;
322
+ }
323
+ function bucketArtifactHints(hints) {
324
+ const buckets = {
325
+ pathHints: [],
326
+ substringHints: [],
327
+ };
328
+ for (const hint of hints) {
329
+ if (hint.includes("/") && SAFE_PATH_PATTERN.test(hint)) {
330
+ buckets.pathHints.push(hint);
331
+ continue;
332
+ }
333
+ const normalized = hint.trim().toLowerCase();
334
+ if (normalized.length >= 3) {
335
+ buckets.substringHints.push(normalized);
336
+ }
337
+ }
338
+ return buckets;
339
+ }
340
+ function isMissingTableError(error, table) {
341
+ if (!(error instanceof Error)) {
342
+ return false;
343
+ }
344
+ return /Table with name/i.test(error.message) && error.message.includes(table);
345
+ }
346
+ async function logHintExpansionEntry(db, tableAvailability, entry) {
347
+ if (!HINT_LOG_ENABLED) {
348
+ return;
349
+ }
350
+ if (!tableAvailability.hasHintLog) {
351
+ return;
352
+ }
353
+ try {
354
+ await db.run(`
355
+ INSERT INTO hint_expansion (repo_id, hint_value, expansion_kind, target_path, payload)
356
+ VALUES (?, ?, ?, ?, ?)
357
+ `, [
358
+ entry.repoId,
359
+ entry.hintValue,
360
+ entry.kind,
361
+ entry.targetPath ?? null,
362
+ entry.payload ? JSON.stringify(entry.payload) : null,
363
+ ]);
364
+ }
365
+ catch (error) {
366
+ if (isMissingTableError(error, "hint_expansion")) {
367
+ console.warn("hint_expansion table is missing in the active database. Enable the latest schema and rerun the indexer to capture hint logs.");
368
+ return;
369
+ }
370
+ throw error;
371
+ }
372
+ }
373
+ async function fetchDictionaryPathHints(db, tableAvailability, repoId, hints, perHintLimit) {
374
+ if (!HINT_DICTIONARY_ENABLED || perHintLimit <= 0 || hints.length === 0) {
375
+ return [];
376
+ }
377
+ if (!tableAvailability.hasHintDictionary) {
378
+ return [];
379
+ }
380
+ const uniqueHints = Array.from(new Set(hints));
381
+ const targets = [];
382
+ for (const hint of uniqueHints) {
383
+ let rows = [];
384
+ try {
385
+ rows = await db.all(`
386
+ SELECT target_path
387
+ FROM hint_dictionary
388
+ WHERE repo_id = ?
389
+ AND hint_value = ?
390
+ ORDER BY freq DESC, target_path
391
+ LIMIT ?
392
+ `, [repoId, hint, perHintLimit]);
393
+ }
394
+ catch (error) {
395
+ if (isMissingTableError(error, "hint_dictionary")) {
396
+ console.warn("hint_dictionary table is missing in the active database. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
397
+ return [];
398
+ }
399
+ throw error;
400
+ }
401
+ for (const row of rows) {
402
+ if (!row.target_path || !SAFE_PATH_PATTERN.test(row.target_path)) {
403
+ continue;
404
+ }
405
+ targets.push({ path: row.target_path, sourceHint: hint, origin: "dictionary" });
406
+ }
407
+ }
408
+ return targets;
409
+ }
410
+ function createHintSeedMeta(targets) {
411
+ const meta = new Map();
412
+ const deduped = [];
413
+ for (const target of targets) {
414
+ if (meta.has(target.path)) {
415
+ continue;
416
+ }
417
+ meta.set(target.path, { sourceHint: target.sourceHint, origin: target.origin });
418
+ deduped.push(target);
419
+ }
420
+ return { list: deduped, meta };
421
+ }
422
+ function getHintSeedMeta(seedMeta, path) {
423
+ return seedMeta?.get(path);
424
+ }
425
+ function computeHintPriorityBoost(weights) {
426
+ const textComponent = weights.textMatch * HINT_PRIORITY_TEXT_MULTIPLIER;
427
+ const pathComponent = weights.pathMatch * HINT_PRIORITY_PATH_MULTIPLIER;
428
+ const aggregate = textComponent + pathComponent + weights.editingPath + weights.dependency;
429
+ return Math.max(HINT_PRIORITY_BASE_BONUS, aggregate);
430
+ }
431
+ function createHintExpansionConfig(weights) {
432
+ return {
433
+ dirLimit: Math.max(0, HINT_DIR_LIMIT),
434
+ dirMaxFiles: Math.max(1, HINT_DIR_MAX_FILES),
435
+ depOutLimit: Math.max(0, HINT_DEP_OUT_LIMIT),
436
+ depInLimit: Math.max(0, HINT_DEP_IN_LIMIT),
437
+ semLimit: Math.max(0, HINT_SEM_LIMIT),
438
+ semDirCandidateLimit: Math.max(1, HINT_SEM_DIR_CANDIDATE_LIMIT),
439
+ semThreshold: Number.isFinite(HINT_SEM_THRESHOLD) ? HINT_SEM_THRESHOLD : 0.65,
440
+ perHintLimit: Math.max(0, HINT_PER_HINT_LIMIT),
441
+ dbQueryBudget: Math.max(0, HINT_DB_QUERY_BUDGET),
442
+ dirBoost: computeHintPriorityBoost(weights) * 0.35,
443
+ depBoost: weights.dependency * 0.8,
444
+ substringLimit: Math.max(0, HINT_SUBSTRING_LIMIT),
445
+ substringBoost: Math.max(0, HINT_SUBSTRING_BOOST),
446
+ };
447
+ }
220
448
  const DEFAULT_SEARCH_LIMIT = 50;
221
- const DEFAULT_SNIPPET_WINDOW = 150;
222
449
  const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
223
450
  const MAX_BUNDLE_LIMIT = 20;
451
+ const TRACE_SEARCH = process.env.KIRI_TRACE_SEARCH === "1";
224
452
  const MAX_KEYWORDS = 12;
225
453
  const MAX_MATCHES_PER_KEYWORD = 40;
226
454
  const MAX_DEPENDENCY_SEEDS = 8;
227
455
  const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
228
456
  const NEARBY_LIMIT = 6;
229
- const FALLBACK_SNIPPET_WINDOW = 40; // Reduced from 120 to optimize token usage
457
+ const serverConfig = loadServerConfig();
458
+ const mergedPathMultiplierCache = new Map();
459
+ const SUPPRESS_NON_CODE_ENABLED = serverConfig.features.suppressNonCode;
460
+ const FINAL_RESULT_SUPPRESSION_ENABLED = serverConfig.features.suppressFinalResults;
461
+ const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
462
+ const FALLBACK_SNIPPET_WINDOW = serverConfig.features.snippetWindow;
230
463
  const MAX_RERANK_LIMIT = 50;
464
+ const MAX_ARTIFACT_HINTS = 8;
465
+ const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
466
+ const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
467
+ const HINT_PRIORITY_PATH_MULTIPLIER = serverConfig.hints.priority.pathMultiplier;
468
+ const HINT_PRIORITY_BASE_BONUS = serverConfig.hints.priority.baseBonus;
469
+ const PATH_FALLBACK_LIMIT = 40;
470
+ const PATH_FALLBACK_TERMS_LIMIT = 5;
471
+ const PATH_FALLBACK_KEEP = 8;
472
+ const AUTO_PATH_SEGMENT_LIMIT = 4;
473
+ function traceSearch(message, ...args) {
474
+ if (TRACE_SEARCH) {
475
+ console.log(`[TRACE context_bundle] ${message}`, ...args);
476
+ }
477
+ }
478
+ const HINT_DIR_LIMIT = serverConfig.hints.directory.limit;
479
+ const HINT_DIR_MAX_FILES = serverConfig.hints.directory.maxFiles;
480
+ const HINT_DEP_OUT_LIMIT = serverConfig.hints.dependency.outLimit;
481
+ const HINT_DEP_IN_LIMIT = serverConfig.hints.dependency.inLimit;
482
+ const HINT_SEM_LIMIT = serverConfig.hints.semantic.limit;
483
+ const HINT_SEM_DIR_CANDIDATE_LIMIT = serverConfig.hints.semantic.dirCandidateLimit;
484
+ const HINT_SEM_THRESHOLD = serverConfig.hints.semantic.threshold;
485
+ const SUPPRESSED_PATH_PREFIXES = [".github/", ".git/", "ThirdPartyNotices", "node_modules/"];
486
+ const SUPPRESSED_FILE_NAMES = ["thirdpartynotices.txt", "thirdpartynotices.md", "cgmanifest.json"];
487
+ function isSuppressedPath(path) {
488
+ if (!SUPPRESS_NON_CODE_ENABLED) {
489
+ return false;
490
+ }
491
+ const normalized = path.startsWith("./") ? path.replace(/^\.\/+/u, "") : path;
492
+ const lower = normalized.toLowerCase();
493
+ if (SUPPRESSED_FILE_NAMES.some((name) => lower.endsWith(name))) {
494
+ return true;
495
+ }
496
+ const lowerPrefixMatches = SUPPRESSED_PATH_PREFIXES.map((prefix) => prefix.toLowerCase());
497
+ return lowerPrefixMatches.some((prefix) => lower.includes(prefix));
498
+ }
499
+ const HINT_PER_HINT_LIMIT = serverConfig.hints.perHintLimit;
500
+ const HINT_DB_QUERY_BUDGET = serverConfig.hints.dbQueryLimit;
501
+ const HINT_SUBSTRING_LIMIT = serverConfig.hints.substring.limit;
502
+ const HINT_SUBSTRING_BOOST = serverConfig.hints.substring.boost;
503
+ const HINT_LOG_ENABLED = process.env.KIRI_HINT_LOG === "1";
504
+ const HINT_DICTIONARY_ENABLED = process.env.KIRI_HINT_DICTIONARY !== "0";
505
+ const HINT_DICTIONARY_LIMIT = Math.max(0, Number.parseInt(process.env.KIRI_HINT_DICTIONARY_LIMIT ?? "2", 10));
231
506
  // Issue #68: Path/Large File Penalty configuration (環境変数で上書き可能)
232
- const PATH_MISS_DELTA = parseFloat(process.env.KIRI_PATH_MISS_DELTA || "-0.5");
233
- const LARGE_FILE_DELTA = parseFloat(process.env.KIRI_LARGE_FILE_DELTA || "-0.8");
507
+ const PATH_MISS_DELTA = serverConfig.penalties.pathMissDelta;
508
+ const LARGE_FILE_DELTA = serverConfig.penalties.largeFileDelta;
234
509
  const MAX_WHY_TAGS = 10;
235
510
  // 項目3: whyタグの優先度マップ(低い数値ほど高優先度)
236
511
  // All actual tag prefixes used in the codebase
237
512
  const WHY_TAG_PRIORITY = {
238
- artifact: 1, // User-provided hints (editing_path, failing_tests)
513
+ artifact: 1, // User-provided hints (editing_path, failing_tests, hints)
514
+ dictionary: 1, // Dictionary-provided hints
239
515
  phrase: 2, // Multi-word literal matches (strongest signal)
240
516
  text: 3, // Single keyword matches
241
- "path-phrase": 4, // Path contains multi-word phrase
242
- structural: 5, // Semantic similarity
243
- "path-segment": 6, // Path component matches
244
- "path-keyword": 7, // Path keyword match
245
- dep: 8, // Dependency relationship
246
- near: 9, // Proximity to editing file
247
- boost: 10, // File type boost
248
- recent: 11, // Recently changed
249
- symbol: 12, // Symbol match
250
- penalty: 13, // Penalty explanations (keep for transparency)
251
- keyword: 14, // Generic keyword (deprecated, kept for compatibility)
517
+ metadata: 4, // Front matter / metadata filters & boosts
518
+ substring: 4, // Substring hint expansion
519
+ "path-phrase": 5, // Path contains multi-word phrase
520
+ structural: 6, // Semantic similarity
521
+ "path-segment": 7, // Path component matches
522
+ "path-keyword": 8, // Path keyword match
523
+ dep: 9, // Dependency relationship
524
+ near: 10, // Proximity to editing file
525
+ boost: 11, // File type boost
526
+ recent: 12, // Recently changed
527
+ symbol: 13, // Symbol match
528
+ penalty: 14, // Penalty explanations (keep for transparency)
529
+ keyword: 15, // Generic keyword (deprecated, kept for compatibility)
252
530
  };
253
531
  // Reserve at least one slot for important structural tags
254
532
  const RESERVED_WHY_SLOTS = {
255
533
  dep: 1, // Dependency relationships are critical
256
534
  symbol: 1, // Symbol boundaries help understand context
257
535
  near: 1, // Proximity explains file selection
536
+ metadata: 1, // Preserve metadata reasons when filters/boosts are active
258
537
  };
259
538
  function parseOutputOptions(params) {
260
539
  return {
@@ -277,6 +556,9 @@ function selectWhyTags(reasons) {
277
556
  reasons = new Set(Array.from(reasons).slice(0, 1000));
278
557
  }
279
558
  const selected = new Set();
559
+ if (reasons.has("boost:links")) {
560
+ selected.add("boost:links");
561
+ }
280
562
  const byCategory = new Map();
281
563
  for (const reason of reasons) {
282
564
  const prefix = reason.split(":")[0] ?? "";
@@ -342,6 +624,45 @@ const STOP_WORDS = new Set([
342
624
  "need",
343
625
  "goal",
344
626
  ]);
627
+ function prioritizeHintCandidates(rankedCandidates, hintPaths, limit) {
628
+ if (rankedCandidates.length === 0) {
629
+ return [];
630
+ }
631
+ const sanitizedLimit = Math.max(1, Math.min(limit, rankedCandidates.length));
632
+ const candidateByPath = new Map();
633
+ for (const candidate of rankedCandidates) {
634
+ if (!candidateByPath.has(candidate.path)) {
635
+ candidateByPath.set(candidate.path, candidate);
636
+ }
637
+ }
638
+ const final = [];
639
+ const seen = new Set();
640
+ for (const hintPath of hintPaths) {
641
+ if (final.length >= sanitizedLimit) {
642
+ break;
643
+ }
644
+ const candidate = candidateByPath.get(hintPath);
645
+ if (!candidate || seen.has(candidate.path)) {
646
+ continue;
647
+ }
648
+ final.push(candidate);
649
+ seen.add(candidate.path);
650
+ }
651
+ if (final.length >= sanitizedLimit) {
652
+ return final;
653
+ }
654
+ for (const candidate of rankedCandidates) {
655
+ if (final.length >= sanitizedLimit) {
656
+ break;
657
+ }
658
+ if (seen.has(candidate.path)) {
659
+ continue;
660
+ }
661
+ final.push(candidate);
662
+ seen.add(candidate.path);
663
+ }
664
+ return final;
665
+ }
345
666
  function normalizeLimit(limit) {
346
667
  if (!limit || Number.isNaN(limit)) {
347
668
  return DEFAULT_SEARCH_LIMIT;
@@ -502,8 +823,30 @@ function extractKeywords(text) {
502
823
  }
503
824
  }
504
825
  }
826
+ addKeywordDerivedPathSegments(result);
505
827
  return result;
506
828
  }
829
+ function addKeywordDerivedPathSegments(result) {
830
+ if (result.pathSegments.length >= AUTO_PATH_SEGMENT_LIMIT) {
831
+ return;
832
+ }
833
+ const additional = [];
834
+ for (const keyword of result.keywords) {
835
+ if (keyword.length < 3 || STOP_WORDS.has(keyword)) {
836
+ continue;
837
+ }
838
+ if (result.pathSegments.includes(keyword) || additional.includes(keyword)) {
839
+ continue;
840
+ }
841
+ additional.push(keyword);
842
+ if (result.pathSegments.length + additional.length >= AUTO_PATH_SEGMENT_LIMIT) {
843
+ break;
844
+ }
845
+ }
846
+ if (additional.length > 0) {
847
+ result.pathSegments.push(...additional);
848
+ }
849
+ }
507
850
  function ensureCandidate(map, filePath) {
508
851
  let candidate = map.get(filePath);
509
852
  if (!candidate) {
@@ -520,14 +863,361 @@ function ensureCandidate(map, filePath) {
520
863
  embedding: null,
521
864
  semanticSimilarity: null,
522
865
  pathMatchHits: 0, // Issue #68: Track path match count
866
+ keywordHits: new Set(),
867
+ phraseHits: 0,
868
+ // pathFallbackReason は optional なので省略(exactOptionalPropertyTypes対応)
869
+ fallbackTextHits: 0,
523
870
  penalties: [], // Issue #68: Penalty log for telemetry
524
871
  };
525
872
  map.set(filePath, candidate);
526
873
  }
527
874
  return candidate;
528
875
  }
876
+ async function expandHintCandidatesForHints(params) {
877
+ const { hintPaths, config } = params;
878
+ if (hintPaths.length === 0 || config.perHintLimit <= 0 || config.dbQueryBudget <= 0) {
879
+ return;
880
+ }
881
+ const state = { remainingDbQueries: config.dbQueryBudget };
882
+ for (const hintPath of hintPaths) {
883
+ if (state.remainingDbQueries <= 0) {
884
+ break;
885
+ }
886
+ await expandSingleHintNeighborhood({ ...params, hintPath, state });
887
+ }
888
+ }
889
+ async function expandSingleHintNeighborhood(args) {
890
+ const { config } = args;
891
+ let remaining = config.perHintLimit;
892
+ if (remaining <= 0) {
893
+ return;
894
+ }
895
+ if (config.dirLimit > 0) {
896
+ const added = await addHintDirectoryNeighbors(args, Math.min(config.dirLimit, remaining));
897
+ remaining -= added;
898
+ if (remaining <= 0) {
899
+ return;
900
+ }
901
+ }
902
+ if (config.depOutLimit > 0 || config.depInLimit > 0) {
903
+ const added = await addHintDependencyNeighbors(args, remaining);
904
+ remaining -= added;
905
+ if (remaining <= 0) {
906
+ return;
907
+ }
908
+ }
909
+ if (config.semLimit > 0) {
910
+ await addHintSemanticNeighbors(args, Math.min(config.semLimit, remaining));
911
+ }
912
+ }
913
+ function useHintDbBudget(state, cost = 1) {
914
+ if (state.remainingDbQueries < cost) {
915
+ return false;
916
+ }
917
+ state.remainingDbQueries -= cost;
918
+ return true;
919
+ }
920
+ function applyHintReasonBoost(candidate, reason, scoreDelta, lang, ext) {
921
+ if (scoreDelta <= 0 || candidate.reasons.has(reason)) {
922
+ return false;
923
+ }
924
+ candidate.score += scoreDelta;
925
+ candidate.reasons.add(reason);
926
+ candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 2);
927
+ candidate.matchLine ??= 1;
928
+ if (lang && !candidate.lang) {
929
+ candidate.lang = lang;
930
+ }
931
+ if (ext && !candidate.ext) {
932
+ candidate.ext = ext;
933
+ }
934
+ return true;
935
+ }
936
+ async function applyPathHintPromotions(args) {
937
+ const { hintTargets } = args;
938
+ if (hintTargets.length === 0) {
939
+ return;
940
+ }
941
+ const hintBoost = computeHintPriorityBoost(args.weights);
942
+ for (const target of hintTargets) {
943
+ const candidate = ensureCandidate(args.candidates, target.path);
944
+ const reasonPrefix = target.origin === "dictionary" ? "dictionary:hint" : "artifact:hint";
945
+ candidate.score += hintBoost;
946
+ candidate.reasons.add(`${reasonPrefix}:${target.path}`);
947
+ candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 3);
948
+ candidate.matchLine ??= 1;
949
+ await logHintExpansionEntry(args.db, args.tableAvailability, {
950
+ repoId: args.repoId,
951
+ hintValue: target.sourceHint,
952
+ kind: target.origin === "dictionary" ? "dictionary" : "path",
953
+ targetPath: target.path,
954
+ payload: {
955
+ origin: target.origin,
956
+ source_hint: target.sourceHint,
957
+ },
958
+ });
959
+ }
960
+ await expandHintCandidatesForHints({
961
+ db: args.db,
962
+ tableAvailability: args.tableAvailability,
963
+ repoId: args.repoId,
964
+ hintPaths: hintTargets.map((target) => target.path),
965
+ candidates: args.candidates,
966
+ fileCache: args.fileCache,
967
+ weights: args.weights,
968
+ config: createHintExpansionConfig(args.weights),
969
+ hintSeedMeta: args.hintSeedMeta,
970
+ });
971
+ }
972
+ async function addHintSubstringMatches(db, tableAvailability, repoId, hints, candidates, limitPerHint, boost) {
973
+ if (limitPerHint <= 0 || boost <= 0) {
974
+ return;
975
+ }
976
+ for (const hint of hints) {
977
+ if (!SAFE_PATH_PATTERN.test(hint.replace(/[^a-zA-Z0-9_.-]/g, ""))) {
978
+ continue;
979
+ }
980
+ const rows = await db.all(`
981
+ SELECT path
982
+ FROM file
983
+ WHERE repo_id = ?
984
+ AND is_binary = FALSE
985
+ AND LOWER(path) LIKE '%' || ? || '%'
986
+ ORDER BY path
987
+ LIMIT ?
988
+ `, [repoId, hint, limitPerHint]);
989
+ for (const row of rows) {
990
+ const candidate = ensureCandidate(candidates, row.path);
991
+ const reason = `substring:hint:${hint}`;
992
+ if (applyHintReasonBoost(candidate, reason, boost)) {
993
+ await logHintExpansionEntry(db, tableAvailability, {
994
+ repoId,
995
+ hintValue: hint,
996
+ kind: "substring",
997
+ targetPath: row.path,
998
+ });
999
+ }
1000
+ }
1001
+ }
1002
+ }
1003
+ async function addHintDirectoryNeighbors(args, limit) {
1004
+ if (limit <= 0) {
1005
+ return 0;
1006
+ }
1007
+ const dir = path.posix.dirname(args.hintPath);
1008
+ if (!dir || dir === "." || dir === "/") {
1009
+ return 0;
1010
+ }
1011
+ if (!useHintDbBudget(args.state)) {
1012
+ return 0;
1013
+ }
1014
+ const rows = await args.db.all(`
1015
+ SELECT path, lang, ext
1016
+ FROM file
1017
+ WHERE repo_id = ?
1018
+ AND is_binary = FALSE
1019
+ AND path LIKE ?
1020
+ ORDER BY path
1021
+ LIMIT ?
1022
+ `, [args.repoId, `${dir}/%`, args.config.dirMaxFiles + 1]);
1023
+ if (rows.length === 0 || rows.length > args.config.dirMaxFiles) {
1024
+ return 0;
1025
+ }
1026
+ rows.sort((a, b) => hintNeighborRank(a.path) - hintNeighborRank(b.path));
1027
+ let added = 0;
1028
+ for (const row of rows) {
1029
+ if (row.path === args.hintPath) {
1030
+ continue;
1031
+ }
1032
+ if (!SAFE_PATH_PATTERN.test(row.path)) {
1033
+ continue;
1034
+ }
1035
+ const candidate = ensureCandidate(args.candidates, row.path);
1036
+ const reason = `artifact:hint_dir:${args.hintPath}:${row.path}`;
1037
+ if (applyHintReasonBoost(candidate, reason, args.config.dirBoost, row.lang, row.ext)) {
1038
+ added += 1;
1039
+ const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
1040
+ await logHintExpansionEntry(args.db, args.tableAvailability, {
1041
+ repoId: args.repoId,
1042
+ hintValue: seedMeta?.sourceHint ?? args.hintPath,
1043
+ kind: "directory",
1044
+ targetPath: row.path,
1045
+ payload: {
1046
+ origin: seedMeta?.origin ?? "artifact",
1047
+ },
1048
+ });
1049
+ if (added >= limit) {
1050
+ break;
1051
+ }
1052
+ }
1053
+ }
1054
+ return added;
1055
+ }
1056
+ async function addHintDependencyNeighbors(args, perHintRemaining) {
1057
+ if (perHintRemaining <= 0) {
1058
+ return 0;
1059
+ }
1060
+ let added = 0;
1061
+ if (args.config.depOutLimit > 0) {
1062
+ const outLimit = Math.min(args.config.depOutLimit, perHintRemaining - added);
1063
+ if (outLimit > 0) {
1064
+ added += await addHintDependencyDirection(args, outLimit, "out");
1065
+ }
1066
+ }
1067
+ if (perHintRemaining - added <= 0) {
1068
+ return added;
1069
+ }
1070
+ if (args.config.depInLimit > 0) {
1071
+ const inLimit = Math.min(args.config.depInLimit, perHintRemaining - added);
1072
+ if (inLimit > 0) {
1073
+ added += await addHintDependencyDirection(args, inLimit, "in");
1074
+ }
1075
+ }
1076
+ return added;
1077
+ }
1078
+ async function addHintDependencyDirection(args, limit, direction) {
1079
+ if (limit <= 0) {
1080
+ return 0;
1081
+ }
1082
+ if (!useHintDbBudget(args.state)) {
1083
+ return 0;
1084
+ }
1085
+ const fetchLimit = Math.min(limit * 4, 25);
1086
+ if (direction === "out") {
1087
+ const rows = await args.db.all(`
1088
+ SELECT dst
1089
+ FROM dependency
1090
+ WHERE repo_id = ?
1091
+ AND src_path = ?
1092
+ AND dst_kind = 'path'
1093
+ LIMIT ?
1094
+ `, [args.repoId, args.hintPath, fetchLimit]);
1095
+ return await applyDependencyRows(args, rows.map((row) => row.dst), limit, direction);
1096
+ }
1097
+ const rows = await args.db.all(`
1098
+ SELECT src_path
1099
+ FROM dependency
1100
+ WHERE repo_id = ?
1101
+ AND dst = ?
1102
+ AND dst_kind = 'path'
1103
+ LIMIT ?
1104
+ `, [args.repoId, args.hintPath, fetchLimit]);
1105
+ return await applyDependencyRows(args, rows.map((row) => row.src_path), limit, direction);
1106
+ }
1107
+ async function applyDependencyRows(args, paths, limit, direction) {
1108
+ if (paths.length === 0) {
1109
+ return 0;
1110
+ }
1111
+ const uniquePaths = Array.from(new Set(paths)).filter((p) => p && SAFE_PATH_PATTERN.test(p));
1112
+ uniquePaths.sort((a, b) => hintNeighborRank(a) - hintNeighborRank(b));
1113
+ let added = 0;
1114
+ for (const dependencyPath of uniquePaths) {
1115
+ if (dependencyPath === args.hintPath) {
1116
+ continue;
1117
+ }
1118
+ const candidate = ensureCandidate(args.candidates, dependencyPath);
1119
+ const reason = `artifact:hint_dep_${direction}:${args.hintPath}:${dependencyPath}`;
1120
+ if (applyHintReasonBoost(candidate, reason, args.config.depBoost)) {
1121
+ added += 1;
1122
+ const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
1123
+ await logHintExpansionEntry(args.db, args.tableAvailability, {
1124
+ repoId: args.repoId,
1125
+ hintValue: seedMeta?.sourceHint ?? args.hintPath,
1126
+ kind: "dependency",
1127
+ targetPath: dependencyPath,
1128
+ payload: {
1129
+ origin: seedMeta?.origin ?? "artifact",
1130
+ direction,
1131
+ },
1132
+ });
1133
+ if (added >= limit) {
1134
+ break;
1135
+ }
1136
+ }
1137
+ }
1138
+ return added;
1139
+ }
1140
+ async function addHintSemanticNeighbors(args, limit) {
1141
+ if (limit <= 0) {
1142
+ return 0;
1143
+ }
1144
+ const dir = path.posix.dirname(args.hintPath);
1145
+ if (!dir || dir === "." || dir === "/") {
1146
+ return 0;
1147
+ }
1148
+ if (!useHintDbBudget(args.state)) {
1149
+ return 0;
1150
+ }
1151
+ const rows = await args.db.all(`
1152
+ SELECT path
1153
+ FROM file
1154
+ WHERE repo_id = ?
1155
+ AND is_binary = FALSE
1156
+ AND path LIKE ?
1157
+ ORDER BY path
1158
+ LIMIT ?
1159
+ `, [args.repoId, `${dir}/%`, args.config.semDirCandidateLimit]);
1160
+ const candidatePaths = rows.map((row) => row.path).filter((p) => p !== args.hintPath);
1161
+ if (candidatePaths.length === 0) {
1162
+ return 0;
1163
+ }
1164
+ if (!useHintDbBudget(args.state)) {
1165
+ return 0;
1166
+ }
1167
+ const embeddingMap = await fetchEmbeddingMap(args.db, args.repoId, [
1168
+ args.hintPath,
1169
+ ...candidatePaths,
1170
+ ]);
1171
+ const hintEmbedding = embeddingMap.get(args.hintPath);
1172
+ if (!hintEmbedding) {
1173
+ return 0;
1174
+ }
1175
+ let added = 0;
1176
+ for (const candidatePath of candidatePaths) {
1177
+ if (!SAFE_PATH_PATTERN.test(candidatePath)) {
1178
+ continue;
1179
+ }
1180
+ const embedding = embeddingMap.get(candidatePath);
1181
+ if (!embedding) {
1182
+ continue;
1183
+ }
1184
+ const similarity = structuralSimilarity(hintEmbedding, embedding);
1185
+ if (!Number.isFinite(similarity) || similarity < args.config.semThreshold) {
1186
+ continue;
1187
+ }
1188
+ const candidate = ensureCandidate(args.candidates, candidatePath);
1189
+ const reason = `artifact:hint_sem:${args.hintPath}:${candidatePath}`;
1190
+ if (applyHintReasonBoost(candidate, reason, args.weights.structural * similarity)) {
1191
+ added += 1;
1192
+ if (added >= limit) {
1193
+ break;
1194
+ }
1195
+ }
1196
+ }
1197
+ return added;
1198
+ }
1199
+ function hintNeighborRank(filePath) {
1200
+ if (filePath.startsWith("src/") || filePath.startsWith("external/assay-kit/src/")) {
1201
+ return 0;
1202
+ }
1203
+ if (isTestLikePath(filePath)) {
1204
+ return 2;
1205
+ }
1206
+ if (filePath.startsWith("docs/")) {
1207
+ return 3;
1208
+ }
1209
+ return 1;
1210
+ }
1211
+ function isTestLikePath(filePath) {
1212
+ return (/(^|\/)(tests?|__tests__|fixtures)\//.test(filePath) ||
1213
+ filePath.endsWith(".spec.ts") ||
1214
+ filePath.endsWith(".spec.tsx") ||
1215
+ filePath.endsWith(".test.ts") ||
1216
+ filePath.endsWith(".test.tsx"));
1217
+ }
529
1218
  function parseEmbedding(vectorJson, vectorDims) {
530
- if (!vectorJson || !vectorDims || vectorDims <= 0) {
1219
+ const dims = vectorDims === null ? null : typeof vectorDims === "bigint" ? Number(vectorDims) : vectorDims;
1220
+ if (!vectorJson || !dims || dims <= 0) {
531
1221
  return null;
532
1222
  }
533
1223
  try {
@@ -536,7 +1226,7 @@ function parseEmbedding(vectorJson, vectorDims) {
536
1226
  return null;
537
1227
  }
538
1228
  const values = [];
539
- for (let i = 0; i < parsed.length && i < vectorDims; i += 1) {
1229
+ for (let i = 0; i < parsed.length && i < dims; i += 1) {
540
1230
  const raw = parsed[i];
541
1231
  const num = typeof raw === "number" ? raw : Number(raw);
542
1232
  if (!Number.isFinite(num)) {
@@ -544,7 +1234,7 @@ function parseEmbedding(vectorJson, vectorDims) {
544
1234
  }
545
1235
  values.push(num);
546
1236
  }
547
- return values.length === vectorDims ? values : null;
1237
+ return values.length === dims ? values : null;
548
1238
  }
549
1239
  catch {
550
1240
  return null;
@@ -636,54 +1326,411 @@ function buildSnippetPreview(content, startLine, endLine) {
636
1326
  if (snippet.length <= 240) {
637
1327
  return snippet;
638
1328
  }
639
- return `${snippet.slice(0, 239)}…`;
1329
+ return `${snippet.slice(0, 239)}…`;
1330
+ }
1331
+ /**
1332
+ * トークン数を推定(コンテンツベース)
1333
+ * 実際のGPTトークナイザーを使用して正確にカウント
1334
+ *
1335
+ * @param content - ファイル全体のコンテンツ
1336
+ * @param startLine - 開始行(1-indexed)
1337
+ * @param endLine - 終了行(1-indexed)
1338
+ * @returns 推定トークン数
1339
+ */
1340
+ function estimateTokensFromContent(content, startLine, endLine) {
1341
+ const lines = content.split(/\r?\n/);
1342
+ const startIndex = Math.max(0, startLine - 1);
1343
+ const endIndex = Math.min(endLine, lines.length);
1344
+ const selectedLines = lines.slice(startIndex, endIndex);
1345
+ const text = selectedLines.join("\n");
1346
+ try {
1347
+ // 実際のGPTトークナイザーを使用
1348
+ return encodeGPT(text).length;
1349
+ }
1350
+ catch (error) {
1351
+ // フォールバック: 平均的な英語テキストで4文字 ≈ 1トークン
1352
+ console.warn("Token encoding failed, using character-based fallback", error);
1353
+ return Math.max(1, Math.ceil(text.length / 4));
1354
+ }
1355
+ }
1356
+ /**
1357
+ * 複数単語クエリを単語分割してOR検索条件を構築
1358
+ * @param query - 検索クエリ文字列
1359
+ * @returns 単語配列(2文字以下を除外)
1360
+ */
1361
+ function splitQueryWords(query) {
1362
+ // 空白、スラッシュ、ハイフン、アンダースコアで分割
1363
+ const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
1364
+ return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
1365
+ }
1366
+ function normalizeMetadataFilterKey(rawKey) {
1367
+ if (!rawKey) {
1368
+ return null;
1369
+ }
1370
+ const normalized = rawKey.toLowerCase();
1371
+ const alias = METADATA_ALIAS_MAP.get(normalized);
1372
+ if (alias) {
1373
+ return { ...alias };
1374
+ }
1375
+ for (const entry of METADATA_KEY_PREFIXES) {
1376
+ if (normalized.startsWith(entry.prefix)) {
1377
+ const remainder = normalized.slice(entry.prefix.length);
1378
+ if (!remainder) {
1379
+ return null;
1380
+ }
1381
+ return {
1382
+ key: remainder,
1383
+ source: entry.source,
1384
+ ...(entry.strict !== undefined && { strict: entry.strict }),
1385
+ };
1386
+ }
1387
+ }
1388
+ return null;
1389
+ }
1390
+ function normalizeFilterValues(value) {
1391
+ if (typeof value === "string") {
1392
+ const trimmed = value.trim();
1393
+ return trimmed ? [trimmed] : [];
1394
+ }
1395
+ if (Array.isArray(value)) {
1396
+ const values = [];
1397
+ for (const item of value) {
1398
+ if (typeof item === "string") {
1399
+ const trimmed = item.trim();
1400
+ if (trimmed) {
1401
+ values.push(trimmed);
1402
+ }
1403
+ }
1404
+ }
1405
+ return values;
1406
+ }
1407
+ return [];
1408
+ }
1409
+ function normalizeMetadataFiltersParam(input) {
1410
+ if (!input || typeof input !== "object") {
1411
+ return [];
1412
+ }
1413
+ const filters = [];
1414
+ for (const [rawKey, rawValue] of Object.entries(input)) {
1415
+ const normalizedKey = normalizeMetadataFilterKey(rawKey);
1416
+ if (!normalizedKey) {
1417
+ continue;
1418
+ }
1419
+ const values = normalizeFilterValues(rawValue);
1420
+ if (values.length === 0) {
1421
+ continue;
1422
+ }
1423
+ const filter = {
1424
+ key: normalizedKey.key,
1425
+ values,
1426
+ source: normalizedKey.source,
1427
+ };
1428
+ if (normalizedKey.strict !== undefined) {
1429
+ filter.strict = normalizedKey.strict;
1430
+ }
1431
+ filters.push(filter);
1432
+ }
1433
+ return filters;
1434
+ }
1435
+ function mergeMetadataFilters(filters) {
1436
+ const merged = new Map();
1437
+ for (const filter of filters) {
1438
+ if (filter.values.length === 0)
1439
+ continue;
1440
+ const mapKey = `${filter.source ?? "*"}::${filter.key}::${filter.strict ? "strict" : "hint"}`;
1441
+ const existing = merged.get(mapKey);
1442
+ if (existing) {
1443
+ const existingSet = new Set(existing.values.map((val) => val.toLowerCase()));
1444
+ for (const value of filter.values) {
1445
+ if (!existingSet.has(value.toLowerCase())) {
1446
+ existing.values.push(value);
1447
+ existingSet.add(value.toLowerCase());
1448
+ }
1449
+ }
1450
+ }
1451
+ else {
1452
+ const entry = {
1453
+ key: filter.key,
1454
+ source: filter.source,
1455
+ values: [...filter.values],
1456
+ };
1457
+ if (filter.strict !== undefined) {
1458
+ entry.strict = filter.strict;
1459
+ }
1460
+ merged.set(mapKey, entry);
1461
+ }
1462
+ }
1463
+ return Array.from(merged.values());
1464
+ }
1465
+ function parseInlineMetadataFilters(query) {
1466
+ if (!query) {
1467
+ return { cleanedQuery: "", filters: [] };
1468
+ }
1469
+ const matches = [];
1470
+ const pattern = /(\b[\w.]+):("[^"]+"|'[^']+'|[^\s]+)/g;
1471
+ let match;
1472
+ while ((match = pattern.exec(query)) !== null) {
1473
+ const normalizedKey = normalizeMetadataFilterKey(match[1] ?? "");
1474
+ if (!normalizedKey) {
1475
+ continue;
1476
+ }
1477
+ let rawValue = match[2] ?? "";
1478
+ if ((rawValue.startsWith('"') && rawValue.endsWith('"')) ||
1479
+ (rawValue.startsWith("'") && rawValue.endsWith("'"))) {
1480
+ rawValue = rawValue.slice(1, -1);
1481
+ }
1482
+ const value = rawValue.trim();
1483
+ if (!value) {
1484
+ continue;
1485
+ }
1486
+ const filter = {
1487
+ key: normalizedKey.key,
1488
+ source: normalizedKey.source,
1489
+ values: [value],
1490
+ };
1491
+ if (normalizedKey.strict !== undefined) {
1492
+ filter.strict = normalizedKey.strict;
1493
+ }
1494
+ matches.push({
1495
+ start: match.index,
1496
+ end: pattern.lastIndex,
1497
+ filter,
1498
+ });
1499
+ }
1500
+ if (matches.length === 0) {
1501
+ return { cleanedQuery: query.trim(), filters: [] };
1502
+ }
1503
+ let cleaned = "";
1504
+ let lastIndex = 0;
1505
+ for (const info of matches) {
1506
+ cleaned += query.slice(lastIndex, info.start);
1507
+ lastIndex = info.end;
1508
+ }
1509
+ cleaned += query.slice(lastIndex);
1510
+ const normalizedQuery = cleaned.replace(/\s{2,}/g, " ").trim();
1511
+ return {
1512
+ cleanedQuery: normalizedQuery,
1513
+ filters: mergeMetadataFilters(matches.map((m) => m.filter)),
1514
+ };
1515
+ }
1516
+ function buildMetadataFilterConditions(filters, alias = "f") {
1517
+ // SQL Injection対策: aliasをリテラル型で制限し、念のため検証
1518
+ if (!["f", "mk"].includes(alias)) {
1519
+ throw new Error(`Invalid SQL alias: ${alias}`);
1520
+ }
1521
+ const clauses = [];
1522
+ for (const filter of filters) {
1523
+ if (!filter.key || filter.values.length === 0) {
1524
+ continue;
1525
+ }
1526
+ const likeClauses = filter.values.map(() => "mk.value ILIKE ?").join(" OR ");
1527
+ const whereParts = [`mk.repo_id = ${alias}.repo_id`, `mk.path = ${alias}.path`];
1528
+ const params = [];
1529
+ if (filter.source) {
1530
+ whereParts.push("mk.source = ?");
1531
+ params.push(filter.source);
1532
+ }
1533
+ whereParts.push("mk.key = ?");
1534
+ params.push(filter.key);
1535
+ whereParts.push(`(${likeClauses})`);
1536
+ params.push(...filter.values.map((value) => `%${value}%`));
1537
+ const sql = `EXISTS (SELECT 1 FROM document_metadata_kv mk WHERE ${whereParts.join(" AND ")})`;
1538
+ clauses.push({ sql, params });
1539
+ }
1540
+ return clauses;
1541
+ }
1542
+ function isTableMissingError(error, table) {
1543
+ if (!(error instanceof Error)) {
1544
+ return false;
1545
+ }
1546
+ return error.message.includes(`Table with name ${table}`) || error.message.includes(table);
1547
+ }
1548
+ async function safeMetadataQuery(db, tableAvailability, sql, params) {
1549
+ if (!tableAvailability.hasMetadataTables) {
1550
+ return [];
1551
+ }
1552
+ try {
1553
+ return await db.all(sql, params);
1554
+ }
1555
+ catch (error) {
1556
+ if (isTableMissingError(error, "document_metadata_kv")) {
1557
+ console.warn("Metadata tables not found; disabling metadata filters and boosts until database is upgraded.");
1558
+ return [];
1559
+ }
1560
+ throw error;
1561
+ }
1562
+ }
1563
+ async function safeLinkQuery(db, tableAvailability, sql, params) {
1564
+ if (!tableAvailability.hasLinkTable) {
1565
+ return [];
1566
+ }
1567
+ try {
1568
+ return await db.all(sql, params);
1569
+ }
1570
+ catch (error) {
1571
+ if (isTableMissingError(error, "markdown_link")) {
1572
+ console.warn("Markdown link table not found; inbound link boosting disabled until database is upgraded.");
1573
+ return [];
1574
+ }
1575
+ throw error;
1576
+ }
1577
+ }
1578
+ async function fetchMetadataOnlyCandidates(db, tableAvailability, repoId, filters, limit) {
1579
+ if (!tableAvailability.hasMetadataTables || filters.length === 0 || limit <= 0) {
1580
+ return [];
1581
+ }
1582
+ const filterClauses = buildMetadataFilterConditions(filters);
1583
+ const whereClauses = ["f.repo_id = ?"];
1584
+ const params = [repoId];
1585
+ for (const clause of filterClauses) {
1586
+ whereClauses.push(clause.sql);
1587
+ params.push(...clause.params);
1588
+ }
1589
+ const sql = `
1590
+ SELECT f.path, f.lang, f.ext, b.content
1591
+ FROM file f
1592
+ JOIN blob b ON b.hash = f.blob_hash
1593
+ WHERE ${whereClauses.join(" AND ")}
1594
+ ORDER BY f.path
1595
+ LIMIT ?
1596
+ `;
1597
+ params.push(limit);
1598
+ try {
1599
+ return await db.all(sql, params);
1600
+ }
1601
+ catch (error) {
1602
+ if (isTableMissingError(error, "document_metadata_kv")) {
1603
+ console.warn("Metadata tables not found; disabling metadata-only searches until database is upgraded.");
1604
+ return [];
1605
+ }
1606
+ throw error;
1607
+ }
1608
+ }
1609
+ async function fetchMetadataKeywordMatches(db, tableAvailability, repoId, keywords, filters, limit, excludePaths) {
1610
+ if (!tableAvailability.hasMetadataTables || keywords.length === 0 || limit <= 0) {
1611
+ return [];
1612
+ }
1613
+ const keywordClauses = keywords.map(() => "mk.value ILIKE ?").join(" OR ");
1614
+ const params = [repoId, ...keywords.map((kw) => `%${kw}%`)];
1615
+ const whereClauses = ["mk.repo_id = ?", `(${keywordClauses})`];
1616
+ if (excludePaths.size > 0) {
1617
+ const placeholders = Array.from(excludePaths)
1618
+ .map(() => "?")
1619
+ .join(", ");
1620
+ whereClauses.push(`f.path NOT IN (${placeholders})`);
1621
+ params.push(...excludePaths);
1622
+ }
1623
+ const filterClauses = buildMetadataFilterConditions(filters, "f");
1624
+ for (const clause of filterClauses) {
1625
+ whereClauses.push(clause.sql);
1626
+ params.push(...clause.params);
1627
+ }
1628
+ params.push(limit);
1629
+ const sql = `
1630
+ SELECT f.path, f.lang, f.ext, b.content, COUNT(*) AS score
1631
+ FROM document_metadata_kv mk
1632
+ JOIN file f ON f.repo_id = mk.repo_id AND f.path = mk.path
1633
+ JOIN blob b ON b.hash = f.blob_hash
1634
+ WHERE ${whereClauses.join(" AND ")}
1635
+ GROUP BY f.path, f.lang, f.ext, b.content
1636
+ ORDER BY score DESC, f.path
1637
+ LIMIT ?
1638
+ `;
1639
+ const rows = await safeMetadataQuery(db, tableAvailability, sql, params);
1640
+ return rows.map((row) => ({ ...row, score: Number(row.score ?? 1) }));
1641
+ }
1642
+ async function loadMetadataForPaths(db, tableAvailability, repoId, paths) {
1643
+ const result = new Map();
1644
+ if (!tableAvailability.hasMetadataTables || paths.length === 0) {
1645
+ return result;
1646
+ }
1647
+ const placeholders = paths.map(() => "?").join(", ");
1648
+ const sql = `
1649
+ SELECT path, key, value, source
1650
+ FROM document_metadata_kv
1651
+ WHERE repo_id = ? AND path IN (${placeholders})
1652
+ `;
1653
+ const rows = await safeMetadataQuery(db, tableAvailability, sql, [repoId, ...paths]);
1654
+ for (const row of rows) {
1655
+ if (!result.has(row.path)) {
1656
+ result.set(row.path, []);
1657
+ }
1658
+ result.get(row.path).push({
1659
+ key: row.key,
1660
+ value: row.value,
1661
+ source: row.source ?? undefined,
1662
+ });
1663
+ }
1664
+ return result;
640
1665
  }
641
- function prependLineNumbers(snippet, startLine) {
642
- const lines = snippet.split(/\r?\n/);
643
- if (lines.length === 0) {
644
- return snippet;
1666
+ async function loadInboundLinkCounts(db, tableAvailability, repoId, paths) {
1667
+ const counts = new Map();
1668
+ if (!tableAvailability.hasLinkTable || paths.length === 0) {
1669
+ return counts;
645
1670
  }
646
- // Calculate required width from the last line number (dynamic sizing)
647
- const endLine = startLine + lines.length - 1;
648
- const width = String(endLine).length;
649
- return lines
650
- .map((line, index) => `${String(startLine + index).padStart(width, " ")}→${line}`)
651
- .join("\n");
1671
+ const placeholders = paths.map(() => "?").join(", ");
1672
+ const sql = `
1673
+ SELECT resolved_path AS path, COUNT(*) AS inbound
1674
+ FROM markdown_link
1675
+ WHERE repo_id = ? AND resolved_path IS NOT NULL AND resolved_path IN (${placeholders})
1676
+ GROUP BY resolved_path
1677
+ `;
1678
+ const rows = await safeLinkQuery(db, tableAvailability, sql, [repoId, ...paths]);
1679
+ for (const row of rows) {
1680
+ const inboundValue = typeof row.inbound === "bigint" ? Number(row.inbound) : Number(row.inbound ?? 0);
1681
+ counts.set(row.path, inboundValue);
1682
+ }
1683
+ return counts;
652
1684
  }
653
- /**
654
- * トークン数を推定(コンテンツベース)
655
- * 実際のGPTトークナイザーを使用して正確にカウント
656
- *
657
- * @param content - ファイル全体のコンテンツ
658
- * @param startLine - 開始行(1-indexed)
659
- * @param endLine - 終了行(1-indexed)
660
- * @returns 推定トークン数
661
- */
662
- function estimateTokensFromContent(content, startLine, endLine) {
663
- const lines = content.split(/\r?\n/);
664
- const startIndex = Math.max(0, startLine - 1);
665
- const endIndex = Math.min(endLine, lines.length);
666
- const selectedLines = lines.slice(startIndex, endIndex);
667
- const text = selectedLines.join("\n");
668
- try {
669
- // 実際のGPTトークナイザーを使用
670
- return encodeGPT(text).length;
1685
+ function computeMetadataBoost(entries, keywordSet, filterValueSet) {
1686
+ if (!entries || entries.length === 0) {
1687
+ return 0;
671
1688
  }
672
- catch (error) {
673
- // フォールバック: 平均的な英語テキストで4文字 1トークン
674
- console.warn("Token encoding failed, using character-based fallback", error);
675
- return Math.max(1, Math.ceil(text.length / 4));
1689
+ let boost = 0;
1690
+ for (const entry of entries) {
1691
+ const valueLower = entry.value.toLowerCase();
1692
+ for (const keyword of keywordSet) {
1693
+ if (valueLower.includes(keyword)) {
1694
+ boost += METADATA_MATCH_WEIGHT;
1695
+ break;
1696
+ }
1697
+ }
1698
+ if (filterValueSet.has(valueLower)) {
1699
+ boost += METADATA_FILTER_MATCH_WEIGHT;
1700
+ }
676
1701
  }
1702
+ return Math.min(boost, 1.5);
677
1703
  }
678
- /**
679
- * 複数単語クエリを単語分割してOR検索条件を構築
680
- * @param query - 検索クエリ文字列
681
- * @returns 単語配列(2文字以下を除外)
682
- */
683
- function splitQueryWords(query) {
684
- // 空白、スラッシュ、ハイフン、アンダースコアで分割
685
- const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
686
- return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
1704
+ function computeInboundLinkBoost(count) {
1705
+ let numericCount = count;
1706
+ if (typeof numericCount === "bigint") {
1707
+ numericCount = Number(numericCount);
1708
+ }
1709
+ if (!numericCount || numericCount <= 0) {
1710
+ return 0;
1711
+ }
1712
+ return Math.min(Math.log1p(numericCount) * INBOUND_LINK_WEIGHT, 1.0);
1713
+ }
1714
+ function candidateMatchesMetadataFilters(entries, filters) {
1715
+ if (filters.length === 0) {
1716
+ return true;
1717
+ }
1718
+ if (!entries || entries.length === 0) {
1719
+ return false;
1720
+ }
1721
+ return filters.every((filter) => {
1722
+ const expectedValues = filter.values.map((value) => value.toLowerCase());
1723
+ return entries.some((entry) => {
1724
+ if (entry.key !== filter.key) {
1725
+ return false;
1726
+ }
1727
+ if (filter.source && entry.source !== filter.source) {
1728
+ return false;
1729
+ }
1730
+ const lowerValue = entry.value.toLowerCase();
1731
+ return expectedValues.some((value) => lowerValue.includes(value));
1732
+ });
1733
+ });
687
1734
  }
688
1735
  /**
689
1736
  * パス固有のマルチプライヤーを取得(最長プレフィックスマッチ)
@@ -710,7 +1757,7 @@ function getPathMultiplier(filePath, profileConfig) {
710
1757
  * @param weights - スコアリングウェイト設定(乗算的ペナルティに使用)
711
1758
  * @returns ブースト適用後のスコア
712
1759
  */
713
- function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
1760
+ function applyFileTypeBoost(path, baseScore, profileConfig, weights) {
714
1761
  // Blacklisted directories that are almost always irrelevant for code context
715
1762
  const blacklistedDirs = [
716
1763
  ".cursor/",
@@ -727,7 +1774,8 @@ function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
727
1774
  if (profileConfig.denylistOverrides.includes(dir)) {
728
1775
  continue;
729
1776
  }
730
- return -100; // Effectively remove it
1777
+ // v1.0.0: Use multiplicative penalty instead of absolute -100
1778
+ return baseScore * weights.blacklistPenaltyMultiplier;
731
1779
  }
732
1780
  }
733
1781
  const fileName = path.split("/").pop() ?? "";
@@ -758,12 +1806,56 @@ function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
758
1806
  multiplier *= implMultiplier;
759
1807
  }
760
1808
  }
761
- // Test files: additive penalty (keep strong for files_search)
1809
+ // Test files: multiplicative penalty (v1.0.0)
762
1810
  if (path.startsWith("tests/") || path.startsWith("test/")) {
763
- return baseScore * 0.2; // Strong penalty for tests
1811
+ return baseScore * weights.testPenaltyMultiplier;
764
1812
  }
765
1813
  return baseScore * multiplier;
766
1814
  }
1815
+ function applyCoverageBoost(candidate, extractedTerms, weights) {
1816
+ // Skip for pure path-fallback candidates without text evidence
1817
+ if (candidate.reasons.has("fallback:path") &&
1818
+ candidate.keywordHits.size === 0 &&
1819
+ candidate.phraseHits === 0) {
1820
+ return;
1821
+ }
1822
+ // Coverage boost is only meaningful for text/phrase evidence; skip if no text evidence at all
1823
+ if (candidate.keywordHits.size === 0 && candidate.phraseHits === 0) {
1824
+ return;
1825
+ }
1826
+ if (extractedTerms.keywords.length > 0 && candidate.keywordHits.size > 0) {
1827
+ const coverage = candidate.keywordHits.size / extractedTerms.keywords.length;
1828
+ const bonus = coverage * weights.textMatch * 0.4;
1829
+ candidate.score += bonus;
1830
+ candidate.reasons.add(`coverage:keywords:${coverage.toFixed(2)}`);
1831
+ }
1832
+ if (extractedTerms.phrases.length > 0 && candidate.phraseHits > 0) {
1833
+ const phraseCoverage = Math.min(1, candidate.phraseHits / extractedTerms.phrases.length);
1834
+ const bonus = phraseCoverage * weights.textMatch * 0.6;
1835
+ candidate.score += bonus;
1836
+ candidate.reasons.add(`coverage:phrases:${phraseCoverage.toFixed(2)}`);
1837
+ }
1838
+ }
1839
+ async function fetchPathFallbackCandidates(db, repoId, terms, limit) {
1840
+ if (terms.length === 0 || limit <= 0) {
1841
+ return [];
1842
+ }
1843
+ const filters = terms.map(() => "f.path ILIKE ?").join(" OR ");
1844
+ const params = [repoId, ...terms.map((term) => `%${term}%`), limit];
1845
+ return await db.all(`
1846
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
1847
+ FROM file f
1848
+ JOIN blob b ON b.hash = f.blob_hash
1849
+ LEFT JOIN file_embedding fe
1850
+ ON fe.repo_id = f.repo_id
1851
+ AND fe.path = f.path
1852
+ WHERE f.repo_id = ?
1853
+ AND f.is_binary = FALSE
1854
+ AND (${filters})
1855
+ ORDER BY f.path
1856
+ LIMIT ?
1857
+ `, params);
1858
+ }
767
1859
  /**
768
1860
  * パスベースのスコアリングを適用(加算的ブースト)
769
1861
  * goalのキーワード/フレーズがファイルパスに含まれる場合にスコアを加算
@@ -862,22 +1954,25 @@ function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
862
1954
  }
863
1955
  }
864
1956
  /**
865
- * 加算的ファイルペナルティを適用
866
- * ブラックリストディレクトリ、テストファイル、lockファイル、設定ファイル、マイグレーションファイルに強いペナルティ
867
- * @param profile - boost_profile設定("docs"の場合はdocs/ディレクトリのブラックリストをスキップ)
868
- * @returns true if penalty was applied and processing should stop
1957
+ * 乗算的ファイルペナルティを適用(v1.0.0+)
1958
+ * ブラックリストディレクトリ、テストファイル、lockファイルに乗算ペナルティ
1959
+ * v1.0.0: 絶対ペナルティ(-100)から乗算ペナルティ(×0.01など)に移行
1960
+ * @param weights - スコアリングウェイト設定(乗算ペナルティ係数を含む)
1961
+ * @param profile - boost_profile設定(denylistOverridesなど)
1962
+ * @returns true if severe penalty was applied (caller should skip further boosts)
869
1963
  */
870
- function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profileConfig) {
871
- // Blacklisted directories - effectively remove
1964
+ function applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig) {
1965
+ // Returns true if a severe penalty was applied (should skip further boosts)
1966
+ // Blacklisted directories - apply strong multiplicative penalty (99% reduction)
1967
+ // v1.0.0: test/ and tests/ removed - handled by testPenaltyMultiplier instead
872
1968
  const blacklistedDirs = [
873
1969
  ".cursor/",
874
1970
  ".devcontainer/",
875
1971
  ".serena/",
876
1972
  "__mocks__/",
877
1973
  "docs/",
878
- "test/",
879
- "tests/",
880
1974
  ".git/",
1975
+ ".github/",
881
1976
  "node_modules/",
882
1977
  "db/migrate/",
883
1978
  "db/migrations/",
@@ -897,19 +1992,26 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
897
1992
  if (profileConfig.denylistOverrides.includes(dir)) {
898
1993
  continue; // Skip this blacklisted directory
899
1994
  }
900
- candidate.score = -100;
1995
+ // v1.0.0: Use multiplicative penalty instead of absolute -100
1996
+ candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
901
1997
  candidate.reasons.add("penalty:blacklisted-dir");
902
- return true;
1998
+ return true; // Signal to skip further boosts - this is the strongest penalty
903
1999
  }
904
2000
  }
905
- // Test files - strong penalty
2001
+ if (isSuppressedPath(path)) {
2002
+ // v1.0.0: Use multiplicative penalty instead of absolute -100
2003
+ candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
2004
+ candidate.reasons.add("penalty:suppressed");
2005
+ return true; // Signal to skip further boosts
2006
+ }
2007
+ // Test files - strong multiplicative penalty (95% reduction)
906
2008
  const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
907
2009
  if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
908
- candidate.score -= 2.0;
2010
+ candidate.scoreMultiplier *= weights.testPenaltyMultiplier;
909
2011
  candidate.reasons.add("penalty:test-file");
910
- return true;
2012
+ return true; // Signal to skip further boosts
911
2013
  }
912
- // Lock files - very strong penalty
2014
+ // Lock files - very strong multiplicative penalty (99% reduction)
913
2015
  const lockFiles = [
914
2016
  "package-lock.json",
915
2017
  "pnpm-lock.yaml",
@@ -920,63 +2022,58 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
920
2022
  "poetry.lock",
921
2023
  ];
922
2024
  if (lockFiles.some((lockFile) => fileName === lockFile)) {
923
- candidate.score -= 3.0;
2025
+ candidate.scoreMultiplier *= weights.lockPenaltyMultiplier;
924
2026
  candidate.reasons.add("penalty:lock-file");
925
- return true;
926
- }
927
- // Configuration files - penalty handling depends on profile
928
- const configPatterns = [
929
- ".config.js",
930
- ".config.ts",
931
- ".config.mjs",
932
- ".config.cjs",
933
- "tsconfig.json",
934
- "jsconfig.json",
935
- "package.json",
936
- ".eslintrc",
937
- ".prettierrc",
938
- "jest.config",
939
- "vite.config",
940
- "vitest.config",
941
- "webpack.config",
942
- "rollup.config",
943
- ];
944
- if (configPatterns.some((pattern) => lowerPath.endsWith(pattern) || fileName.startsWith(".env")) ||
945
- fileName === "Dockerfile" ||
946
- fileName === "docker-compose.yml" ||
947
- fileName === "docker-compose.yaml") {
948
- // ✅ Use explicit flag instead of magic number (0.3) to determine behavior
949
- // This decouples profile detection from multiplier values
950
- if (profileConfig.skipConfigAdditivePenalty) {
951
- return false; // Continue to multiplicative penalty only
952
- }
953
- // For other profiles, apply strong additive penalty
954
- candidate.score -= 1.5;
955
- candidate.reasons.add("penalty:config-file");
956
- return true;
2027
+ return true; // Signal to skip further boosts
957
2028
  }
958
- // Migration files - strong penalty
959
- if (lowerPath.includes("migrate") || lowerPath.includes("migration")) {
960
- candidate.score -= 2.0;
961
- candidate.reasons.add("penalty:migration-file");
962
- return true;
963
- }
964
- return false; // No penalty applied, continue processing
2029
+ // v1.0.0: No penalty applied, allow further boosts/penalties
2030
+ return false;
965
2031
  }
966
2032
  /**
967
2033
  * ファイルタイプ別の乗算的ペナルティ/ブーストを適用(v0.7.0+)
968
2034
  * profile="docs": ドキュメントファイルをブースト
969
2035
  * profile="default": ドキュメントファイルにペナルティ、実装ファイルをブースト
970
2036
  */
971
- function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights) {
2037
+ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights) {
972
2038
  const fileName = path.split("/").pop() ?? "";
973
- // Step 1: Config files
2039
+ const lowerPath = path.toLowerCase();
2040
+ // Very low value: schemas, fixtures, testdata, examples, baseline
2041
+ const schemaJson = lowerPath.endsWith(".schema.json") || lowerPath.includes("/schemas/");
2042
+ const isFixture = lowerPath.includes("/fixtures/") ||
2043
+ lowerPath.includes("/fixture/") ||
2044
+ lowerPath.includes("/testdata/");
2045
+ const isExample = lowerPath.includes("/examples/") || lowerPath.includes("/example/");
2046
+ const isBaseline = lowerPath.includes("baseline") || lowerPath.includes("golden");
2047
+ if (schemaJson || isFixture || isExample || isBaseline) {
2048
+ candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
2049
+ candidate.reasons.add("penalty:low-value-file");
2050
+ return;
2051
+ }
2052
+ // ✅ Step 1: Low-value files (v1.0.0: syntax/perf/legal/migration)
2053
+ // Apply configPenaltyMultiplier (strong penalty) to rarely useful file types
2054
+ const isSyntaxGrammar = path.includes("/syntaxes/") &&
2055
+ (lowerPath.endsWith(".tmlanguage") ||
2056
+ lowerPath.endsWith(".tmlanguage.json") ||
2057
+ lowerPath.endsWith(".tmtheme") ||
2058
+ lowerPath.endsWith(".plist"));
2059
+ const isPerfData = lowerPath.includes(".perf.data") ||
2060
+ lowerPath.includes(".perf-data") ||
2061
+ lowerPath.includes("-perf-data");
2062
+ const isLegalFile = fileName.toLowerCase().includes("thirdpartynotices") ||
2063
+ fileName.toLowerCase() === "cgmanifest.json";
2064
+ const isMigrationFile = lowerPath.includes("migrate") || lowerPath.includes("migration");
2065
+ if (isSyntaxGrammar || isPerfData || isLegalFile || isMigrationFile) {
2066
+ candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
2067
+ candidate.reasons.add("penalty:low-value-file");
2068
+ return; // Don't apply impl boosts
2069
+ }
2070
+ // ✅ Step 2: Config files
974
2071
  if (isConfigFile(path, fileName)) {
975
2072
  candidate.scoreMultiplier *= profileConfig.fileTypeMultipliers.config;
976
2073
  candidate.reasons.add("penalty:config-file");
977
2074
  return; // Don't apply impl boosts to config files
978
2075
  }
979
- // ✅ Step 2: Documentation files
2076
+ // ✅ Step 3: Documentation files
980
2077
  const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
981
2078
  if (docExtensions.some((docExt) => path.endsWith(docExt))) {
982
2079
  const docMultiplier = profileConfig.fileTypeMultipliers.doc;
@@ -989,7 +2086,7 @@ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights)
989
2086
  }
990
2087
  return; // Don't apply impl boosts to docs
991
2088
  }
992
- // ✅ Step 3: Implementation files with path-specific boosts
2089
+ // ✅ Step 4: Implementation files with path-specific boosts
993
2090
  const implMultiplier = profileConfig.fileTypeMultipliers.impl;
994
2091
  // ✅ Use longest-prefix-match logic (order-independent)
995
2092
  const pathBoost = getPathMultiplier(path, profileConfig);
@@ -1016,16 +2113,21 @@ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights)
1016
2113
  }
1017
2114
  }
1018
2115
  /**
1019
- * contextBundle専用のブーストプロファイル適用(v0.7.0+: リファクタリング版)
2116
+ * contextBundle専用のブーストプロファイル適用(v1.0.0: 乗算ペナルティモデル)
1020
2117
  * 複雑度を削減するために3つのヘルパー関数に分割:
1021
2118
  * 1. applyPathBasedScoring: パスベースの加算的スコアリング
1022
- * 2. applyAdditiveFilePenalties: 強力な加算的ペナルティ
1023
- * 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト
2119
+ * 2. applyMultiplicativeFilePenalties: 乗算的ペナルティ(blacklist/test/lock)
2120
+ * 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト(doc/config/impl)
1024
2121
  *
1025
- * CRITICAL SAFETY RULES:
1026
- * 1. Multipliers are stored in candidate.scoreMultiplier, applied AFTER all additive scoring
1027
- * 2. profile="docs" skips documentation penalties (allows doc-focused queries)
1028
- * 3. Blacklist/test/lock/config files keep additive penalties (already very strong)
2122
+ * v1.0.0 CHANGES:
2123
+ * - 絶対ペナルティ(-100)を乗算ペナルティ(×0.01など)に置き換え
2124
+ * - すべてのペナルティが組み合わせ可能に(boost_profileとの相互作用が予測可能)
2125
+ * - v0.9.0の特別ケース処理(if profile === "docs")が不要に
2126
+ *
2127
+ * SCORING PHASES:
2128
+ * 1. Additive phase: テキストマッチ、パスマッチ、依存関係、近接性を加算
2129
+ * 2. Multiplicative phase: ペナルティとブーストを scoreMultiplier に蓄積
2130
+ * 3. Final application: score *= scoreMultiplier(最終段階で一度だけ適用)
1029
2131
  */
1030
2132
  function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms) {
1031
2133
  const { path, ext } = row;
@@ -1033,117 +2135,205 @@ function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerm
1033
2135
  const fileName = path.split("/").pop() ?? "";
1034
2136
  // Step 1: パスベースのスコアリング(加算的ブースト)
1035
2137
  applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms);
1036
- // Step 2: 加算的ペナルティ(ブラックリスト、テスト、lock、設定、マイグレーション)
1037
- const shouldStop = applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profileConfig);
1038
- if (shouldStop) {
1039
- return; // ペナルティが適用された場合は処理終了
1040
- }
2138
+ // Step 2: 乗算的ペナルティ(ブラックリスト、テスト、lock
2139
+ // v1.0.0: Returns true if severe penalty applied (should skip further boosts)
2140
+ const skipFurtherBoosts = applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig);
1041
2141
  // Step 3: ファイルタイプ別の乗算的ペナルティ/ブースト
1042
- applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights);
2142
+ // Skip if severe penalty was applied (blacklist/test/lock files shouldn't get impl boosts)
2143
+ if (!skipFurtherBoosts) {
2144
+ applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights);
2145
+ }
1043
2146
  }
1044
2147
  export async function filesSearch(context, params) {
1045
2148
  const { db, repoId } = context;
1046
- const { query } = params;
1047
- if (!query || query.trim().length === 0) {
1048
- throw new Error("files_search requires a non-empty query. Provide a search keyword to continue.");
2149
+ const rawQuery = params.query ?? "";
2150
+ const inlineMetadata = parseInlineMetadataFilters(rawQuery);
2151
+ const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
2152
+ const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
2153
+ const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
2154
+ const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
2155
+ const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
2156
+ const hasHintMetadataFilters = hintMetadataFilters.length > 0;
2157
+ const hasAnyMetadataFilters = metadataFilters.length > 0;
2158
+ let cleanedQuery = inlineMetadata.cleanedQuery;
2159
+ let hasTextQuery = cleanedQuery.length > 0;
2160
+ if (!hasTextQuery && hasHintMetadataFilters) {
2161
+ cleanedQuery = hintMetadataFilters
2162
+ .flatMap((filter) => filter.values)
2163
+ .map((value) => value.trim())
2164
+ .filter((value) => value.length > 0)
2165
+ .join(" ");
2166
+ cleanedQuery = cleanedQuery.trim();
2167
+ hasTextQuery = cleanedQuery.length > 0;
2168
+ }
2169
+ const metadataValueSeed = metadataFilters
2170
+ .flatMap((filter) => filter.values)
2171
+ .map((value) => value.trim())
2172
+ .filter((value) => value.length > 0)
2173
+ .join(" ");
2174
+ if (metadataValueSeed.length > 0) {
2175
+ cleanedQuery = `${cleanedQuery} ${metadataValueSeed}`.trim();
2176
+ hasTextQuery = cleanedQuery.length > 0;
2177
+ }
2178
+ if (!hasTextQuery && !hasAnyMetadataFilters) {
2179
+ throw new Error("files_search requires a query or metadata_filters. Provide keywords or structured filters to continue.");
1049
2180
  }
1050
2181
  const limit = normalizeLimit(params.limit);
1051
2182
  const ftsStatus = await getFreshFtsStatus(context);
1052
2183
  const hasFTS = ftsStatus.ready;
1053
- let sql;
1054
- let values;
1055
- if (hasFTS) {
1056
- // FTS拡張利用可能: fts_main_blob.match_bm25 を使用
1057
- const conditions = ["f.repo_id = ?"];
1058
- values = [repoId];
1059
- // 言語・拡張子フィルタ
1060
- if (params.lang) {
1061
- conditions.push("COALESCE(f.lang, '') = ?");
1062
- values.push(params.lang);
1063
- }
1064
- if (params.ext) {
1065
- conditions.push("COALESCE(f.ext, '') = ?");
1066
- values.push(params.ext);
1067
- }
1068
- if (params.path_prefix) {
1069
- conditions.push("f.path LIKE ?");
1070
- values.push(`${params.path_prefix}%`);
1071
- }
1072
- // FTS検索(BM25スコアリング)
1073
- sql = `
1074
- SELECT f.path, f.lang, f.ext, b.content, fts.score
1075
- FROM file f
1076
- JOIN blob b ON b.hash = f.blob_hash
1077
- JOIN (
1078
- SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
1079
- FROM blob
1080
- WHERE score IS NOT NULL
1081
- ) fts ON fts.hash = b.hash
1082
- WHERE ${conditions.join(" AND ")}
1083
- ORDER BY fts.score DESC
1084
- LIMIT ?
1085
- `;
1086
- values.unshift(query); // FTSクエリを先頭に追加
1087
- values.push(limit);
1088
- }
1089
- else {
1090
- // FTS拡張利用不可: ILIKE検索(Phase 1の単語分割ロジック)
1091
- const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
1092
- values = [repoId];
1093
- const words = splitQueryWords(query);
1094
- if (words.length === 1) {
1095
- conditions.push("b.content ILIKE '%' || ? || '%'");
1096
- values.push(query);
2184
+ const metadataClauses = buildMetadataFilterConditions(strictMetadataFilters);
2185
+ const candidateRows = [];
2186
+ if (hasTextQuery) {
2187
+ let sql;
2188
+ let values;
2189
+ if (hasFTS) {
2190
+ const conditions = ["f.repo_id = ?"];
2191
+ values = [repoId];
2192
+ if (params.lang) {
2193
+ conditions.push("COALESCE(f.lang, '') = ?");
2194
+ values.push(params.lang);
2195
+ }
2196
+ if (params.ext) {
2197
+ conditions.push("COALESCE(f.ext, '') = ?");
2198
+ values.push(params.ext);
2199
+ }
2200
+ if (params.path_prefix) {
2201
+ conditions.push("f.path LIKE ?");
2202
+ values.push(`${params.path_prefix}%`);
2203
+ }
2204
+ for (const clause of metadataClauses) {
2205
+ conditions.push(clause.sql);
2206
+ values.push(...clause.params);
2207
+ }
2208
+ sql = `
2209
+ SELECT f.path, f.lang, f.ext, b.content, fts.score
2210
+ FROM file f
2211
+ JOIN blob b ON b.hash = f.blob_hash
2212
+ JOIN (
2213
+ SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
2214
+ FROM blob
2215
+ WHERE score IS NOT NULL
2216
+ ) fts ON fts.hash = b.hash
2217
+ WHERE ${conditions.join(" AND ")}
2218
+ ORDER BY fts.score DESC
2219
+ LIMIT ?
2220
+ `;
2221
+ values.unshift(cleanedQuery);
2222
+ values.push(limit);
1097
2223
  }
1098
2224
  else {
1099
- const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
1100
- conditions.push(`(${wordConditions.join(" OR ")})`);
1101
- values.push(...words);
1102
- }
1103
- if (params.lang) {
1104
- conditions.push("COALESCE(f.lang, '') = ?");
1105
- values.push(params.lang);
1106
- }
1107
- if (params.ext) {
1108
- conditions.push("COALESCE(f.ext, '') = ?");
1109
- values.push(params.ext);
1110
- }
1111
- if (params.path_prefix) {
1112
- conditions.push("f.path LIKE ?");
1113
- values.push(`${params.path_prefix}%`);
1114
- }
1115
- sql = `
1116
- SELECT f.path, f.lang, f.ext, b.content
1117
- FROM file f
1118
- JOIN blob b ON b.hash = f.blob_hash
1119
- WHERE ${conditions.join(" AND ")}
1120
- ORDER BY f.path
1121
- LIMIT ?
1122
- `;
1123
- values.push(limit);
1124
- }
1125
- const rows = await db.all(sql, values);
1126
- const boostProfile = params.boost_profile ?? "default";
1127
- const profileConfig = getBoostProfile(boostProfile);
1128
- // v0.7.0+: Load configurable scoring weights for unified boosting logic
1129
- // Note: filesSearch doesn't have a separate profile parameter, uses default weights
2225
+ const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
2226
+ values = [repoId];
2227
+ const words = splitQueryWords(cleanedQuery);
2228
+ if (words.length === 1) {
2229
+ conditions.push("b.content ILIKE '%' || ? || '%'");
2230
+ values.push(cleanedQuery);
2231
+ }
2232
+ else {
2233
+ const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
2234
+ conditions.push(`(${wordConditions.join(" OR ")})`);
2235
+ values.push(...words);
2236
+ }
2237
+ if (params.lang) {
2238
+ conditions.push("COALESCE(f.lang, '') = ?");
2239
+ values.push(params.lang);
2240
+ }
2241
+ if (params.ext) {
2242
+ conditions.push("COALESCE(f.ext, '') = ?");
2243
+ values.push(params.ext);
2244
+ }
2245
+ if (params.path_prefix) {
2246
+ conditions.push("f.path LIKE ?");
2247
+ values.push(`${params.path_prefix}%`);
2248
+ }
2249
+ for (const clause of metadataClauses) {
2250
+ conditions.push(clause.sql);
2251
+ values.push(...clause.params);
2252
+ }
2253
+ sql = `
2254
+ SELECT f.path, f.lang, f.ext, b.content
2255
+ FROM file f
2256
+ JOIN blob b ON b.hash = f.blob_hash
2257
+ WHERE ${conditions.join(" AND ")}
2258
+ ORDER BY f.path
2259
+ LIMIT ?
2260
+ `;
2261
+ values.push(limit);
2262
+ }
2263
+ const textRows = await db.all(sql, values);
2264
+ candidateRows.push(...textRows);
2265
+ }
2266
+ if (!hasTextQuery && hasAnyMetadataFilters) {
2267
+ const metadataOnlyRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
2268
+ for (const row of metadataOnlyRows) {
2269
+ row.score = 1 + metadataFilters.length * 0.2;
2270
+ }
2271
+ candidateRows.push(...metadataOnlyRows);
2272
+ }
2273
+ if (hasTextQuery) {
2274
+ const metadataKeywords = splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase());
2275
+ if (metadataKeywords.length > 0) {
2276
+ const excludePaths = new Set(candidateRows.map((row) => row.path));
2277
+ const metadataRows = await fetchMetadataKeywordMatches(db, context.tableAvailability, repoId, metadataKeywords, metadataFilters, limit * 2, excludePaths);
2278
+ candidateRows.push(...metadataRows);
2279
+ }
2280
+ }
2281
+ if (candidateRows.length === 0) {
2282
+ return [];
2283
+ }
2284
+ const rowMap = new Map();
2285
+ for (const row of candidateRows) {
2286
+ const base = row.score ?? (hasTextQuery ? 1.0 : 0.8);
2287
+ const existing = rowMap.get(row.path);
2288
+ const existingScore = existing?.score ?? (hasTextQuery ? 1.0 : 0.8);
2289
+ if (!existing || base > existingScore) {
2290
+ rowMap.set(row.path, { ...row, score: base });
2291
+ }
2292
+ }
2293
+ const dedupedRows = Array.from(rowMap.values()).sort((a, b) => (b.score ?? 1) - (a.score ?? 1));
2294
+ const limitedRows = dedupedRows.slice(0, limit);
2295
+ const paths = limitedRows.map((row) => row.path);
2296
+ const metadataMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, paths);
2297
+ const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, paths);
2298
+ const metadataKeywordSet = hasTextQuery
2299
+ ? new Set(splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase()))
2300
+ : new Set();
2301
+ const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
2302
+ const boostProfile = params.boost_profile ??
2303
+ (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
2304
+ const baseProfileConfig = getBoostProfile(boostProfile);
2305
+ const cachedMerged = mergedPathMultiplierCache.get(boostProfile);
2306
+ const mergedPathMultipliers = cachedMerged ??
2307
+ mergePathPenaltyEntries(baseProfileConfig.pathMultipliers, [], serverConfig.pathPenalties);
2308
+ if (!cachedMerged) {
2309
+ mergedPathMultiplierCache.set(boostProfile, mergedPathMultipliers);
2310
+ }
2311
+ const profileConfig = {
2312
+ ...baseProfileConfig,
2313
+ pathMultipliers: mergedPathMultipliers,
2314
+ };
1130
2315
  const weights = loadScoringProfile(null);
1131
2316
  const options = parseOutputOptions(params);
1132
- return rows
2317
+ const previewQuery = hasTextQuery
2318
+ ? cleanedQuery
2319
+ : (metadataFilters[0]?.values[0] ?? rawQuery.trim());
2320
+ return limitedRows
1133
2321
  .map((row) => {
1134
2322
  let preview;
1135
2323
  let matchLine;
2324
+ const previewSource = previewQuery || row.path;
1136
2325
  if (options.includePreview) {
1137
- // Full preview generation for non-compact mode
1138
- const previewData = buildPreview(row.content ?? "", query);
2326
+ const previewData = buildPreview(row.content ?? "", previewSource);
1139
2327
  preview = previewData.preview;
1140
2328
  matchLine = previewData.line;
1141
2329
  }
1142
2330
  else {
1143
- // Lightweight: extract only line number without preview
1144
- matchLine = findFirstMatchLine(row.content ?? "", query);
2331
+ matchLine = findFirstMatchLine(row.content ?? "", previewSource);
1145
2332
  }
1146
- const baseScore = row.score ?? 1.0; // FTS時はBM25スコア、ILIKE時は1.0
2333
+ const metadataEntries = metadataMap.get(row.path);
2334
+ const metadataBoost = computeMetadataBoost(metadataEntries, metadataKeywordSet, filterValueSet);
2335
+ const inboundBoost = computeInboundLinkBoost(inboundCounts.get(row.path));
2336
+ const baseScore = (row.score ?? (hasTextQuery ? 1.0 : 0.8)) + metadataBoost + inboundBoost;
1147
2337
  const boostedScore = boostProfile === "none"
1148
2338
  ? baseScore
1149
2339
  : applyFileTypeBoost(row.path, baseScore, profileConfig, weights);
@@ -1159,96 +2349,20 @@ export async function filesSearch(context, params) {
1159
2349
  }
1160
2350
  return result;
1161
2351
  })
1162
- .sort((a, b) => b.score - a.score); // スコアの高い順に再ソート
1163
- }
1164
- export async function snippetsGet(context, params) {
1165
- const { db, repoId } = context;
1166
- if (!params.path) {
1167
- throw new Error("snippets_get requires a file path. Specify a tracked text file path to continue.");
1168
- }
1169
- const rows = await db.all(`
1170
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content
1171
- FROM file f
1172
- JOIN blob b ON b.hash = f.blob_hash
1173
- WHERE f.repo_id = ? AND f.path = ?
1174
- LIMIT 1
1175
- `, [repoId, params.path]);
1176
- if (rows.length === 0) {
1177
- throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
1178
- }
1179
- const row = rows[0];
1180
- if (!row) {
1181
- throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
1182
- }
1183
- if (row.is_binary) {
1184
- throw new Error("Binary snippets are not supported. Choose a text file to preview its content.");
1185
- }
1186
- if (row.content === null) {
1187
- throw new Error("Snippet content is unavailable. Re-run the indexer to refresh DuckDB state.");
1188
- }
1189
- const lines = row.content.split(/\r?\n/);
1190
- const totalLines = lines.length;
1191
- const snippetRows = await db.all(`
1192
- SELECT s.snippet_id, s.start_line, s.end_line, s.symbol_id, sym.name AS symbol_name, sym.kind AS symbol_kind
1193
- FROM snippet s
1194
- LEFT JOIN symbol sym
1195
- ON sym.repo_id = s.repo_id
1196
- AND sym.path = s.path
1197
- AND sym.symbol_id = s.symbol_id
1198
- WHERE s.repo_id = ? AND s.path = ?
1199
- ORDER BY s.start_line
1200
- `, [repoId, params.path]);
1201
- const requestedStart = params.start_line ?? 1;
1202
- const requestedEnd = params.end_line ?? Math.min(totalLines, requestedStart + DEFAULT_SNIPPET_WINDOW - 1);
1203
- const useSymbolSnippets = snippetRows.length > 0 && params.end_line === undefined;
1204
- let snippetSelection = null;
1205
- if (useSymbolSnippets) {
1206
- snippetSelection =
1207
- snippetRows.find((snippet) => requestedStart >= snippet.start_line && requestedStart <= snippet.end_line) ?? null;
1208
- if (!snippetSelection) {
1209
- const firstSnippet = snippetRows[0];
1210
- if (firstSnippet && requestedStart < firstSnippet.start_line) {
1211
- snippetSelection = firstSnippet;
1212
- }
1213
- else {
1214
- snippetSelection = snippetRows[snippetRows.length - 1] ?? null;
1215
- }
1216
- }
1217
- }
1218
- let startLine;
1219
- let endLine;
1220
- let symbolName = null;
1221
- let symbolKind = null;
1222
- if (snippetSelection) {
1223
- startLine = snippetSelection.start_line;
1224
- endLine = snippetSelection.end_line;
1225
- symbolName = snippetSelection.symbol_name;
1226
- symbolKind = snippetSelection.symbol_kind;
1227
- }
1228
- else {
1229
- startLine = Math.max(1, Math.min(totalLines, requestedStart));
1230
- endLine = Math.max(startLine, Math.min(totalLines, requestedEnd));
1231
- }
1232
- const isCompact = params.compact === true;
1233
- const addLineNumbers = params.includeLineNumbers === true && !isCompact;
1234
- let content;
1235
- if (!isCompact) {
1236
- const snippetContent = lines.slice(startLine - 1, endLine).join("\n");
1237
- content = addLineNumbers ? prependLineNumbers(snippetContent, startLine) : snippetContent;
1238
- }
1239
- return {
1240
- path: row.path,
1241
- startLine,
1242
- endLine,
1243
- ...(content !== undefined && { content }),
1244
- totalLines,
1245
- symbolName,
1246
- symbolKind,
1247
- };
2352
+ .filter((result) => result.score > SCORE_FILTER_THRESHOLD) // v1.0.0: Filter out extremely low-scored files (multiplicative penalties)
2353
+ .sort((a, b) => b.score - a.score);
1248
2354
  }
2355
+ // snippetsGet has been extracted to ./handlers/snippets-get.ts and re-exported above
1249
2356
  // ============================================================================
1250
2357
  // Issue #68: Path/Large File Penalty Helper Functions
1251
2358
  // ============================================================================
2359
+ /**
2360
+ * v1.0.0: Score filtering threshold for multiplicative penalty model
2361
+ * Files with score < threshold are filtered out (unless they are hint paths)
2362
+ * Default: 0.05 removes files with >95% penalty while keeping relevant files
2363
+ * Can be overridden via KIRI_SCORE_THRESHOLD environment variable
2364
+ */
2365
+ const SCORE_FILTER_THRESHOLD = parseFloat(process.env.KIRI_SCORE_THRESHOLD ?? "0.05");
1252
2366
  /**
1253
2367
  * 環境変数からペナルティ機能フラグを読み取る
1254
2368
  */
@@ -1505,15 +2619,40 @@ function computeGraduatedPenalty(pathMatchHits, queryStats, config) {
1505
2619
  return config.tier2Delta;
1506
2620
  return 0; // pathMatchHits >= 3: no penalty
1507
2621
  }
1508
- export async function contextBundle(context, params) {
2622
+ async function contextBundleImpl(context, params) {
1509
2623
  context.warningManager.startRequest();
1510
2624
  const { db, repoId } = context;
1511
- const goal = params.goal?.trim() ?? "";
1512
- if (goal.length === 0) {
2625
+ const rawGoal = params.goal?.trim() ?? "";
2626
+ if (rawGoal.length === 0) {
1513
2627
  throw new Error("context_bundle requires a non-empty goal. Describe your objective to receive context.");
1514
2628
  }
2629
+ if (process.env.KIRI_TRACE_METADATA === "1") {
2630
+ console.info(`[metadata-trace-env] goal=${rawGoal}`);
2631
+ }
2632
+ const inlineMetadata = parseInlineMetadataFilters(rawGoal);
2633
+ const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
2634
+ const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
2635
+ const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
2636
+ const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
2637
+ const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
2638
+ const hasHintMetadataFilters = hintMetadataFilters.length > 0;
2639
+ const hasAnyMetadataFilters = metadataFilters.length > 0;
2640
+ const goal = inlineMetadata.cleanedQuery.length > 0 ? inlineMetadata.cleanedQuery : rawGoal;
2641
+ if (process.env.KIRI_TRACE_METADATA === "1") {
2642
+ console.info("[metadata-trace]", JSON.stringify({
2643
+ rawGoal,
2644
+ cleanedGoal: goal,
2645
+ inlineFilters: inlineMetadata.filters,
2646
+ paramFilters,
2647
+ mergedFilters: metadataFilters,
2648
+ }));
2649
+ }
1515
2650
  const limit = normalizeBundleLimit(params.limit);
1516
2651
  const artifacts = params.artifacts ?? {};
2652
+ const artifactHints = normalizeArtifactHints(artifacts.hints);
2653
+ const hintBuckets = bucketArtifactHints(artifactHints);
2654
+ const artifactPathHints = hintBuckets.pathHints;
2655
+ const substringHints = hintBuckets.substringHints;
1517
2656
  const includeTokensEstimate = params.includeTokensEstimate === true;
1518
2657
  const isCompact = params.compact === true;
1519
2658
  // 項目2: トークンバジェット保護警告
@@ -1536,9 +2675,20 @@ export async function contextBundle(context, params) {
1536
2675
  if (artifacts.editing_path) {
1537
2676
  keywordSources.push(artifacts.editing_path);
1538
2677
  }
2678
+ if (artifactHints.length > 0) {
2679
+ keywordSources.push(artifactHints.join(" "));
2680
+ }
2681
+ if (hasAnyMetadataFilters) {
2682
+ const filterSeed = metadataFilters
2683
+ .map((filter) => `${filter.source ?? "meta"}:${filter.key}=${filter.values.join(",")}`)
2684
+ .join(" ");
2685
+ keywordSources.push(filterSeed);
2686
+ }
1539
2687
  const semanticSeed = keywordSources.join(" ");
1540
2688
  const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
1541
2689
  const extractedTerms = extractKeywords(semanticSeed);
2690
+ const segmentPreview = extractedTerms.pathSegments.slice(0, AUTO_PATH_SEGMENT_LIMIT).join(",");
2691
+ traceSearch(`terms repo=${repoId} id=${params.requestId ?? "n/a"} keywords=${extractedTerms.keywords.length} phrases=${extractedTerms.phrases.length} pathSegments=${extractedTerms.pathSegments.length} segs=[${segmentPreview}]`);
1542
2692
  // フォールバック: editing_pathからキーワードを抽出
1543
2693
  if (extractedTerms.phrases.length === 0 &&
1544
2694
  extractedTerms.keywords.length === 0 &&
@@ -1553,13 +2703,20 @@ export async function contextBundle(context, params) {
1553
2703
  const stringMatchSeeds = new Set();
1554
2704
  const fileCache = new Map();
1555
2705
  // ✅ Cache boost profile config to avoid redundant lookups in hot path
1556
- const boostProfile = params.boost_profile ?? "default";
1557
- const profileConfig = getBoostProfile(boostProfile);
2706
+ const boostProfile = params.boost_profile ??
2707
+ (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
2708
+ const baseProfileConfig = getBoostProfile(boostProfile);
2709
+ const profileConfig = {
2710
+ ...baseProfileConfig,
2711
+ pathMultipliers: loadPathPenalties(baseProfileConfig.pathMultipliers),
2712
+ };
1558
2713
  // フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
1559
2714
  if (extractedTerms.phrases.length > 0) {
1560
2715
  const phrasePlaceholders = extractedTerms.phrases
1561
2716
  .map(() => "b.content ILIKE '%' || ? || '%'")
1562
2717
  .join(" OR ");
2718
+ // DEBUG: Log SQL query parameters for troubleshooting
2719
+ traceSearch(`Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
1563
2720
  const rows = await db.all(`
1564
2721
  SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
1565
2722
  FROM file f
@@ -1573,6 +2730,17 @@ export async function contextBundle(context, params) {
1573
2730
  ORDER BY f.path
1574
2731
  LIMIT ?
1575
2732
  `, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
2733
+ // DEBUG: Log returned paths and verify they match expected repo_id
2734
+ if (rows.length > 0) {
2735
+ traceSearch(`Phrase match returned ${rows.length} rows. Sample paths: ${rows
2736
+ .slice(0, 3)
2737
+ .map((r) => r.path)
2738
+ .join(", ")}`);
2739
+ // Verify repo_id of returned files
2740
+ const pathsToCheck = rows.slice(0, 3).map((r) => r.path);
2741
+ const verification = await db.all(`SELECT path, repo_id FROM file WHERE path IN (${pathsToCheck.map(() => "?").join(", ")}) LIMIT 3`, pathsToCheck);
2742
+ traceSearch(`Repo ID verification`, verification);
2743
+ }
1576
2744
  for (const row of rows) {
1577
2745
  if (row.content === null) {
1578
2746
  continue;
@@ -1584,6 +2752,7 @@ export async function contextBundle(context, params) {
1584
2752
  continue; // Should not happen, but defensive check
1585
2753
  }
1586
2754
  const candidate = ensureCandidate(candidates, row.path);
2755
+ candidate.phraseHits += matchedPhrases.length;
1587
2756
  // 各マッチしたフレーズに対してスコアリング
1588
2757
  for (const phrase of matchedPhrases) {
1589
2758
  // フレーズマッチは通常の2倍のスコア
@@ -1614,6 +2783,7 @@ export async function contextBundle(context, params) {
1614
2783
  });
1615
2784
  }
1616
2785
  }
2786
+ traceSearch(`phrase search produced ${rows.length} rows, candidates=${candidates.size}`);
1617
2787
  }
1618
2788
  // キーワードマッチング(通常の重み)- 統合クエリでパフォーマンス改善
1619
2789
  if (extractedTerms.keywords.length > 0) {
@@ -1648,6 +2818,7 @@ export async function contextBundle(context, params) {
1648
2818
  for (const keyword of matchedKeywords) {
1649
2819
  candidate.score += weights.textMatch;
1650
2820
  candidate.reasons.add(`text:${keyword}`);
2821
+ candidate.keywordHits.add(keyword);
1651
2822
  }
1652
2823
  // Apply boost profile once per file
1653
2824
  if (boostProfile !== "none") {
@@ -1673,6 +2844,124 @@ export async function contextBundle(context, params) {
1673
2844
  });
1674
2845
  }
1675
2846
  }
2847
+ traceSearch(`keyword search produced ${rows.length} rows, candidates=${candidates.size}`);
2848
+ }
2849
+ const fallbackTerms = Array.from(new Set([...extractedTerms.phrases, ...extractedTerms.keywords, ...extractedTerms.pathSegments]
2850
+ .map((term) => term.toLowerCase())
2851
+ .filter((term) => term.length >= 3))).slice(0, PATH_FALLBACK_TERMS_LIMIT);
2852
+ if (fallbackTerms.length > 0) {
2853
+ const fallbackRows = await fetchPathFallbackCandidates(db, repoId, fallbackTerms, Math.min(limit * 2, PATH_FALLBACK_LIMIT));
2854
+ const fallbackReason = stringMatchSeeds.size === 0
2855
+ ? "no-string-match"
2856
+ : candidates.size < limit
2857
+ ? "low-candidates"
2858
+ : "supplemental";
2859
+ traceSearch(`path fallback triggered (${fallbackReason}) terms=${JSON.stringify(fallbackTerms)} rows=${fallbackRows.length}`);
2860
+ const fallbackWeight = stringMatchSeeds.size === 0 ? weights.pathMatch * 0.75 : weights.pathMatch * 0.2;
2861
+ for (const row of fallbackRows) {
2862
+ const candidate = ensureCandidate(candidates, row.path);
2863
+ candidate.pathFallbackReason = fallbackReason;
2864
+ candidate.score += fallbackWeight;
2865
+ candidate.reasons.add("fallback:path");
2866
+ const contentLower = row.content?.toLowerCase() ?? "";
2867
+ if (contentLower.length > 0) {
2868
+ let textHits = 0;
2869
+ for (const term of fallbackTerms) {
2870
+ if (contentLower.includes(term)) {
2871
+ textHits += 1;
2872
+ candidate.keywordHits.add(term);
2873
+ }
2874
+ }
2875
+ candidate.fallbackTextHits += textHits;
2876
+ if (textHits > 0) {
2877
+ const textBoost = textHits * weights.textMatch * 0.15;
2878
+ candidate.score += textBoost;
2879
+ candidate.reasons.add(`fallback:content:${textHits}`);
2880
+ }
2881
+ }
2882
+ candidate.matchLine ??= 1;
2883
+ candidate.lang ??= row.lang;
2884
+ candidate.ext ??= row.ext;
2885
+ candidate.totalLines ??= row.content?.split(/\r?\n/).length ?? null;
2886
+ candidate.content ??= row.content;
2887
+ candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
2888
+ if (boostProfile !== "none") {
2889
+ applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms);
2890
+ }
2891
+ stringMatchSeeds.add(row.path);
2892
+ if (!fileCache.has(row.path) && row.content) {
2893
+ fileCache.set(row.path, {
2894
+ content: row.content,
2895
+ lang: row.lang,
2896
+ ext: row.ext,
2897
+ totalLines: candidate.totalLines ?? 0,
2898
+ embedding: candidate.embedding,
2899
+ });
2900
+ }
2901
+ }
2902
+ // Drop fallback-only candidates with zero text evidence before trimming
2903
+ for (const [path, candidate] of Array.from(candidates.entries())) {
2904
+ const isFallbackOnly = candidate.reasons.has("fallback:path") &&
2905
+ candidate.keywordHits.size === 0 &&
2906
+ candidate.phraseHits === 0;
2907
+ const hasTextEvidence = candidate.fallbackTextHits > 0;
2908
+ if (isFallbackOnly && !hasTextEvidence) {
2909
+ candidates.delete(path);
2910
+ }
2911
+ }
2912
+ // Demote fallback-only hits without text evidence
2913
+ for (const candidate of candidates.values()) {
2914
+ const isFallbackOnly = candidate.reasons.has("fallback:path") &&
2915
+ candidate.keywordHits.size === 0 &&
2916
+ candidate.phraseHits === 0;
2917
+ const hasTextEvidence = candidate.fallbackTextHits > 0;
2918
+ if (isFallbackOnly && !hasTextEvidence) {
2919
+ candidate.scoreMultiplier *= 0.5;
2920
+ candidate.reasons.add("penalty:fallback-no-text");
2921
+ }
2922
+ }
2923
+ if (fallbackRows.length > PATH_FALLBACK_KEEP) {
2924
+ const fallbackOnly = Array.from(candidates.entries())
2925
+ .filter(([_, candidate]) => candidate.reasons.has("fallback:path") &&
2926
+ candidate.keywordHits.size === 0 &&
2927
+ candidate.phraseHits === 0)
2928
+ .sort((a, b) => b[1].score - a[1].score);
2929
+ const toDrop = fallbackOnly.slice(PATH_FALLBACK_KEEP);
2930
+ for (const [path] of toDrop) {
2931
+ candidates.delete(path);
2932
+ }
2933
+ traceSearch(`path fallback trimmed kept=${PATH_FALLBACK_KEEP} dropped=${toDrop.length} candidates=${candidates.size}`);
2934
+ }
2935
+ }
2936
+ if (extractedTerms.keywords.length > 0 || extractedTerms.phrases.length > 0) {
2937
+ for (const candidate of candidates.values()) {
2938
+ applyCoverageBoost(candidate, extractedTerms, weights);
2939
+ }
2940
+ }
2941
+ const artifactPathTargets = artifactPathHints.map((hintPath) => ({
2942
+ path: hintPath,
2943
+ sourceHint: hintPath,
2944
+ origin: "artifact",
2945
+ }));
2946
+ const dictionaryPathTargets = await fetchDictionaryPathHints(db, context.tableAvailability, repoId, substringHints, HINT_DICTIONARY_LIMIT);
2947
+ const { list: resolvedPathHintTargets, meta: hintSeedMeta } = createHintSeedMeta([
2948
+ ...artifactPathTargets,
2949
+ ...dictionaryPathTargets,
2950
+ ]);
2951
+ if (resolvedPathHintTargets.length > 0) {
2952
+ await applyPathHintPromotions({
2953
+ db,
2954
+ tableAvailability: context.tableAvailability,
2955
+ repoId,
2956
+ hintTargets: resolvedPathHintTargets,
2957
+ candidates,
2958
+ fileCache,
2959
+ weights,
2960
+ hintSeedMeta,
2961
+ });
2962
+ }
2963
+ if (substringHints.length > 0) {
2964
+ await addHintSubstringMatches(db, context.tableAvailability, repoId, substringHints, candidates, HINT_SUBSTRING_LIMIT, HINT_SUBSTRING_BOOST);
1676
2965
  }
1677
2966
  if (artifacts.editing_path) {
1678
2967
  const editingCandidate = ensureCandidate(candidates, artifacts.editing_path);
@@ -1681,7 +2970,6 @@ export async function contextBundle(context, params) {
1681
2970
  editingCandidate.matchLine ??= 1;
1682
2971
  }
1683
2972
  // SQL injection防御: ファイルパスの検証パターン
1684
- const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
1685
2973
  const dependencySeeds = new Set();
1686
2974
  for (const pathSeed of stringMatchSeeds) {
1687
2975
  if (!SAFE_PATH_PATTERN.test(pathSeed)) {
@@ -1695,10 +2983,13 @@ export async function contextBundle(context, params) {
1695
2983
  }
1696
2984
  if (artifacts.editing_path) {
1697
2985
  if (!SAFE_PATH_PATTERN.test(artifacts.editing_path)) {
1698
- throw new Error(`Invalid editing_path format. Path must contain only alphanumeric characters, underscores, dots, hyphens, and forward slashes.`);
2986
+ throw new Error(`Invalid editing_path format: ${artifacts.editing_path}. Use only A-Z, 0-9, _, ., -, / characters.`);
1699
2987
  }
1700
2988
  dependencySeeds.add(artifacts.editing_path);
1701
2989
  }
2990
+ for (const target of resolvedPathHintTargets) {
2991
+ dependencySeeds.add(target.path);
2992
+ }
1702
2993
  if (dependencySeeds.size > 0) {
1703
2994
  // SQL injection防御: プレースホルダー生成前にサイズを検証
1704
2995
  if (dependencySeeds.size > MAX_DEPENDENCY_SEEDS_QUERY_LIMIT) {
@@ -1708,7 +2999,7 @@ export async function contextBundle(context, params) {
1708
2999
  // 防御的チェック: プレースホルダーが正しい形式であることを確認
1709
3000
  // 期待される形式: "?, ?, ..." (クエスチョンマーク、カンマ、スペースのみ)
1710
3001
  if (!/^(\?)(,\s*\?)*$/.test(placeholders)) {
1711
- throw new Error("Invalid placeholder generation detected. Operation aborted for safety.");
3002
+ throw new Error("Invalid dependency placeholder sequence detected. Remove unsafe dependency seeds and retry the request.");
1712
3003
  }
1713
3004
  const depRows = await db.all(`
1714
3005
  SELECT src_path, dst_kind, dst, rel
@@ -1746,31 +3037,74 @@ export async function contextBundle(context, params) {
1746
3037
  }
1747
3038
  }
1748
3039
  }
1749
- const materializedCandidates = [];
1750
- for (const candidate of candidates.values()) {
1751
- if (!candidate.content) {
1752
- const cached = fileCache.get(candidate.path);
1753
- if (cached) {
1754
- candidate.content = cached.content;
1755
- candidate.lang = cached.lang;
1756
- candidate.ext = cached.ext;
1757
- candidate.totalLines = cached.totalLines;
1758
- candidate.embedding = cached.embedding;
3040
+ const materializeCandidates = async () => {
3041
+ const result = [];
3042
+ for (const candidate of candidates.values()) {
3043
+ if (isSuppressedPath(candidate.path)) {
3044
+ continue;
1759
3045
  }
1760
- else {
1761
- const loaded = await loadFileContent(db, repoId, candidate.path);
1762
- if (!loaded) {
1763
- continue;
3046
+ if (!candidate.content) {
3047
+ const cached = fileCache.get(candidate.path);
3048
+ if (cached) {
3049
+ candidate.content = cached.content;
3050
+ candidate.lang = cached.lang;
3051
+ candidate.ext = cached.ext;
3052
+ candidate.totalLines = cached.totalLines;
3053
+ candidate.embedding = cached.embedding;
1764
3054
  }
1765
- candidate.content = loaded.content;
1766
- candidate.lang = loaded.lang;
1767
- candidate.ext = loaded.ext;
1768
- candidate.totalLines = loaded.totalLines;
1769
- candidate.embedding = loaded.embedding;
1770
- fileCache.set(candidate.path, loaded);
3055
+ else {
3056
+ const loaded = await loadFileContent(db, repoId, candidate.path);
3057
+ if (!loaded) {
3058
+ continue;
3059
+ }
3060
+ candidate.content = loaded.content;
3061
+ candidate.lang = loaded.lang;
3062
+ candidate.ext = loaded.ext;
3063
+ candidate.totalLines = loaded.totalLines;
3064
+ candidate.embedding = loaded.embedding;
3065
+ fileCache.set(candidate.path, loaded);
3066
+ }
3067
+ }
3068
+ result.push(candidate);
3069
+ }
3070
+ return result;
3071
+ };
3072
+ const addMetadataFallbackCandidates = async () => {
3073
+ if (!hasAnyMetadataFilters) {
3074
+ return;
3075
+ }
3076
+ const metadataRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
3077
+ if (metadataRows.length === 0) {
3078
+ return;
3079
+ }
3080
+ for (const row of metadataRows) {
3081
+ const candidate = ensureCandidate(candidates, row.path);
3082
+ if (row.content) {
3083
+ candidate.content = row.content;
3084
+ candidate.totalLines = row.content.split(/\r?\n/).length;
3085
+ fileCache.set(row.path, {
3086
+ content: row.content,
3087
+ lang: row.lang,
3088
+ ext: row.ext,
3089
+ totalLines: candidate.totalLines,
3090
+ embedding: candidate.embedding,
3091
+ });
1771
3092
  }
3093
+ candidate.lang ??= row.lang;
3094
+ candidate.ext ??= row.ext;
3095
+ candidate.matchLine ??= 1;
3096
+ candidate.score = Math.max(candidate.score, 1 + metadataFilters.length * 0.2);
1772
3097
  }
1773
- materializedCandidates.push(candidate);
3098
+ };
3099
+ if (hasAnyMetadataFilters) {
3100
+ await addMetadataFallbackCandidates();
3101
+ }
3102
+ let materializedCandidates = await materializeCandidates();
3103
+ traceSearch(`materialized candidates: ${materializedCandidates.length}`);
3104
+ if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
3105
+ await addMetadataFallbackCandidates();
3106
+ materializedCandidates = await materializeCandidates();
3107
+ traceSearch(`materialized candidates after metadata fallback: ${materializedCandidates.length}`);
1774
3108
  }
1775
3109
  if (materializedCandidates.length === 0) {
1776
3110
  // Get warnings from WarningManager (includes breaking change notification if applicable)
@@ -1781,6 +3115,72 @@ export async function contextBundle(context, params) {
1781
3115
  ...(warnings.length > 0 && { warnings }),
1782
3116
  };
1783
3117
  }
3118
+ const metadataKeywordSet = new Set(extractedTerms.keywords.map((keyword) => keyword.toLowerCase()));
3119
+ const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
3120
+ let metadataEntriesMap;
3121
+ if (hasAnyMetadataFilters || metadataKeywordSet.size > 0 || filterValueSet.size > 0) {
3122
+ metadataEntriesMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
3123
+ }
3124
+ if (hasStrictMetadataFilters) {
3125
+ metadataEntriesMap ??= new Map();
3126
+ for (let i = materializedCandidates.length - 1; i >= 0; i--) {
3127
+ const candidate = materializedCandidates[i];
3128
+ if (!candidate) {
3129
+ continue; // Skip undefined entries
3130
+ }
3131
+ const entries = metadataEntriesMap.get(candidate.path);
3132
+ const matchesFilters = candidateMatchesMetadataFilters(entries, strictMetadataFilters);
3133
+ if (!matchesFilters) {
3134
+ materializedCandidates.splice(i, 1);
3135
+ continue;
3136
+ }
3137
+ candidate.reasons.add("metadata:filter");
3138
+ if (process.env.KIRI_TRACE_METADATA === "1") {
3139
+ console.info(`[metadata-trace-match] path=${candidate.path}`);
3140
+ }
3141
+ }
3142
+ if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
3143
+ await addMetadataFallbackCandidates();
3144
+ materializedCandidates = await materializeCandidates();
3145
+ }
3146
+ if (materializedCandidates.length === 0) {
3147
+ const warnings = [...context.warningManager.responseWarnings];
3148
+ return {
3149
+ context: [],
3150
+ ...(includeTokensEstimate && { tokens_estimate: 0 }),
3151
+ ...(warnings.length > 0 && { warnings }),
3152
+ };
3153
+ }
3154
+ }
3155
+ if (hasHintMetadataFilters) {
3156
+ metadataEntriesMap ??= new Map();
3157
+ for (const candidate of materializedCandidates) {
3158
+ const entries = metadataEntriesMap.get(candidate.path);
3159
+ const matchesHints = candidateMatchesMetadataFilters(entries, hintMetadataFilters);
3160
+ if (matchesHints) {
3161
+ candidate.score += METADATA_HINT_BONUS;
3162
+ candidate.reasons.add("metadata:hint");
3163
+ }
3164
+ }
3165
+ }
3166
+ const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
3167
+ if (metadataEntriesMap) {
3168
+ for (const candidate of materializedCandidates) {
3169
+ const entries = metadataEntriesMap.get(candidate.path);
3170
+ const metadataBoost = computeMetadataBoost(entries, metadataKeywordSet, filterValueSet);
3171
+ if (metadataBoost > 0) {
3172
+ candidate.score += metadataBoost;
3173
+ candidate.reasons.add("boost:metadata");
3174
+ }
3175
+ }
3176
+ }
3177
+ for (const candidate of materializedCandidates) {
3178
+ const linkBoost = computeInboundLinkBoost(inboundCounts.get(candidate.path));
3179
+ if (linkBoost > 0) {
3180
+ candidate.score += linkBoost;
3181
+ candidate.reasons.add("boost:links");
3182
+ }
3183
+ }
1784
3184
  applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
1785
3185
  // ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
1786
3186
  // Only apply to positive scores to prevent negative score inversion
@@ -1819,18 +3219,39 @@ export async function contextBundle(context, params) {
1819
3219
  const telemetry = computePenaltyTelemetry(materializedCandidates);
1820
3220
  logPenaltyTelemetry(telemetry, queryStats);
1821
3221
  }
1822
- const sortedCandidates = materializedCandidates
1823
- .filter((candidate) => candidate.score > 0) // Filter out candidates with negative or zero scores
3222
+ // v1.0.0: Filter out extremely low-scored candidates (result of multiplicative penalties)
3223
+ // Threshold removes files with >95% penalty while keeping reasonably relevant files
3224
+ // Hint paths are exempt from this threshold (always included if score > 0)
3225
+ const hintPathSet = new Set(resolvedPathHintTargets.map((target) => target.path));
3226
+ const rankedCandidates = materializedCandidates
3227
+ .filter((candidate) => candidate.score > SCORE_FILTER_THRESHOLD ||
3228
+ (candidate.score > 0 && hintPathSet.has(candidate.path)))
1824
3229
  .sort((a, b) => {
1825
3230
  if (b.score === a.score) {
1826
3231
  return a.path.localeCompare(b.path);
1827
3232
  }
1828
3233
  return b.score - a.score;
1829
- })
1830
- .slice(0, limit);
1831
- const maxScore = Math.max(...sortedCandidates.map((candidate) => candidate.score));
3234
+ });
3235
+ if (TRACE_SEARCH) {
3236
+ const sample = rankedCandidates.slice(0, 5).map((candidate) => ({
3237
+ path: candidate.path,
3238
+ score: Number(candidate.score.toFixed(3)),
3239
+ reasons: Array.from(candidate.reasons).slice(0, 3),
3240
+ }));
3241
+ traceSearch(`ranked candidates=${rankedCandidates.length}`, sample);
3242
+ }
3243
+ const prioritizedCandidates = prioritizeHintCandidates(rankedCandidates, resolvedPathHintTargets.map((target) => target.path), limit);
3244
+ if (prioritizedCandidates.length === 0) {
3245
+ const warnings = [...context.warningManager.responseWarnings];
3246
+ return {
3247
+ context: [],
3248
+ ...(includeTokensEstimate && { tokens_estimate: 0 }),
3249
+ ...(warnings.length > 0 && { warnings }),
3250
+ };
3251
+ }
3252
+ const maxScore = Math.max(...prioritizedCandidates.map((candidate) => candidate.score));
1832
3253
  const results = [];
1833
- for (const candidate of sortedCandidates) {
3254
+ for (const candidate of prioritizedCandidates) {
1834
3255
  if (!candidate.content) {
1835
3256
  continue;
1836
3257
  }
@@ -1858,6 +3279,23 @@ export async function contextBundle(context, params) {
1858
3279
  startLine = Math.max(1, matchLine - windowHalf);
1859
3280
  endLine = Math.min(totalLines === 0 ? matchLine + windowHalf : totalLines, startLine + FALLBACK_SNIPPET_WINDOW - 1);
1860
3281
  }
3282
+ if (CLAMP_SNIPPETS_ENABLED) {
3283
+ // Clamp snippet length to FALLBACK_SNIPPET_WINDOW even when symbol spans large regions
3284
+ const maxWindow = FALLBACK_SNIPPET_WINDOW;
3285
+ const selectedEnd = selected ? selected.end_line : endLine;
3286
+ const selectedStart = selected ? selected.start_line : startLine;
3287
+ if (endLine - startLine + 1 > maxWindow) {
3288
+ const anchor = candidate.matchLine ?? startLine;
3289
+ let clampedStart = Math.max(selectedStart, anchor - Math.floor(maxWindow / 2));
3290
+ let clampedEnd = clampedStart + maxWindow - 1;
3291
+ if (clampedEnd > selectedEnd) {
3292
+ clampedEnd = selectedEnd;
3293
+ clampedStart = Math.max(selectedStart, clampedEnd - maxWindow + 1);
3294
+ }
3295
+ startLine = clampedStart;
3296
+ endLine = Math.max(clampedStart, clampedEnd);
3297
+ }
3298
+ }
1861
3299
  if (endLine < startLine) {
1862
3300
  endLine = startLine;
1863
3301
  }
@@ -1885,7 +3323,7 @@ export async function contextBundle(context, params) {
1885
3323
  let tokensEstimate;
1886
3324
  if (includeTokensEstimate) {
1887
3325
  tokensEstimate = results.reduce((acc, item) => {
1888
- const candidate = sortedCandidates.find((c) => c.path === item.path);
3326
+ const candidate = prioritizedCandidates.find((c) => c.path === item.path);
1889
3327
  if (candidate && candidate.content) {
1890
3328
  return acc + estimateTokensFromContent(candidate.content, item.range[0], item.range[1]);
1891
3329
  }
@@ -1896,8 +3334,13 @@ export async function contextBundle(context, params) {
1896
3334
  }
1897
3335
  // Get warnings from WarningManager (includes breaking change notification if applicable)
1898
3336
  const warnings = [...context.warningManager.responseWarnings];
3337
+ const shouldFilterResults = FINAL_RESULT_SUPPRESSION_ENABLED && SUPPRESS_NON_CODE_ENABLED;
3338
+ const sanitizedResults = shouldFilterResults
3339
+ ? results.filter((item) => !isSuppressedPath(item.path))
3340
+ : results;
3341
+ const finalResults = sanitizedResults.length > 0 ? sanitizedResults : results;
1899
3342
  const payload = {
1900
- context: results,
3343
+ context: finalResults,
1901
3344
  ...(warnings.length > 0 && { warnings }),
1902
3345
  };
1903
3346
  if (tokensEstimate !== undefined) {
@@ -2100,35 +3543,27 @@ export async function depsClosure(context, params) {
2100
3543
  edges,
2101
3544
  };
2102
3545
  }
2103
- export async function resolveRepoId(db, repoRoot) {
3546
+ /**
3547
+ * リポジトリのrootパスをデータベースIDに解決する。
3548
+ *
3549
+ * この関数は下位互換性のために保持されているが、内部的には新しいRepoResolverを使用する。
3550
+ *
3551
+ * @param db - DuckDBクライアント
3552
+ * @param repoRoot - リポジトリのrootパス
3553
+ * @param services - オプショナルなServerServices(指定がなければ新規作成される)
3554
+ * @returns リポジトリID
3555
+ * @throws Error リポジトリがインデックスされていない場合
3556
+ */
3557
+ export async function resolveRepoId(db, repoRoot, services) {
3558
+ const svc = services ?? createServerServices(db);
3559
+ return await svc.repoResolver.resolveId(repoRoot);
3560
+ }
3561
+ export async function contextBundle(context, params) {
2104
3562
  try {
2105
- const candidates = getRepoPathCandidates(repoRoot);
2106
- const normalized = candidates[0];
2107
- const placeholders = candidates.map(() => "?").join(", ");
2108
- const rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders}) LIMIT 1`, candidates);
2109
- if (rows.length === 0) {
2110
- const existingRows = await db.all("SELECT id, root FROM repo");
2111
- for (const candidate of existingRows) {
2112
- if (normalizeRepoPath(candidate.root) === normalized) {
2113
- await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, candidate.id]);
2114
- return candidate.id;
2115
- }
2116
- }
2117
- throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
2118
- }
2119
- const row = rows[0];
2120
- if (!row) {
2121
- throw new Error("Failed to retrieve repository record. Database returned empty result.");
2122
- }
2123
- if (row.root !== normalized) {
2124
- await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, row.id]);
2125
- }
2126
- return row.id;
3563
+ return await contextBundleImpl(context, params);
2127
3564
  }
2128
3565
  catch (error) {
2129
- if (error instanceof Error && error.message.includes("Table with name repo")) {
2130
- throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
2131
- }
3566
+ console.error("context_bundle error:", error);
2132
3567
  throw error;
2133
3568
  }
2134
3569
  }