kiri-mcp-server 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +10 -1
  2. package/config/scoring-profiles.yml +82 -35
  3. package/dist/config/scoring-profiles.yml +82 -35
  4. package/dist/package.json +9 -1
  5. package/dist/src/indexer/cli.d.ts.map +1 -1
  6. package/dist/src/indexer/cli.js +712 -98
  7. package/dist/src/indexer/cli.js.map +1 -1
  8. package/dist/src/indexer/git.d.ts.map +1 -1
  9. package/dist/src/indexer/git.js +41 -3
  10. package/dist/src/indexer/git.js.map +1 -1
  11. package/dist/src/indexer/migrations/repo-merger.d.ts +33 -0
  12. package/dist/src/indexer/migrations/repo-merger.d.ts.map +1 -0
  13. package/dist/src/indexer/migrations/repo-merger.js +67 -0
  14. package/dist/src/indexer/migrations/repo-merger.js.map +1 -0
  15. package/dist/src/indexer/schema.d.ts +66 -0
  16. package/dist/src/indexer/schema.d.ts.map +1 -1
  17. package/dist/src/indexer/schema.js +337 -0
  18. package/dist/src/indexer/schema.js.map +1 -1
  19. package/dist/src/server/boost-profiles.d.ts +1 -1
  20. package/dist/src/server/boost-profiles.d.ts.map +1 -1
  21. package/dist/src/server/boost-profiles.js +116 -0
  22. package/dist/src/server/boost-profiles.js.map +1 -1
  23. package/dist/src/server/config.d.ts +45 -0
  24. package/dist/src/server/config.d.ts.map +1 -0
  25. package/dist/src/server/config.js +146 -0
  26. package/dist/src/server/config.js.map +1 -0
  27. package/dist/src/server/context.d.ts +29 -0
  28. package/dist/src/server/context.d.ts.map +1 -1
  29. package/dist/src/server/context.js +26 -1
  30. package/dist/src/server/context.js.map +1 -1
  31. package/dist/src/server/handlers/snippets-get.d.ts +36 -0
  32. package/dist/src/server/handlers/snippets-get.d.ts.map +1 -0
  33. package/dist/src/server/handlers/snippets-get.js +120 -0
  34. package/dist/src/server/handlers/snippets-get.js.map +1 -0
  35. package/dist/src/server/handlers.d.ts +32 -20
  36. package/dist/src/server/handlers.d.ts.map +1 -1
  37. package/dist/src/server/handlers.js +1554 -338
  38. package/dist/src/server/handlers.js.map +1 -1
  39. package/dist/src/server/indexBootstrap.d.ts.map +1 -1
  40. package/dist/src/server/indexBootstrap.js +49 -2
  41. package/dist/src/server/indexBootstrap.js.map +1 -1
  42. package/dist/src/server/main.d.ts.map +1 -1
  43. package/dist/src/server/main.js +7 -0
  44. package/dist/src/server/main.js.map +1 -1
  45. package/dist/src/server/profile-selector.d.ts +33 -0
  46. package/dist/src/server/profile-selector.d.ts.map +1 -0
  47. package/dist/src/server/profile-selector.js +291 -0
  48. package/dist/src/server/profile-selector.js.map +1 -0
  49. package/dist/src/server/rpc.d.ts.map +1 -1
  50. package/dist/src/server/rpc.js +36 -6
  51. package/dist/src/server/rpc.js.map +1 -1
  52. package/dist/src/server/runtime.d.ts.map +1 -1
  53. package/dist/src/server/runtime.js +14 -4
  54. package/dist/src/server/runtime.js.map +1 -1
  55. package/dist/src/server/scoring.d.ts +7 -1
  56. package/dist/src/server/scoring.d.ts.map +1 -1
  57. package/dist/src/server/scoring.js +121 -21
  58. package/dist/src/server/scoring.js.map +1 -1
  59. package/dist/src/server/services/index.d.ts +24 -0
  60. package/dist/src/server/services/index.d.ts.map +1 -0
  61. package/dist/src/server/services/index.js +20 -0
  62. package/dist/src/server/services/index.js.map +1 -0
  63. package/dist/src/server/services/repo-repository.d.ts +61 -0
  64. package/dist/src/server/services/repo-repository.d.ts.map +1 -0
  65. package/dist/src/server/services/repo-repository.js +93 -0
  66. package/dist/src/server/services/repo-repository.js.map +1 -0
  67. package/dist/src/server/services/repo-resolver.d.ts +28 -0
  68. package/dist/src/server/services/repo-resolver.d.ts.map +1 -0
  69. package/dist/src/server/services/repo-resolver.js +62 -0
  70. package/dist/src/server/services/repo-resolver.js.map +1 -0
  71. package/dist/src/shared/duckdb.d.ts.map +1 -1
  72. package/dist/src/shared/duckdb.js +21 -1
  73. package/dist/src/shared/duckdb.js.map +1 -1
  74. package/dist/src/shared/fs/safePath.d.ts +7 -0
  75. package/dist/src/shared/fs/safePath.d.ts.map +1 -0
  76. package/dist/src/shared/fs/safePath.js +23 -0
  77. package/dist/src/shared/fs/safePath.js.map +1 -0
  78. package/dist/src/shared/utils/glob.d.ts +5 -0
  79. package/dist/src/shared/utils/glob.d.ts.map +1 -0
  80. package/dist/src/shared/utils/glob.js +22 -0
  81. package/dist/src/shared/utils/glob.js.map +1 -0
  82. package/dist/src/shared/utils/retry.d.ts +8 -0
  83. package/dist/src/shared/utils/retry.d.ts.map +1 -0
  84. package/dist/src/shared/utils/retry.js +20 -0
  85. package/dist/src/shared/utils/retry.js.map +1 -0
  86. package/package.json +28 -22
@@ -3,10 +3,13 @@ import path from "node:path";
3
3
  import { checkFTSSchemaExists } from "../indexer/schema.js";
4
4
  import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
5
5
  import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
6
- import { getRepoPathCandidates, normalizeRepoPath } from "../shared/utils/path.js";
7
6
  import { expandAbbreviations } from "./abbreviations.js";
8
7
  import { getBoostProfile, } from "./boost-profiles.js";
8
+ import { loadServerConfig } from "./config.js";
9
9
  import { coerceProfileName, loadScoringProfile } from "./scoring.js";
10
+ import { createServerServices } from "./services/index.js";
11
+ // Re-export extracted handlers for backward compatibility
12
+ export { snippetsGet, } from "./handlers/snippets-get.js";
10
13
  // Configuration file patterns (v0.8.0+: consolidated to avoid duplication)
11
14
  // Comprehensive list covering multiple languages and tools
12
15
  const CONFIG_FILES = [
@@ -133,6 +136,83 @@ const CONFIG_PATTERNS = [
133
136
  ".github/workflows",
134
137
  ];
135
138
  const FTS_STATUS_CACHE_TTL_MS = 10_000;
139
+ const METADATA_ALIAS_MAP = new Map([
140
+ ["tag", { key: "tags" }],
141
+ ["tags", { key: "tags" }],
142
+ ["category", { key: "category" }],
143
+ ["title", { key: "title" }],
144
+ ["service", { key: "service" }],
145
+ ]);
146
+ const METADATA_KEY_PREFIXES = [
147
+ { prefix: "meta." },
148
+ { prefix: "metadata.", strict: true },
149
+ { prefix: "docmeta.", strict: true },
150
+ { prefix: "frontmatter.", source: "front_matter" },
151
+ { prefix: "fm.", source: "front_matter" },
152
+ { prefix: "yaml.", source: "yaml" },
153
+ { prefix: "json.", source: "json" },
154
+ ];
155
+ const METADATA_MATCH_WEIGHT = 0.15;
156
+ const METADATA_FILTER_MATCH_WEIGHT = 0.1;
157
+ const METADATA_HINT_BONUS = 0.25;
158
+ const INBOUND_LINK_WEIGHT = 0.2;
159
+ /**
160
+ * checkTableAvailability
161
+ *
162
+ * 起動時にテーブルの存在を確認し、TableAvailabilityオブジェクトを生成する。
163
+ * これにより、グローバルミュータブル変数による競合状態を回避する。
164
+ *
165
+ * NOTE: スキーマ変更(テーブル追加)後はサーバーの再起動が必要です。
166
+ *
167
+ * @param db - DuckDBClient インスタンス
168
+ * @returns TableAvailability オブジェクト
169
+ * @throws データベース接続エラー等、テーブル不在以外のエラーが発生した場合
170
+ */
171
+ export async function checkTableAvailability(db) {
172
+ const ALLOWED_TABLES = [
173
+ "document_metadata_kv",
174
+ "markdown_link",
175
+ "hint_expansion",
176
+ "hint_dictionary",
177
+ ];
178
+ const checkTable = async (tableName) => {
179
+ if (!ALLOWED_TABLES.includes(tableName)) {
180
+ throw new Error(`Invalid table name: ${tableName}`);
181
+ }
182
+ try {
183
+ await db.all(`SELECT 1 FROM ${tableName} LIMIT 0`);
184
+ return true;
185
+ }
186
+ catch (error) {
187
+ // テーブル不在エラーのみキャッチ
188
+ if (isTableMissingError(error, tableName)) {
189
+ return false;
190
+ }
191
+ // その他のエラー(接続エラー等)は再スロー
192
+ throw new Error(`Failed to check table availability for ${tableName}: ${error instanceof Error ? error.message : String(error)}`);
193
+ }
194
+ };
195
+ const result = {
196
+ hasMetadataTables: await checkTable("document_metadata_kv"),
197
+ hasLinkTable: await checkTable("markdown_link"),
198
+ hasHintLog: await checkTable("hint_expansion"),
199
+ hasHintDictionary: await checkTable("hint_dictionary"),
200
+ };
201
+ // 起動時警告: テーブルが存在しない場合に通知
202
+ if (!result.hasMetadataTables) {
203
+ console.warn("document_metadata_kv table is missing. Metadata filters and boosts disabled until database is upgraded.");
204
+ }
205
+ if (!result.hasLinkTable) {
206
+ console.warn("markdown_link table is missing. Inbound link boosting disabled until database is upgraded.");
207
+ }
208
+ if (!result.hasHintLog) {
209
+ console.warn("hint_expansion table is missing. Hint logging disabled. Enable the latest schema and rerun the indexer to capture hint logs.");
210
+ }
211
+ if (!result.hasHintDictionary) {
212
+ console.warn("hint_dictionary table is missing. Dictionary hints disabled. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
213
+ }
214
+ return result;
215
+ }
136
216
  async function hasDirtyRepos(db) {
137
217
  const statusCheck = await db.all(`SELECT COUNT(*) as count FROM repo
138
218
  WHERE fts_dirty = true OR fts_status IN ('dirty', 'rebuilding')`);
@@ -217,8 +297,154 @@ function isConfigFile(path, fileName) {
217
297
  fileName.startsWith(".env") ||
218
298
  isInConfigDirectory);
219
299
  }
300
+ function normalizeArtifactHints(hints) {
301
+ if (!Array.isArray(hints)) {
302
+ return [];
303
+ }
304
+ const normalized = [];
305
+ const seen = new Set();
306
+ for (const rawHint of hints) {
307
+ if (typeof rawHint !== "string") {
308
+ continue;
309
+ }
310
+ const trimmed = rawHint.trim();
311
+ if (!trimmed || seen.has(trimmed)) {
312
+ continue;
313
+ }
314
+ normalized.push(trimmed);
315
+ seen.add(trimmed);
316
+ if (normalized.length >= MAX_ARTIFACT_HINTS) {
317
+ break;
318
+ }
319
+ }
320
+ return normalized;
321
+ }
322
+ function bucketArtifactHints(hints) {
323
+ const buckets = {
324
+ pathHints: [],
325
+ substringHints: [],
326
+ };
327
+ for (const hint of hints) {
328
+ if (hint.includes("/") && SAFE_PATH_PATTERN.test(hint)) {
329
+ buckets.pathHints.push(hint);
330
+ continue;
331
+ }
332
+ const normalized = hint.trim().toLowerCase();
333
+ if (normalized.length >= 3) {
334
+ buckets.substringHints.push(normalized);
335
+ }
336
+ }
337
+ return buckets;
338
+ }
339
+ function isMissingTableError(error, table) {
340
+ if (!(error instanceof Error)) {
341
+ return false;
342
+ }
343
+ return /Table with name/i.test(error.message) && error.message.includes(table);
344
+ }
345
+ async function logHintExpansionEntry(db, tableAvailability, entry) {
346
+ if (!HINT_LOG_ENABLED) {
347
+ return;
348
+ }
349
+ if (!tableAvailability.hasHintLog) {
350
+ return;
351
+ }
352
+ try {
353
+ await db.run(`
354
+ INSERT INTO hint_expansion (repo_id, hint_value, expansion_kind, target_path, payload)
355
+ VALUES (?, ?, ?, ?, ?)
356
+ `, [
357
+ entry.repoId,
358
+ entry.hintValue,
359
+ entry.kind,
360
+ entry.targetPath ?? null,
361
+ entry.payload ? JSON.stringify(entry.payload) : null,
362
+ ]);
363
+ }
364
+ catch (error) {
365
+ if (isMissingTableError(error, "hint_expansion")) {
366
+ console.warn("hint_expansion table is missing in the active database. Enable the latest schema and rerun the indexer to capture hint logs.");
367
+ return;
368
+ }
369
+ throw error;
370
+ }
371
+ }
372
+ async function fetchDictionaryPathHints(db, tableAvailability, repoId, hints, perHintLimit) {
373
+ if (!HINT_DICTIONARY_ENABLED || perHintLimit <= 0 || hints.length === 0) {
374
+ return [];
375
+ }
376
+ if (!tableAvailability.hasHintDictionary) {
377
+ return [];
378
+ }
379
+ const uniqueHints = Array.from(new Set(hints));
380
+ const targets = [];
381
+ for (const hint of uniqueHints) {
382
+ let rows = [];
383
+ try {
384
+ rows = await db.all(`
385
+ SELECT target_path
386
+ FROM hint_dictionary
387
+ WHERE repo_id = ?
388
+ AND hint_value = ?
389
+ ORDER BY freq DESC, target_path
390
+ LIMIT ?
391
+ `, [repoId, hint, perHintLimit]);
392
+ }
393
+ catch (error) {
394
+ if (isMissingTableError(error, "hint_dictionary")) {
395
+ console.warn("hint_dictionary table is missing in the active database. Run scripts/diag/build-hint-dictionary.ts after upgrading the schema.");
396
+ return [];
397
+ }
398
+ throw error;
399
+ }
400
+ for (const row of rows) {
401
+ if (!row.target_path || !SAFE_PATH_PATTERN.test(row.target_path)) {
402
+ continue;
403
+ }
404
+ targets.push({ path: row.target_path, sourceHint: hint, origin: "dictionary" });
405
+ }
406
+ }
407
+ return targets;
408
+ }
409
+ function createHintSeedMeta(targets) {
410
+ const meta = new Map();
411
+ const deduped = [];
412
+ for (const target of targets) {
413
+ if (meta.has(target.path)) {
414
+ continue;
415
+ }
416
+ meta.set(target.path, { sourceHint: target.sourceHint, origin: target.origin });
417
+ deduped.push(target);
418
+ }
419
+ return { list: deduped, meta };
420
+ }
421
+ function getHintSeedMeta(seedMeta, path) {
422
+ return seedMeta?.get(path);
423
+ }
424
+ function computeHintPriorityBoost(weights) {
425
+ const textComponent = weights.textMatch * HINT_PRIORITY_TEXT_MULTIPLIER;
426
+ const pathComponent = weights.pathMatch * HINT_PRIORITY_PATH_MULTIPLIER;
427
+ const aggregate = textComponent + pathComponent + weights.editingPath + weights.dependency;
428
+ return Math.max(HINT_PRIORITY_BASE_BONUS, aggregate);
429
+ }
430
+ function createHintExpansionConfig(weights) {
431
+ return {
432
+ dirLimit: Math.max(0, HINT_DIR_LIMIT),
433
+ dirMaxFiles: Math.max(1, HINT_DIR_MAX_FILES),
434
+ depOutLimit: Math.max(0, HINT_DEP_OUT_LIMIT),
435
+ depInLimit: Math.max(0, HINT_DEP_IN_LIMIT),
436
+ semLimit: Math.max(0, HINT_SEM_LIMIT),
437
+ semDirCandidateLimit: Math.max(1, HINT_SEM_DIR_CANDIDATE_LIMIT),
438
+ semThreshold: Number.isFinite(HINT_SEM_THRESHOLD) ? HINT_SEM_THRESHOLD : 0.65,
439
+ perHintLimit: Math.max(0, HINT_PER_HINT_LIMIT),
440
+ dbQueryBudget: Math.max(0, HINT_DB_QUERY_BUDGET),
441
+ dirBoost: computeHintPriorityBoost(weights) * 0.35,
442
+ depBoost: weights.dependency * 0.8,
443
+ substringLimit: Math.max(0, HINT_SUBSTRING_LIMIT),
444
+ substringBoost: Math.max(0, HINT_SUBSTRING_BOOST),
445
+ };
446
+ }
220
447
  const DEFAULT_SEARCH_LIMIT = 50;
221
- const DEFAULT_SNIPPET_WINDOW = 150;
222
448
  const DEFAULT_BUNDLE_LIMIT = 7; // Reduced from 12 to optimize token usage
223
449
  const MAX_BUNDLE_LIMIT = 20;
224
450
  const MAX_KEYWORDS = 12;
@@ -226,35 +452,76 @@ const MAX_MATCHES_PER_KEYWORD = 40;
226
452
  const MAX_DEPENDENCY_SEEDS = 8;
227
453
  const MAX_DEPENDENCY_SEEDS_QUERY_LIMIT = 100; // SQL injection防御用の上限
228
454
  const NEARBY_LIMIT = 6;
229
- const FALLBACK_SNIPPET_WINDOW = 40; // Reduced from 120 to optimize token usage
455
+ const serverConfig = loadServerConfig();
456
+ const SUPPRESS_NON_CODE_ENABLED = serverConfig.features.suppressNonCode;
457
+ const FINAL_RESULT_SUPPRESSION_ENABLED = serverConfig.features.suppressFinalResults;
458
+ const CLAMP_SNIPPETS_ENABLED = serverConfig.features.clampSnippets;
459
+ const FALLBACK_SNIPPET_WINDOW = serverConfig.features.snippetWindow;
230
460
  const MAX_RERANK_LIMIT = 50;
461
+ const MAX_ARTIFACT_HINTS = 8;
462
+ const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
463
+ const HINT_PRIORITY_TEXT_MULTIPLIER = serverConfig.hints.priority.textMultiplier;
464
+ const HINT_PRIORITY_PATH_MULTIPLIER = serverConfig.hints.priority.pathMultiplier;
465
+ const HINT_PRIORITY_BASE_BONUS = serverConfig.hints.priority.baseBonus;
466
+ const HINT_DIR_LIMIT = serverConfig.hints.directory.limit;
467
+ const HINT_DIR_MAX_FILES = serverConfig.hints.directory.maxFiles;
468
+ const HINT_DEP_OUT_LIMIT = serverConfig.hints.dependency.outLimit;
469
+ const HINT_DEP_IN_LIMIT = serverConfig.hints.dependency.inLimit;
470
+ const HINT_SEM_LIMIT = serverConfig.hints.semantic.limit;
471
+ const HINT_SEM_DIR_CANDIDATE_LIMIT = serverConfig.hints.semantic.dirCandidateLimit;
472
+ const HINT_SEM_THRESHOLD = serverConfig.hints.semantic.threshold;
473
+ const SUPPRESSED_PATH_PREFIXES = [".github/", ".git/", "ThirdPartyNotices", "node_modules/"];
474
+ const SUPPRESSED_FILE_NAMES = ["thirdpartynotices.txt", "thirdpartynotices.md", "cgmanifest.json"];
475
+ function isSuppressedPath(path) {
476
+ if (!SUPPRESS_NON_CODE_ENABLED) {
477
+ return false;
478
+ }
479
+ const normalized = path.startsWith("./") ? path.replace(/^\.\/+/u, "") : path;
480
+ const lower = normalized.toLowerCase();
481
+ if (SUPPRESSED_FILE_NAMES.some((name) => lower.endsWith(name))) {
482
+ return true;
483
+ }
484
+ const lowerPrefixMatches = SUPPRESSED_PATH_PREFIXES.map((prefix) => prefix.toLowerCase());
485
+ return lowerPrefixMatches.some((prefix) => lower.includes(prefix));
486
+ }
487
+ const HINT_PER_HINT_LIMIT = serverConfig.hints.perHintLimit;
488
+ const HINT_DB_QUERY_BUDGET = serverConfig.hints.dbQueryLimit;
489
+ const HINT_SUBSTRING_LIMIT = serverConfig.hints.substring.limit;
490
+ const HINT_SUBSTRING_BOOST = serverConfig.hints.substring.boost;
491
+ const HINT_LOG_ENABLED = process.env.KIRI_HINT_LOG === "1";
492
+ const HINT_DICTIONARY_ENABLED = process.env.KIRI_HINT_DICTIONARY !== "0";
493
+ const HINT_DICTIONARY_LIMIT = Math.max(0, Number.parseInt(process.env.KIRI_HINT_DICTIONARY_LIMIT ?? "2", 10));
231
494
  // Issue #68: Path/Large File Penalty configuration (環境変数で上書き可能)
232
- const PATH_MISS_DELTA = parseFloat(process.env.KIRI_PATH_MISS_DELTA || "-0.5");
233
- const LARGE_FILE_DELTA = parseFloat(process.env.KIRI_LARGE_FILE_DELTA || "-0.8");
495
+ const PATH_MISS_DELTA = serverConfig.penalties.pathMissDelta;
496
+ const LARGE_FILE_DELTA = serverConfig.penalties.largeFileDelta;
234
497
  const MAX_WHY_TAGS = 10;
235
498
  // 項目3: whyタグの優先度マップ(低い数値ほど高優先度)
236
499
  // All actual tag prefixes used in the codebase
237
500
  const WHY_TAG_PRIORITY = {
238
- artifact: 1, // User-provided hints (editing_path, failing_tests)
501
+ artifact: 1, // User-provided hints (editing_path, failing_tests, hints)
502
+ dictionary: 1, // Dictionary-provided hints
239
503
  phrase: 2, // Multi-word literal matches (strongest signal)
240
504
  text: 3, // Single keyword matches
241
- "path-phrase": 4, // Path contains multi-word phrase
242
- structural: 5, // Semantic similarity
243
- "path-segment": 6, // Path component matches
244
- "path-keyword": 7, // Path keyword match
245
- dep: 8, // Dependency relationship
246
- near: 9, // Proximity to editing file
247
- boost: 10, // File type boost
248
- recent: 11, // Recently changed
249
- symbol: 12, // Symbol match
250
- penalty: 13, // Penalty explanations (keep for transparency)
251
- keyword: 14, // Generic keyword (deprecated, kept for compatibility)
505
+ metadata: 4, // Front matter / metadata filters & boosts
506
+ substring: 4, // Substring hint expansion
507
+ "path-phrase": 5, // Path contains multi-word phrase
508
+ structural: 6, // Semantic similarity
509
+ "path-segment": 7, // Path component matches
510
+ "path-keyword": 8, // Path keyword match
511
+ dep: 9, // Dependency relationship
512
+ near: 10, // Proximity to editing file
513
+ boost: 11, // File type boost
514
+ recent: 12, // Recently changed
515
+ symbol: 13, // Symbol match
516
+ penalty: 14, // Penalty explanations (keep for transparency)
517
+ keyword: 15, // Generic keyword (deprecated, kept for compatibility)
252
518
  };
253
519
  // Reserve at least one slot for important structural tags
254
520
  const RESERVED_WHY_SLOTS = {
255
521
  dep: 1, // Dependency relationships are critical
256
522
  symbol: 1, // Symbol boundaries help understand context
257
523
  near: 1, // Proximity explains file selection
524
+ metadata: 1, // Preserve metadata reasons when filters/boosts are active
258
525
  };
259
526
  function parseOutputOptions(params) {
260
527
  return {
@@ -277,6 +544,9 @@ function selectWhyTags(reasons) {
277
544
  reasons = new Set(Array.from(reasons).slice(0, 1000));
278
545
  }
279
546
  const selected = new Set();
547
+ if (reasons.has("boost:links")) {
548
+ selected.add("boost:links");
549
+ }
280
550
  const byCategory = new Map();
281
551
  for (const reason of reasons) {
282
552
  const prefix = reason.split(":")[0] ?? "";
@@ -342,6 +612,45 @@ const STOP_WORDS = new Set([
342
612
  "need",
343
613
  "goal",
344
614
  ]);
615
+ function prioritizeHintCandidates(rankedCandidates, hintPaths, limit) {
616
+ if (rankedCandidates.length === 0) {
617
+ return [];
618
+ }
619
+ const sanitizedLimit = Math.max(1, Math.min(limit, rankedCandidates.length));
620
+ const candidateByPath = new Map();
621
+ for (const candidate of rankedCandidates) {
622
+ if (!candidateByPath.has(candidate.path)) {
623
+ candidateByPath.set(candidate.path, candidate);
624
+ }
625
+ }
626
+ const final = [];
627
+ const seen = new Set();
628
+ for (const hintPath of hintPaths) {
629
+ if (final.length >= sanitizedLimit) {
630
+ break;
631
+ }
632
+ const candidate = candidateByPath.get(hintPath);
633
+ if (!candidate || seen.has(candidate.path)) {
634
+ continue;
635
+ }
636
+ final.push(candidate);
637
+ seen.add(candidate.path);
638
+ }
639
+ if (final.length >= sanitizedLimit) {
640
+ return final;
641
+ }
642
+ for (const candidate of rankedCandidates) {
643
+ if (final.length >= sanitizedLimit) {
644
+ break;
645
+ }
646
+ if (seen.has(candidate.path)) {
647
+ continue;
648
+ }
649
+ final.push(candidate);
650
+ seen.add(candidate.path);
651
+ }
652
+ return final;
653
+ }
345
654
  function normalizeLimit(limit) {
346
655
  if (!limit || Number.isNaN(limit)) {
347
656
  return DEFAULT_SEARCH_LIMIT;
@@ -526,8 +835,351 @@ function ensureCandidate(map, filePath) {
526
835
  }
527
836
  return candidate;
528
837
  }
838
+ async function expandHintCandidatesForHints(params) {
839
+ const { hintPaths, config } = params;
840
+ if (hintPaths.length === 0 || config.perHintLimit <= 0 || config.dbQueryBudget <= 0) {
841
+ return;
842
+ }
843
+ const state = { remainingDbQueries: config.dbQueryBudget };
844
+ for (const hintPath of hintPaths) {
845
+ if (state.remainingDbQueries <= 0) {
846
+ break;
847
+ }
848
+ await expandSingleHintNeighborhood({ ...params, hintPath, state });
849
+ }
850
+ }
851
+ async function expandSingleHintNeighborhood(args) {
852
+ const { config } = args;
853
+ let remaining = config.perHintLimit;
854
+ if (remaining <= 0) {
855
+ return;
856
+ }
857
+ if (config.dirLimit > 0) {
858
+ const added = await addHintDirectoryNeighbors(args, Math.min(config.dirLimit, remaining));
859
+ remaining -= added;
860
+ if (remaining <= 0) {
861
+ return;
862
+ }
863
+ }
864
+ if (config.depOutLimit > 0 || config.depInLimit > 0) {
865
+ const added = await addHintDependencyNeighbors(args, remaining);
866
+ remaining -= added;
867
+ if (remaining <= 0) {
868
+ return;
869
+ }
870
+ }
871
+ if (config.semLimit > 0) {
872
+ await addHintSemanticNeighbors(args, Math.min(config.semLimit, remaining));
873
+ }
874
+ }
875
+ function useHintDbBudget(state, cost = 1) {
876
+ if (state.remainingDbQueries < cost) {
877
+ return false;
878
+ }
879
+ state.remainingDbQueries -= cost;
880
+ return true;
881
+ }
882
+ function applyHintReasonBoost(candidate, reason, scoreDelta, lang, ext) {
883
+ if (scoreDelta <= 0 || candidate.reasons.has(reason)) {
884
+ return false;
885
+ }
886
+ candidate.score += scoreDelta;
887
+ candidate.reasons.add(reason);
888
+ candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 2);
889
+ candidate.matchLine ??= 1;
890
+ if (lang && !candidate.lang) {
891
+ candidate.lang = lang;
892
+ }
893
+ if (ext && !candidate.ext) {
894
+ candidate.ext = ext;
895
+ }
896
+ return true;
897
+ }
898
+ async function applyPathHintPromotions(args) {
899
+ const { hintTargets } = args;
900
+ if (hintTargets.length === 0) {
901
+ return;
902
+ }
903
+ const hintBoost = computeHintPriorityBoost(args.weights);
904
+ for (const target of hintTargets) {
905
+ const candidate = ensureCandidate(args.candidates, target.path);
906
+ const reasonPrefix = target.origin === "dictionary" ? "dictionary:hint" : "artifact:hint";
907
+ candidate.score += hintBoost;
908
+ candidate.reasons.add(`${reasonPrefix}:${target.path}`);
909
+ candidate.pathMatchHits = Math.max(candidate.pathMatchHits, 3);
910
+ candidate.matchLine ??= 1;
911
+ await logHintExpansionEntry(args.db, args.tableAvailability, {
912
+ repoId: args.repoId,
913
+ hintValue: target.sourceHint,
914
+ kind: target.origin === "dictionary" ? "dictionary" : "path",
915
+ targetPath: target.path,
916
+ payload: {
917
+ origin: target.origin,
918
+ source_hint: target.sourceHint,
919
+ },
920
+ });
921
+ }
922
+ await expandHintCandidatesForHints({
923
+ db: args.db,
924
+ tableAvailability: args.tableAvailability,
925
+ repoId: args.repoId,
926
+ hintPaths: hintTargets.map((target) => target.path),
927
+ candidates: args.candidates,
928
+ fileCache: args.fileCache,
929
+ weights: args.weights,
930
+ config: createHintExpansionConfig(args.weights),
931
+ hintSeedMeta: args.hintSeedMeta,
932
+ });
933
+ }
934
+ async function addHintSubstringMatches(db, tableAvailability, repoId, hints, candidates, limitPerHint, boost) {
935
+ if (limitPerHint <= 0 || boost <= 0) {
936
+ return;
937
+ }
938
+ for (const hint of hints) {
939
+ if (!SAFE_PATH_PATTERN.test(hint.replace(/[^a-zA-Z0-9_.-]/g, ""))) {
940
+ continue;
941
+ }
942
+ const rows = await db.all(`
943
+ SELECT path
944
+ FROM file
945
+ WHERE repo_id = ?
946
+ AND is_binary = FALSE
947
+ AND LOWER(path) LIKE '%' || ? || '%'
948
+ ORDER BY path
949
+ LIMIT ?
950
+ `, [repoId, hint, limitPerHint]);
951
+ for (const row of rows) {
952
+ const candidate = ensureCandidate(candidates, row.path);
953
+ const reason = `substring:hint:${hint}`;
954
+ if (applyHintReasonBoost(candidate, reason, boost)) {
955
+ await logHintExpansionEntry(db, tableAvailability, {
956
+ repoId,
957
+ hintValue: hint,
958
+ kind: "substring",
959
+ targetPath: row.path,
960
+ });
961
+ }
962
+ }
963
+ }
964
+ }
965
+ async function addHintDirectoryNeighbors(args, limit) {
966
+ if (limit <= 0) {
967
+ return 0;
968
+ }
969
+ const dir = path.posix.dirname(args.hintPath);
970
+ if (!dir || dir === "." || dir === "/") {
971
+ return 0;
972
+ }
973
+ if (!useHintDbBudget(args.state)) {
974
+ return 0;
975
+ }
976
+ const rows = await args.db.all(`
977
+ SELECT path, lang, ext
978
+ FROM file
979
+ WHERE repo_id = ?
980
+ AND is_binary = FALSE
981
+ AND path LIKE ?
982
+ ORDER BY path
983
+ LIMIT ?
984
+ `, [args.repoId, `${dir}/%`, args.config.dirMaxFiles + 1]);
985
+ if (rows.length === 0 || rows.length > args.config.dirMaxFiles) {
986
+ return 0;
987
+ }
988
+ rows.sort((a, b) => hintNeighborRank(a.path) - hintNeighborRank(b.path));
989
+ let added = 0;
990
+ for (const row of rows) {
991
+ if (row.path === args.hintPath) {
992
+ continue;
993
+ }
994
+ if (!SAFE_PATH_PATTERN.test(row.path)) {
995
+ continue;
996
+ }
997
+ const candidate = ensureCandidate(args.candidates, row.path);
998
+ const reason = `artifact:hint_dir:${args.hintPath}:${row.path}`;
999
+ if (applyHintReasonBoost(candidate, reason, args.config.dirBoost, row.lang, row.ext)) {
1000
+ added += 1;
1001
+ const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
1002
+ await logHintExpansionEntry(args.db, args.tableAvailability, {
1003
+ repoId: args.repoId,
1004
+ hintValue: seedMeta?.sourceHint ?? args.hintPath,
1005
+ kind: "directory",
1006
+ targetPath: row.path,
1007
+ payload: {
1008
+ origin: seedMeta?.origin ?? "artifact",
1009
+ },
1010
+ });
1011
+ if (added >= limit) {
1012
+ break;
1013
+ }
1014
+ }
1015
+ }
1016
+ return added;
1017
+ }
1018
+ async function addHintDependencyNeighbors(args, perHintRemaining) {
1019
+ if (perHintRemaining <= 0) {
1020
+ return 0;
1021
+ }
1022
+ let added = 0;
1023
+ if (args.config.depOutLimit > 0) {
1024
+ const outLimit = Math.min(args.config.depOutLimit, perHintRemaining - added);
1025
+ if (outLimit > 0) {
1026
+ added += await addHintDependencyDirection(args, outLimit, "out");
1027
+ }
1028
+ }
1029
+ if (perHintRemaining - added <= 0) {
1030
+ return added;
1031
+ }
1032
+ if (args.config.depInLimit > 0) {
1033
+ const inLimit = Math.min(args.config.depInLimit, perHintRemaining - added);
1034
+ if (inLimit > 0) {
1035
+ added += await addHintDependencyDirection(args, inLimit, "in");
1036
+ }
1037
+ }
1038
+ return added;
1039
+ }
1040
+ async function addHintDependencyDirection(args, limit, direction) {
1041
+ if (limit <= 0) {
1042
+ return 0;
1043
+ }
1044
+ if (!useHintDbBudget(args.state)) {
1045
+ return 0;
1046
+ }
1047
+ const fetchLimit = Math.min(limit * 4, 25);
1048
+ if (direction === "out") {
1049
+ const rows = await args.db.all(`
1050
+ SELECT dst
1051
+ FROM dependency
1052
+ WHERE repo_id = ?
1053
+ AND src_path = ?
1054
+ AND dst_kind = 'path'
1055
+ LIMIT ?
1056
+ `, [args.repoId, args.hintPath, fetchLimit]);
1057
+ return await applyDependencyRows(args, rows.map((row) => row.dst), limit, direction);
1058
+ }
1059
+ const rows = await args.db.all(`
1060
+ SELECT src_path
1061
+ FROM dependency
1062
+ WHERE repo_id = ?
1063
+ AND dst = ?
1064
+ AND dst_kind = 'path'
1065
+ LIMIT ?
1066
+ `, [args.repoId, args.hintPath, fetchLimit]);
1067
+ return await applyDependencyRows(args, rows.map((row) => row.src_path), limit, direction);
1068
+ }
1069
+ async function applyDependencyRows(args, paths, limit, direction) {
1070
+ if (paths.length === 0) {
1071
+ return 0;
1072
+ }
1073
+ const uniquePaths = Array.from(new Set(paths)).filter((p) => p && SAFE_PATH_PATTERN.test(p));
1074
+ uniquePaths.sort((a, b) => hintNeighborRank(a) - hintNeighborRank(b));
1075
+ let added = 0;
1076
+ for (const dependencyPath of uniquePaths) {
1077
+ if (dependencyPath === args.hintPath) {
1078
+ continue;
1079
+ }
1080
+ const candidate = ensureCandidate(args.candidates, dependencyPath);
1081
+ const reason = `artifact:hint_dep_${direction}:${args.hintPath}:${dependencyPath}`;
1082
+ if (applyHintReasonBoost(candidate, reason, args.config.depBoost)) {
1083
+ added += 1;
1084
+ const seedMeta = getHintSeedMeta(args.hintSeedMeta, args.hintPath);
1085
+ await logHintExpansionEntry(args.db, args.tableAvailability, {
1086
+ repoId: args.repoId,
1087
+ hintValue: seedMeta?.sourceHint ?? args.hintPath,
1088
+ kind: "dependency",
1089
+ targetPath: dependencyPath,
1090
+ payload: {
1091
+ origin: seedMeta?.origin ?? "artifact",
1092
+ direction,
1093
+ },
1094
+ });
1095
+ if (added >= limit) {
1096
+ break;
1097
+ }
1098
+ }
1099
+ }
1100
+ return added;
1101
+ }
1102
+ async function addHintSemanticNeighbors(args, limit) {
1103
+ if (limit <= 0) {
1104
+ return 0;
1105
+ }
1106
+ const dir = path.posix.dirname(args.hintPath);
1107
+ if (!dir || dir === "." || dir === "/") {
1108
+ return 0;
1109
+ }
1110
+ if (!useHintDbBudget(args.state)) {
1111
+ return 0;
1112
+ }
1113
+ const rows = await args.db.all(`
1114
+ SELECT path
1115
+ FROM file
1116
+ WHERE repo_id = ?
1117
+ AND is_binary = FALSE
1118
+ AND path LIKE ?
1119
+ ORDER BY path
1120
+ LIMIT ?
1121
+ `, [args.repoId, `${dir}/%`, args.config.semDirCandidateLimit]);
1122
+ const candidatePaths = rows.map((row) => row.path).filter((p) => p !== args.hintPath);
1123
+ if (candidatePaths.length === 0) {
1124
+ return 0;
1125
+ }
1126
+ if (!useHintDbBudget(args.state)) {
1127
+ return 0;
1128
+ }
1129
+ const embeddingMap = await fetchEmbeddingMap(args.db, args.repoId, [
1130
+ args.hintPath,
1131
+ ...candidatePaths,
1132
+ ]);
1133
+ const hintEmbedding = embeddingMap.get(args.hintPath);
1134
+ if (!hintEmbedding) {
1135
+ return 0;
1136
+ }
1137
+ let added = 0;
1138
+ for (const candidatePath of candidatePaths) {
1139
+ if (!SAFE_PATH_PATTERN.test(candidatePath)) {
1140
+ continue;
1141
+ }
1142
+ const embedding = embeddingMap.get(candidatePath);
1143
+ if (!embedding) {
1144
+ continue;
1145
+ }
1146
+ const similarity = structuralSimilarity(hintEmbedding, embedding);
1147
+ if (!Number.isFinite(similarity) || similarity < args.config.semThreshold) {
1148
+ continue;
1149
+ }
1150
+ const candidate = ensureCandidate(args.candidates, candidatePath);
1151
+ const reason = `artifact:hint_sem:${args.hintPath}:${candidatePath}`;
1152
+ if (applyHintReasonBoost(candidate, reason, args.weights.structural * similarity)) {
1153
+ added += 1;
1154
+ if (added >= limit) {
1155
+ break;
1156
+ }
1157
+ }
1158
+ }
1159
+ return added;
1160
+ }
1161
+ function hintNeighborRank(filePath) {
1162
+ if (filePath.startsWith("src/") || filePath.startsWith("external/assay-kit/src/")) {
1163
+ return 0;
1164
+ }
1165
+ if (isTestLikePath(filePath)) {
1166
+ return 2;
1167
+ }
1168
+ if (filePath.startsWith("docs/")) {
1169
+ return 3;
1170
+ }
1171
+ return 1;
1172
+ }
1173
+ function isTestLikePath(filePath) {
1174
+ return (/(^|\/)(tests?|__tests__|fixtures)\//.test(filePath) ||
1175
+ filePath.endsWith(".spec.ts") ||
1176
+ filePath.endsWith(".spec.tsx") ||
1177
+ filePath.endsWith(".test.ts") ||
1178
+ filePath.endsWith(".test.tsx"));
1179
+ }
529
1180
  function parseEmbedding(vectorJson, vectorDims) {
530
- if (!vectorJson || !vectorDims || vectorDims <= 0) {
1181
+ const dims = vectorDims === null ? null : typeof vectorDims === "bigint" ? Number(vectorDims) : vectorDims;
1182
+ if (!vectorJson || !dims || dims <= 0) {
531
1183
  return null;
532
1184
  }
533
1185
  try {
@@ -536,7 +1188,7 @@ function parseEmbedding(vectorJson, vectorDims) {
536
1188
  return null;
537
1189
  }
538
1190
  const values = [];
539
- for (let i = 0; i < parsed.length && i < vectorDims; i += 1) {
1191
+ for (let i = 0; i < parsed.length && i < dims; i += 1) {
540
1192
  const raw = parsed[i];
541
1193
  const num = typeof raw === "number" ? raw : Number(raw);
542
1194
  if (!Number.isFinite(num)) {
@@ -544,7 +1196,7 @@ function parseEmbedding(vectorJson, vectorDims) {
544
1196
  }
545
1197
  values.push(num);
546
1198
  }
547
- return values.length === vectorDims ? values : null;
1199
+ return values.length === dims ? values : null;
548
1200
  }
549
1201
  catch {
550
1202
  return null;
@@ -638,18 +1290,6 @@ function buildSnippetPreview(content, startLine, endLine) {
638
1290
  }
639
1291
  return `${snippet.slice(0, 239)}…`;
640
1292
  }
641
- function prependLineNumbers(snippet, startLine) {
642
- const lines = snippet.split(/\r?\n/);
643
- if (lines.length === 0) {
644
- return snippet;
645
- }
646
- // Calculate required width from the last line number (dynamic sizing)
647
- const endLine = startLine + lines.length - 1;
648
- const width = String(endLine).length;
649
- return lines
650
- .map((line, index) => `${String(startLine + index).padStart(width, " ")}→${line}`)
651
- .join("\n");
652
- }
653
1293
  /**
654
1294
  * トークン数を推定(コンテンツベース)
655
1295
  * 実際のGPTトークナイザーを使用して正確にカウント
@@ -685,6 +1325,375 @@ function splitQueryWords(query) {
685
1325
  const words = query.split(/[\s/\-_]+/).filter((w) => w.length > 2);
686
1326
  return words.length > 0 ? words : [query]; // 全て除外された場合は元のクエリを使用
687
1327
  }
1328
+ function normalizeMetadataFilterKey(rawKey) {
1329
+ if (!rawKey) {
1330
+ return null;
1331
+ }
1332
+ const normalized = rawKey.toLowerCase();
1333
+ const alias = METADATA_ALIAS_MAP.get(normalized);
1334
+ if (alias) {
1335
+ return { ...alias };
1336
+ }
1337
+ for (const entry of METADATA_KEY_PREFIXES) {
1338
+ if (normalized.startsWith(entry.prefix)) {
1339
+ const remainder = normalized.slice(entry.prefix.length);
1340
+ if (!remainder) {
1341
+ return null;
1342
+ }
1343
+ return {
1344
+ key: remainder,
1345
+ source: entry.source,
1346
+ ...(entry.strict !== undefined && { strict: entry.strict }),
1347
+ };
1348
+ }
1349
+ }
1350
+ return null;
1351
+ }
1352
+ function normalizeFilterValues(value) {
1353
+ if (typeof value === "string") {
1354
+ const trimmed = value.trim();
1355
+ return trimmed ? [trimmed] : [];
1356
+ }
1357
+ if (Array.isArray(value)) {
1358
+ const values = [];
1359
+ for (const item of value) {
1360
+ if (typeof item === "string") {
1361
+ const trimmed = item.trim();
1362
+ if (trimmed) {
1363
+ values.push(trimmed);
1364
+ }
1365
+ }
1366
+ }
1367
+ return values;
1368
+ }
1369
+ return [];
1370
+ }
1371
+ function normalizeMetadataFiltersParam(input) {
1372
+ if (!input || typeof input !== "object") {
1373
+ return [];
1374
+ }
1375
+ const filters = [];
1376
+ for (const [rawKey, rawValue] of Object.entries(input)) {
1377
+ const normalizedKey = normalizeMetadataFilterKey(rawKey);
1378
+ if (!normalizedKey) {
1379
+ continue;
1380
+ }
1381
+ const values = normalizeFilterValues(rawValue);
1382
+ if (values.length === 0) {
1383
+ continue;
1384
+ }
1385
+ const filter = {
1386
+ key: normalizedKey.key,
1387
+ values,
1388
+ source: normalizedKey.source,
1389
+ };
1390
+ if (normalizedKey.strict !== undefined) {
1391
+ filter.strict = normalizedKey.strict;
1392
+ }
1393
+ filters.push(filter);
1394
+ }
1395
+ return filters;
1396
+ }
1397
+ function mergeMetadataFilters(filters) {
1398
+ const merged = new Map();
1399
+ for (const filter of filters) {
1400
+ if (filter.values.length === 0)
1401
+ continue;
1402
+ const mapKey = `${filter.source ?? "*"}::${filter.key}::${filter.strict ? "strict" : "hint"}`;
1403
+ const existing = merged.get(mapKey);
1404
+ if (existing) {
1405
+ const existingSet = new Set(existing.values.map((val) => val.toLowerCase()));
1406
+ for (const value of filter.values) {
1407
+ if (!existingSet.has(value.toLowerCase())) {
1408
+ existing.values.push(value);
1409
+ existingSet.add(value.toLowerCase());
1410
+ }
1411
+ }
1412
+ }
1413
+ else {
1414
+ const entry = {
1415
+ key: filter.key,
1416
+ source: filter.source,
1417
+ values: [...filter.values],
1418
+ };
1419
+ if (filter.strict !== undefined) {
1420
+ entry.strict = filter.strict;
1421
+ }
1422
+ merged.set(mapKey, entry);
1423
+ }
1424
+ }
1425
+ return Array.from(merged.values());
1426
+ }
1427
+ function parseInlineMetadataFilters(query) {
1428
+ if (!query) {
1429
+ return { cleanedQuery: "", filters: [] };
1430
+ }
1431
+ const matches = [];
1432
+ const pattern = /(\b[\w.]+):("[^"]+"|'[^']+'|[^\s]+)/g;
1433
+ let match;
1434
+ while ((match = pattern.exec(query)) !== null) {
1435
+ const normalizedKey = normalizeMetadataFilterKey(match[1] ?? "");
1436
+ if (!normalizedKey) {
1437
+ continue;
1438
+ }
1439
+ let rawValue = match[2] ?? "";
1440
+ if ((rawValue.startsWith('"') && rawValue.endsWith('"')) ||
1441
+ (rawValue.startsWith("'") && rawValue.endsWith("'"))) {
1442
+ rawValue = rawValue.slice(1, -1);
1443
+ }
1444
+ const value = rawValue.trim();
1445
+ if (!value) {
1446
+ continue;
1447
+ }
1448
+ const filter = {
1449
+ key: normalizedKey.key,
1450
+ source: normalizedKey.source,
1451
+ values: [value],
1452
+ };
1453
+ if (normalizedKey.strict !== undefined) {
1454
+ filter.strict = normalizedKey.strict;
1455
+ }
1456
+ matches.push({
1457
+ start: match.index,
1458
+ end: pattern.lastIndex,
1459
+ filter,
1460
+ });
1461
+ }
1462
+ if (matches.length === 0) {
1463
+ return { cleanedQuery: query.trim(), filters: [] };
1464
+ }
1465
+ let cleaned = "";
1466
+ let lastIndex = 0;
1467
+ for (const info of matches) {
1468
+ cleaned += query.slice(lastIndex, info.start);
1469
+ lastIndex = info.end;
1470
+ }
1471
+ cleaned += query.slice(lastIndex);
1472
+ const normalizedQuery = cleaned.replace(/\s{2,}/g, " ").trim();
1473
+ return {
1474
+ cleanedQuery: normalizedQuery,
1475
+ filters: mergeMetadataFilters(matches.map((m) => m.filter)),
1476
+ };
1477
+ }
1478
+ function buildMetadataFilterConditions(filters, alias = "f") {
1479
+ // SQL Injection対策: aliasをリテラル型で制限し、念のため検証
1480
+ if (!["f", "mk"].includes(alias)) {
1481
+ throw new Error(`Invalid SQL alias: ${alias}`);
1482
+ }
1483
+ const clauses = [];
1484
+ for (const filter of filters) {
1485
+ if (!filter.key || filter.values.length === 0) {
1486
+ continue;
1487
+ }
1488
+ const likeClauses = filter.values.map(() => "mk.value ILIKE ?").join(" OR ");
1489
+ const whereParts = [`mk.repo_id = ${alias}.repo_id`, `mk.path = ${alias}.path`];
1490
+ const params = [];
1491
+ if (filter.source) {
1492
+ whereParts.push("mk.source = ?");
1493
+ params.push(filter.source);
1494
+ }
1495
+ whereParts.push("mk.key = ?");
1496
+ params.push(filter.key);
1497
+ whereParts.push(`(${likeClauses})`);
1498
+ params.push(...filter.values.map((value) => `%${value}%`));
1499
+ const sql = `EXISTS (SELECT 1 FROM document_metadata_kv mk WHERE ${whereParts.join(" AND ")})`;
1500
+ clauses.push({ sql, params });
1501
+ }
1502
+ return clauses;
1503
+ }
1504
+ function isTableMissingError(error, table) {
1505
+ if (!(error instanceof Error)) {
1506
+ return false;
1507
+ }
1508
+ return error.message.includes(`Table with name ${table}`) || error.message.includes(table);
1509
+ }
1510
+ async function safeMetadataQuery(db, tableAvailability, sql, params) {
1511
+ if (!tableAvailability.hasMetadataTables) {
1512
+ return [];
1513
+ }
1514
+ try {
1515
+ return await db.all(sql, params);
1516
+ }
1517
+ catch (error) {
1518
+ if (isTableMissingError(error, "document_metadata_kv")) {
1519
+ console.warn("Metadata tables not found; disabling metadata filters and boosts until database is upgraded.");
1520
+ return [];
1521
+ }
1522
+ throw error;
1523
+ }
1524
+ }
1525
+ async function safeLinkQuery(db, tableAvailability, sql, params) {
1526
+ if (!tableAvailability.hasLinkTable) {
1527
+ return [];
1528
+ }
1529
+ try {
1530
+ return await db.all(sql, params);
1531
+ }
1532
+ catch (error) {
1533
+ if (isTableMissingError(error, "markdown_link")) {
1534
+ console.warn("Markdown link table not found; inbound link boosting disabled until database is upgraded.");
1535
+ return [];
1536
+ }
1537
+ throw error;
1538
+ }
1539
+ }
1540
+ async function fetchMetadataOnlyCandidates(db, tableAvailability, repoId, filters, limit) {
1541
+ if (!tableAvailability.hasMetadataTables || filters.length === 0 || limit <= 0) {
1542
+ return [];
1543
+ }
1544
+ const filterClauses = buildMetadataFilterConditions(filters);
1545
+ const whereClauses = ["f.repo_id = ?"];
1546
+ const params = [repoId];
1547
+ for (const clause of filterClauses) {
1548
+ whereClauses.push(clause.sql);
1549
+ params.push(...clause.params);
1550
+ }
1551
+ const sql = `
1552
+ SELECT f.path, f.lang, f.ext, b.content
1553
+ FROM file f
1554
+ JOIN blob b ON b.hash = f.blob_hash
1555
+ WHERE ${whereClauses.join(" AND ")}
1556
+ ORDER BY f.path
1557
+ LIMIT ?
1558
+ `;
1559
+ params.push(limit);
1560
+ try {
1561
+ return await db.all(sql, params);
1562
+ }
1563
+ catch (error) {
1564
+ if (isTableMissingError(error, "document_metadata_kv")) {
1565
+ console.warn("Metadata tables not found; disabling metadata-only searches until database is upgraded.");
1566
+ return [];
1567
+ }
1568
+ throw error;
1569
+ }
1570
+ }
1571
+ async function fetchMetadataKeywordMatches(db, tableAvailability, repoId, keywords, filters, limit, excludePaths) {
1572
+ if (!tableAvailability.hasMetadataTables || keywords.length === 0 || limit <= 0) {
1573
+ return [];
1574
+ }
1575
+ const keywordClauses = keywords.map(() => "mk.value ILIKE ?").join(" OR ");
1576
+ const params = [repoId, ...keywords.map((kw) => `%${kw}%`)];
1577
+ const whereClauses = ["mk.repo_id = ?", `(${keywordClauses})`];
1578
+ if (excludePaths.size > 0) {
1579
+ const placeholders = Array.from(excludePaths)
1580
+ .map(() => "?")
1581
+ .join(", ");
1582
+ whereClauses.push(`f.path NOT IN (${placeholders})`);
1583
+ params.push(...excludePaths);
1584
+ }
1585
+ const filterClauses = buildMetadataFilterConditions(filters, "f");
1586
+ for (const clause of filterClauses) {
1587
+ whereClauses.push(clause.sql);
1588
+ params.push(...clause.params);
1589
+ }
1590
+ params.push(limit);
1591
+ const sql = `
1592
+ SELECT f.path, f.lang, f.ext, b.content, COUNT(*) AS score
1593
+ FROM document_metadata_kv mk
1594
+ JOIN file f ON f.repo_id = mk.repo_id AND f.path = mk.path
1595
+ JOIN blob b ON b.hash = f.blob_hash
1596
+ WHERE ${whereClauses.join(" AND ")}
1597
+ GROUP BY f.path, f.lang, f.ext, b.content
1598
+ ORDER BY score DESC, f.path
1599
+ LIMIT ?
1600
+ `;
1601
+ const rows = await safeMetadataQuery(db, tableAvailability, sql, params);
1602
+ return rows.map((row) => ({ ...row, score: Number(row.score ?? 1) }));
1603
+ }
1604
+ async function loadMetadataForPaths(db, tableAvailability, repoId, paths) {
1605
+ const result = new Map();
1606
+ if (!tableAvailability.hasMetadataTables || paths.length === 0) {
1607
+ return result;
1608
+ }
1609
+ const placeholders = paths.map(() => "?").join(", ");
1610
+ const sql = `
1611
+ SELECT path, key, value, source
1612
+ FROM document_metadata_kv
1613
+ WHERE repo_id = ? AND path IN (${placeholders})
1614
+ `;
1615
+ const rows = await safeMetadataQuery(db, tableAvailability, sql, [repoId, ...paths]);
1616
+ for (const row of rows) {
1617
+ if (!result.has(row.path)) {
1618
+ result.set(row.path, []);
1619
+ }
1620
+ result.get(row.path).push({
1621
+ key: row.key,
1622
+ value: row.value,
1623
+ source: row.source ?? undefined,
1624
+ });
1625
+ }
1626
+ return result;
1627
+ }
1628
+ async function loadInboundLinkCounts(db, tableAvailability, repoId, paths) {
1629
+ const counts = new Map();
1630
+ if (!tableAvailability.hasLinkTable || paths.length === 0) {
1631
+ return counts;
1632
+ }
1633
+ const placeholders = paths.map(() => "?").join(", ");
1634
+ const sql = `
1635
+ SELECT resolved_path AS path, COUNT(*) AS inbound
1636
+ FROM markdown_link
1637
+ WHERE repo_id = ? AND resolved_path IS NOT NULL AND resolved_path IN (${placeholders})
1638
+ GROUP BY resolved_path
1639
+ `;
1640
+ const rows = await safeLinkQuery(db, tableAvailability, sql, [repoId, ...paths]);
1641
+ for (const row of rows) {
1642
+ const inboundValue = typeof row.inbound === "bigint" ? Number(row.inbound) : Number(row.inbound ?? 0);
1643
+ counts.set(row.path, inboundValue);
1644
+ }
1645
+ return counts;
1646
+ }
1647
+ function computeMetadataBoost(entries, keywordSet, filterValueSet) {
1648
+ if (!entries || entries.length === 0) {
1649
+ return 0;
1650
+ }
1651
+ let boost = 0;
1652
+ for (const entry of entries) {
1653
+ const valueLower = entry.value.toLowerCase();
1654
+ for (const keyword of keywordSet) {
1655
+ if (valueLower.includes(keyword)) {
1656
+ boost += METADATA_MATCH_WEIGHT;
1657
+ break;
1658
+ }
1659
+ }
1660
+ if (filterValueSet.has(valueLower)) {
1661
+ boost += METADATA_FILTER_MATCH_WEIGHT;
1662
+ }
1663
+ }
1664
+ return Math.min(boost, 1.5);
1665
+ }
1666
+ function computeInboundLinkBoost(count) {
1667
+ let numericCount = count;
1668
+ if (typeof numericCount === "bigint") {
1669
+ numericCount = Number(numericCount);
1670
+ }
1671
+ if (!numericCount || numericCount <= 0) {
1672
+ return 0;
1673
+ }
1674
+ return Math.min(Math.log1p(numericCount) * INBOUND_LINK_WEIGHT, 1.0);
1675
+ }
1676
+ function candidateMatchesMetadataFilters(entries, filters) {
1677
+ if (filters.length === 0) {
1678
+ return true;
1679
+ }
1680
+ if (!entries || entries.length === 0) {
1681
+ return false;
1682
+ }
1683
+ return filters.every((filter) => {
1684
+ const expectedValues = filter.values.map((value) => value.toLowerCase());
1685
+ return entries.some((entry) => {
1686
+ if (entry.key !== filter.key) {
1687
+ return false;
1688
+ }
1689
+ if (filter.source && entry.source !== filter.source) {
1690
+ return false;
1691
+ }
1692
+ const lowerValue = entry.value.toLowerCase();
1693
+ return expectedValues.some((value) => lowerValue.includes(value));
1694
+ });
1695
+ });
1696
+ }
688
1697
  /**
689
1698
  * パス固有のマルチプライヤーを取得(最長プレフィックスマッチ)
690
1699
  * 配列の順序に依存せず、常に最長一致のプレフィックスを選択
@@ -710,7 +1719,7 @@ function getPathMultiplier(filePath, profileConfig) {
710
1719
  * @param weights - スコアリングウェイト設定(乗算的ペナルティに使用)
711
1720
  * @returns ブースト適用後のスコア
712
1721
  */
713
- function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
1722
+ function applyFileTypeBoost(path, baseScore, profileConfig, weights) {
714
1723
  // Blacklisted directories that are almost always irrelevant for code context
715
1724
  const blacklistedDirs = [
716
1725
  ".cursor/",
@@ -727,7 +1736,8 @@ function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
727
1736
  if (profileConfig.denylistOverrides.includes(dir)) {
728
1737
  continue;
729
1738
  }
730
- return -100; // Effectively remove it
1739
+ // v1.0.0: Use multiplicative penalty instead of absolute -100
1740
+ return baseScore * weights.blacklistPenaltyMultiplier;
731
1741
  }
732
1742
  }
733
1743
  const fileName = path.split("/").pop() ?? "";
@@ -758,9 +1768,9 @@ function applyFileTypeBoost(path, baseScore, profileConfig, _weights) {
758
1768
  multiplier *= implMultiplier;
759
1769
  }
760
1770
  }
761
- // Test files: additive penalty (keep strong for files_search)
1771
+ // Test files: multiplicative penalty (v1.0.0)
762
1772
  if (path.startsWith("tests/") || path.startsWith("test/")) {
763
- return baseScore * 0.2; // Strong penalty for tests
1773
+ return baseScore * weights.testPenaltyMultiplier;
764
1774
  }
765
1775
  return baseScore * multiplier;
766
1776
  }
@@ -862,22 +1872,25 @@ function applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms) {
862
1872
  }
863
1873
  }
864
1874
  /**
865
- * 加算的ファイルペナルティを適用
866
- * ブラックリストディレクトリ、テストファイル、lockファイル、設定ファイル、マイグレーションファイルに強いペナルティ
867
- * @param profile - boost_profile設定("docs"の場合はdocs/ディレクトリのブラックリストをスキップ)
868
- * @returns true if penalty was applied and processing should stop
1875
+ * 乗算的ファイルペナルティを適用(v1.0.0+)
1876
+ * ブラックリストディレクトリ、テストファイル、lockファイルに乗算ペナルティ
1877
+ * v1.0.0: 絶対ペナルティ(-100)から乗算ペナルティ(×0.01など)に移行
1878
+ * @param weights - スコアリングウェイト設定(乗算ペナルティ係数を含む)
1879
+ * @param profile - boost_profile設定(denylistOverridesなど)
1880
+ * @returns true if severe penalty was applied (caller should skip further boosts)
869
1881
  */
870
- function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profileConfig) {
871
- // Blacklisted directories - effectively remove
1882
+ function applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig) {
1883
+ // Returns true if a severe penalty was applied (should skip further boosts)
1884
+ // Blacklisted directories - apply strong multiplicative penalty (99% reduction)
1885
+ // v1.0.0: test/ and tests/ removed - handled by testPenaltyMultiplier instead
872
1886
  const blacklistedDirs = [
873
1887
  ".cursor/",
874
1888
  ".devcontainer/",
875
1889
  ".serena/",
876
1890
  "__mocks__/",
877
1891
  "docs/",
878
- "test/",
879
- "tests/",
880
1892
  ".git/",
1893
+ ".github/",
881
1894
  "node_modules/",
882
1895
  "db/migrate/",
883
1896
  "db/migrations/",
@@ -897,19 +1910,26 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
897
1910
  if (profileConfig.denylistOverrides.includes(dir)) {
898
1911
  continue; // Skip this blacklisted directory
899
1912
  }
900
- candidate.score = -100;
1913
+ // v1.0.0: Use multiplicative penalty instead of absolute -100
1914
+ candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
901
1915
  candidate.reasons.add("penalty:blacklisted-dir");
902
- return true;
1916
+ return true; // Signal to skip further boosts - this is the strongest penalty
903
1917
  }
904
1918
  }
905
- // Test files - strong penalty
1919
+ if (isSuppressedPath(path)) {
1920
+ // v1.0.0: Use multiplicative penalty instead of absolute -100
1921
+ candidate.scoreMultiplier *= weights.blacklistPenaltyMultiplier;
1922
+ candidate.reasons.add("penalty:suppressed");
1923
+ return true; // Signal to skip further boosts
1924
+ }
1925
+ // Test files - strong multiplicative penalty (95% reduction)
906
1926
  const testPatterns = [".spec.ts", ".spec.js", ".test.ts", ".test.js", ".spec.tsx", ".test.tsx"];
907
1927
  if (testPatterns.some((pattern) => lowerPath.endsWith(pattern))) {
908
- candidate.score -= 2.0;
1928
+ candidate.scoreMultiplier *= weights.testPenaltyMultiplier;
909
1929
  candidate.reasons.add("penalty:test-file");
910
- return true;
1930
+ return true; // Signal to skip further boosts
911
1931
  }
912
- // Lock files - very strong penalty
1932
+ // Lock files - very strong multiplicative penalty (99% reduction)
913
1933
  const lockFiles = [
914
1934
  "package-lock.json",
915
1935
  "pnpm-lock.yaml",
@@ -920,63 +1940,46 @@ function applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profil
920
1940
  "poetry.lock",
921
1941
  ];
922
1942
  if (lockFiles.some((lockFile) => fileName === lockFile)) {
923
- candidate.score -= 3.0;
1943
+ candidate.scoreMultiplier *= weights.lockPenaltyMultiplier;
924
1944
  candidate.reasons.add("penalty:lock-file");
925
- return true;
1945
+ return true; // Signal to skip further boosts
926
1946
  }
927
- // Configuration files - penalty handling depends on profile
928
- const configPatterns = [
929
- ".config.js",
930
- ".config.ts",
931
- ".config.mjs",
932
- ".config.cjs",
933
- "tsconfig.json",
934
- "jsconfig.json",
935
- "package.json",
936
- ".eslintrc",
937
- ".prettierrc",
938
- "jest.config",
939
- "vite.config",
940
- "vitest.config",
941
- "webpack.config",
942
- "rollup.config",
943
- ];
944
- if (configPatterns.some((pattern) => lowerPath.endsWith(pattern) || fileName.startsWith(".env")) ||
945
- fileName === "Dockerfile" ||
946
- fileName === "docker-compose.yml" ||
947
- fileName === "docker-compose.yaml") {
948
- // ✅ Use explicit flag instead of magic number (0.3) to determine behavior
949
- // This decouples profile detection from multiplier values
950
- if (profileConfig.skipConfigAdditivePenalty) {
951
- return false; // Continue to multiplicative penalty only
952
- }
953
- // For other profiles, apply strong additive penalty
954
- candidate.score -= 1.5;
955
- candidate.reasons.add("penalty:config-file");
956
- return true;
957
- }
958
- // Migration files - strong penalty
959
- if (lowerPath.includes("migrate") || lowerPath.includes("migration")) {
960
- candidate.score -= 2.0;
961
- candidate.reasons.add("penalty:migration-file");
962
- return true;
963
- }
964
- return false; // No penalty applied, continue processing
1947
+ // v1.0.0: No penalty applied, allow further boosts/penalties
1948
+ return false;
965
1949
  }
966
1950
  /**
967
1951
  * ファイルタイプ別の乗算的ペナルティ/ブーストを適用(v0.7.0+)
968
1952
  * profile="docs": ドキュメントファイルをブースト
969
1953
  * profile="default": ドキュメントファイルにペナルティ、実装ファイルをブースト
970
1954
  */
971
- function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights) {
1955
+ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights) {
972
1956
  const fileName = path.split("/").pop() ?? "";
973
- // Step 1: Config files
1957
+ const lowerPath = path.toLowerCase();
1958
+ // ✅ Step 1: Low-value files (v1.0.0: syntax/perf/legal/migration)
1959
+ // Apply configPenaltyMultiplier (strong penalty) to rarely useful file types
1960
+ const isSyntaxGrammar = path.includes("/syntaxes/") &&
1961
+ (lowerPath.endsWith(".tmlanguage") ||
1962
+ lowerPath.endsWith(".tmlanguage.json") ||
1963
+ lowerPath.endsWith(".tmtheme") ||
1964
+ lowerPath.endsWith(".plist"));
1965
+ const isPerfData = lowerPath.includes(".perf.data") ||
1966
+ lowerPath.includes(".perf-data") ||
1967
+ lowerPath.includes("-perf-data");
1968
+ const isLegalFile = fileName.toLowerCase().includes("thirdpartynotices") ||
1969
+ fileName.toLowerCase() === "cgmanifest.json";
1970
+ const isMigrationFile = lowerPath.includes("migrate") || lowerPath.includes("migration");
1971
+ if (isSyntaxGrammar || isPerfData || isLegalFile || isMigrationFile) {
1972
+ candidate.scoreMultiplier *= weights.configPenaltyMultiplier;
1973
+ candidate.reasons.add("penalty:low-value-file");
1974
+ return; // Don't apply impl boosts
1975
+ }
1976
+ // ✅ Step 2: Config files
974
1977
  if (isConfigFile(path, fileName)) {
975
1978
  candidate.scoreMultiplier *= profileConfig.fileTypeMultipliers.config;
976
1979
  candidate.reasons.add("penalty:config-file");
977
1980
  return; // Don't apply impl boosts to config files
978
1981
  }
979
- // ✅ Step 2: Documentation files
1982
+ // ✅ Step 3: Documentation files
980
1983
  const docExtensions = [".md", ".yaml", ".yml", ".mdc"];
981
1984
  if (docExtensions.some((docExt) => path.endsWith(docExt))) {
982
1985
  const docMultiplier = profileConfig.fileTypeMultipliers.doc;
@@ -989,7 +1992,7 @@ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights)
989
1992
  }
990
1993
  return; // Don't apply impl boosts to docs
991
1994
  }
992
- // ✅ Step 3: Implementation files with path-specific boosts
1995
+ // ✅ Step 4: Implementation files with path-specific boosts
993
1996
  const implMultiplier = profileConfig.fileTypeMultipliers.impl;
994
1997
  // ✅ Use longest-prefix-match logic (order-independent)
995
1998
  const pathBoost = getPathMultiplier(path, profileConfig);
@@ -1016,16 +2019,21 @@ function applyFileTypeMultipliers(candidate, path, ext, profileConfig, _weights)
1016
2019
  }
1017
2020
  }
1018
2021
  /**
1019
- * contextBundle専用のブーストプロファイル適用(v0.7.0+: リファクタリング版)
2022
+ * contextBundle専用のブーストプロファイル適用(v1.0.0: 乗算ペナルティモデル)
1020
2023
  * 複雑度を削減するために3つのヘルパー関数に分割:
1021
2024
  * 1. applyPathBasedScoring: パスベースの加算的スコアリング
1022
- * 2. applyAdditiveFilePenalties: 強力な加算的ペナルティ
1023
- * 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト
2025
+ * 2. applyMultiplicativeFilePenalties: 乗算的ペナルティ(blacklist/test/lock)
2026
+ * 3. applyFileTypeMultipliers: 乗算的ペナルティ/ブースト(doc/config/impl)
1024
2027
  *
1025
- * CRITICAL SAFETY RULES:
1026
- * 1. Multipliers are stored in candidate.scoreMultiplier, applied AFTER all additive scoring
1027
- * 2. profile="docs" skips documentation penalties (allows doc-focused queries)
1028
- * 3. Blacklist/test/lock/config files keep additive penalties (already very strong)
2028
+ * v1.0.0 CHANGES:
2029
+ * - 絶対ペナルティ(-100)を乗算ペナルティ(×0.01など)に置き換え
2030
+ * - すべてのペナルティが組み合わせ可能に(boost_profileとの相互作用が予測可能)
2031
+ * - v0.9.0の特別ケース処理(if profile === "docs")が不要に
2032
+ *
2033
+ * SCORING PHASES:
2034
+ * 1. Additive phase: テキストマッチ、パスマッチ、依存関係、近接性を加算
2035
+ * 2. Multiplicative phase: ペナルティとブーストを scoreMultiplier に蓄積
2036
+ * 3. Final application: score *= scoreMultiplier(最終段階で一度だけ適用)
1029
2037
  */
1030
2038
  function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerms) {
1031
2039
  const { path, ext } = row;
@@ -1033,117 +2041,195 @@ function applyBoostProfile(candidate, row, profileConfig, weights, extractedTerm
1033
2041
  const fileName = path.split("/").pop() ?? "";
1034
2042
  // Step 1: パスベースのスコアリング(加算的ブースト)
1035
2043
  applyPathBasedScoring(candidate, lowerPath, weights, extractedTerms);
1036
- // Step 2: 加算的ペナルティ(ブラックリスト、テスト、lock、設定、マイグレーション)
1037
- const shouldStop = applyAdditiveFilePenalties(candidate, path, lowerPath, fileName, profileConfig);
1038
- if (shouldStop) {
1039
- return; // ペナルティが適用された場合は処理終了
1040
- }
2044
+ // Step 2: 乗算的ペナルティ(ブラックリスト、テスト、lock
2045
+ // v1.0.0: Returns true if severe penalty applied (should skip further boosts)
2046
+ const skipFurtherBoosts = applyMultiplicativeFilePenalties(candidate, path, lowerPath, fileName, weights, profileConfig);
1041
2047
  // Step 3: ファイルタイプ別の乗算的ペナルティ/ブースト
1042
- applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights);
2048
+ // Skip if severe penalty was applied (blacklist/test/lock files shouldn't get impl boosts)
2049
+ if (!skipFurtherBoosts) {
2050
+ applyFileTypeMultipliers(candidate, path, ext, profileConfig, weights);
2051
+ }
1043
2052
  }
1044
2053
  export async function filesSearch(context, params) {
1045
2054
  const { db, repoId } = context;
1046
- const { query } = params;
1047
- if (!query || query.trim().length === 0) {
1048
- throw new Error("files_search requires a non-empty query. Provide a search keyword to continue.");
2055
+ const rawQuery = params.query ?? "";
2056
+ const inlineMetadata = parseInlineMetadataFilters(rawQuery);
2057
+ const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
2058
+ const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
2059
+ const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
2060
+ const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
2061
+ const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
2062
+ const hasHintMetadataFilters = hintMetadataFilters.length > 0;
2063
+ const hasAnyMetadataFilters = metadataFilters.length > 0;
2064
+ let cleanedQuery = inlineMetadata.cleanedQuery;
2065
+ let hasTextQuery = cleanedQuery.length > 0;
2066
+ if (!hasTextQuery && hasHintMetadataFilters) {
2067
+ cleanedQuery = hintMetadataFilters
2068
+ .flatMap((filter) => filter.values)
2069
+ .map((value) => value.trim())
2070
+ .filter((value) => value.length > 0)
2071
+ .join(" ");
2072
+ cleanedQuery = cleanedQuery.trim();
2073
+ hasTextQuery = cleanedQuery.length > 0;
2074
+ }
2075
+ const metadataValueSeed = metadataFilters
2076
+ .flatMap((filter) => filter.values)
2077
+ .map((value) => value.trim())
2078
+ .filter((value) => value.length > 0)
2079
+ .join(" ");
2080
+ if (metadataValueSeed.length > 0) {
2081
+ cleanedQuery = `${cleanedQuery} ${metadataValueSeed}`.trim();
2082
+ hasTextQuery = cleanedQuery.length > 0;
2083
+ }
2084
+ if (!hasTextQuery && !hasAnyMetadataFilters) {
2085
+ throw new Error("files_search requires a query or metadata_filters. Provide keywords or structured filters to continue.");
1049
2086
  }
1050
2087
  const limit = normalizeLimit(params.limit);
1051
2088
  const ftsStatus = await getFreshFtsStatus(context);
1052
2089
  const hasFTS = ftsStatus.ready;
1053
- let sql;
1054
- let values;
1055
- if (hasFTS) {
1056
- // FTS拡張利用可能: fts_main_blob.match_bm25 を使用
1057
- const conditions = ["f.repo_id = ?"];
1058
- values = [repoId];
1059
- // 言語・拡張子フィルタ
1060
- if (params.lang) {
1061
- conditions.push("COALESCE(f.lang, '') = ?");
1062
- values.push(params.lang);
1063
- }
1064
- if (params.ext) {
1065
- conditions.push("COALESCE(f.ext, '') = ?");
1066
- values.push(params.ext);
1067
- }
1068
- if (params.path_prefix) {
1069
- conditions.push("f.path LIKE ?");
1070
- values.push(`${params.path_prefix}%`);
1071
- }
1072
- // FTS検索(BM25スコアリング)
1073
- sql = `
1074
- SELECT f.path, f.lang, f.ext, b.content, fts.score
1075
- FROM file f
1076
- JOIN blob b ON b.hash = f.blob_hash
1077
- JOIN (
1078
- SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
1079
- FROM blob
1080
- WHERE score IS NOT NULL
1081
- ) fts ON fts.hash = b.hash
1082
- WHERE ${conditions.join(" AND ")}
1083
- ORDER BY fts.score DESC
1084
- LIMIT ?
1085
- `;
1086
- values.unshift(query); // FTSクエリを先頭に追加
1087
- values.push(limit);
1088
- }
1089
- else {
1090
- // FTS拡張利用不可: ILIKE検索(Phase 1の単語分割ロジック)
1091
- const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
1092
- values = [repoId];
1093
- const words = splitQueryWords(query);
1094
- if (words.length === 1) {
1095
- conditions.push("b.content ILIKE '%' || ? || '%'");
1096
- values.push(query);
2090
+ const metadataClauses = buildMetadataFilterConditions(strictMetadataFilters);
2091
+ const candidateRows = [];
2092
+ if (hasTextQuery) {
2093
+ let sql;
2094
+ let values;
2095
+ if (hasFTS) {
2096
+ const conditions = ["f.repo_id = ?"];
2097
+ values = [repoId];
2098
+ if (params.lang) {
2099
+ conditions.push("COALESCE(f.lang, '') = ?");
2100
+ values.push(params.lang);
2101
+ }
2102
+ if (params.ext) {
2103
+ conditions.push("COALESCE(f.ext, '') = ?");
2104
+ values.push(params.ext);
2105
+ }
2106
+ if (params.path_prefix) {
2107
+ conditions.push("f.path LIKE ?");
2108
+ values.push(`${params.path_prefix}%`);
2109
+ }
2110
+ for (const clause of metadataClauses) {
2111
+ conditions.push(clause.sql);
2112
+ values.push(...clause.params);
2113
+ }
2114
+ sql = `
2115
+ SELECT f.path, f.lang, f.ext, b.content, fts.score
2116
+ FROM file f
2117
+ JOIN blob b ON b.hash = f.blob_hash
2118
+ JOIN (
2119
+ SELECT hash, fts_main_blob.match_bm25(hash, ?) AS score
2120
+ FROM blob
2121
+ WHERE score IS NOT NULL
2122
+ ) fts ON fts.hash = b.hash
2123
+ WHERE ${conditions.join(" AND ")}
2124
+ ORDER BY fts.score DESC
2125
+ LIMIT ?
2126
+ `;
2127
+ values.unshift(cleanedQuery);
2128
+ values.push(limit);
1097
2129
  }
1098
2130
  else {
1099
- const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
1100
- conditions.push(`(${wordConditions.join(" OR ")})`);
1101
- values.push(...words);
1102
- }
1103
- if (params.lang) {
1104
- conditions.push("COALESCE(f.lang, '') = ?");
1105
- values.push(params.lang);
1106
- }
1107
- if (params.ext) {
1108
- conditions.push("COALESCE(f.ext, '') = ?");
1109
- values.push(params.ext);
1110
- }
1111
- if (params.path_prefix) {
1112
- conditions.push("f.path LIKE ?");
1113
- values.push(`${params.path_prefix}%`);
1114
- }
1115
- sql = `
1116
- SELECT f.path, f.lang, f.ext, b.content
1117
- FROM file f
1118
- JOIN blob b ON b.hash = f.blob_hash
1119
- WHERE ${conditions.join(" AND ")}
1120
- ORDER BY f.path
1121
- LIMIT ?
1122
- `;
1123
- values.push(limit);
1124
- }
1125
- const rows = await db.all(sql, values);
1126
- const boostProfile = params.boost_profile ?? "default";
2131
+ const conditions = ["f.repo_id = ?", "b.content IS NOT NULL"];
2132
+ values = [repoId];
2133
+ const words = splitQueryWords(cleanedQuery);
2134
+ if (words.length === 1) {
2135
+ conditions.push("b.content ILIKE '%' || ? || '%'");
2136
+ values.push(cleanedQuery);
2137
+ }
2138
+ else {
2139
+ const wordConditions = words.map(() => "b.content ILIKE '%' || ? || '%'");
2140
+ conditions.push(`(${wordConditions.join(" OR ")})`);
2141
+ values.push(...words);
2142
+ }
2143
+ if (params.lang) {
2144
+ conditions.push("COALESCE(f.lang, '') = ?");
2145
+ values.push(params.lang);
2146
+ }
2147
+ if (params.ext) {
2148
+ conditions.push("COALESCE(f.ext, '') = ?");
2149
+ values.push(params.ext);
2150
+ }
2151
+ if (params.path_prefix) {
2152
+ conditions.push("f.path LIKE ?");
2153
+ values.push(`${params.path_prefix}%`);
2154
+ }
2155
+ for (const clause of metadataClauses) {
2156
+ conditions.push(clause.sql);
2157
+ values.push(...clause.params);
2158
+ }
2159
+ sql = `
2160
+ SELECT f.path, f.lang, f.ext, b.content
2161
+ FROM file f
2162
+ JOIN blob b ON b.hash = f.blob_hash
2163
+ WHERE ${conditions.join(" AND ")}
2164
+ ORDER BY f.path
2165
+ LIMIT ?
2166
+ `;
2167
+ values.push(limit);
2168
+ }
2169
+ const textRows = await db.all(sql, values);
2170
+ candidateRows.push(...textRows);
2171
+ }
2172
+ if (!hasTextQuery && hasAnyMetadataFilters) {
2173
+ const metadataOnlyRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
2174
+ for (const row of metadataOnlyRows) {
2175
+ row.score = 1 + metadataFilters.length * 0.2;
2176
+ }
2177
+ candidateRows.push(...metadataOnlyRows);
2178
+ }
2179
+ if (hasTextQuery) {
2180
+ const metadataKeywords = splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase());
2181
+ if (metadataKeywords.length > 0) {
2182
+ const excludePaths = new Set(candidateRows.map((row) => row.path));
2183
+ const metadataRows = await fetchMetadataKeywordMatches(db, context.tableAvailability, repoId, metadataKeywords, metadataFilters, limit * 2, excludePaths);
2184
+ candidateRows.push(...metadataRows);
2185
+ }
2186
+ }
2187
+ if (candidateRows.length === 0) {
2188
+ return [];
2189
+ }
2190
+ const rowMap = new Map();
2191
+ for (const row of candidateRows) {
2192
+ const base = row.score ?? (hasTextQuery ? 1.0 : 0.8);
2193
+ const existing = rowMap.get(row.path);
2194
+ const existingScore = existing?.score ?? (hasTextQuery ? 1.0 : 0.8);
2195
+ if (!existing || base > existingScore) {
2196
+ rowMap.set(row.path, { ...row, score: base });
2197
+ }
2198
+ }
2199
+ const dedupedRows = Array.from(rowMap.values()).sort((a, b) => (b.score ?? 1) - (a.score ?? 1));
2200
+ const limitedRows = dedupedRows.slice(0, limit);
2201
+ const paths = limitedRows.map((row) => row.path);
2202
+ const metadataMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, paths);
2203
+ const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, paths);
2204
+ const metadataKeywordSet = hasTextQuery
2205
+ ? new Set(splitQueryWords(cleanedQuery.toLowerCase()).map((kw) => kw.toLowerCase()))
2206
+ : new Set();
2207
+ const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
2208
+ const boostProfile = params.boost_profile ??
2209
+ (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
1127
2210
  const profileConfig = getBoostProfile(boostProfile);
1128
- // ✅ v0.7.0+: Load configurable scoring weights for unified boosting logic
1129
- // Note: filesSearch doesn't have a separate profile parameter, uses default weights
1130
2211
  const weights = loadScoringProfile(null);
1131
2212
  const options = parseOutputOptions(params);
1132
- return rows
2213
+ const previewQuery = hasTextQuery
2214
+ ? cleanedQuery
2215
+ : (metadataFilters[0]?.values[0] ?? rawQuery.trim());
2216
+ return limitedRows
1133
2217
  .map((row) => {
1134
2218
  let preview;
1135
2219
  let matchLine;
2220
+ const previewSource = previewQuery || row.path;
1136
2221
  if (options.includePreview) {
1137
- // Full preview generation for non-compact mode
1138
- const previewData = buildPreview(row.content ?? "", query);
2222
+ const previewData = buildPreview(row.content ?? "", previewSource);
1139
2223
  preview = previewData.preview;
1140
2224
  matchLine = previewData.line;
1141
2225
  }
1142
2226
  else {
1143
- // Lightweight: extract only line number without preview
1144
- matchLine = findFirstMatchLine(row.content ?? "", query);
2227
+ matchLine = findFirstMatchLine(row.content ?? "", previewSource);
1145
2228
  }
1146
- const baseScore = row.score ?? 1.0; // FTS時はBM25スコア、ILIKE時は1.0
2229
+ const metadataEntries = metadataMap.get(row.path);
2230
+ const metadataBoost = computeMetadataBoost(metadataEntries, metadataKeywordSet, filterValueSet);
2231
+ const inboundBoost = computeInboundLinkBoost(inboundCounts.get(row.path));
2232
+ const baseScore = (row.score ?? (hasTextQuery ? 1.0 : 0.8)) + metadataBoost + inboundBoost;
1147
2233
  const boostedScore = boostProfile === "none"
1148
2234
  ? baseScore
1149
2235
  : applyFileTypeBoost(row.path, baseScore, profileConfig, weights);
@@ -1159,96 +2245,20 @@ export async function filesSearch(context, params) {
1159
2245
  }
1160
2246
  return result;
1161
2247
  })
1162
- .sort((a, b) => b.score - a.score); // スコアの高い順に再ソート
1163
- }
1164
- export async function snippetsGet(context, params) {
1165
- const { db, repoId } = context;
1166
- if (!params.path) {
1167
- throw new Error("snippets_get requires a file path. Specify a tracked text file path to continue.");
1168
- }
1169
- const rows = await db.all(`
1170
- SELECT f.path, f.lang, f.ext, f.is_binary, b.content
1171
- FROM file f
1172
- JOIN blob b ON b.hash = f.blob_hash
1173
- WHERE f.repo_id = ? AND f.path = ?
1174
- LIMIT 1
1175
- `, [repoId, params.path]);
1176
- if (rows.length === 0) {
1177
- throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
1178
- }
1179
- const row = rows[0];
1180
- if (!row) {
1181
- throw new Error("Requested snippet file was not indexed. Re-run the indexer or choose another path.");
1182
- }
1183
- if (row.is_binary) {
1184
- throw new Error("Binary snippets are not supported. Choose a text file to preview its content.");
1185
- }
1186
- if (row.content === null) {
1187
- throw new Error("Snippet content is unavailable. Re-run the indexer to refresh DuckDB state.");
1188
- }
1189
- const lines = row.content.split(/\r?\n/);
1190
- const totalLines = lines.length;
1191
- const snippetRows = await db.all(`
1192
- SELECT s.snippet_id, s.start_line, s.end_line, s.symbol_id, sym.name AS symbol_name, sym.kind AS symbol_kind
1193
- FROM snippet s
1194
- LEFT JOIN symbol sym
1195
- ON sym.repo_id = s.repo_id
1196
- AND sym.path = s.path
1197
- AND sym.symbol_id = s.symbol_id
1198
- WHERE s.repo_id = ? AND s.path = ?
1199
- ORDER BY s.start_line
1200
- `, [repoId, params.path]);
1201
- const requestedStart = params.start_line ?? 1;
1202
- const requestedEnd = params.end_line ?? Math.min(totalLines, requestedStart + DEFAULT_SNIPPET_WINDOW - 1);
1203
- const useSymbolSnippets = snippetRows.length > 0 && params.end_line === undefined;
1204
- let snippetSelection = null;
1205
- if (useSymbolSnippets) {
1206
- snippetSelection =
1207
- snippetRows.find((snippet) => requestedStart >= snippet.start_line && requestedStart <= snippet.end_line) ?? null;
1208
- if (!snippetSelection) {
1209
- const firstSnippet = snippetRows[0];
1210
- if (firstSnippet && requestedStart < firstSnippet.start_line) {
1211
- snippetSelection = firstSnippet;
1212
- }
1213
- else {
1214
- snippetSelection = snippetRows[snippetRows.length - 1] ?? null;
1215
- }
1216
- }
1217
- }
1218
- let startLine;
1219
- let endLine;
1220
- let symbolName = null;
1221
- let symbolKind = null;
1222
- if (snippetSelection) {
1223
- startLine = snippetSelection.start_line;
1224
- endLine = snippetSelection.end_line;
1225
- symbolName = snippetSelection.symbol_name;
1226
- symbolKind = snippetSelection.symbol_kind;
1227
- }
1228
- else {
1229
- startLine = Math.max(1, Math.min(totalLines, requestedStart));
1230
- endLine = Math.max(startLine, Math.min(totalLines, requestedEnd));
1231
- }
1232
- const isCompact = params.compact === true;
1233
- const addLineNumbers = params.includeLineNumbers === true && !isCompact;
1234
- let content;
1235
- if (!isCompact) {
1236
- const snippetContent = lines.slice(startLine - 1, endLine).join("\n");
1237
- content = addLineNumbers ? prependLineNumbers(snippetContent, startLine) : snippetContent;
1238
- }
1239
- return {
1240
- path: row.path,
1241
- startLine,
1242
- endLine,
1243
- ...(content !== undefined && { content }),
1244
- totalLines,
1245
- symbolName,
1246
- symbolKind,
1247
- };
2248
+ .filter((result) => result.score > SCORE_FILTER_THRESHOLD) // v1.0.0: Filter out extremely low-scored files (multiplicative penalties)
2249
+ .sort((a, b) => b.score - a.score);
1248
2250
  }
2251
+ // snippetsGet has been extracted to ./handlers/snippets-get.ts and re-exported above
1249
2252
  // ============================================================================
1250
2253
  // Issue #68: Path/Large File Penalty Helper Functions
1251
2254
  // ============================================================================
2255
+ /**
2256
+ * v1.0.0: Score filtering threshold for multiplicative penalty model
2257
+ * Files with score < threshold are filtered out (unless they are hint paths)
2258
+ * Default: 0.05 removes files with >95% penalty while keeping relevant files
2259
+ * Can be overridden via KIRI_SCORE_THRESHOLD environment variable
2260
+ */
2261
+ const SCORE_FILTER_THRESHOLD = parseFloat(process.env.KIRI_SCORE_THRESHOLD ?? "0.05");
1252
2262
  /**
1253
2263
  * 環境変数からペナルティ機能フラグを読み取る
1254
2264
  */
@@ -1505,15 +2515,40 @@ function computeGraduatedPenalty(pathMatchHits, queryStats, config) {
1505
2515
  return config.tier2Delta;
1506
2516
  return 0; // pathMatchHits >= 3: no penalty
1507
2517
  }
1508
- export async function contextBundle(context, params) {
2518
+ async function contextBundleImpl(context, params) {
1509
2519
  context.warningManager.startRequest();
1510
2520
  const { db, repoId } = context;
1511
- const goal = params.goal?.trim() ?? "";
1512
- if (goal.length === 0) {
2521
+ const rawGoal = params.goal?.trim() ?? "";
2522
+ if (rawGoal.length === 0) {
1513
2523
  throw new Error("context_bundle requires a non-empty goal. Describe your objective to receive context.");
1514
2524
  }
2525
+ if (process.env.KIRI_TRACE_METADATA === "1") {
2526
+ console.info(`[metadata-trace-env] goal=${rawGoal}`);
2527
+ }
2528
+ const inlineMetadata = parseInlineMetadataFilters(rawGoal);
2529
+ const paramFilters = normalizeMetadataFiltersParam(params.metadata_filters);
2530
+ const metadataFilters = mergeMetadataFilters([...inlineMetadata.filters, ...paramFilters]);
2531
+ const strictMetadataFilters = metadataFilters.filter((filter) => filter.strict);
2532
+ const hintMetadataFilters = metadataFilters.filter((filter) => !filter.strict);
2533
+ const hasStrictMetadataFilters = strictMetadataFilters.length > 0;
2534
+ const hasHintMetadataFilters = hintMetadataFilters.length > 0;
2535
+ const hasAnyMetadataFilters = metadataFilters.length > 0;
2536
+ const goal = inlineMetadata.cleanedQuery.length > 0 ? inlineMetadata.cleanedQuery : rawGoal;
2537
+ if (process.env.KIRI_TRACE_METADATA === "1") {
2538
+ console.info("[metadata-trace]", JSON.stringify({
2539
+ rawGoal,
2540
+ cleanedGoal: goal,
2541
+ inlineFilters: inlineMetadata.filters,
2542
+ paramFilters,
2543
+ mergedFilters: metadataFilters,
2544
+ }));
2545
+ }
1515
2546
  const limit = normalizeBundleLimit(params.limit);
1516
2547
  const artifacts = params.artifacts ?? {};
2548
+ const artifactHints = normalizeArtifactHints(artifacts.hints);
2549
+ const hintBuckets = bucketArtifactHints(artifactHints);
2550
+ const artifactPathHints = hintBuckets.pathHints;
2551
+ const substringHints = hintBuckets.substringHints;
1517
2552
  const includeTokensEstimate = params.includeTokensEstimate === true;
1518
2553
  const isCompact = params.compact === true;
1519
2554
  // 項目2: トークンバジェット保護警告
@@ -1536,6 +2571,15 @@ export async function contextBundle(context, params) {
1536
2571
  if (artifacts.editing_path) {
1537
2572
  keywordSources.push(artifacts.editing_path);
1538
2573
  }
2574
+ if (artifactHints.length > 0) {
2575
+ keywordSources.push(artifactHints.join(" "));
2576
+ }
2577
+ if (hasAnyMetadataFilters) {
2578
+ const filterSeed = metadataFilters
2579
+ .map((filter) => `${filter.source ?? "meta"}:${filter.key}=${filter.values.join(",")}`)
2580
+ .join(" ");
2581
+ keywordSources.push(filterSeed);
2582
+ }
1539
2583
  const semanticSeed = keywordSources.join(" ");
1540
2584
  const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
1541
2585
  const extractedTerms = extractKeywords(semanticSeed);
@@ -1553,13 +2597,16 @@ export async function contextBundle(context, params) {
1553
2597
  const stringMatchSeeds = new Set();
1554
2598
  const fileCache = new Map();
1555
2599
  // ✅ Cache boost profile config to avoid redundant lookups in hot path
1556
- const boostProfile = params.boost_profile ?? "default";
2600
+ const boostProfile = params.boost_profile ??
2601
+ (hasHintMetadataFilters ? "balanced" : hasStrictMetadataFilters ? "docs" : "default");
1557
2602
  const profileConfig = getBoostProfile(boostProfile);
1558
2603
  // フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
1559
2604
  if (extractedTerms.phrases.length > 0) {
1560
2605
  const phrasePlaceholders = extractedTerms.phrases
1561
2606
  .map(() => "b.content ILIKE '%' || ? || '%'")
1562
2607
  .join(" OR ");
2608
+ // DEBUG: Log SQL query parameters for troubleshooting
2609
+ console.log(`[DEBUG contextBundle] Executing phrase match query with repo_id=${repoId}, phrases=${JSON.stringify(extractedTerms.phrases)}`);
1563
2610
  const rows = await db.all(`
1564
2611
  SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
1565
2612
  FROM file f
@@ -1573,6 +2620,14 @@ export async function contextBundle(context, params) {
1573
2620
  ORDER BY f.path
1574
2621
  LIMIT ?
1575
2622
  `, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
2623
+ // DEBUG: Log returned paths and verify they match expected repo_id
2624
+ if (rows.length > 0) {
2625
+ console.log(`[DEBUG contextBundle] Phrase match returned ${rows.length} rows. Sample paths:`, rows.slice(0, 3).map((r) => r.path));
2626
+ // Verify repo_id of returned files
2627
+ const pathsToCheck = rows.slice(0, 3).map((r) => r.path);
2628
+ const verification = await db.all(`SELECT path, repo_id FROM file WHERE path IN (${pathsToCheck.map(() => "?").join(", ")}) LIMIT 3`, pathsToCheck);
2629
+ console.log(`[DEBUG contextBundle] Repo ID verification:`, verification);
2630
+ }
1576
2631
  for (const row of rows) {
1577
2632
  if (row.content === null) {
1578
2633
  continue;
@@ -1674,6 +2729,31 @@ export async function contextBundle(context, params) {
1674
2729
  }
1675
2730
  }
1676
2731
  }
2732
+ const artifactPathTargets = artifactPathHints.map((hintPath) => ({
2733
+ path: hintPath,
2734
+ sourceHint: hintPath,
2735
+ origin: "artifact",
2736
+ }));
2737
+ const dictionaryPathTargets = await fetchDictionaryPathHints(db, context.tableAvailability, repoId, substringHints, HINT_DICTIONARY_LIMIT);
2738
+ const { list: resolvedPathHintTargets, meta: hintSeedMeta } = createHintSeedMeta([
2739
+ ...artifactPathTargets,
2740
+ ...dictionaryPathTargets,
2741
+ ]);
2742
+ if (resolvedPathHintTargets.length > 0) {
2743
+ await applyPathHintPromotions({
2744
+ db,
2745
+ tableAvailability: context.tableAvailability,
2746
+ repoId,
2747
+ hintTargets: resolvedPathHintTargets,
2748
+ candidates,
2749
+ fileCache,
2750
+ weights,
2751
+ hintSeedMeta,
2752
+ });
2753
+ }
2754
+ if (substringHints.length > 0) {
2755
+ await addHintSubstringMatches(db, context.tableAvailability, repoId, substringHints, candidates, HINT_SUBSTRING_LIMIT, HINT_SUBSTRING_BOOST);
2756
+ }
1677
2757
  if (artifacts.editing_path) {
1678
2758
  const editingCandidate = ensureCandidate(candidates, artifacts.editing_path);
1679
2759
  editingCandidate.score += weights.editingPath;
@@ -1681,7 +2761,6 @@ export async function contextBundle(context, params) {
1681
2761
  editingCandidate.matchLine ??= 1;
1682
2762
  }
1683
2763
  // SQL injection防御: ファイルパスの検証パターン
1684
- const SAFE_PATH_PATTERN = /^[a-zA-Z0-9_.\-/]+$/;
1685
2764
  const dependencySeeds = new Set();
1686
2765
  for (const pathSeed of stringMatchSeeds) {
1687
2766
  if (!SAFE_PATH_PATTERN.test(pathSeed)) {
@@ -1695,10 +2774,13 @@ export async function contextBundle(context, params) {
1695
2774
  }
1696
2775
  if (artifacts.editing_path) {
1697
2776
  if (!SAFE_PATH_PATTERN.test(artifacts.editing_path)) {
1698
- throw new Error(`Invalid editing_path format. Path must contain only alphanumeric characters, underscores, dots, hyphens, and forward slashes.`);
2777
+ throw new Error(`Invalid editing_path format: ${artifacts.editing_path}. Use only A-Z, 0-9, _, ., -, / characters.`);
1699
2778
  }
1700
2779
  dependencySeeds.add(artifacts.editing_path);
1701
2780
  }
2781
+ for (const target of resolvedPathHintTargets) {
2782
+ dependencySeeds.add(target.path);
2783
+ }
1702
2784
  if (dependencySeeds.size > 0) {
1703
2785
  // SQL injection防御: プレースホルダー生成前にサイズを検証
1704
2786
  if (dependencySeeds.size > MAX_DEPENDENCY_SEEDS_QUERY_LIMIT) {
@@ -1708,7 +2790,7 @@ export async function contextBundle(context, params) {
1708
2790
  // 防御的チェック: プレースホルダーが正しい形式であることを確認
1709
2791
  // 期待される形式: "?, ?, ..." (クエスチョンマーク、カンマ、スペースのみ)
1710
2792
  if (!/^(\?)(,\s*\?)*$/.test(placeholders)) {
1711
- throw new Error("Invalid placeholder generation detected. Operation aborted for safety.");
2793
+ throw new Error("Invalid dependency placeholder sequence detected. Remove unsafe dependency seeds and retry the request.");
1712
2794
  }
1713
2795
  const depRows = await db.all(`
1714
2796
  SELECT src_path, dst_kind, dst, rel
@@ -1746,31 +2828,72 @@ export async function contextBundle(context, params) {
1746
2828
  }
1747
2829
  }
1748
2830
  }
1749
- const materializedCandidates = [];
1750
- for (const candidate of candidates.values()) {
1751
- if (!candidate.content) {
1752
- const cached = fileCache.get(candidate.path);
1753
- if (cached) {
1754
- candidate.content = cached.content;
1755
- candidate.lang = cached.lang;
1756
- candidate.ext = cached.ext;
1757
- candidate.totalLines = cached.totalLines;
1758
- candidate.embedding = cached.embedding;
2831
+ const materializeCandidates = async () => {
2832
+ const result = [];
2833
+ for (const candidate of candidates.values()) {
2834
+ if (isSuppressedPath(candidate.path)) {
2835
+ continue;
1759
2836
  }
1760
- else {
1761
- const loaded = await loadFileContent(db, repoId, candidate.path);
1762
- if (!loaded) {
1763
- continue;
2837
+ if (!candidate.content) {
2838
+ const cached = fileCache.get(candidate.path);
2839
+ if (cached) {
2840
+ candidate.content = cached.content;
2841
+ candidate.lang = cached.lang;
2842
+ candidate.ext = cached.ext;
2843
+ candidate.totalLines = cached.totalLines;
2844
+ candidate.embedding = cached.embedding;
1764
2845
  }
1765
- candidate.content = loaded.content;
1766
- candidate.lang = loaded.lang;
1767
- candidate.ext = loaded.ext;
1768
- candidate.totalLines = loaded.totalLines;
1769
- candidate.embedding = loaded.embedding;
1770
- fileCache.set(candidate.path, loaded);
2846
+ else {
2847
+ const loaded = await loadFileContent(db, repoId, candidate.path);
2848
+ if (!loaded) {
2849
+ continue;
2850
+ }
2851
+ candidate.content = loaded.content;
2852
+ candidate.lang = loaded.lang;
2853
+ candidate.ext = loaded.ext;
2854
+ candidate.totalLines = loaded.totalLines;
2855
+ candidate.embedding = loaded.embedding;
2856
+ fileCache.set(candidate.path, loaded);
2857
+ }
2858
+ }
2859
+ result.push(candidate);
2860
+ }
2861
+ return result;
2862
+ };
2863
+ const addMetadataFallbackCandidates = async () => {
2864
+ if (!hasAnyMetadataFilters) {
2865
+ return;
2866
+ }
2867
+ const metadataRows = await fetchMetadataOnlyCandidates(db, context.tableAvailability, repoId, metadataFilters, limit * 2);
2868
+ if (metadataRows.length === 0) {
2869
+ return;
2870
+ }
2871
+ for (const row of metadataRows) {
2872
+ const candidate = ensureCandidate(candidates, row.path);
2873
+ if (row.content) {
2874
+ candidate.content = row.content;
2875
+ candidate.totalLines = row.content.split(/\r?\n/).length;
2876
+ fileCache.set(row.path, {
2877
+ content: row.content,
2878
+ lang: row.lang,
2879
+ ext: row.ext,
2880
+ totalLines: candidate.totalLines,
2881
+ embedding: candidate.embedding,
2882
+ });
1771
2883
  }
2884
+ candidate.lang ??= row.lang;
2885
+ candidate.ext ??= row.ext;
2886
+ candidate.matchLine ??= 1;
2887
+ candidate.score = Math.max(candidate.score, 1 + metadataFilters.length * 0.2);
1772
2888
  }
1773
- materializedCandidates.push(candidate);
2889
+ };
2890
+ if (hasAnyMetadataFilters) {
2891
+ await addMetadataFallbackCandidates();
2892
+ }
2893
+ let materializedCandidates = await materializeCandidates();
2894
+ if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
2895
+ await addMetadataFallbackCandidates();
2896
+ materializedCandidates = await materializeCandidates();
1774
2897
  }
1775
2898
  if (materializedCandidates.length === 0) {
1776
2899
  // Get warnings from WarningManager (includes breaking change notification if applicable)
@@ -1781,6 +2904,72 @@ export async function contextBundle(context, params) {
1781
2904
  ...(warnings.length > 0 && { warnings }),
1782
2905
  };
1783
2906
  }
2907
+ const metadataKeywordSet = new Set(extractedTerms.keywords.map((keyword) => keyword.toLowerCase()));
2908
+ const filterValueSet = new Set(metadataFilters.flatMap((filter) => filter.values.map((value) => value.toLowerCase())));
2909
+ let metadataEntriesMap;
2910
+ if (hasAnyMetadataFilters || metadataKeywordSet.size > 0 || filterValueSet.size > 0) {
2911
+ metadataEntriesMap = await loadMetadataForPaths(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
2912
+ }
2913
+ if (hasStrictMetadataFilters) {
2914
+ metadataEntriesMap ??= new Map();
2915
+ for (let i = materializedCandidates.length - 1; i >= 0; i--) {
2916
+ const candidate = materializedCandidates[i];
2917
+ if (!candidate) {
2918
+ continue; // Skip undefined entries
2919
+ }
2920
+ const entries = metadataEntriesMap.get(candidate.path);
2921
+ const matchesFilters = candidateMatchesMetadataFilters(entries, strictMetadataFilters);
2922
+ if (!matchesFilters) {
2923
+ materializedCandidates.splice(i, 1);
2924
+ continue;
2925
+ }
2926
+ candidate.reasons.add("metadata:filter");
2927
+ if (process.env.KIRI_TRACE_METADATA === "1") {
2928
+ console.info(`[metadata-trace-match] path=${candidate.path}`);
2929
+ }
2930
+ }
2931
+ if (materializedCandidates.length === 0 && hasAnyMetadataFilters) {
2932
+ await addMetadataFallbackCandidates();
2933
+ materializedCandidates = await materializeCandidates();
2934
+ }
2935
+ if (materializedCandidates.length === 0) {
2936
+ const warnings = [...context.warningManager.responseWarnings];
2937
+ return {
2938
+ context: [],
2939
+ ...(includeTokensEstimate && { tokens_estimate: 0 }),
2940
+ ...(warnings.length > 0 && { warnings }),
2941
+ };
2942
+ }
2943
+ }
2944
+ if (hasHintMetadataFilters) {
2945
+ metadataEntriesMap ??= new Map();
2946
+ for (const candidate of materializedCandidates) {
2947
+ const entries = metadataEntriesMap.get(candidate.path);
2948
+ const matchesHints = candidateMatchesMetadataFilters(entries, hintMetadataFilters);
2949
+ if (matchesHints) {
2950
+ candidate.score += METADATA_HINT_BONUS;
2951
+ candidate.reasons.add("metadata:hint");
2952
+ }
2953
+ }
2954
+ }
2955
+ const inboundCounts = await loadInboundLinkCounts(db, context.tableAvailability, repoId, materializedCandidates.map((candidate) => candidate.path));
2956
+ if (metadataEntriesMap) {
2957
+ for (const candidate of materializedCandidates) {
2958
+ const entries = metadataEntriesMap.get(candidate.path);
2959
+ const metadataBoost = computeMetadataBoost(entries, metadataKeywordSet, filterValueSet);
2960
+ if (metadataBoost > 0) {
2961
+ candidate.score += metadataBoost;
2962
+ candidate.reasons.add("boost:metadata");
2963
+ }
2964
+ }
2965
+ }
2966
+ for (const candidate of materializedCandidates) {
2967
+ const linkBoost = computeInboundLinkBoost(inboundCounts.get(candidate.path));
2968
+ if (linkBoost > 0) {
2969
+ candidate.score += linkBoost;
2970
+ candidate.reasons.add("boost:links");
2971
+ }
2972
+ }
1784
2973
  applyStructuralScores(materializedCandidates, queryEmbedding, weights.structural);
1785
2974
  // ✅ CRITICAL SAFETY: Apply multipliers AFTER all additive scoring (v0.7.0)
1786
2975
  // Only apply to positive scores to prevent negative score inversion
@@ -1819,18 +3008,31 @@ export async function contextBundle(context, params) {
1819
3008
  const telemetry = computePenaltyTelemetry(materializedCandidates);
1820
3009
  logPenaltyTelemetry(telemetry, queryStats);
1821
3010
  }
1822
- const sortedCandidates = materializedCandidates
1823
- .filter((candidate) => candidate.score > 0) // Filter out candidates with negative or zero scores
3011
+ // v1.0.0: Filter out extremely low-scored candidates (result of multiplicative penalties)
3012
+ // Threshold removes files with >95% penalty while keeping reasonably relevant files
3013
+ // Hint paths are exempt from this threshold (always included if score > 0)
3014
+ const hintPathSet = new Set(resolvedPathHintTargets.map((target) => target.path));
3015
+ const rankedCandidates = materializedCandidates
3016
+ .filter((candidate) => candidate.score > SCORE_FILTER_THRESHOLD ||
3017
+ (candidate.score > 0 && hintPathSet.has(candidate.path)))
1824
3018
  .sort((a, b) => {
1825
3019
  if (b.score === a.score) {
1826
3020
  return a.path.localeCompare(b.path);
1827
3021
  }
1828
3022
  return b.score - a.score;
1829
- })
1830
- .slice(0, limit);
1831
- const maxScore = Math.max(...sortedCandidates.map((candidate) => candidate.score));
3023
+ });
3024
+ const prioritizedCandidates = prioritizeHintCandidates(rankedCandidates, resolvedPathHintTargets.map((target) => target.path), limit);
3025
+ if (prioritizedCandidates.length === 0) {
3026
+ const warnings = [...context.warningManager.responseWarnings];
3027
+ return {
3028
+ context: [],
3029
+ ...(includeTokensEstimate && { tokens_estimate: 0 }),
3030
+ ...(warnings.length > 0 && { warnings }),
3031
+ };
3032
+ }
3033
+ const maxScore = Math.max(...prioritizedCandidates.map((candidate) => candidate.score));
1832
3034
  const results = [];
1833
- for (const candidate of sortedCandidates) {
3035
+ for (const candidate of prioritizedCandidates) {
1834
3036
  if (!candidate.content) {
1835
3037
  continue;
1836
3038
  }
@@ -1858,6 +3060,23 @@ export async function contextBundle(context, params) {
1858
3060
  startLine = Math.max(1, matchLine - windowHalf);
1859
3061
  endLine = Math.min(totalLines === 0 ? matchLine + windowHalf : totalLines, startLine + FALLBACK_SNIPPET_WINDOW - 1);
1860
3062
  }
3063
+ if (CLAMP_SNIPPETS_ENABLED) {
3064
+ // Clamp snippet length to FALLBACK_SNIPPET_WINDOW even when symbol spans large regions
3065
+ const maxWindow = FALLBACK_SNIPPET_WINDOW;
3066
+ const selectedEnd = selected ? selected.end_line : endLine;
3067
+ const selectedStart = selected ? selected.start_line : startLine;
3068
+ if (endLine - startLine + 1 > maxWindow) {
3069
+ const anchor = candidate.matchLine ?? startLine;
3070
+ let clampedStart = Math.max(selectedStart, anchor - Math.floor(maxWindow / 2));
3071
+ let clampedEnd = clampedStart + maxWindow - 1;
3072
+ if (clampedEnd > selectedEnd) {
3073
+ clampedEnd = selectedEnd;
3074
+ clampedStart = Math.max(selectedStart, clampedEnd - maxWindow + 1);
3075
+ }
3076
+ startLine = clampedStart;
3077
+ endLine = Math.max(clampedStart, clampedEnd);
3078
+ }
3079
+ }
1861
3080
  if (endLine < startLine) {
1862
3081
  endLine = startLine;
1863
3082
  }
@@ -1885,7 +3104,7 @@ export async function contextBundle(context, params) {
1885
3104
  let tokensEstimate;
1886
3105
  if (includeTokensEstimate) {
1887
3106
  tokensEstimate = results.reduce((acc, item) => {
1888
- const candidate = sortedCandidates.find((c) => c.path === item.path);
3107
+ const candidate = prioritizedCandidates.find((c) => c.path === item.path);
1889
3108
  if (candidate && candidate.content) {
1890
3109
  return acc + estimateTokensFromContent(candidate.content, item.range[0], item.range[1]);
1891
3110
  }
@@ -1896,8 +3115,13 @@ export async function contextBundle(context, params) {
1896
3115
  }
1897
3116
  // Get warnings from WarningManager (includes breaking change notification if applicable)
1898
3117
  const warnings = [...context.warningManager.responseWarnings];
3118
+ const shouldFilterResults = FINAL_RESULT_SUPPRESSION_ENABLED && SUPPRESS_NON_CODE_ENABLED;
3119
+ const sanitizedResults = shouldFilterResults
3120
+ ? results.filter((item) => !isSuppressedPath(item.path))
3121
+ : results;
3122
+ const finalResults = sanitizedResults.length > 0 ? sanitizedResults : results;
1899
3123
  const payload = {
1900
- context: results,
3124
+ context: finalResults,
1901
3125
  ...(warnings.length > 0 && { warnings }),
1902
3126
  };
1903
3127
  if (tokensEstimate !== undefined) {
@@ -2100,35 +3324,27 @@ export async function depsClosure(context, params) {
2100
3324
  edges,
2101
3325
  };
2102
3326
  }
2103
- export async function resolveRepoId(db, repoRoot) {
3327
+ /**
3328
+ * リポジトリのrootパスをデータベースIDに解決する。
3329
+ *
3330
+ * この関数は下位互換性のために保持されているが、内部的には新しいRepoResolverを使用する。
3331
+ *
3332
+ * @param db - DuckDBクライアント
3333
+ * @param repoRoot - リポジトリのrootパス
3334
+ * @param services - オプショナルなServerServices(指定がなければ新規作成される)
3335
+ * @returns リポジトリID
3336
+ * @throws Error リポジトリがインデックスされていない場合
3337
+ */
3338
+ export async function resolveRepoId(db, repoRoot, services) {
3339
+ const svc = services ?? createServerServices(db);
3340
+ return await svc.repoResolver.resolveId(repoRoot);
3341
+ }
3342
+ export async function contextBundle(context, params) {
2104
3343
  try {
2105
- const candidates = getRepoPathCandidates(repoRoot);
2106
- const normalized = candidates[0];
2107
- const placeholders = candidates.map(() => "?").join(", ");
2108
- const rows = await db.all(`SELECT id, root FROM repo WHERE root IN (${placeholders}) LIMIT 1`, candidates);
2109
- if (rows.length === 0) {
2110
- const existingRows = await db.all("SELECT id, root FROM repo");
2111
- for (const candidate of existingRows) {
2112
- if (normalizeRepoPath(candidate.root) === normalized) {
2113
- await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, candidate.id]);
2114
- return candidate.id;
2115
- }
2116
- }
2117
- throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
2118
- }
2119
- const row = rows[0];
2120
- if (!row) {
2121
- throw new Error("Failed to retrieve repository record. Database returned empty result.");
2122
- }
2123
- if (row.root !== normalized) {
2124
- await db.run("UPDATE repo SET root = ? WHERE id = ?", [normalized, row.id]);
2125
- }
2126
- return row.id;
3344
+ return await contextBundleImpl(context, params);
2127
3345
  }
2128
3346
  catch (error) {
2129
- if (error instanceof Error && error.message.includes("Table with name repo")) {
2130
- throw new Error("Target repository is missing from DuckDB. Run the indexer before starting the server.");
2131
- }
3347
+ console.error("context_bundle error:", error);
2132
3348
  throw error;
2133
3349
  }
2134
3350
  }