@dreb/coding-agent 1.16.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/README.md +19 -9
  2. package/agents/code-reviewer.md +1 -1
  3. package/agents/completeness-checker.md +1 -1
  4. package/agents/error-auditor.md +1 -1
  5. package/agents/explore.md +1 -1
  6. package/agents/feature-dev.md +1 -1
  7. package/agents/independent-assessor.md +1 -1
  8. package/agents/simplifier.md +1 -1
  9. package/agents/test-reviewer.md +1 -1
  10. package/dist/core/system-prompt.d.ts.map +1 -1
  11. package/dist/core/system-prompt.js +7 -1
  12. package/dist/core/system-prompt.js.map +1 -1
  13. package/dist/core/tools/dreb-paths.d.ts +17 -0
  14. package/dist/core/tools/dreb-paths.d.ts.map +1 -0
  15. package/dist/core/tools/dreb-paths.js +43 -0
  16. package/dist/core/tools/dreb-paths.js.map +1 -0
  17. package/dist/core/tools/find.d.ts.map +1 -1
  18. package/dist/core/tools/find.js +8 -0
  19. package/dist/core/tools/find.js.map +1 -1
  20. package/dist/core/tools/grep.d.ts.map +1 -1
  21. package/dist/core/tools/grep.js +8 -0
  22. package/dist/core/tools/grep.js.map +1 -1
  23. package/dist/core/tools/search.d.ts +19 -1
  24. package/dist/core/tools/search.d.ts.map +1 -1
  25. package/dist/core/tools/search.js +50 -44
  26. package/dist/core/tools/search.js.map +1 -1
  27. package/package.json +2 -1
  28. package/dist/core/search/chunker.d.ts +0 -21
  29. package/dist/core/search/chunker.d.ts.map +0 -1
  30. package/dist/core/search/chunker.js +0 -51
  31. package/dist/core/search/chunker.js.map +0 -1
  32. package/dist/core/search/db.d.ts +0 -89
  33. package/dist/core/search/db.d.ts.map +0 -1
  34. package/dist/core/search/db.js +0 -406
  35. package/dist/core/search/db.js.map +0 -1
  36. package/dist/core/search/embedder.d.ts +0 -51
  37. package/dist/core/search/embedder.d.ts.map +0 -1
  38. package/dist/core/search/embedder.js +0 -143
  39. package/dist/core/search/embedder.js.map +0 -1
  40. package/dist/core/search/index-manager.d.ts +0 -55
  41. package/dist/core/search/index-manager.d.ts.map +0 -1
  42. package/dist/core/search/index-manager.js +0 -311
  43. package/dist/core/search/index-manager.js.map +0 -1
  44. package/dist/core/search/metrics/bm25.d.ts +0 -10
  45. package/dist/core/search/metrics/bm25.d.ts.map +0 -1
  46. package/dist/core/search/metrics/bm25.js +0 -32
  47. package/dist/core/search/metrics/bm25.js.map +0 -1
  48. package/dist/core/search/metrics/git-recency.d.ts +0 -14
  49. package/dist/core/search/metrics/git-recency.d.ts.map +0 -1
  50. package/dist/core/search/metrics/git-recency.js +0 -123
  51. package/dist/core/search/metrics/git-recency.js.map +0 -1
  52. package/dist/core/search/metrics/import-graph.d.ts +0 -15
  53. package/dist/core/search/metrics/import-graph.d.ts.map +0 -1
  54. package/dist/core/search/metrics/import-graph.js +0 -115
  55. package/dist/core/search/metrics/import-graph.js.map +0 -1
  56. package/dist/core/search/metrics/path-match.d.ts +0 -13
  57. package/dist/core/search/metrics/path-match.d.ts.map +0 -1
  58. package/dist/core/search/metrics/path-match.js +0 -54
  59. package/dist/core/search/metrics/path-match.js.map +0 -1
  60. package/dist/core/search/metrics/symbol-match.d.ts +0 -12
  61. package/dist/core/search/metrics/symbol-match.d.ts.map +0 -1
  62. package/dist/core/search/metrics/symbol-match.js +0 -62
  63. package/dist/core/search/metrics/symbol-match.js.map +0 -1
  64. package/dist/core/search/metrics/tokenize.d.ts +0 -12
  65. package/dist/core/search/metrics/tokenize.d.ts.map +0 -1
  66. package/dist/core/search/metrics/tokenize.js +0 -29
  67. package/dist/core/search/metrics/tokenize.js.map +0 -1
  68. package/dist/core/search/poem.d.ts +0 -38
  69. package/dist/core/search/poem.d.ts.map +0 -1
  70. package/dist/core/search/poem.js +0 -214
  71. package/dist/core/search/poem.js.map +0 -1
  72. package/dist/core/search/query-classifier.d.ts +0 -17
  73. package/dist/core/search/query-classifier.d.ts.map +0 -1
  74. package/dist/core/search/query-classifier.js +0 -54
  75. package/dist/core/search/query-classifier.js.map +0 -1
  76. package/dist/core/search/scanner.d.ts +0 -30
  77. package/dist/core/search/scanner.d.ts.map +0 -1
  78. package/dist/core/search/scanner.js +0 -335
  79. package/dist/core/search/scanner.js.map +0 -1
  80. package/dist/core/search/search.d.ts +0 -42
  81. package/dist/core/search/search.d.ts.map +0 -1
  82. package/dist/core/search/search.js +0 -337
  83. package/dist/core/search/search.js.map +0 -1
  84. package/dist/core/search/text-chunker.d.ts +0 -15
  85. package/dist/core/search/text-chunker.d.ts.map +0 -1
  86. package/dist/core/search/text-chunker.js +0 -580
  87. package/dist/core/search/text-chunker.js.map +0 -1
  88. package/dist/core/search/tree-sitter-chunker.d.ts +0 -25
  89. package/dist/core/search/tree-sitter-chunker.d.ts.map +0 -1
  90. package/dist/core/search/tree-sitter-chunker.js +0 -357
  91. package/dist/core/search/tree-sitter-chunker.js.map +0 -1
  92. package/dist/core/search/types.d.ts +0 -96
  93. package/dist/core/search/types.d.ts.map +0 -1
  94. package/dist/core/search/types.js +0 -6
  95. package/dist/core/search/types.js.map +0 -1
  96. package/dist/core/search/vector-store.d.ts +0 -43
  97. package/dist/core/search/vector-store.d.ts.map +0 -1
  98. package/dist/core/search/vector-store.js +0 -73
  99. package/dist/core/search/vector-store.js.map +0 -1
@@ -1 +0,0 @@
1
- {"version":3,"file":"poem.js","sourceRoot":"","sources":["../../../src/core/search/poem.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,EAAE,YAAY,EAAsC,MAAM,YAAY,CAAC;AAmB9E,MAAM,cAAc,GAAqC;IACxD,UAAU,EAAE;QACX,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;IACD,gBAAgB,EAAE;QACjB,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;IACD,SAAS,EAAE;QACV,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;CACD,CAAC;AAEF,+EAA+E;AAC/E,uDAAuD;AACvD,+EAA+E;AAE/E,MAAM,OAAO,GAAG,IAAI,CAAC;AAErB,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E;;GAEG;AACH,SAAS,SAAS,CAAC,UAAqC,EAAE,IAAY,EAAe;IACpF,IAAI,UAAU,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC;QAC7B,OAAO,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAA4B,EAAE,CAAC;QAC1C,KAAK,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YACvC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E;;;;;;;;;;;;;;;;;;GAkBG;AACH,SAAS,oBAAoB,CAC5B,MAAsB,EACtB,OAAsB,EACtB,IAAY,EACiC;IAC7C,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACtC,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,mCAAmC;IACnC,MAAM,OAAO,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC;IACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAE3C,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;QAC/B,IAAI,MAAM,KAAK,CAAC;YAAE,SAAS;QAC3B,WAAW,IAAI,MAAM,CAAC;QAEtB,8CAA8C;QAC9C,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAE3E,6CAA6C;QAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAE5B,oDAAoD;QACpD,iEAA+D;QAC/D,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;YAC/B,MAAM,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC;YACrB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,KAAK,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;gBACpC,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC;YACtC,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;AAAA,CAC7B;AAED;;;;;;;;;GASG;AACH,SAAS,cAAc,CAAC,MAAmB,EAAE,CAAS,EAAE,WAAmB,EAAgB;IAC1F,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;IACpC,MAAM,SAAS,GAAG,WAAW,GAAG,GAAG,CAAC;IAEpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;QAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,IAAI,CAAC,KAAK,CAAC;gBAAE,SAAS;YACtB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;YAChC,MAAM,IAAI,KAAK,CAAC;YAChB,IAAI,KAAK,GAAG,SAAS;gBAAE,aAAa,EAAE,CAAC;YACvC,IAAI,KAAK,GAAG,SAAS;gBAAE,aAAa,EAAE,CAAC;QACxC,CAAC;QAED,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,aAAa,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,aAAa,GAAG,OAAO,CAAC,CAAC;IAChF,CAAC;IAED,OAAO,OAAO,CAAC;AAAA,CACf;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;GAOG;AACH,MAAM,UAAU,QAAQ,CAAC,UAAqC,EAAE,SAAoB,EAAE,IAAI,GAAG,IAAI,EAAqB;IACrH,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,uCAAqC;IACrC,MAAM,SAAS,GAAG,SAAS,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAE9C,wBAAwB;IACxB,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;QAC5B,MAAM,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;QAC9B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACb,MAAM,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC;YACjB,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC;YACrB,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,CAAC;YAC3B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,CAAC;YAC/B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,CAAC;YAC/B,UAAU,EAAE,CAAC,CAAC,UAAU,IAAI,CAAC;SAC7B,CAAC,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC;IAErB,8BAA4B;IAC5B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACb,OAAO,CAAC,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IACrD,CAAC;IAED,sDAAsD;IACtD,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,GAAG,oBAAoB,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAE1E,4BAA4B;IAC5B,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;IAEvD,mDAAmD;IACnD,MAAM,KAAK,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACzC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;IAE9C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;QAChC,EAAE,EAAE,GAAG,CAAC,GAAG,CAAC;QACZ,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC;QACnB,IAAI;KACJ,CAAC,CAAC,CAAC;AAAA,CACJ","sourcesContent":["/**\n * POEM — Pareto-Optimal Embedded Modeling, multi-metric ranking.\n *\n * Ranks search candidates across multiple relevance metrics without requiring\n * hand-tuned weights. Uses the TFPR (Top-Fraction Pareto Ranker) approach:\n * vectorized dominance matrix computation with column duplication for\n * query-type-dependent metric weighting.\n *\n * Algorithm:\n * 1. Prune: per-metric top-K → union of survivors\n * 2. Build objectives matrix with column duplication for query-type weighting\n * 3. For each objective column, sort candidates and accumulate pairwise\n * dominance counts (duplicate columns contribute via weight multiplier)\n * 4. Compute fitness: meanDominance × (numDominating + ε) / (numSubmitting + ε)\n * 5. Sort by fitness, assign ranks\n *\n * References:\n * - POEM paper: https://iopscience.iop.org/article/10.1088/2632-2153/ab891b\n * - TFPR: https://github.com/merckgroup/aidd_tfpr\n * - colourdle: https://github.com/aebrer/colourdle\n */\n\nimport type { QueryType } from \"./query-classifier.js\";\nimport { METRIC_NAMES, type MetricName, type MetricScores } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\nexport interface RankedCandidate {\n\tid: number;\n\tscores: MetricScores;\n\trank: number;\n}\n\n// ============================================================================\n// Column duplication config per query type\n// ============================================================================\n\n/** How many times each metric column appears in the objectives matrix. */\ntype ColumnWeights = Record<MetricName, number>;\n\nconst COLUMN_WEIGHTS: Record<QueryType, ColumnWeights> = {\n\tidentifier: {\n\t\tbm25: 2,\n\t\tcosine: 1,\n\t\tpathMatch: 1,\n\t\tsymbolMatch: 2,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n\tnatural_language: {\n\t\tbm25: 1,\n\t\tcosine: 2,\n\t\tpathMatch: 1,\n\t\tsymbolMatch: 1,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n\tpath_like: {\n\t\tbm25: 1,\n\t\tcosine: 1,\n\t\tpathMatch: 3,\n\t\tsymbolMatch: 1,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n};\n\n// ============================================================================\n// Fitness smoothing constant (avoids division by zero)\n// ============================================================================\n\nconst EPSILON = 0.05;\n\n// ============================================================================\n// Pruning\n// ============================================================================\n\n/**\n * Per-metric top-K pruning → union of surviving candidate IDs.\n */\nfunction pruneTopK(candidates: Map<number, MetricScores>, topK: number): Set<number> {\n\tif (candidates.size <= topK) {\n\t\treturn new Set(candidates.keys());\n\t}\n\n\tconst union = new Set<number>();\n\n\tfor (const metric of METRIC_NAMES) {\n\t\tconst pairs: Array<[number, number]> = [];\n\t\tfor (const [id, scores] of candidates) {\n\t\t\tpairs.push([id, scores[metric] ?? 0]);\n\t\t}\n\t\tpairs.sort((a, b) => b[1] - a[1]);\n\t\tconst limit = Math.min(topK, pairs.length);\n\t\tfor (let i = 0; i < limit; i++) {\n\t\t\tunion.add(pairs[i][0]);\n\t\t}\n\t}\n\n\treturn union;\n}\n\n// ============================================================================\n// Dominance matrix computation\n// ============================================================================\n\n/**\n * Build the dominance count matrix using the TFPR approach.\n *\n * For each objective (metric), sorts candidates and accumulates pairwise\n * dominance: if candidate i ranks above candidate j on an objective,\n * dominanceCounts[i][j] increases by the column weight.\n *\n * Duplicate columns (from column duplication) are handled by multiplying\n * the contribution by the weight rather than re-sorting — same result,\n * no redundant work.\n *\n * Uses Uint16Array to keep memory compact (max possible value per cell\n * is the sum of all weights, which is ≤ 10).\n *\n * @param scores Dense array of MetricScores, indexed 0..n-1\n * @param weights Column weights from query type\n * @param topK Only top-K per objective contribute to pairwise dominance\n * @returns Flat dominance count matrix [n × n] and the total weight sum\n */\nfunction buildDominanceCounts(\n\tscores: MetricScores[],\n\tweights: ColumnWeights,\n\ttopK: number,\n): [counts: Uint16Array, totalWeight: number] {\n\tconst n = scores.length;\n\tconst counts = new Uint16Array(n * n);\n\tlet totalWeight = 0;\n\n\t// Reusable index array for sorting\n\tconst indices = new Array<number>(n);\n\tfor (let i = 0; i < n; i++) indices[i] = i;\n\n\tfor (const metric of METRIC_NAMES) {\n\t\tconst weight = weights[metric];\n\t\tif (weight === 0) continue;\n\t\ttotalWeight += weight;\n\n\t\t// Sort candidates by this metric (descending)\n\t\tconst sorted = indices.slice();\n\t\tsorted.sort((a, b) => (scores[b][metric] ?? 0) - (scores[a][metric] ?? 0));\n\n\t\t// Only consider top-K for pairwise dominance\n\t\tconst k = Math.min(topK, n);\n\n\t\t// For each pair in the top-K where i ranks above j:\n\t\t// i dominates j on this objective → add weight to counts[i, j]\n\t\tfor (let ri = 0; ri < k; ri++) {\n\t\t\tconst i = sorted[ri];\n\t\t\tconst iBase = i * n;\n\t\t\tfor (let rj = ri + 1; rj < k; rj++) {\n\t\t\t\tcounts[iBase + sorted[rj]] += weight;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn [counts, totalWeight];\n}\n\n/**\n * Compute fitness scores from the dominance count matrix.\n *\n * Fitness = meanDominance × (numDominating + ε) / (numSubmitting + ε)\n *\n * Where:\n * - meanDominance = average normalized dominance across all other candidates\n * - numDominating = count of candidates this one dominates (>50% of objectives)\n * - numSubmitting = count of candidates this one fails to dominate (<50%)\n */\nfunction computeFitness(counts: Uint16Array, n: number, totalWeight: number): Float64Array {\n\tconst fitness = new Float64Array(n);\n\tconst threshold = totalWeight * 0.5;\n\n\tfor (let i = 0; i < n; i++) {\n\t\tlet sumDom = 0;\n\t\tlet numDominating = 0;\n\t\tlet numSubmitting = 0;\n\t\tconst iBase = i * n;\n\n\t\tfor (let j = 0; j < n; j++) {\n\t\t\tif (i === j) continue;\n\t\t\tconst count = counts[iBase + j];\n\t\t\tsumDom += count;\n\t\t\tif (count > threshold) numDominating++;\n\t\t\tif (count < threshold) numSubmitting++;\n\t\t}\n\n\t\tconst meanDom = n > 1 ? sumDom / ((n - 1) * totalWeight) : 0;\n\t\tfitness[i] = (meanDom * (numDominating + EPSILON)) / (numSubmitting + EPSILON);\n\t}\n\n\treturn fitness;\n}\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Rank candidates using POEM / TFPR.\n *\n * @param candidates Map of candidateId → MetricScores (all values 0–1)\n * @param queryType Query type for column duplication weighting\n * @param topK Per-metric pruning limit (default: 1000)\n * @returns Candidates ordered best-first with assigned ranks (0 = best)\n */\nexport function poemRank(candidates: Map<number, MetricScores>, queryType: QueryType, topK = 1000): RankedCandidate[] {\n\tif (candidates.size === 0) return [];\n\n\t// 1. Prune: per-metric top-K → union\n\tconst surviving = pruneTopK(candidates, topK);\n\n\t// 2. Build dense arrays\n\tconst ids: number[] = [];\n\tconst scores: MetricScores[] = [];\n\n\tfor (const id of surviving) {\n\t\tconst s = candidates.get(id)!;\n\t\tids.push(id);\n\t\tscores.push({\n\t\t\tbm25: s.bm25 ?? 0,\n\t\t\tcosine: s.cosine ?? 0,\n\t\t\tpathMatch: s.pathMatch ?? 0,\n\t\t\tsymbolMatch: s.symbolMatch ?? 0,\n\t\t\timportGraph: s.importGraph ?? 0,\n\t\t\tgitRecency: s.gitRecency ?? 0,\n\t\t});\n\t}\n\n\tconst n = ids.length;\n\n\t// Single candidate → rank 0\n\tif (n === 1) {\n\t\treturn [{ id: ids[0], scores: scores[0], rank: 0 }];\n\t}\n\n\t// 3. Compute dominance matrix with column duplication\n\tconst weights = COLUMN_WEIGHTS[queryType];\n\tconst [counts, totalWeight] = buildDominanceCounts(scores, weights, topK);\n\n\t// 4. Compute fitness scores\n\tconst fitness = computeFitness(counts, n, totalWeight);\n\n\t// 5. Sort by fitness (descending) and assign ranks\n\tconst order = new Array<number>(n);\n\tfor (let i = 0; i < n; i++) order[i] = i;\n\torder.sort((a, b) => fitness[b] - fitness[a]);\n\n\treturn order.map((idx, rank) => ({\n\t\tid: ids[idx],\n\t\tscores: scores[idx],\n\t\trank,\n\t}));\n}\n"]}
@@ -1,17 +0,0 @@
1
- /**
2
- * Classify search queries into types for POEM column weighting.
3
- *
4
- * Query types affect how metric columns are duplicated during ranking:
5
- * - identifier: emphasise BM25 and symbol-match scores
6
- * - path_like: emphasise path-match scores
7
- * - natural_language: emphasise cosine similarity scores
8
- */
9
- export type QueryType = "identifier" | "natural_language" | "path_like";
10
- /**
11
- * Classify a search query to guide POEM column weighting.
12
- *
13
- * @param query Raw user query string
14
- * @returns The detected query type
15
- */
16
- export declare function classifyQuery(query: string): QueryType;
17
- //# sourceMappingURL=query-classifier.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"query-classifier.d.ts","sourceRoot":"","sources":["../../../src/core/search/query-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,MAAM,SAAS,GAAG,YAAY,GAAG,kBAAkB,GAAG,WAAW,CAAC;AAoBxE;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,CAyBtD","sourcesContent":["/**\n * Classify search queries into types for POEM column weighting.\n *\n * Query types affect how metric columns are duplicated during ranking:\n * - identifier: emphasise BM25 and symbol-match scores\n * - path_like: emphasise path-match scores\n * - natural_language: emphasise cosine similarity scores\n */\n\nexport type QueryType = \"identifier\" | \"natural_language\" | \"path_like\";\n\n/** Matches camelCase or PascalCase boundaries (lowercase→uppercase). */\nconst CAMEL_RE = /[a-z][A-Z]/;\n\n/** Matches snake_case — word chars around an underscore. */\nconst SNAKE_RE = /\\w+_\\w+/;\n\n/** SCREAMING_SNAKE_CASE — two or more uppercase-letter groups joined by underscores. */\nconst SCREAMING_SNAKE_RE = /^[A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+$/;\n\n/** Path separators. */\nconst PATH_SEP_RE = /[/\\\\]/;\n\n/** Dotted path like `foo.bar.baz` (3+ segments). */\nconst DOTTED_PATH_RE = /^\\w+\\.\\w+\\.\\w+/;\n\n/** File extension pattern — ends with `.ext` where ext is 1-5 alphanumeric chars. */\nconst FILE_EXT_RE = /\\.\\w{1,5}$/;\n\n/**\n * Classify a search query to guide POEM column weighting.\n *\n * @param query Raw user query string\n * @returns The detected query type\n */\nexport function classifyQuery(query: string): QueryType {\n\tconst trimmed = query.trim();\n\tif (trimmed.length === 0) return \"natural_language\";\n\n\t// --- path_like ---\n\tif (PATH_SEP_RE.test(trimmed)) return \"path_like\";\n\tif (DOTTED_PATH_RE.test(trimmed)) return \"path_like\";\n\t// File extension at end of a single token (e.g. \"config.yaml\", \"auth.ts\")\n\tconst words = trimmed.split(/\\s+/);\n\tif (words.length === 1 && FILE_EXT_RE.test(trimmed) && /\\./.test(trimmed)) {\n\t\treturn \"path_like\";\n\t}\n\n\t// --- identifier ---\n\t// Single token or short (≤3 words) with code-style naming\n\tif (words.length === 1) return \"identifier\";\n\tif (words.length <= 3) {\n\t\t// If any word looks like a code identifier, classify as identifier\n\t\tif (words.some((w) => CAMEL_RE.test(w) || SNAKE_RE.test(w) || SCREAMING_SNAKE_RE.test(w))) {\n\t\t\treturn \"identifier\";\n\t\t}\n\t}\n\n\t// --- natural_language ---\n\treturn \"natural_language\";\n}\n"]}
@@ -1,54 +0,0 @@
1
- /**
2
- * Classify search queries into types for POEM column weighting.
3
- *
4
- * Query types affect how metric columns are duplicated during ranking:
5
- * - identifier: emphasise BM25 and symbol-match scores
6
- * - path_like: emphasise path-match scores
7
- * - natural_language: emphasise cosine similarity scores
8
- */
9
- /** Matches camelCase or PascalCase boundaries (lowercase→uppercase). */
10
- const CAMEL_RE = /[a-z][A-Z]/;
11
- /** Matches snake_case — word chars around an underscore. */
12
- const SNAKE_RE = /\w+_\w+/;
13
- /** SCREAMING_SNAKE_CASE — two or more uppercase-letter groups joined by underscores. */
14
- const SCREAMING_SNAKE_RE = /^[A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+$/;
15
- /** Path separators. */
16
- const PATH_SEP_RE = /[/\\]/;
17
- /** Dotted path like `foo.bar.baz` (3+ segments). */
18
- const DOTTED_PATH_RE = /^\w+\.\w+\.\w+/;
19
- /** File extension pattern — ends with `.ext` where ext is 1-5 alphanumeric chars. */
20
- const FILE_EXT_RE = /\.\w{1,5}$/;
21
- /**
22
- * Classify a search query to guide POEM column weighting.
23
- *
24
- * @param query Raw user query string
25
- * @returns The detected query type
26
- */
27
- export function classifyQuery(query) {
28
- const trimmed = query.trim();
29
- if (trimmed.length === 0)
30
- return "natural_language";
31
- // --- path_like ---
32
- if (PATH_SEP_RE.test(trimmed))
33
- return "path_like";
34
- if (DOTTED_PATH_RE.test(trimmed))
35
- return "path_like";
36
- // File extension at end of a single token (e.g. "config.yaml", "auth.ts")
37
- const words = trimmed.split(/\s+/);
38
- if (words.length === 1 && FILE_EXT_RE.test(trimmed) && /\./.test(trimmed)) {
39
- return "path_like";
40
- }
41
- // --- identifier ---
42
- // Single token or short (≤3 words) with code-style naming
43
- if (words.length === 1)
44
- return "identifier";
45
- if (words.length <= 3) {
46
- // If any word looks like a code identifier, classify as identifier
47
- if (words.some((w) => CAMEL_RE.test(w) || SNAKE_RE.test(w) || SCREAMING_SNAKE_RE.test(w))) {
48
- return "identifier";
49
- }
50
- }
51
- // --- natural_language ---
52
- return "natural_language";
53
- }
54
- //# sourceMappingURL=query-classifier.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"query-classifier.js","sourceRoot":"","sources":["../../../src/core/search/query-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,0EAAwE;AACxE,MAAM,QAAQ,GAAG,YAAY,CAAC;AAE9B,8DAA4D;AAC5D,MAAM,QAAQ,GAAG,SAAS,CAAC;AAE3B,0FAAwF;AACxF,MAAM,kBAAkB,GAAG,iCAAiC,CAAC;AAE7D,uBAAuB;AACvB,MAAM,WAAW,GAAG,OAAO,CAAC;AAE5B,oDAAoD;AACpD,MAAM,cAAc,GAAG,gBAAgB,CAAC;AAExC,uFAAqF;AACrF,MAAM,WAAW,GAAG,YAAY,CAAC;AAEjC;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,KAAa,EAAa;IACvD,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;IAC7B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,kBAAkB,CAAC;IAEpD,oBAAoB;IACpB,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,WAAW,CAAC;IAClD,IAAI,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,WAAW,CAAC;IACrD,0EAA0E;IAC1E,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3E,OAAO,WAAW,CAAC;IACpB,CAAC;IAED,qBAAqB;IACrB,4DAA0D;IAC1D,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,YAAY,CAAC;IAC5C,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,mEAAmE;QACnE,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3F,OAAO,YAAY,CAAC;QACrB,CAAC;IACF,CAAC;IAED,2BAA2B;IAC3B,OAAO,kBAAkB,CAAC;AAAA,CAC1B","sourcesContent":["/**\n * Classify search queries into types for POEM column weighting.\n *\n * Query types affect how metric columns are duplicated during ranking:\n * - identifier: emphasise BM25 and symbol-match scores\n * - path_like: emphasise path-match scores\n * - natural_language: emphasise cosine similarity scores\n */\n\nexport type QueryType = \"identifier\" | \"natural_language\" | \"path_like\";\n\n/** Matches camelCase or PascalCase boundaries (lowercase→uppercase). */\nconst CAMEL_RE = /[a-z][A-Z]/;\n\n/** Matches snake_case — word chars around an underscore. */\nconst SNAKE_RE = /\\w+_\\w+/;\n\n/** SCREAMING_SNAKE_CASE — two or more uppercase-letter groups joined by underscores. */\nconst SCREAMING_SNAKE_RE = /^[A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+$/;\n\n/** Path separators. */\nconst PATH_SEP_RE = /[/\\\\]/;\n\n/** Dotted path like `foo.bar.baz` (3+ segments). */\nconst DOTTED_PATH_RE = /^\\w+\\.\\w+\\.\\w+/;\n\n/** File extension pattern — ends with `.ext` where ext is 1-5 alphanumeric chars. */\nconst FILE_EXT_RE = /\\.\\w{1,5}$/;\n\n/**\n * Classify a search query to guide POEM column weighting.\n *\n * @param query Raw user query string\n * @returns The detected query type\n */\nexport function classifyQuery(query: string): QueryType {\n\tconst trimmed = query.trim();\n\tif (trimmed.length === 0) return \"natural_language\";\n\n\t// --- path_like ---\n\tif (PATH_SEP_RE.test(trimmed)) return \"path_like\";\n\tif (DOTTED_PATH_RE.test(trimmed)) return \"path_like\";\n\t// File extension at end of a single token (e.g. \"config.yaml\", \"auth.ts\")\n\tconst words = trimmed.split(/\\s+/);\n\tif (words.length === 1 && FILE_EXT_RE.test(trimmed) && /\\./.test(trimmed)) {\n\t\treturn \"path_like\";\n\t}\n\n\t// --- identifier ---\n\t// Single token or short (≤3 words) with code-style naming\n\tif (words.length === 1) return \"identifier\";\n\tif (words.length <= 3) {\n\t\t// If any word looks like a code identifier, classify as identifier\n\t\tif (words.some((w) => CAMEL_RE.test(w) || SNAKE_RE.test(w) || SCREAMING_SNAKE_RE.test(w))) {\n\t\t\treturn \"identifier\";\n\t\t}\n\t}\n\n\t// --- natural_language ---\n\treturn \"natural_language\";\n}\n"]}
@@ -1,30 +0,0 @@
1
- /**
2
- * File scanner for the semantic search subsystem.
3
- *
4
- * Discovers project files for indexing by walking the directory tree,
5
- * respecting .gitignore rules, and classifying files by type.
6
- */
7
- import type { FileType } from "./types.js";
8
- /** A file discovered by the scanner, ready for indexing. */
9
- export interface ScannedFile {
10
- /** Path relative to the project root (posix separators). */
11
- filePath: string;
12
- /** Detected file type. */
13
- fileType: FileType;
14
- /** File modification time in milliseconds since epoch. */
15
- mtime: number;
16
- }
17
- /**
18
- * Detect the {@link FileType} for a file path based on its extension.
19
- * Returns `null` for unrecognized extensions or files without an extension.
20
- */
21
- export declare function detectFileType(filePath: string): FileType | null;
22
- /**
23
- * Scan a project directory and return all indexable files.
24
- *
25
- * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,
26
- * skips binary / oversized files, and optionally includes memory files from
27
- * a global memory directory.
28
- */
29
- export declare function scanProject(projectRoot: string, globalMemoryDir?: string): Promise<ScannedFile[]>;
30
- //# sourceMappingURL=scanner.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scanner.d.ts","sourceRoot":"","sources":["../../../src/core/search/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAM3C,4DAA4D;AAC5D,MAAM,WAAW,WAAW;IAC3B,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IACjB,0BAA0B;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAC;CACd;AAiED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAIhE;AAED;;;;;;GAMG;AACH,wBAAsB,WAAW,CAAC,WAAW,EAAE,MAAM,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAuBvG","sourcesContent":["/**\n * File scanner for the semantic search subsystem.\n *\n * Discovers project files for indexing by walking the directory tree,\n * respecting .gitignore rules, and classifying files by type.\n */\n\nimport { existsSync, readdirSync, readFileSync, type Stats, statSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { extname, isAbsolute, join, relative, sep } from \"node:path\";\nimport ignore from \"ignore\";\nimport type { FileType } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** A file discovered by the scanner, ready for indexing. */\nexport interface ScannedFile {\n\t/** Path relative to the project root (posix separators). */\n\tfilePath: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n\t/** File modification time in milliseconds since epoch. */\n\tmtime: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Maximum file size to index (1 MB). */\nconst MAX_FILE_SIZE = 1024 * 1024;\n\n/** Directories unconditionally skipped during traversal. */\nconst SKIP_DIRS = new Set([\n\t\"node_modules\",\n\t\".git\",\n\t\".dreb/index\",\n\t\".hg\",\n\t\".svn\",\n\t\"__pycache__\",\n\t\".tox\",\n\t\".venv\",\n\t\"dist\",\n\t\"build\",\n\t\".next\",\n\t\".nuxt\",\n\t\"coverage\",\n\t\".cache\",\n]);\n\n/** Extension → FileType mapping. */\nconst EXTENSION_MAP: ReadonlyMap<string, FileType> = new Map<string, FileType>([\n\t// Tree-sitter languages\n\t[\".ts\", \"typescript\"],\n\t[\".tsx\", \"tsx\"],\n\t[\".js\", \"javascript\"],\n\t[\".mjs\", \"javascript\"],\n\t[\".cjs\", \"javascript\"],\n\t[\".py\", \"python\"],\n\t[\".go\", \"go\"],\n\t[\".rs\", \"rust\"],\n\t[\".java\", \"java\"],\n\t[\".c\", \"c\"],\n\t[\".h\", \"c\"],\n\t[\".cpp\", \"cpp\"],\n\t[\".hpp\", \"cpp\"],\n\t[\".cc\", \"cpp\"],\n\t[\".cxx\", \"cpp\"],\n\t[\".hh\", \"cpp\"],\n\t[\".hxx\", \"cpp\"],\n\t// Text file types\n\t[\".md\", \"markdown\"],\n\t[\".mdx\", \"markdown\"],\n\t[\".yml\", \"yaml\"],\n\t[\".yaml\", \"yaml\"],\n\t[\".json\", \"json\"],\n\t[\".toml\", \"toml\"],\n\t[\".txt\", \"plaintext\"],\n\t[\".cfg\", \"plaintext\"],\n\t[\".ini\", \"plaintext\"],\n\t[\".env\", \"plaintext\"],\n\t[\".conf\", \"plaintext\"],\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Detect the {@link FileType} for a file path based on its extension.\n * Returns `null` for unrecognized extensions or files without an extension.\n */\nexport function detectFileType(filePath: string): FileType | null {\n\tconst ext = extname(filePath).toLowerCase();\n\tif (!ext) return null;\n\treturn EXTENSION_MAP.get(ext) ?? null;\n}\n\n/**\n * Scan a project directory and return all indexable files.\n *\n * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,\n * skips binary / oversized files, and optionally includes memory files from\n * a global memory directory.\n */\nexport async function scanProject(projectRoot: string, globalMemoryDir?: string): Promise<ScannedFile[]> {\n\tconst results: ScannedFile[] = [];\n\n\t// Detect if projectRoot is the home directory — use shallow scan mode\n\t// to avoid recursing into the entire home dir (which would be catastrophic).\n\tconst isHomeDir = isHomeDirPath(projectRoot);\n\n\tif (isHomeDir) {\n\t\t// Shallow mode: only scan top-level files and ~/.dreb/memory/\n\t\tscanShallow(projectRoot, results);\n\t} else {\n\t\t// Normal mode: full recursive walk with .gitignore\n\t\tconst ig = ignore();\n\t\tloadGitignore(ig, projectRoot, projectRoot);\n\t\twalkDirectory(projectRoot, projectRoot, ig, results);\n\t}\n\n\t// Include global memory files if the directory exists\n\tif (globalMemoryDir && existsSync(globalMemoryDir)) {\n\t\tscanMemoryDir(globalMemoryDir, projectRoot, results);\n\t}\n\n\treturn results;\n}\n\n/** Check if a path is the user's home directory. */\nfunction isHomeDirPath(dir: string): boolean {\n\ttry {\n\t\tconst home = homedir();\n\t\t// Normalize trailing slashes for comparison\n\t\tconst normalizedDir = dir.replace(/[/\\\\]+$/, \"\");\n\t\tconst normalizedHome = home.replace(/[/\\\\]+$/, \"\");\n\t\treturn normalizedDir === normalizedHome;\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Shallow scan mode for home directory: only index top-level files\n * (no directory recursion) to avoid scanning the entire home directory.\n * Memory files are handled separately via scanMemoryDir.\n */\nfunction scanShallow(dir: string, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\t// Skip dotfiles/dotdirs in home dir (except specific ones we want)\n\t\tif (entry.startsWith(\".\")) continue;\n\n\t\tconst fullPath = join(dir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\t// Only index files, not directories (shallow mode)\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: entry,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n// ============================================================================\n// Internal helpers\n// ============================================================================\n\ntype IgnoreMatcher = ReturnType<typeof ignore>;\n\n/** Convert an OS path to posix separators for ignore matching. */\nfunction toPosix(p: string): string {\n\treturn p.split(sep).join(\"/\");\n}\n\n/** Load .gitignore rules from a directory into the ignore matcher. */\nfunction loadGitignore(ig: IgnoreMatcher, dir: string, root: string): void {\n\tconst gitignorePath = join(dir, \".gitignore\");\n\tif (!existsSync(gitignorePath)) return;\n\n\ttry {\n\t\tconst content = readFileSync(gitignorePath, \"utf-8\");\n\t\tconst relDir = relative(root, dir);\n\t\tconst prefix = relDir ? `${toPosix(relDir)}/` : \"\";\n\n\t\tconst patterns = content\n\t\t\t.split(/\\r?\\n/)\n\t\t\t.map((line) => prefixPattern(line, prefix))\n\t\t\t.filter((line): line is string => line !== null);\n\n\t\tif (patterns.length > 0) {\n\t\t\tig.add(patterns);\n\t\t}\n\t} catch {\n\t\t// Unreadable .gitignore — skip silently\n\t}\n}\n\n/**\n * Prefix a .gitignore pattern with a directory path so it applies\n * correctly when matching against root-relative paths.\n */\nfunction prefixPattern(line: string, prefix: string): string | null {\n\tconst trimmed = line.trim();\n\tif (!trimmed) return null;\n\tif (trimmed.startsWith(\"#\") && !trimmed.startsWith(\"\\\\#\")) return null;\n\n\tlet pattern = line;\n\tlet negated = false;\n\n\tif (pattern.startsWith(\"!\")) {\n\t\tnegated = true;\n\t\tpattern = pattern.slice(1);\n\t} else if (pattern.startsWith(\"\\\\!\")) {\n\t\tpattern = pattern.slice(1);\n\t}\n\n\tconst prefixed = prefix ? `${prefix}${pattern}` : pattern;\n\treturn negated ? `!${prefixed}` : prefixed;\n}\n\n/**\n * Check if a directory component (relative to root) should be unconditionally skipped.\n * Handles both top-level names (\"node_modules\") and nested paths (\".dreb/index\").\n */\nfunction shouldSkipDir(relPath: string): boolean {\n\tconst posix = toPosix(relPath);\n\n\t// Check the directory name itself\n\tconst parts = posix.split(\"/\");\n\tconst name = parts[parts.length - 1];\n\tif (SKIP_DIRS.has(name)) return true;\n\n\t// Check multi-segment skip patterns (e.g. \".dreb/index\")\n\tfor (const skip of SKIP_DIRS) {\n\t\tif (skip.includes(\"/\") && (posix === skip || posix.endsWith(`/${skip}`))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/** Recursively walk a directory, collecting indexable files. */\nfunction walkDirectory(dir: string, root: string, ig: IgnoreMatcher, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn; // Permission denied, etc.\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(dir, entry);\n\t\tconst relPath = relative(root, fullPath);\n\t\tconst posixRel = toPosix(relPath);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue; // Broken symlink, etc.\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Hard-coded skip list\n\t\t\tif (shouldSkipDir(relPath)) continue;\n\n\t\t\t// .gitignore check (directories need trailing slash)\n\t\t\tif (ig.ignores(`${posixRel}/`)) continue;\n\n\t\t\t// Load nested .gitignore before descending\n\t\t\tloadGitignore(ig, fullPath, root);\n\n\t\t\twalkDirectory(fullPath, root, ig, results);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\n\t\t// .gitignore check for files\n\t\tif (ig.ignores(posixRel)) continue;\n\n\t\t// Size gate\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\t// File type detection\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: posixRel,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n/**\n * Scan a memory directory (project or global) for indexable files.\n *\n * Memory directories are always fully included — no .gitignore filtering —\n * because they live outside the normal project tree or in `.dreb/` which\n * is typically gitignored.\n *\n * Paths for global memory files are stored with a `~memory/` prefix\n * to distinguish them from project files.\n */\nfunction scanMemoryDir(memoryDir: string, projectRoot: string, results: ScannedFile[], baseMemoryDir?: string): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(memoryDir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(memoryDir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Recurse into subdirectories\n\t\t\tscanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\t// If the memory dir is inside the project root, use normal relative path.\n\t\t// Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.\n\t\tconst rel = relative(projectRoot, fullPath);\n\t\tconst isOutsideProject = rel.startsWith(\"..\") || isAbsolute(rel);\n\t\tconst rootMemoryDir = baseMemoryDir ?? memoryDir;\n\t\tconst filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;\n\n\t\tresults.push({\n\t\t\tfilePath: toPosix(filePath),\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n"]}
@@ -1,335 +0,0 @@
1
- /**
2
- * File scanner for the semantic search subsystem.
3
- *
4
- * Discovers project files for indexing by walking the directory tree,
5
- * respecting .gitignore rules, and classifying files by type.
6
- */
7
- import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
8
- import { homedir } from "node:os";
9
- import { extname, isAbsolute, join, relative, sep } from "node:path";
10
- import ignore from "ignore";
11
- // ============================================================================
12
- // Constants
13
- // ============================================================================
14
- /** Maximum file size to index (1 MB). */
15
- const MAX_FILE_SIZE = 1024 * 1024;
16
- /** Directories unconditionally skipped during traversal. */
17
- const SKIP_DIRS = new Set([
18
- "node_modules",
19
- ".git",
20
- ".dreb/index",
21
- ".hg",
22
- ".svn",
23
- "__pycache__",
24
- ".tox",
25
- ".venv",
26
- "dist",
27
- "build",
28
- ".next",
29
- ".nuxt",
30
- "coverage",
31
- ".cache",
32
- ]);
33
- /** Extension → FileType mapping. */
34
- const EXTENSION_MAP = new Map([
35
- // Tree-sitter languages
36
- [".ts", "typescript"],
37
- [".tsx", "tsx"],
38
- [".js", "javascript"],
39
- [".mjs", "javascript"],
40
- [".cjs", "javascript"],
41
- [".py", "python"],
42
- [".go", "go"],
43
- [".rs", "rust"],
44
- [".java", "java"],
45
- [".c", "c"],
46
- [".h", "c"],
47
- [".cpp", "cpp"],
48
- [".hpp", "cpp"],
49
- [".cc", "cpp"],
50
- [".cxx", "cpp"],
51
- [".hh", "cpp"],
52
- [".hxx", "cpp"],
53
- // Text file types
54
- [".md", "markdown"],
55
- [".mdx", "markdown"],
56
- [".yml", "yaml"],
57
- [".yaml", "yaml"],
58
- [".json", "json"],
59
- [".toml", "toml"],
60
- [".txt", "plaintext"],
61
- [".cfg", "plaintext"],
62
- [".ini", "plaintext"],
63
- [".env", "plaintext"],
64
- [".conf", "plaintext"],
65
- ]);
66
- // ============================================================================
67
- // Public API
68
- // ============================================================================
69
- /**
70
- * Detect the {@link FileType} for a file path based on its extension.
71
- * Returns `null` for unrecognized extensions or files without an extension.
72
- */
73
- export function detectFileType(filePath) {
74
- const ext = extname(filePath).toLowerCase();
75
- if (!ext)
76
- return null;
77
- return EXTENSION_MAP.get(ext) ?? null;
78
- }
79
- /**
80
- * Scan a project directory and return all indexable files.
81
- *
82
- * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,
83
- * skips binary / oversized files, and optionally includes memory files from
84
- * a global memory directory.
85
- */
86
- export async function scanProject(projectRoot, globalMemoryDir) {
87
- const results = [];
88
- // Detect if projectRoot is the home directory — use shallow scan mode
89
- // to avoid recursing into the entire home dir (which would be catastrophic).
90
- const isHomeDir = isHomeDirPath(projectRoot);
91
- if (isHomeDir) {
92
- // Shallow mode: only scan top-level files and ~/.dreb/memory/
93
- scanShallow(projectRoot, results);
94
- }
95
- else {
96
- // Normal mode: full recursive walk with .gitignore
97
- const ig = ignore();
98
- loadGitignore(ig, projectRoot, projectRoot);
99
- walkDirectory(projectRoot, projectRoot, ig, results);
100
- }
101
- // Include global memory files if the directory exists
102
- if (globalMemoryDir && existsSync(globalMemoryDir)) {
103
- scanMemoryDir(globalMemoryDir, projectRoot, results);
104
- }
105
- return results;
106
- }
107
- /** Check if a path is the user's home directory. */
108
- function isHomeDirPath(dir) {
109
- try {
110
- const home = homedir();
111
- // Normalize trailing slashes for comparison
112
- const normalizedDir = dir.replace(/[/\\]+$/, "");
113
- const normalizedHome = home.replace(/[/\\]+$/, "");
114
- return normalizedDir === normalizedHome;
115
- }
116
- catch {
117
- return false;
118
- }
119
- }
120
- /**
121
- * Shallow scan mode for home directory: only index top-level files
122
- * (no directory recursion) to avoid scanning the entire home directory.
123
- * Memory files are handled separately via scanMemoryDir.
124
- */
125
- function scanShallow(dir, results) {
126
- let entries;
127
- try {
128
- entries = readdirSync(dir);
129
- }
130
- catch {
131
- return;
132
- }
133
- for (const entry of entries) {
134
- // Skip dotfiles/dotdirs in home dir (except specific ones we want)
135
- if (entry.startsWith("."))
136
- continue;
137
- const fullPath = join(dir, entry);
138
- let stats;
139
- try {
140
- stats = statSync(fullPath);
141
- }
142
- catch {
143
- continue;
144
- }
145
- // Only index files, not directories (shallow mode)
146
- if (!stats.isFile())
147
- continue;
148
- if (stats.size > MAX_FILE_SIZE)
149
- continue;
150
- if (stats.size === 0)
151
- continue;
152
- const fileType = detectFileType(entry);
153
- if (!fileType)
154
- continue;
155
- results.push({
156
- filePath: entry,
157
- fileType,
158
- mtime: stats.mtimeMs,
159
- });
160
- }
161
- }
162
- /** Convert an OS path to posix separators for ignore matching. */
163
- function toPosix(p) {
164
- return p.split(sep).join("/");
165
- }
166
- /** Load .gitignore rules from a directory into the ignore matcher. */
167
- function loadGitignore(ig, dir, root) {
168
- const gitignorePath = join(dir, ".gitignore");
169
- if (!existsSync(gitignorePath))
170
- return;
171
- try {
172
- const content = readFileSync(gitignorePath, "utf-8");
173
- const relDir = relative(root, dir);
174
- const prefix = relDir ? `${toPosix(relDir)}/` : "";
175
- const patterns = content
176
- .split(/\r?\n/)
177
- .map((line) => prefixPattern(line, prefix))
178
- .filter((line) => line !== null);
179
- if (patterns.length > 0) {
180
- ig.add(patterns);
181
- }
182
- }
183
- catch {
184
- // Unreadable .gitignore — skip silently
185
- }
186
- }
187
- /**
188
- * Prefix a .gitignore pattern with a directory path so it applies
189
- * correctly when matching against root-relative paths.
190
- */
191
- function prefixPattern(line, prefix) {
192
- const trimmed = line.trim();
193
- if (!trimmed)
194
- return null;
195
- if (trimmed.startsWith("#") && !trimmed.startsWith("\\#"))
196
- return null;
197
- let pattern = line;
198
- let negated = false;
199
- if (pattern.startsWith("!")) {
200
- negated = true;
201
- pattern = pattern.slice(1);
202
- }
203
- else if (pattern.startsWith("\\!")) {
204
- pattern = pattern.slice(1);
205
- }
206
- const prefixed = prefix ? `${prefix}${pattern}` : pattern;
207
- return negated ? `!${prefixed}` : prefixed;
208
- }
209
- /**
210
- * Check if a directory component (relative to root) should be unconditionally skipped.
211
- * Handles both top-level names ("node_modules") and nested paths (".dreb/index").
212
- */
213
- function shouldSkipDir(relPath) {
214
- const posix = toPosix(relPath);
215
- // Check the directory name itself
216
- const parts = posix.split("/");
217
- const name = parts[parts.length - 1];
218
- if (SKIP_DIRS.has(name))
219
- return true;
220
- // Check multi-segment skip patterns (e.g. ".dreb/index")
221
- for (const skip of SKIP_DIRS) {
222
- if (skip.includes("/") && (posix === skip || posix.endsWith(`/${skip}`))) {
223
- return true;
224
- }
225
- }
226
- return false;
227
- }
228
- /** Recursively walk a directory, collecting indexable files. */
229
- function walkDirectory(dir, root, ig, results) {
230
- let entries;
231
- try {
232
- entries = readdirSync(dir);
233
- }
234
- catch {
235
- return; // Permission denied, etc.
236
- }
237
- for (const entry of entries) {
238
- const fullPath = join(dir, entry);
239
- const relPath = relative(root, fullPath);
240
- const posixRel = toPosix(relPath);
241
- let stats;
242
- try {
243
- stats = statSync(fullPath);
244
- }
245
- catch {
246
- continue; // Broken symlink, etc.
247
- }
248
- if (stats.isDirectory()) {
249
- // Hard-coded skip list
250
- if (shouldSkipDir(relPath))
251
- continue;
252
- // .gitignore check (directories need trailing slash)
253
- if (ig.ignores(`${posixRel}/`))
254
- continue;
255
- // Load nested .gitignore before descending
256
- loadGitignore(ig, fullPath, root);
257
- walkDirectory(fullPath, root, ig, results);
258
- continue;
259
- }
260
- if (!stats.isFile())
261
- continue;
262
- // .gitignore check for files
263
- if (ig.ignores(posixRel))
264
- continue;
265
- // Size gate
266
- if (stats.size > MAX_FILE_SIZE)
267
- continue;
268
- if (stats.size === 0)
269
- continue;
270
- // File type detection
271
- const fileType = detectFileType(entry);
272
- if (!fileType)
273
- continue;
274
- results.push({
275
- filePath: posixRel,
276
- fileType,
277
- mtime: stats.mtimeMs,
278
- });
279
- }
280
- }
281
- /**
282
- * Scan a memory directory (project or global) for indexable files.
283
- *
284
- * Memory directories are always fully included — no .gitignore filtering —
285
- * because they live outside the normal project tree or in `.dreb/` which
286
- * is typically gitignored.
287
- *
288
- * Paths for global memory files are stored with a `~memory/` prefix
289
- * to distinguish them from project files.
290
- */
291
- function scanMemoryDir(memoryDir, projectRoot, results, baseMemoryDir) {
292
- let entries;
293
- try {
294
- entries = readdirSync(memoryDir);
295
- }
296
- catch {
297
- return;
298
- }
299
- for (const entry of entries) {
300
- const fullPath = join(memoryDir, entry);
301
- let stats;
302
- try {
303
- stats = statSync(fullPath);
304
- }
305
- catch {
306
- continue;
307
- }
308
- if (stats.isDirectory()) {
309
- // Recurse into subdirectories
310
- scanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);
311
- continue;
312
- }
313
- if (!stats.isFile())
314
- continue;
315
- if (stats.size > MAX_FILE_SIZE)
316
- continue;
317
- if (stats.size === 0)
318
- continue;
319
- const fileType = detectFileType(entry);
320
- if (!fileType)
321
- continue;
322
- // If the memory dir is inside the project root, use normal relative path.
323
- // Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.
324
- const rel = relative(projectRoot, fullPath);
325
- const isOutsideProject = rel.startsWith("..") || isAbsolute(rel);
326
- const rootMemoryDir = baseMemoryDir ?? memoryDir;
327
- const filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;
328
- results.push({
329
- filePath: toPosix(filePath),
330
- fileType,
331
- mtime: stats.mtimeMs,
332
- });
333
- }
334
- }
335
- //# sourceMappingURL=scanner.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scanner.js","sourceRoot":"","sources":["../../../src/core/search/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAc,QAAQ,EAAE,MAAM,SAAS,CAAC;AACtF,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACrE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAiB5B,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,yCAAyC;AACzC,MAAM,aAAa,GAAG,IAAI,GAAG,IAAI,CAAC;AAElC,4DAA4D;AAC5D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACzB,cAAc;IACd,MAAM;IACN,aAAa;IACb,KAAK;IACL,MAAM;IACN,aAAa;IACb,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,OAAO;IACP,OAAO;IACP,UAAU;IACV,QAAQ;CACR,CAAC,CAAC;AAEH,sCAAoC;AACpC,MAAM,aAAa,GAAkC,IAAI,GAAG,CAAmB;IAC9E,wBAAwB;IACxB,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,KAAK,EAAE,QAAQ,CAAC;IACjB,CAAC,KAAK,EAAE,IAAI,CAAC;IACb,CAAC,KAAK,EAAE,MAAM,CAAC;IACf,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,kBAAkB;IAClB,CAAC,KAAK,EAAE,UAAU,CAAC;IACnB,CAAC,MAAM,EAAE,UAAU,CAAC;IACpB,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,OAAO,EAAE,WAAW,CAAC;CACtB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB,EAAmB;IACjE,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,OAAO,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC;AAAA,CACtC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,WAAmB,EAAE,eAAwB,EAA0B;IACxG,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,wEAAsE;IACtE,6EAA6E;IAC7E,MAAM,SAAS,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE7C,IAAI,SAAS,EAAE,CAAC;QACf,8DAA8D;QAC9D,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACP,mDAAmD;QACnD,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;QACpB,aAAa,CAAC,EAAE,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;QAC5C,aAAa,CAAC,WAAW,EAAE,WAAW,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,sDAAsD;IACtD,IAAI,eAAe,IAAI,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;QACpD,aAAa,CAAC,eAAe,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,OAAO,CAAC;AAAA,CACf;AAED,oDAAoD;AACpD,SAAS,aAAa,CAAC,GAAW,EAAW;IAC5C,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;QACvB,4CAA4C;QAC5C,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACnD,OAAO,aAAa,KAAK,cAAc,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,KAAK,CAAC;IACd,CAAC;AAAA,CACD;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW,EAAE,OAAsB,EAAQ;IAC/D,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,mEAAmE;QACnE,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QAED,mDAAmD;QACnD,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD;AAQD,kEAAkE;AAClE,SAAS,OAAO,CAAC,CAAS,EAAU;IACnC,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAAA,CAC9B;AAED,sEAAsE;AACtE,SAAS,aAAa,CAAC,EAAiB,EAAE,GAAW,EAAE,IAAY,EAAQ;IAC1E,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAC9C,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO;IAEvC,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAEnD,MAAM,QAAQ,GAAG,OAAO;aACtB,KAAK,CAAC,OAAO,CAAC;aACd,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;aAC1C,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QAElD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClB,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,0CAAwC;IACzC,CAAC;AAAA,CACD;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,IAAY,EAAE,MAAc,EAAiB;IACnE,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvE,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC1D,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;AAAA,CAC3C;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,OAAe,EAAW;IAChD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B,kCAAkC;IAClC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,yDAAyD;IACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;YAC1E,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED,gEAAgE;AAChE,SAAS,aAAa,CAAC,GAAW,EAAE,IAAY,EAAE,EAAiB,EAAE,OAAsB,EAAQ;IAClG,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,CAAC,0BAA0B;IACnC,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS,CAAC,uBAAuB;QAClC,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,uBAAuB;YACvB,IAAI,aAAa,CAAC,OAAO,CAAC;gBAAE,SAAS;YAErC,qDAAqD;YACrD,IAAI,EAAE,CAAC,OAAO,CAAC,GAAG,QAAQ,GAAG,CAAC;gBAAE,SAAS;YAEzC,2CAA2C;YAC3C,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;YAElC,aAAa,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;YAC3C,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAE9B,6BAA6B;QAC7B,IAAI,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEnC,YAAY;QACZ,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,sBAAsB;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,QAAQ;YAClB,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD;AAED;;;;;;;;;GASG;AACH,SAAS,aAAa,CAAC,SAAiB,EAAE,WAAmB,EAAE,OAAsB,EAAE,aAAsB,EAAQ;IACpH,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAExC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,8BAA8B;YAC9B,aAAa,CAAC,QAAQ,EAAE,WAAW,EAAE,OAAO,EAAE,aAAa,IAAI,SAAS,CAAC,CAAC;YAC1E,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,0EAA0E;QAC1E,4EAA4E;QAC5E,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAC5C,MAAM,gBAAgB,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC;QACjE,MAAM,aAAa,GAAG,aAAa,IAAI,SAAS,CAAC;QACjD,MAAM,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,WAAW,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAEzF,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC;YAC3B,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD","sourcesContent":["/**\n * File scanner for the semantic search subsystem.\n *\n * Discovers project files for indexing by walking the directory tree,\n * respecting .gitignore rules, and classifying files by type.\n */\n\nimport { existsSync, readdirSync, readFileSync, type Stats, statSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { extname, isAbsolute, join, relative, sep } from \"node:path\";\nimport ignore from \"ignore\";\nimport type { FileType } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** A file discovered by the scanner, ready for indexing. */\nexport interface ScannedFile {\n\t/** Path relative to the project root (posix separators). */\n\tfilePath: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n\t/** File modification time in milliseconds since epoch. */\n\tmtime: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Maximum file size to index (1 MB). */\nconst MAX_FILE_SIZE = 1024 * 1024;\n\n/** Directories unconditionally skipped during traversal. */\nconst SKIP_DIRS = new Set([\n\t\"node_modules\",\n\t\".git\",\n\t\".dreb/index\",\n\t\".hg\",\n\t\".svn\",\n\t\"__pycache__\",\n\t\".tox\",\n\t\".venv\",\n\t\"dist\",\n\t\"build\",\n\t\".next\",\n\t\".nuxt\",\n\t\"coverage\",\n\t\".cache\",\n]);\n\n/** Extension → FileType mapping. */\nconst EXTENSION_MAP: ReadonlyMap<string, FileType> = new Map<string, FileType>([\n\t// Tree-sitter languages\n\t[\".ts\", \"typescript\"],\n\t[\".tsx\", \"tsx\"],\n\t[\".js\", \"javascript\"],\n\t[\".mjs\", \"javascript\"],\n\t[\".cjs\", \"javascript\"],\n\t[\".py\", \"python\"],\n\t[\".go\", \"go\"],\n\t[\".rs\", \"rust\"],\n\t[\".java\", \"java\"],\n\t[\".c\", \"c\"],\n\t[\".h\", \"c\"],\n\t[\".cpp\", \"cpp\"],\n\t[\".hpp\", \"cpp\"],\n\t[\".cc\", \"cpp\"],\n\t[\".cxx\", \"cpp\"],\n\t[\".hh\", \"cpp\"],\n\t[\".hxx\", \"cpp\"],\n\t// Text file types\n\t[\".md\", \"markdown\"],\n\t[\".mdx\", \"markdown\"],\n\t[\".yml\", \"yaml\"],\n\t[\".yaml\", \"yaml\"],\n\t[\".json\", \"json\"],\n\t[\".toml\", \"toml\"],\n\t[\".txt\", \"plaintext\"],\n\t[\".cfg\", \"plaintext\"],\n\t[\".ini\", \"plaintext\"],\n\t[\".env\", \"plaintext\"],\n\t[\".conf\", \"plaintext\"],\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Detect the {@link FileType} for a file path based on its extension.\n * Returns `null` for unrecognized extensions or files without an extension.\n */\nexport function detectFileType(filePath: string): FileType | null {\n\tconst ext = extname(filePath).toLowerCase();\n\tif (!ext) return null;\n\treturn EXTENSION_MAP.get(ext) ?? null;\n}\n\n/**\n * Scan a project directory and return all indexable files.\n *\n * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,\n * skips binary / oversized files, and optionally includes memory files from\n * a global memory directory.\n */\nexport async function scanProject(projectRoot: string, globalMemoryDir?: string): Promise<ScannedFile[]> {\n\tconst results: ScannedFile[] = [];\n\n\t// Detect if projectRoot is the home directory — use shallow scan mode\n\t// to avoid recursing into the entire home dir (which would be catastrophic).\n\tconst isHomeDir = isHomeDirPath(projectRoot);\n\n\tif (isHomeDir) {\n\t\t// Shallow mode: only scan top-level files and ~/.dreb/memory/\n\t\tscanShallow(projectRoot, results);\n\t} else {\n\t\t// Normal mode: full recursive walk with .gitignore\n\t\tconst ig = ignore();\n\t\tloadGitignore(ig, projectRoot, projectRoot);\n\t\twalkDirectory(projectRoot, projectRoot, ig, results);\n\t}\n\n\t// Include global memory files if the directory exists\n\tif (globalMemoryDir && existsSync(globalMemoryDir)) {\n\t\tscanMemoryDir(globalMemoryDir, projectRoot, results);\n\t}\n\n\treturn results;\n}\n\n/** Check if a path is the user's home directory. */\nfunction isHomeDirPath(dir: string): boolean {\n\ttry {\n\t\tconst home = homedir();\n\t\t// Normalize trailing slashes for comparison\n\t\tconst normalizedDir = dir.replace(/[/\\\\]+$/, \"\");\n\t\tconst normalizedHome = home.replace(/[/\\\\]+$/, \"\");\n\t\treturn normalizedDir === normalizedHome;\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Shallow scan mode for home directory: only index top-level files\n * (no directory recursion) to avoid scanning the entire home directory.\n * Memory files are handled separately via scanMemoryDir.\n */\nfunction scanShallow(dir: string, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\t// Skip dotfiles/dotdirs in home dir (except specific ones we want)\n\t\tif (entry.startsWith(\".\")) continue;\n\n\t\tconst fullPath = join(dir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\t// Only index files, not directories (shallow mode)\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: entry,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n// ============================================================================\n// Internal helpers\n// ============================================================================\n\ntype IgnoreMatcher = ReturnType<typeof ignore>;\n\n/** Convert an OS path to posix separators for ignore matching. */\nfunction toPosix(p: string): string {\n\treturn p.split(sep).join(\"/\");\n}\n\n/** Load .gitignore rules from a directory into the ignore matcher. */\nfunction loadGitignore(ig: IgnoreMatcher, dir: string, root: string): void {\n\tconst gitignorePath = join(dir, \".gitignore\");\n\tif (!existsSync(gitignorePath)) return;\n\n\ttry {\n\t\tconst content = readFileSync(gitignorePath, \"utf-8\");\n\t\tconst relDir = relative(root, dir);\n\t\tconst prefix = relDir ? `${toPosix(relDir)}/` : \"\";\n\n\t\tconst patterns = content\n\t\t\t.split(/\\r?\\n/)\n\t\t\t.map((line) => prefixPattern(line, prefix))\n\t\t\t.filter((line): line is string => line !== null);\n\n\t\tif (patterns.length > 0) {\n\t\t\tig.add(patterns);\n\t\t}\n\t} catch {\n\t\t// Unreadable .gitignore — skip silently\n\t}\n}\n\n/**\n * Prefix a .gitignore pattern with a directory path so it applies\n * correctly when matching against root-relative paths.\n */\nfunction prefixPattern(line: string, prefix: string): string | null {\n\tconst trimmed = line.trim();\n\tif (!trimmed) return null;\n\tif (trimmed.startsWith(\"#\") && !trimmed.startsWith(\"\\\\#\")) return null;\n\n\tlet pattern = line;\n\tlet negated = false;\n\n\tif (pattern.startsWith(\"!\")) {\n\t\tnegated = true;\n\t\tpattern = pattern.slice(1);\n\t} else if (pattern.startsWith(\"\\\\!\")) {\n\t\tpattern = pattern.slice(1);\n\t}\n\n\tconst prefixed = prefix ? `${prefix}${pattern}` : pattern;\n\treturn negated ? `!${prefixed}` : prefixed;\n}\n\n/**\n * Check if a directory component (relative to root) should be unconditionally skipped.\n * Handles both top-level names (\"node_modules\") and nested paths (\".dreb/index\").\n */\nfunction shouldSkipDir(relPath: string): boolean {\n\tconst posix = toPosix(relPath);\n\n\t// Check the directory name itself\n\tconst parts = posix.split(\"/\");\n\tconst name = parts[parts.length - 1];\n\tif (SKIP_DIRS.has(name)) return true;\n\n\t// Check multi-segment skip patterns (e.g. \".dreb/index\")\n\tfor (const skip of SKIP_DIRS) {\n\t\tif (skip.includes(\"/\") && (posix === skip || posix.endsWith(`/${skip}`))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/** Recursively walk a directory, collecting indexable files. */\nfunction walkDirectory(dir: string, root: string, ig: IgnoreMatcher, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn; // Permission denied, etc.\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(dir, entry);\n\t\tconst relPath = relative(root, fullPath);\n\t\tconst posixRel = toPosix(relPath);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue; // Broken symlink, etc.\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Hard-coded skip list\n\t\t\tif (shouldSkipDir(relPath)) continue;\n\n\t\t\t// .gitignore check (directories need trailing slash)\n\t\t\tif (ig.ignores(`${posixRel}/`)) continue;\n\n\t\t\t// Load nested .gitignore before descending\n\t\t\tloadGitignore(ig, fullPath, root);\n\n\t\t\twalkDirectory(fullPath, root, ig, results);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\n\t\t// .gitignore check for files\n\t\tif (ig.ignores(posixRel)) continue;\n\n\t\t// Size gate\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\t// File type detection\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: posixRel,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n/**\n * Scan a memory directory (project or global) for indexable files.\n *\n * Memory directories are always fully included — no .gitignore filtering —\n * because they live outside the normal project tree or in `.dreb/` which\n * is typically gitignored.\n *\n * Paths for global memory files are stored with a `~memory/` prefix\n * to distinguish them from project files.\n */\nfunction scanMemoryDir(memoryDir: string, projectRoot: string, results: ScannedFile[], baseMemoryDir?: string): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(memoryDir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(memoryDir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Recurse into subdirectories\n\t\t\tscanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\t// If the memory dir is inside the project root, use normal relative path.\n\t\t// Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.\n\t\tconst rel = relative(projectRoot, fullPath);\n\t\tconst isOutsideProject = rel.startsWith(\"..\") || isAbsolute(rel);\n\t\tconst rootMemoryDir = baseMemoryDir ?? memoryDir;\n\t\tconst filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;\n\n\t\tresults.push({\n\t\t\tfilePath: toPosix(filePath),\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n"]}
@@ -1,42 +0,0 @@
1
- /**
2
- * Main search API.
3
- *
4
- * Orchestrates: check/build index → compute all 6 metrics → classify query
5
- * → duplicate columns → POEM rank → assemble results.
6
- */
7
- import type { IndexProgressCallback, SearchResult } from "./types.js";
8
- export interface SearchOptions {
9
- /** Maximum number of results to return. Default: 20. */
10
- limit?: number;
11
- /** Restrict search to files under this path (relative to project root). */
12
- pathFilter?: string;
13
- /** Progress callback for indexing operations. */
14
- onProgress?: IndexProgressCallback;
15
- }
16
- export declare class SearchEngine {
17
- private readonly projectRoot;
18
- private indexManager;
19
- private embedder;
20
- constructor(projectRoot: string);
21
- /** Check if semantic search is available (requires node:sqlite). */
22
- static isAvailable(): boolean;
23
- /**
24
- * Search the codebase with a natural language or identifier query.
25
- *
26
- * On first call, builds the index (scans, chunks, embeds). Subsequent calls
27
- * incrementally update changed files before searching.
28
- */
29
- search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
30
- /** Get index stats without opening a new connection. */
31
- getStats(): {
32
- files: number;
33
- chunks: number;
34
- } | null;
35
- /** Dispose resources. */
36
- close(): void;
37
- private getIndexManager;
38
- private getIndexConfig;
39
- private getOrCreateEmbedder;
40
- private computeVectorScores;
41
- }
42
- //# sourceMappingURL=search.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../../src/core/search/search.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAcH,OAAO,KAAK,EAAe,qBAAqB,EAAgB,YAAY,EAAe,MAAM,YAAY,CAAC;AAe9G,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2EAA2E;IAC3E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,UAAU,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAMD,qBAAa,YAAY;IACxB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,YAAY,CAA6B;IACjD,OAAO,CAAC,QAAQ,CAAyB;IAEzC,YAAY,WAAW,EAAE,MAAM,EAE9B;IAED,oEAAoE;IACpE,MAAM,CAAC,WAAW,IAAI,OAAO,CAE5B;IAED;;;;;OAKG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAmH5E;IAED,wDAAwD;IACxD,QAAQ,IAAI;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAGnD;IAED,yBAAyB;IACzB,KAAK,IAAI,IAAI,CAKZ;IAMD,OAAO,CAAC,eAAe;IASvB,OAAO,CAAC,cAAc;YASR,mBAAmB;YAYnB,mBAAmB;CA4BjC","sourcesContent":["/**\n * Main search API.\n *\n * Orchestrates: check/build index → compute all 6 metrics → classify query\n * → duplicate columns → POEM rank → assemble results.\n */\n\nimport { homedir } from \"node:os\";\nimport path from \"node:path\";\nimport type { SearchDatabase } from \"./db.js\";\nimport { Embedder } from \"./embedder.js\";\nimport { IndexManager } from \"./index-manager.js\";\nimport { computeBm25Scores } from \"./metrics/bm25.js\";\nimport { computeGitRecencyScores } from \"./metrics/git-recency.js\";\nimport { computeImportGraphScores } from \"./metrics/import-graph.js\";\nimport { computePathMatchScores } from \"./metrics/path-match.js\";\nimport { computeSymbolMatchScores } from \"./metrics/symbol-match.js\";\nimport { poemRank } from \"./poem.js\";\nimport { classifyQuery } from \"./query-classifier.js\";\nimport type { IndexConfig, IndexProgressCallback, MetricScores, SearchResult, StoredChunk } from \"./types.js\";\nimport { topKSimilar } from \"./vector-store.js\";\n\n// ============================================================================\n// Constants\n// ============================================================================\n\nconst DEFAULT_MODEL_NAME = \"Xenova/all-MiniLM-L6-v2\";\nconst DEFAULT_RESULT_LIMIT = 20;\nconst METRIC_CANDIDATE_LIMIT = 1000;\n\n// ============================================================================\n// Search Options\n// ============================================================================\n\nexport interface SearchOptions {\n\t/** Maximum number of results to return. Default: 20. */\n\tlimit?: number;\n\t/** Restrict search to files under this path (relative to project root). */\n\tpathFilter?: string;\n\t/** Progress callback for indexing operations. */\n\tonProgress?: IndexProgressCallback;\n}\n\n// ============================================================================\n// Search Engine\n// ============================================================================\n\nexport class SearchEngine {\n\tprivate readonly projectRoot: string;\n\tprivate indexManager: IndexManager | null = null;\n\tprivate embedder: Embedder | null = null;\n\n\tconstructor(projectRoot: string) {\n\t\tthis.projectRoot = projectRoot;\n\t}\n\n\t/** Check if semantic search is available (requires node:sqlite). */\n\tstatic isAvailable(): boolean {\n\t\treturn IndexManager.isAvailable();\n\t}\n\n\t/**\n\t * Search the codebase with a natural language or identifier query.\n\t *\n\t * On first call, builds the index (scans, chunks, embeds). Subsequent calls\n\t * incrementally update changed files before searching.\n\t */\n\tasync search(query: string, options?: SearchOptions): Promise<SearchResult[]> {\n\t\tconst limit = options?.limit ?? DEFAULT_RESULT_LIMIT;\n\t\tconst onProgress = options?.onProgress;\n\n\t\t// Ensure index is built and up to date\n\t\tconst indexManager = this.getIndexManager();\n\t\tconst db = indexManager.getDb();\n\n\t\t// Share our embedder with IndexManager so it doesn't create a second one\n\t\tconst embedder = await this.getOrCreateEmbedder();\n\t\tindexManager.setEmbedder(embedder);\n\n\t\tawait indexManager.buildIndex(onProgress);\n\t\tawait indexManager.ensureEmbeddings(onProgress);\n\n\t\t// Get all chunks (potentially filtered by path)\n\t\tlet allChunks = db.getAllChunks();\n\t\tif (options?.pathFilter) {\n\t\t\tconst filter = options.pathFilter;\n\t\t\tallChunks = allChunks.filter((c) => c.filePath.startsWith(filter));\n\t\t}\n\n\t\tif (allChunks.length === 0) {\n\t\t\treturn [];\n\t\t}\n\n\t\t// Classify query type for POEM column weighting\n\t\tconst queryType = classifyQuery(query);\n\n\t\t// Compute all 6 metrics\n\t\tonProgress?.(\"searching\", 0, 6);\n\n\t\t// 1. BM25 (FTS5)\n\t\tconst bm25Scores = computeBm25Scores(db, sanitizeFtsQuery(query), METRIC_CANDIDATE_LIMIT);\n\t\tonProgress?.(\"searching\", 1, 6);\n\n\t\t// 2. Cosine similarity (vector search)\n\t\tconst cosineScores = await this.computeVectorScores(db, query, METRIC_CANDIDATE_LIMIT, onProgress);\n\t\tonProgress?.(\"searching\", 2, 6);\n\n\t\t// 3. Path match\n\t\tconst pathScores = computePathMatchScores(query, allChunks);\n\t\tonProgress?.(\"searching\", 3, 6);\n\n\t\t// 4. Symbol match\n\t\tconst symbols = db.getAllSymbols();\n\t\tconst symbolScores = computeSymbolMatchScores(query, symbols);\n\t\tonProgress?.(\"searching\", 4, 6);\n\n\t\t// 5. Import graph (use BM25 + cosine as seed scores, aggregated per file)\n\t\t// Only use files with strong scores as seeds — low-scoring files (e.g. from\n\t\t// common OR terms matching everywhere) pollute the seed set and prevent\n\t\t// meaningful propagation.\n\t\tconst fileSeedScores = aggregateFileScores(allChunks, bm25Scores, cosineScores);\n\t\tconst seedThreshold = computeSeedThreshold(fileSeedScores);\n\t\tconst filteredSeeds = new Map<number, number>();\n\t\tfor (const [fileId, score] of fileSeedScores) {\n\t\t\tif (score >= seedThreshold) filteredSeeds.set(fileId, score);\n\t\t}\n\t\tconst fileIdToChunkIds = buildFileChunkMap(allChunks);\n\t\tconst importScores = computeImportGraphScores(db, filteredSeeds, fileIdToChunkIds);\n\t\tonProgress?.(\"searching\", 5, 6);\n\n\t\t// 6. Git recency\n\t\tconst recencyScores = await computeGitRecencyScores(this.projectRoot, allChunks);\n\t\tonProgress?.(\"searching\", 6, 6);\n\n\t\t// Build MetricScores for each candidate chunk\n\t\tconst candidateIds = collectCandidateIds(\n\t\t\tbm25Scores,\n\t\t\tcosineScores,\n\t\t\tpathScores,\n\t\t\tsymbolScores,\n\t\t\timportScores,\n\t\t\trecencyScores,\n\t\t);\n\t\tconst candidates = new Map<number, MetricScores>();\n\n\t\tfor (const id of candidateIds) {\n\t\t\tcandidates.set(id, {\n\t\t\t\tbm25: bm25Scores.get(id) ?? 0,\n\t\t\t\tcosine: cosineScores.get(id) ?? 0,\n\t\t\t\tpathMatch: pathScores.get(id) ?? 0,\n\t\t\t\tsymbolMatch: symbolScores.get(id) ?? 0,\n\t\t\t\timportGraph: importScores.get(id) ?? 0,\n\t\t\t\tgitRecency: recencyScores.get(id) ?? 0,\n\t\t\t});\n\t\t}\n\n\t\tif (candidates.size === 0) {\n\t\t\treturn [];\n\t\t}\n\n\t\t// POEM rank\n\t\tconst ranked = poemRank(candidates, queryType);\n\n\t\t// Assemble results\n\t\tconst chunkMap = new Map<number, StoredChunk>();\n\t\tfor (const chunk of allChunks) {\n\t\t\tchunkMap.set(chunk.id, chunk);\n\t\t}\n\n\t\tconst results: SearchResult[] = [];\n\t\tfor (const candidate of ranked.slice(0, limit)) {\n\t\t\tconst chunk = chunkMap.get(candidate.id);\n\t\t\tif (chunk) {\n\t\t\t\tresults.push({\n\t\t\t\t\tchunk,\n\t\t\t\t\tscores: candidate.scores,\n\t\t\t\t\trank: candidate.rank,\n\t\t\t\t});\n\t\t\t}\n\t\t}\n\n\t\treturn results;\n\t}\n\n\t/** Get index stats without opening a new connection. */\n\tgetStats(): { files: number; chunks: number } | null {\n\t\tif (!this.indexManager) return null;\n\t\treturn this.indexManager.getStats();\n\t}\n\n\t/** Dispose resources. */\n\tclose(): void {\n\t\tthis.indexManager?.close();\n\t\tthis.indexManager = null;\n\t\tthis.embedder?.dispose();\n\t\tthis.embedder = null;\n\t}\n\n\t// ========================================================================\n\t// Private\n\t// ========================================================================\n\n\tprivate getIndexManager(): IndexManager {\n\t\tif (!this.indexManager) {\n\t\t\tconst config = this.getIndexConfig();\n\t\t\tthis.indexManager = new IndexManager(config);\n\t\t\tthis.indexManager.open();\n\t\t}\n\t\treturn this.indexManager;\n\t}\n\n\tprivate getIndexConfig(): IndexConfig {\n\t\treturn {\n\t\t\tprojectRoot: this.projectRoot,\n\t\t\tindexDir: path.join(this.projectRoot, \".dreb\", \"index\"),\n\t\t\tglobalMemoryDir: path.join(homedir(), \".dreb\", \"memory\"),\n\t\t\tmodelName: DEFAULT_MODEL_NAME,\n\t\t};\n\t}\n\n\tprivate async getOrCreateEmbedder(): Promise<Embedder> {\n\t\tif (!this.embedder) {\n\t\t\tconst config = this.getIndexConfig();\n\t\t\tthis.embedder = new Embedder({\n\t\t\t\tmodelCacheDir: path.join(homedir(), \".dreb\", \"agent\", \"models\"),\n\t\t\t\tmodelName: config.modelName,\n\t\t\t});\n\t\t\tawait this.embedder.initialize();\n\t\t}\n\t\treturn this.embedder;\n\t}\n\n\tprivate async computeVectorScores(\n\t\tdb: SearchDatabase,\n\t\tquery: string,\n\t\tlimit: number,\n\t\t_onProgress?: IndexProgressCallback,\n\t): Promise<Map<number, number>> {\n\t\tconst config = this.getIndexConfig();\n\t\tconst embedder = await this.getOrCreateEmbedder();\n\n\t\t// Embed the query\n\t\tconst queryVector = await embedder.embedQuery(query);\n\n\t\t// Get all stored embeddings\n\t\tconst storedVectors = db.getAllEmbeddings(config.modelName);\n\n\t\tif (storedVectors.size === 0) {\n\t\t\treturn new Map();\n\t\t}\n\n\t\tconst topK = topKSimilar(queryVector, storedVectors, limit);\n\n\t\t// Convert to Map, clamping negative similarities to 0\n\t\tconst scores = new Map<number, number>();\n\t\tfor (const { id, score } of topK) {\n\t\t\tscores.set(id, Math.max(0, score));\n\t\t}\n\t\treturn scores;\n\t}\n}\n\n// ============================================================================\n// Helpers\n// ============================================================================\n\n/** Collect all unique chunk IDs that appear in any metric's results. */\nfunction collectCandidateIds(...scoreMaps: Map<number, number>[]): Set<number> {\n\tconst ids = new Set<number>();\n\tfor (const map of scoreMaps) {\n\t\tfor (const id of map.keys()) {\n\t\t\tids.add(id);\n\t\t}\n\t}\n\treturn ids;\n}\n\n/** Aggregate chunk-level scores to file-level scores (max per file). */\nfunction aggregateFileScores(chunks: StoredChunk[], ...scoreMaps: Map<number, number>[]): Map<number, number> {\n\tconst fileScores = new Map<number, number>();\n\n\tfor (const chunk of chunks) {\n\t\tlet maxScore = 0;\n\t\tfor (const map of scoreMaps) {\n\t\t\tconst s = map.get(chunk.id);\n\t\t\tif (s !== undefined && s > maxScore) maxScore = s;\n\t\t}\n\t\tif (maxScore > 0) {\n\t\t\tconst existing = fileScores.get(chunk.fileId);\n\t\t\tif (existing === undefined || maxScore > existing) {\n\t\t\t\tfileScores.set(chunk.fileId, maxScore);\n\t\t\t}\n\t\t}\n\t}\n\n\treturn fileScores;\n}\n\n/**\n * Compute a dynamic threshold for import graph seeds.\n * Uses the median score — only the top half of files are strong enough seeds.\n * Falls back to 0.1 minimum to avoid accepting near-zero scores.\n */\nfunction computeSeedThreshold(fileScores: Map<number, number>): number {\n\tif (fileScores.size === 0) return 0;\n\tconst sorted = [...fileScores.values()].sort((a, b) => b - a);\n\tconst median = sorted[Math.floor(sorted.length / 2)];\n\treturn Math.max(median, 0.1);\n}\n\n/** Build a map of fileId → chunk IDs for that file. */\nfunction buildFileChunkMap(chunks: StoredChunk[]): Map<number, number[]> {\n\tconst map = new Map<number, number[]>();\n\tfor (const chunk of chunks) {\n\t\tconst existing = map.get(chunk.fileId);\n\t\tif (existing) existing.push(chunk.id);\n\t\telse map.set(chunk.fileId, [chunk.id]);\n\t}\n\treturn map;\n}\n\n/** Common English stopwords to exclude from FTS queries. */\nconst STOPWORDS = new Set([\n\t\"a\",\n\t\"an\",\n\t\"and\",\n\t\"are\",\n\t\"as\",\n\t\"at\",\n\t\"be\",\n\t\"but\",\n\t\"by\",\n\t\"for\",\n\t\"from\",\n\t\"had\",\n\t\"has\",\n\t\"have\",\n\t\"he\",\n\t\"her\",\n\t\"his\",\n\t\"how\",\n\t\"i\",\n\t\"if\",\n\t\"in\",\n\t\"into\",\n\t\"is\",\n\t\"it\",\n\t\"its\",\n\t\"me\",\n\t\"my\",\n\t\"no\",\n\t\"not\",\n\t\"of\",\n\t\"on\",\n\t\"or\",\n\t\"our\",\n\t\"she\",\n\t\"so\",\n\t\"than\",\n\t\"that\",\n\t\"the\",\n\t\"their\",\n\t\"them\",\n\t\"then\",\n\t\"there\",\n\t\"these\",\n\t\"they\",\n\t\"this\",\n\t\"to\",\n\t\"up\",\n\t\"us\",\n\t\"was\",\n\t\"we\",\n\t\"what\",\n\t\"when\",\n\t\"where\",\n\t\"which\",\n\t\"who\",\n\t\"will\",\n\t\"with\",\n\t\"would\",\n\t\"you\",\n\t\"your\",\n]);\n\n/**\n * Sanitize a query string for FTS5 MATCH syntax.\n * FTS5 chokes on certain characters — strip operators and wrap terms.\n *\n * Removes stopwords and uses OR between terms so multi-word queries return\n * partial matches (FTS5's default implicit AND is too restrictive).\n */\nfunction sanitizeFtsQuery(query: string): string {\n\t// Remove FTS5 operators and special chars\n\tconst cleaned = query\n\t\t.replace(/[*\"():^{}[\\]~!@#$%&=+|<>]/g, \" \")\n\t\t.replace(/\\bAND\\b|\\bOR\\b|\\bNOT\\b|\\bNEAR\\b/gi, \" \")\n\t\t.trim();\n\n\t// Split into tokens, remove stopwords, join with OR\n\tconst tokens = cleaned.split(/\\s+/).filter((t) => t.length > 0 && !STOPWORDS.has(t.toLowerCase()));\n\tif (tokens.length === 0) return '\"\"';\n\tif (tokens.length === 1) return tokens[0];\n\treturn tokens.join(\" OR \");\n}\n"]}