@dreb/semantic-search 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/.claude-plugin/plugin.json +17 -0
  2. package/.mcp.json +8 -0
  3. package/README.md +97 -0
  4. package/bin/server.js +14 -0
  5. package/dist/chunker.d.ts +21 -0
  6. package/dist/chunker.d.ts.map +1 -0
  7. package/dist/chunker.js +51 -0
  8. package/dist/chunker.js.map +1 -0
  9. package/dist/db.d.ts +89 -0
  10. package/dist/db.d.ts.map +1 -0
  11. package/dist/db.js +406 -0
  12. package/dist/db.js.map +1 -0
  13. package/dist/embedder.d.ts +52 -0
  14. package/dist/embedder.d.ts.map +1 -0
  15. package/dist/embedder.js +158 -0
  16. package/dist/embedder.js.map +1 -0
  17. package/dist/format.d.ts +4 -0
  18. package/dist/format.d.ts.map +1 -0
  19. package/dist/format.js +37 -0
  20. package/dist/format.js.map +1 -0
  21. package/dist/index-manager.d.ts +55 -0
  22. package/dist/index-manager.d.ts.map +1 -0
  23. package/dist/index-manager.js +311 -0
  24. package/dist/index-manager.js.map +1 -0
  25. package/dist/index.d.ts +18 -0
  26. package/dist/index.d.ts.map +1 -0
  27. package/dist/index.js +21 -0
  28. package/dist/index.js.map +1 -0
  29. package/dist/mcp-server.d.ts +25 -0
  30. package/dist/mcp-server.d.ts.map +1 -0
  31. package/dist/mcp-server.js +149 -0
  32. package/dist/mcp-server.js.map +1 -0
  33. package/dist/metrics/bm25.d.ts +10 -0
  34. package/dist/metrics/bm25.d.ts.map +1 -0
  35. package/dist/metrics/bm25.js +32 -0
  36. package/dist/metrics/bm25.js.map +1 -0
  37. package/dist/metrics/git-recency.d.ts +14 -0
  38. package/dist/metrics/git-recency.d.ts.map +1 -0
  39. package/dist/metrics/git-recency.js +123 -0
  40. package/dist/metrics/git-recency.js.map +1 -0
  41. package/dist/metrics/import-graph.d.ts +15 -0
  42. package/dist/metrics/import-graph.d.ts.map +1 -0
  43. package/dist/metrics/import-graph.js +115 -0
  44. package/dist/metrics/import-graph.js.map +1 -0
  45. package/dist/metrics/path-match.d.ts +13 -0
  46. package/dist/metrics/path-match.d.ts.map +1 -0
  47. package/dist/metrics/path-match.js +54 -0
  48. package/dist/metrics/path-match.js.map +1 -0
  49. package/dist/metrics/symbol-match.d.ts +12 -0
  50. package/dist/metrics/symbol-match.d.ts.map +1 -0
  51. package/dist/metrics/symbol-match.js +62 -0
  52. package/dist/metrics/symbol-match.js.map +1 -0
  53. package/dist/metrics/tokenize.d.ts +12 -0
  54. package/dist/metrics/tokenize.d.ts.map +1 -0
  55. package/dist/metrics/tokenize.js +29 -0
  56. package/dist/metrics/tokenize.js.map +1 -0
  57. package/dist/poem.d.ts +38 -0
  58. package/dist/poem.d.ts.map +1 -0
  59. package/dist/poem.js +214 -0
  60. package/dist/poem.js.map +1 -0
  61. package/dist/query-classifier.d.ts +17 -0
  62. package/dist/query-classifier.d.ts.map +1 -0
  63. package/dist/query-classifier.js +54 -0
  64. package/dist/query-classifier.js.map +1 -0
  65. package/dist/scanner.d.ts +30 -0
  66. package/dist/scanner.d.ts.map +1 -0
  67. package/dist/scanner.js +343 -0
  68. package/dist/scanner.js.map +1 -0
  69. package/dist/search.d.ts +63 -0
  70. package/dist/search.d.ts.map +1 -0
  71. package/dist/search.js +400 -0
  72. package/dist/search.js.map +1 -0
  73. package/dist/text-chunker.d.ts +15 -0
  74. package/dist/text-chunker.d.ts.map +1 -0
  75. package/dist/text-chunker.js +580 -0
  76. package/dist/text-chunker.js.map +1 -0
  77. package/dist/tree-sitter-chunker.d.ts +25 -0
  78. package/dist/tree-sitter-chunker.d.ts.map +1 -0
  79. package/dist/tree-sitter-chunker.js +357 -0
  80. package/dist/tree-sitter-chunker.js.map +1 -0
  81. package/dist/types.d.ts +98 -0
  82. package/dist/types.d.ts.map +1 -0
  83. package/dist/types.js +6 -0
  84. package/dist/types.js.map +1 -0
  85. package/dist/vector-store.d.ts +43 -0
  86. package/dist/vector-store.d.ts.map +1 -0
  87. package/dist/vector-store.js +73 -0
  88. package/dist/vector-store.js.map +1 -0
  89. package/package.json +71 -0
  90. package/skills/search/SKILL.md +56 -0
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Vector operations for semantic search.
3
+ *
4
+ * Pure JS implementations — no native dependencies or SQLite UDFs needed.
5
+ * Vectors are computed and compared in JS, stored as BLOBs in SQLite.
6
+ */
7
+ /**
8
+ * Compute cosine similarity between two normalized vectors.
9
+ *
10
+ * For normalized vectors, cosine similarity is simply the dot product:
11
+ * cos(a, b) = Σ a[i] * b[i]
12
+ *
13
+ * Returns a value in [-1, 1] where 1 = identical, 0 = orthogonal, -1 = opposite.
14
+ */
15
+ export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
16
+ /**
17
+ * Pack a Float32Array into a Buffer for SQLite BLOB storage.
18
+ *
19
+ * Creates a copy to ensure the buffer isn't shared with other typed arrays.
20
+ */
21
+ export declare function packVector(vector: Float32Array): Buffer;
22
+ /**
23
+ * Unpack a BLOB (Uint8Array from node:sqlite) back to a Float32Array.
24
+ *
25
+ * The returned array shares the underlying buffer with the input for
26
+ * zero-copy performance. Callers should not mutate the input after calling.
27
+ */
28
+ export declare function unpackVector(blob: Uint8Array): Float32Array;
29
+ /**
30
+ * Find the top-K most similar vectors from a set.
31
+ *
32
+ * Computes cosine similarity between the query vector and every candidate,
33
+ * then returns the K highest-scoring results sorted by descending score.
34
+ *
35
+ * Uses a simple full scan — suitable for the index sizes we expect in a
36
+ * single-project codebase (typically <100K chunks). For millions of vectors,
37
+ * an approximate nearest neighbor index (HNSW, IVF) would be needed.
38
+ */
39
+ export declare function topKSimilar(query: Float32Array, vectors: Map<number, Float32Array>, k: number): Array<{
40
+ id: number;
41
+ score: number;
42
+ }>;
43
+ //# sourceMappingURL=vector-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vector-store.d.ts","sourceRoot":"","sources":["../src/vector-store.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAOzE;AAMD;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,YAAY,GAAG,MAAM,CAEvD;AAED;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,UAAU,GAAG,YAAY,CAE3D;AAMD;;;;;;;;;GASG;AACH,wBAAgB,WAAW,CAC1B,KAAK,EAAE,YAAY,EACnB,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,EAClC,CAAC,EAAE,MAAM,GACP,KAAK,CAAC;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAgBtC","sourcesContent":["/**\n * Vector operations for semantic search.\n *\n * Pure JS implementations — no native dependencies or SQLite UDFs needed.\n * Vectors are computed and compared in JS, stored as BLOBs in SQLite.\n */\n\n// ============================================================================\n// Similarity\n// ============================================================================\n\n/**\n * Compute cosine similarity between two normalized vectors.\n *\n * For normalized vectors, cosine similarity is simply the dot product:\n * cos(a, b) = Σ a[i] * b[i]\n *\n * Returns a value in [-1, 1] where 1 = identical, 0 = orthogonal, -1 = opposite.\n */\nexport function cosineSimilarity(a: Float32Array, b: Float32Array): number {\n\tconst len = a.length;\n\tlet dot = 0;\n\tfor (let i = 0; i < len; i++) {\n\t\tdot += a[i] * b[i];\n\t}\n\treturn dot;\n}\n\n// ============================================================================\n// Serialization\n// ============================================================================\n\n/**\n * Pack a Float32Array into a Buffer for SQLite BLOB storage.\n *\n * Creates a copy to ensure the buffer isn't shared with other typed arrays.\n */\nexport function packVector(vector: Float32Array): Buffer {\n\treturn Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);\n}\n\n/**\n * Unpack a BLOB (Uint8Array from node:sqlite) back to a Float32Array.\n *\n * The returned array shares the underlying buffer with the input for\n * zero-copy performance. Callers should not mutate the input after calling.\n */\nexport function unpackVector(blob: Uint8Array): Float32Array {\n\treturn new Float32Array(blob.buffer, blob.byteOffset, blob.byteLength / 4);\n}\n\n// ============================================================================\n// Top-K Search\n// ============================================================================\n\n/**\n * Find the top-K most similar vectors from a set.\n *\n * Computes cosine similarity between the query vector and every candidate,\n * then returns the K highest-scoring results sorted by descending score.\n *\n * Uses a simple full scan — suitable for the index sizes we expect in a\n * single-project codebase (typically <100K chunks). For millions of vectors,\n * an approximate nearest neighbor index (HNSW, IVF) would be needed.\n */\nexport function topKSimilar(\n\tquery: Float32Array,\n\tvectors: Map<number, Float32Array>,\n\tk: number,\n): Array<{ id: number; score: number }> {\n\tif (k <= 0 || vectors.size === 0) return [];\n\n\t// For small k relative to n, a min-heap would be more efficient.\n\t// For typical codebase sizes (<100K vectors) the difference is negligible,\n\t// and a sorted array is simpler and correct.\n\tconst scored: Array<{ id: number; score: number }> = [];\n\n\tfor (const [id, vector] of vectors) {\n\t\tscored.push({ id, score: cosineSimilarity(query, vector) });\n\t}\n\n\t// Partial sort: only need top-k, but full sort is fine for expected sizes\n\tscored.sort((a, b) => b.score - a.score);\n\n\treturn scored.slice(0, k);\n}\n"]}
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Vector operations for semantic search.
3
+ *
4
+ * Pure JS implementations — no native dependencies or SQLite UDFs needed.
5
+ * Vectors are computed and compared in JS, stored as BLOBs in SQLite.
6
+ */
7
+ // ============================================================================
8
+ // Similarity
9
+ // ============================================================================
10
+ /**
11
+ * Compute cosine similarity between two normalized vectors.
12
+ *
13
+ * For normalized vectors, cosine similarity is simply the dot product:
14
+ * cos(a, b) = Σ a[i] * b[i]
15
+ *
16
+ * Returns a value in [-1, 1] where 1 = identical, 0 = orthogonal, -1 = opposite.
17
+ */
18
+ export function cosineSimilarity(a, b) {
19
+ const len = a.length;
20
+ let dot = 0;
21
+ for (let i = 0; i < len; i++) {
22
+ dot += a[i] * b[i];
23
+ }
24
+ return dot;
25
+ }
26
+ // ============================================================================
27
+ // Serialization
28
+ // ============================================================================
29
+ /**
30
+ * Pack a Float32Array into a Buffer for SQLite BLOB storage.
31
+ *
32
+ * Creates a copy to ensure the buffer isn't shared with other typed arrays.
33
+ */
34
+ export function packVector(vector) {
35
+ return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
36
+ }
37
+ /**
38
+ * Unpack a BLOB (Uint8Array from node:sqlite) back to a Float32Array.
39
+ *
40
+ * The returned array shares the underlying buffer with the input for
41
+ * zero-copy performance. Callers should not mutate the input after calling.
42
+ */
43
+ export function unpackVector(blob) {
44
+ return new Float32Array(blob.buffer, blob.byteOffset, blob.byteLength / 4);
45
+ }
46
+ // ============================================================================
47
+ // Top-K Search
48
+ // ============================================================================
49
+ /**
50
+ * Find the top-K most similar vectors from a set.
51
+ *
52
+ * Computes cosine similarity between the query vector and every candidate,
53
+ * then returns the K highest-scoring results sorted by descending score.
54
+ *
55
+ * Uses a simple full scan — suitable for the index sizes we expect in a
56
+ * single-project codebase (typically <100K chunks). For millions of vectors,
57
+ * an approximate nearest neighbor index (HNSW, IVF) would be needed.
58
+ */
59
+ export function topKSimilar(query, vectors, k) {
60
+ if (k <= 0 || vectors.size === 0)
61
+ return [];
62
+ // For small k relative to n, a min-heap would be more efficient.
63
+ // For typical codebase sizes (<100K vectors) the difference is negligible,
64
+ // and a sorted array is simpler and correct.
65
+ const scored = [];
66
+ for (const [id, vector] of vectors) {
67
+ scored.push({ id, score: cosineSimilarity(query, vector) });
68
+ }
69
+ // Partial sort: only need top-k, but full sort is fine for expected sizes
70
+ scored.sort((a, b) => b.score - a.score);
71
+ return scored.slice(0, k);
72
+ }
73
+ //# sourceMappingURL=vector-store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vector-store.js","sourceRoot":"","sources":["../src/vector-store.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;GAOG;AACH,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe,EAAU;IAC1E,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC;IACrB,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IACD,OAAO,GAAG,CAAC;AAAA,CACX;AAED,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E;;;;GAIG;AACH,MAAM,UAAU,UAAU,CAAC,MAAoB,EAAU;IACxD,OAAO,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;AAAA,CACxE;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAgB,EAAgB;IAC5D,OAAO,IAAI,YAAY,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;AAAA,CAC3E;AAED,+EAA+E;AAC/E,eAAe;AACf,+EAA+E;AAE/E;;;;;;;;;GASG;AACH,MAAM,UAAU,WAAW,CAC1B,KAAmB,EACnB,OAAkC,EAClC,CAAS,EAC8B;IACvC,IAAI,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE5C,iEAAiE;IACjE,2EAA2E;IAC3E,6CAA6C;IAC7C,MAAM,MAAM,GAAyC,EAAE,CAAC;IAExD,KAAK,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,gBAAgB,CAAC,KAAK,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,0EAA0E;IAC1E,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAEzC,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AAAA,CAC1B","sourcesContent":["/**\n * Vector operations for semantic search.\n *\n * Pure JS implementations — no native dependencies or SQLite UDFs needed.\n * Vectors are computed and compared in JS, stored as BLOBs in SQLite.\n */\n\n// ============================================================================\n// Similarity\n// ============================================================================\n\n/**\n * Compute cosine similarity between two normalized vectors.\n *\n * For normalized vectors, cosine similarity is simply the dot product:\n * cos(a, b) = Σ a[i] * b[i]\n *\n * Returns a value in [-1, 1] where 1 = identical, 0 = orthogonal, -1 = opposite.\n */\nexport function cosineSimilarity(a: Float32Array, b: Float32Array): number {\n\tconst len = a.length;\n\tlet dot = 0;\n\tfor (let i = 0; i < len; i++) {\n\t\tdot += a[i] * b[i];\n\t}\n\treturn dot;\n}\n\n// ============================================================================\n// Serialization\n// ============================================================================\n\n/**\n * Pack a Float32Array into a Buffer for SQLite BLOB storage.\n *\n * Creates a copy to ensure the buffer isn't shared with other typed arrays.\n */\nexport function packVector(vector: Float32Array): Buffer {\n\treturn Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);\n}\n\n/**\n * Unpack a BLOB (Uint8Array from node:sqlite) back to a Float32Array.\n *\n * The returned array shares the underlying buffer with the input for\n * zero-copy performance. Callers should not mutate the input after calling.\n */\nexport function unpackVector(blob: Uint8Array): Float32Array {\n\treturn new Float32Array(blob.buffer, blob.byteOffset, blob.byteLength / 4);\n}\n\n// ============================================================================\n// Top-K Search\n// ============================================================================\n\n/**\n * Find the top-K most similar vectors from a set.\n *\n * Computes cosine similarity between the query vector and every candidate,\n * then returns the K highest-scoring results sorted by descending score.\n *\n * Uses a simple full scan — suitable for the index sizes we expect in a\n * single-project codebase (typically <100K chunks). For millions of vectors,\n * an approximate nearest neighbor index (HNSW, IVF) would be needed.\n */\nexport function topKSimilar(\n\tquery: Float32Array,\n\tvectors: Map<number, Float32Array>,\n\tk: number,\n): Array<{ id: number; score: number }> {\n\tif (k <= 0 || vectors.size === 0) return [];\n\n\t// For small k relative to n, a min-heap would be more efficient.\n\t// For typical codebase sizes (<100K vectors) the difference is negligible,\n\t// and a sorted array is simpler and correct.\n\tconst scored: Array<{ id: number; score: number }> = [];\n\n\tfor (const [id, vector] of vectors) {\n\t\tscored.push({ id, score: cosineSimilarity(query, vector) });\n\t}\n\n\t// Partial sort: only need top-k, but full sort is fine for expected sizes\n\tscored.sort((a, b) => b.score - a.score);\n\n\treturn scored.slice(0, k);\n}\n"]}
package/package.json ADDED
@@ -0,0 +1,71 @@
1
+ {
2
+ "name": "@dreb/semantic-search",
3
+ "version": "1.18.0",
4
+ "description": "Semantic codebase search engine with embedding-based ranking and MCP server",
5
+ "publishConfig": {
6
+ "access": "public"
7
+ },
8
+ "type": "module",
9
+ "main": "./dist/index.js",
10
+ "types": "./dist/index.d.ts",
11
+ "exports": {
12
+ ".": {
13
+ "types": "./dist/index.d.ts",
14
+ "import": "./dist/index.js"
15
+ }
16
+ },
17
+ "bin": {
18
+ "semantic-search-mcp": "./bin/server.js"
19
+ },
20
+ "files": [
21
+ "dist",
22
+ "bin",
23
+ ".claude-plugin",
24
+ ".mcp.json",
25
+ "skills",
26
+ "README.md"
27
+ ],
28
+ "scripts": {
29
+ "clean": "shx rm -rf dist",
30
+ "build": "tsgo -p tsconfig.build.json",
31
+ "dev": "tsgo -p tsconfig.build.json --watch --preserveWatchOutput",
32
+ "test": "vitest --run",
33
+ "prepublishOnly": "npm run clean && npm run build"
34
+ },
35
+ "dependencies": {
36
+ "@huggingface/transformers": "^4.0.1",
37
+ "@modelcontextprotocol/sdk": "^1.29.0",
38
+ "ignore": "^7.0.5",
39
+ "tree-sitter-c": "^0.24.1",
40
+ "tree-sitter-cpp": "^0.23.4",
41
+ "tree-sitter-go": "^0.25.0",
42
+ "tree-sitter-java": "^0.23.5",
43
+ "tree-sitter-javascript": "^0.25.0",
44
+ "tree-sitter-python": "^0.25.0",
45
+ "tree-sitter-rust": "^0.24.0",
46
+ "tree-sitter-typescript": "^0.23.2",
47
+ "web-tree-sitter": "^0.26.8"
48
+ },
49
+ "devDependencies": {
50
+ "@types/node": "^24.3.0",
51
+ "shx": "^0.4.0",
52
+ "typescript": "^5.9.2",
53
+ "vitest": "^3.2.4"
54
+ },
55
+ "keywords": [
56
+ "semantic-search",
57
+ "codebase",
58
+ "embeddings",
59
+ "mcp"
60
+ ],
61
+ "author": "Drew Brereton",
62
+ "license": "MIT",
63
+ "repository": {
64
+ "type": "git",
65
+ "url": "https://github.com/aebrer/dreb.git",
66
+ "directory": "packages/semantic-search"
67
+ },
68
+ "engines": {
69
+ "node": ">=22.0.0"
70
+ }
71
+ }
@@ -0,0 +1,56 @@
1
+ # Semantic Codebase Search
2
+
3
+ Use `search` as your **default exploration tool** for understanding code, finding implementations, and answering questions about the codebase. Use `grep` when you already know the exact text or pattern you're looking for.
4
+
5
+ ## Indexing
6
+
7
+ The first query builds the index — this may take 10–60 seconds depending on project size. Subsequent queries are fast because the index is incrementally updated (only changed files are re-processed).
8
+
9
+ ## Query Types
10
+
11
+ The search tool supports three kinds of queries, automatically classified:
12
+
13
+ - **Identifier queries** — e.g. `AuthMiddleware`, `handleRequest`, `SearchEngine` — finds definitions, usages, and related code for a specific symbol
14
+ - **Natural language queries** — e.g. `where is rate limiting handled`, `how does authentication work` — semantic search across code and documentation
15
+ - **Path queries** — e.g. `src/auth/`, `packages/ai` — finds code within a directory structure
16
+
17
+ ## Parameters
18
+
19
+ | Parameter | Required | Description |
20
+ | ------------ | -------- | --------------------------------------------------------------------------- |
21
+ | `query` | Yes | Search query — natural language, identifier, or path |
22
+ | `projectDir` | Yes | Absolute path to the project directory. Set this to your current working directory |
23
+ | `path` | No | Restrict search to files under this subdirectory (relative to project root) |
24
+ | `limit` | No | Maximum number of results to return (default: 20) |
25
+ | `rebuild` | No | Force a clean index rebuild — use when files have changed significantly |
26
+
27
+ ## Ranking
28
+
29
+ Results are ranked using 6 signals fused via **POEM** (Pareto-Optimal Embedded Modeling):
30
+
31
+ 1. **BM25** — keyword matching via FTS5 full-text search
32
+ 2. **Cosine similarity** — embedding-based semantic similarity (all-MiniLM-L6-v2)
33
+ 3. **Path match** — query terms appearing in the file path
34
+ 4. **Symbol match** — query terms matching function/class/type names
35
+ 5. **Import graph proximity** — files imported by or importing high-scoring files
36
+ 6. **Git recency** — recently modified files ranked higher
37
+
38
+ The weight given to each signal varies by query type. Identifier queries emphasize symbol match and BM25; natural language queries emphasize cosine similarity; path queries emphasize path match.
39
+
40
+ ## Results
41
+
42
+ Each result includes:
43
+
44
+ - **File path** and **line range** (start–end)
45
+ - **Chunk kind** (function, class, method, interface, heading_section, etc.) and **name**
46
+ - **Metric scores** for each of the 6 signals
47
+ - **Content preview** of the matching code or text
48
+
49
+ ## Tips
50
+
51
+ - Start broad, then narrow with `path` if you get too many results from different areas
52
+ - Use `limit` to get more results when exploring a broad topic (e.g. `limit: 50`)
53
+ - Use `rebuild: true` after major refactors, branch switches, or large file changes
54
+ - Identifier queries work best for finding where something is defined or used
55
+ - Natural language queries work best for understanding how a feature or concept is implemented
56
+ - Combine search with `grep` for a powerful workflow: search to find the right files, then grep for exact patterns within them