@dreb/semantic-search 2.0.5 → 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/bin/server.js +13 -3
- package/dist/chunker.d.ts.map +1 -1
- package/dist/chunker.js.map +1 -1
- package/dist/db.d.ts.map +1 -1
- package/dist/db.js +0 -1
- package/dist/db.js.map +1 -1
- package/dist/embedder.d.ts.map +1 -1
- package/dist/embedder.js +3 -6
- package/dist/embedder.js.map +1 -1
- package/dist/format.d.ts.map +1 -1
- package/dist/format.js.map +1 -1
- package/dist/index-manager.d.ts +4 -0
- package/dist/index-manager.d.ts.map +1 -1
- package/dist/index-manager.js +2 -3
- package/dist/index-manager.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/mcp-server.d.ts.map +1 -1
- package/dist/mcp-server.js.map +1 -1
- package/dist/metrics/bm25.d.ts.map +1 -1
- package/dist/metrics/bm25.js.map +1 -1
- package/dist/metrics/git-recency.d.ts.map +1 -1
- package/dist/metrics/git-recency.js.map +1 -1
- package/dist/metrics/import-graph.d.ts.map +1 -1
- package/dist/metrics/import-graph.js.map +1 -1
- package/dist/metrics/path-match.d.ts.map +1 -1
- package/dist/metrics/path-match.js.map +1 -1
- package/dist/metrics/symbol-match.d.ts.map +1 -1
- package/dist/metrics/symbol-match.js.map +1 -1
- package/dist/metrics/tokenize.d.ts.map +1 -1
- package/dist/metrics/tokenize.js.map +1 -1
- package/dist/poem.d.ts.map +1 -1
- package/dist/poem.js.map +1 -1
- package/dist/query-classifier.d.ts.map +1 -1
- package/dist/query-classifier.js.map +1 -1
- package/dist/scanner.d.ts.map +1 -1
- package/dist/scanner.js.map +1 -1
- package/dist/search.d.ts.map +1 -1
- package/dist/search.js +3 -5
- package/dist/search.js.map +1 -1
- package/dist/text-chunker.d.ts.map +1 -1
- package/dist/text-chunker.js.map +1 -1
- package/dist/tree-sitter-chunker.d.ts.map +1 -1
- package/dist/tree-sitter-chunker.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/vector-store.d.ts.map +1 -1
- package/dist/vector-store.js.map +1 -1
- package/package.json +3 -3
- package/skills/search/SKILL.md +5 -0
package/dist/mcp-server.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mcp-server.d.ts","sourceRoot":"","sources":["../src/mcp-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAIH,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AAqDnE;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,iBAAiB,EAAE,MAAM,GAAG,MAAM,CA6FjE;AAMD;;;GAGG;AACH,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAInE"
|
|
1
|
+
{"version":3,"file":"mcp-server.d.ts","sourceRoot":"","sources":["../src/mcp-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAIH,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AAqDnE;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,iBAAiB,EAAE,MAAM,GAAG,MAAM,CA6FjE;AAMD;;;GAGG;AACH,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAInE"}
|
package/dist/mcp-server.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mcp-server.js","sourceRoot":"","sources":["../src/mcp-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,qBAAqB,EAAuB,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AACxH,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,MAAM,OAAO,GAAG,aAAa,CAAC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,EAAE,OAAO,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,iBAAiB,CAAwB,CAAC;AAEtF,+EAA+E;AAC/E,oCAAoC;AACpC,+EAA+E;AAE/E,MAAM,WAAW,GAAG;IACnB,IAAI,EAAE,QAAQ;IACd,WAAW,EACV,oNAAoN;IACrN,WAAW,EAAE;QACZ,IAAI,EAAE,QAAiB;QACvB,UAAU,EAAE;YACX,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,sDAAsD,EAAE;YAC9F,UAAU,EAAE;gBACX,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,uFAAuF;aACpG;YACD,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,0CAA0C,EAAE;YACjF,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,yCAAyC,EAAE;YACjF,OAAO,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,sCAAsC,EAAE;SACjF;QACD,QAAQ,EAAE,CAAC,OAAO,EAAE,YAAY,CAAC;KACjC;CACD,CAAC;AAEF,+EAA+E;AAC/E,eAAe;AACf,+EAA+E;AAE/E,yEAAyE;AACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAwB,CAAC;AAEpD,SAAS,eAAe,CAAC,WAAmB,EAAgB;IAC3D,IAAI,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,GAAG,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC;QACvC,WAAW,CAAC,GAAG,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,MAAM,CAAC;AAAA,CACd;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,iBAAyB,EAAU;IAClE,MAAM,MAAM,GAAG,IAAI,MAAM,CACxB,EAAE,IAAI,EAAE,iBAAiB,EAAE,OAAO,EAAE,cAAc,EAAE,EACpD,EAAE,YAAY,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,EAAE,CAC5C,CAAC;IAEF,MAAM,CAAC,iBAAiB,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;QAC7D,KAAK,EAAE,CAAC,WAAW,CAAC;KACpB,CAAC,CAAC,CAAC;IAEJ,MAAM,CAAC,iBAAiB,CAAC,qBAAqB,EAAE,KAAK,EAAE,OAAO,EAA2B,EAAE,CAAC;QAC3F,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtC,OAAO;gBACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,iBAAiB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;gBACzE,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,IAAI,EAAE,CAM3C,CAAC;QACF,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,GAAG,KAAK,EAAE,GAAG,IAAI,CAAC;QAC1D,MAAM,KAAK,GAAG,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE7F,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;QAElF,IAAI,CAAC,YAAY,CAAC,WAAW,EAAE,EAAE,CAAC;YACjC,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,2GAA2G;qBACjH;iBACD;gBACD,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;QAED,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzC,OAAO;gBACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,+BAA+B,EAAE,CAAC;gBAClE,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAE3C,IAAI,OAAO,EAAE,CAAC;gBACb,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;YAC3B,CAAC;YAED,qCAAqC;YACrC,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE;gBAC1C,KAAK;gBACL,UAAU,EAAE,UAAU;gBACtB,UAAU,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC;oBACtC,MAAM;yBACJ,kBAAkB,CAAC;wBACnB,KAAK,EAAE,MAAM;wBACb,MAAM,EAAE,iBAAiB;wBACzB,IAAI,EAAE,GAAG,KAAK,KAAK,OAAO,IAAI,KAAK,EAAE;qBACrC,CAAC;yBACD,KAAK,CAAC,GAAG,EAAE,CAAC;wBACZ,oEAAkE;oBADrD,CAEb,CAAC,CAAC;gBAAA,CACJ;aACD,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;YACpC,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;YAEhC,IAAI,SAAS,GAAG,EAAE,CAAC;YACnB,IAAI,KAAK,EAAE,CAAC;gBACX,SAAS,GAAG,eAAe,KAAK,CAAC,KAAK,WAAW,KAAK,CAAC,MAAM,UAAU,CAAC;YACzE,CAAC;YAED,OAAO;gBACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,GAAG,SAAS,EAAE,CAAC;aACnD,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,OAAO;gBACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;gBACvG,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;IAAA,CACD,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAAA,CACd;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,UAAkB,EAAiB;IACpE,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAAA,CAChC","sourcesContent":["/**\n * MCP stdio server adapter for semantic codebase search.\n *\n * Exposes the SearchEngine as a single \"search\" tool over the Model Context Protocol,\n * enabling any MCP-compatible client to run semantic codebase queries.\n *\n * The server defaults to using its CWD as the project directory. Claude Code\n * launches MCP servers with CWD set to the project root, so no configuration\n * is needed for typical per-project usage.\n */\n\nimport { createRequire } from \"node:module\";\nimport { resolve } from \"node:path\";\nimport { Server } from \"@modelcontextprotocol/sdk/server/index.js\";\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\";\nimport { CallToolRequestSchema, type CallToolResult, ListToolsRequestSchema } from \"@modelcontextprotocol/sdk/types.js\";\nimport { formatResults } from \"./format.js\";\nimport { SearchEngine } from \"./search.js\";\n\nconst require = createRequire(import.meta.url);\nconst { version: packageVersion } = require(\"../package.json\") as { version: string };\n\n// ============================================================================\n// Tool Schema (JSON Schema for MCP)\n// ============================================================================\n\nconst SEARCH_TOOL = {\n\tname: \"search\",\n\tdescription:\n\t\t\"Search the codebase using natural language queries. Returns ranked code/doc results using semantic similarity and keyword matching. First query builds the index (may take a moment); subsequent queries are fast.\",\n\tinputSchema: {\n\t\ttype: \"object\" as const,\n\t\tproperties: {\n\t\t\tquery: { type: \"string\", description: \"Search query (natural language, identifier, or path)\" },\n\t\t\tprojectDir: {\n\t\t\t\ttype: \"string\",\n\t\t\t\tdescription: \"Absolute path to the project directory to search. Use your current working directory.\",\n\t\t\t},\n\t\t\tpath: { type: \"string\", description: \"Restrict search to files under this path\" },\n\t\t\tlimit: { type: \"number\", description: \"Maximum results to return (default: 20)\" },\n\t\t\trebuild: { type: \"boolean\", description: \"Force index rebuild (default: false)\" },\n\t\t},\n\t\trequired: [\"query\", \"projectDir\"],\n\t},\n};\n\n// ============================================================================\n// Engine Cache\n// ============================================================================\n\n/** Cache search engines per project root to reuse index across calls. */\nconst engineCache = new Map<string, SearchEngine>();\n\nfunction getSearchEngine(projectRoot: string): SearchEngine {\n\tlet engine = engineCache.get(projectRoot);\n\tif (!engine) {\n\t\tengine = new SearchEngine(projectRoot);\n\t\tengineCache.set(projectRoot, engine);\n\t}\n\treturn engine;\n}\n\n// ============================================================================\n// Server Factory\n// ============================================================================\n\n/**\n * Create an MCP server instance configured with the semantic search tool.\n *\n * @param defaultProjectDir - Default project directory for searches. Used when\n * the client doesn't specify `projectDir` in the tool call. Typically the\n * server's CWD, which Claude Code sets to the project root.\n */\nexport function createMcpServer(defaultProjectDir: string): Server {\n\tconst server = new Server(\n\t\t{ name: \"semantic-search\", version: packageVersion },\n\t\t{ capabilities: { tools: {}, logging: {} } },\n\t);\n\n\tserver.setRequestHandler(ListToolsRequestSchema, async () => ({\n\t\ttools: [SEARCH_TOOL],\n\t}));\n\n\tserver.setRequestHandler(CallToolRequestSchema, async (request): Promise<CallToolResult> => {\n\t\tif (request.params.name !== \"search\") {\n\t\t\treturn {\n\t\t\t\tcontent: [{ type: \"text\", text: `Unknown tool: ${request.params.name}` }],\n\t\t\t\tisError: true,\n\t\t\t};\n\t\t}\n\n\t\tconst args = (request.params.arguments ?? {}) as {\n\t\t\tquery?: string;\n\t\t\tprojectDir?: string;\n\t\t\tpath?: string;\n\t\t\tlimit?: number;\n\t\t\trebuild?: boolean;\n\t\t};\n\t\tconst { query, path: searchPath, rebuild = false } = args;\n\t\tconst limit = typeof args.limit === \"number\" && args.limit > 0 ? Math.floor(args.limit) : 20;\n\n\t\tconst projectDir = args.projectDir ? resolve(args.projectDir) : defaultProjectDir;\n\n\t\tif (!SearchEngine.isAvailable()) {\n\t\t\treturn {\n\t\t\t\tcontent: [\n\t\t\t\t\t{\n\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\ttext: \"Semantic search requires Node.js 22+ (for built-in SQLite). Current version does not support node:sqlite.\",\n\t\t\t\t\t},\n\t\t\t\t],\n\t\t\t\tisError: true,\n\t\t\t};\n\t\t}\n\n\t\tif (!query || query.trim().length === 0) {\n\t\t\treturn {\n\t\t\t\tcontent: [{ type: \"text\", text: \"Search query cannot be empty.\" }],\n\t\t\t\tisError: true,\n\t\t\t};\n\t\t}\n\n\t\ttry {\n\t\t\tconst engine = getSearchEngine(projectDir);\n\n\t\t\tif (rebuild) {\n\t\t\t\tawait engine.resetIndex();\n\t\t\t}\n\n\t\t\t// Send progress via logging messages\n\t\t\tconst results = await engine.search(query, {\n\t\t\t\tlimit,\n\t\t\t\tpathFilter: searchPath,\n\t\t\t\tonProgress: (phase, current, total) => {\n\t\t\t\t\tserver\n\t\t\t\t\t\t.sendLoggingMessage({\n\t\t\t\t\t\t\tlevel: \"info\",\n\t\t\t\t\t\t\tlogger: \"semantic-search\",\n\t\t\t\t\t\t\tdata: `${phase}: ${current}/${total}`,\n\t\t\t\t\t\t})\n\t\t\t\t\t\t.catch(() => {\n\t\t\t\t\t\t\t// Ignore errors sending progress — client may not support logging\n\t\t\t\t\t\t});\n\t\t\t\t},\n\t\t\t});\n\n\t\t\tconst text = formatResults(results);\n\t\t\tconst stats = engine.getStats();\n\n\t\t\tlet statsLine = \"\";\n\t\t\tif (stats) {\n\t\t\t\tstatsLine = `\\n\\n[Index: ${stats.files} files, ${stats.chunks} chunks]`;\n\t\t\t}\n\n\t\t\treturn {\n\t\t\t\tcontent: [{ type: \"text\", text: text + statsLine }],\n\t\t\t};\n\t\t} catch (err) {\n\t\t\treturn {\n\t\t\t\tcontent: [{ type: \"text\", text: `Search failed: ${err instanceof Error ? err.message : String(err)}` }],\n\t\t\t\tisError: true,\n\t\t\t};\n\t\t}\n\t});\n\n\treturn server;\n}\n\n// ============================================================================\n// Server Startup\n// ============================================================================\n\n/**\n * Create and start an MCP server over stdio.\n * This blocks until the transport is closed.\n */\nexport async function startServer(projectDir: string): Promise<void> {\n\tconst server = createMcpServer(projectDir);\n\tconst transport = new StdioServerTransport();\n\tawait server.connect(transport);\n}\n"]}
|
|
1
|
+
{"version":3,"file":"mcp-server.js","sourceRoot":"","sources":["../src/mcp-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,qBAAqB,EAAuB,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AACxH,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,EAAE,OAAO,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,iBAAiB,CAAwB,CAAC;AAEtF,+EAA+E;AAC/E,oCAAoC;AACpC,+EAA+E;AAE/E,MAAM,WAAW,GAAG;IACnB,IAAI,EAAE,QAAQ;IACd,WAAW,EACV,oNAAoN;IACrN,WAAW,EAAE;QACZ,IAAI,EAAE,QAAiB;QACvB,UAAU,EAAE;YACX,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,sDAAsD,EAAE;YAC9F,UAAU,EAAE;gBACX,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,uFAAuF;aACpG;YACD,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,0CAA0C,EAAE;YACjF,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,yCAAyC,EAAE;YACjF,OAAO,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,sCAAsC,EAAE;SACjF;QACD,QAAQ,EAAE,CAAC,OAAO,EAAE,YAAY,CAAC;KACjC;CACD,CAAC;AAEF,+EAA+E;AAC/E,eAAe;AACf,+EAA+E;AAE/E,yEAAyE;AACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAwB,CAAC;AAEpD,SAAS,eAAe,CAAC,WAAmB;IAC3C,IAAI,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,GAAG,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC;QACvC,WAAW,CAAC,GAAG,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,MAAM,CAAC;AACf,CAAC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,iBAAyB;IACxD,MAAM,MAAM,GAAG,IAAI,MAAM,CACxB,EAAE,IAAI,EAAE,iBAAiB,EAAE,OAAO,EAAE,cAAc,EAAE,EACpD,EAAE,YAAY,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,EAAE,CAC5C,CAAC;IAEF,MAAM,CAAC,iBAAiB,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;QAC7D,KAAK,EAAE,CAAC,WAAW,CAAC;KACpB,CAAC,CAAC,CAAC;IAEJ,MAAM,CAAC,iBAAiB,CAAC,qBAAqB,EAAE,KAAK,EAAE,OAAO,EAA2B,EAAE;QAC1F,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtC,OAAO;gBACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,iBAAiB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;gBACzE,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,IAAI,EAAE,CAM3C,CAAC;QACF,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,GAAG,KAAK,EAAE,GAAG,IAAI,CAAC;QAC1D,MAAM,KAAK,GAAG,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE7F,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;QAElF,IAAI,CAAC,YAAY,CAAC,WAAW,EAAE,EAAE,CAAC;YACjC,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,2GAA2G;qBACjH;iBACD;gBACD,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;QAED,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzC,OAAO;gBACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,+BAA+B,EAAE,CAAC;gBAClE,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAE3C,IAAI,OAAO,EAAE,CAAC;gBACb,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;YAC3B,CAAC;YAED,qCAAqC;YACrC,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE;gBAC1C,KAAK;gBACL,UAAU,EAAE,UAAU;gBACtB,UAAU,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,EAAE;oBACrC,MAAM;yBACJ,kBAAkB,CAAC;wBACnB,KAAK,EAAE,MAAM;wBACb,MAAM,EAAE,iBAAiB;wBACzB,IAAI,EAAE,GAAG,KAAK,KAAK,OAAO,IAAI,KAAK,EAAE;qBACrC,CAAC;yBACD,KAAK,CAAC,GAAG,EAAE;wBACX,kEAAkE;oBACnE,CAAC,CAAC,CAAC;gBACL,CAAC;aACD,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;YACpC,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;YAEhC,IAAI,SAAS,GAAG,EAAE,CAAC;YACnB,IAAI,KAAK,EAAE,CAAC;gBACX,SAAS,GAAG,eAAe,KAAK,CAAC,KAAK,WAAW,KAAK,CAAC,MAAM,UAAU,CAAC;YACzE,CAAC;YAED,OAAO;gBACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,GAAG,SAAS,EAAE,CAAC;aACnD,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,OAAO;gBACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;gBACvG,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;IACF,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AACf,CAAC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,UAAkB;IACnD,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AACjC,CAAC","sourcesContent":["/**\n * MCP stdio server adapter for semantic codebase search.\n *\n * Exposes the SearchEngine as a single \"search\" tool over the Model Context Protocol,\n * enabling any MCP-compatible client to run semantic codebase queries.\n *\n * The server defaults to using its CWD as the project directory. Claude Code\n * launches MCP servers with CWD set to the project root, so no configuration\n * is needed for typical per-project usage.\n */\n\nimport { createRequire } from \"node:module\";\nimport { resolve } from \"node:path\";\nimport { Server } from \"@modelcontextprotocol/sdk/server/index.js\";\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\";\nimport { CallToolRequestSchema, type CallToolResult, ListToolsRequestSchema } from \"@modelcontextprotocol/sdk/types.js\";\nimport { formatResults } from \"./format.js\";\nimport { SearchEngine } from \"./search.js\";\n\nconst require = createRequire(import.meta.url);\nconst { version: packageVersion } = require(\"../package.json\") as { version: string };\n\n// ============================================================================\n// Tool Schema (JSON Schema for MCP)\n// ============================================================================\n\nconst SEARCH_TOOL = {\n\tname: \"search\",\n\tdescription:\n\t\t\"Search the codebase using natural language queries. Returns ranked code/doc results using semantic similarity and keyword matching. First query builds the index (may take a moment); subsequent queries are fast.\",\n\tinputSchema: {\n\t\ttype: \"object\" as const,\n\t\tproperties: {\n\t\t\tquery: { type: \"string\", description: \"Search query (natural language, identifier, or path)\" },\n\t\t\tprojectDir: {\n\t\t\t\ttype: \"string\",\n\t\t\t\tdescription: \"Absolute path to the project directory to search. Use your current working directory.\",\n\t\t\t},\n\t\t\tpath: { type: \"string\", description: \"Restrict search to files under this path\" },\n\t\t\tlimit: { type: \"number\", description: \"Maximum results to return (default: 20)\" },\n\t\t\trebuild: { type: \"boolean\", description: \"Force index rebuild (default: false)\" },\n\t\t},\n\t\trequired: [\"query\", \"projectDir\"],\n\t},\n};\n\n// ============================================================================\n// Engine Cache\n// ============================================================================\n\n/** Cache search engines per project root to reuse index across calls. */\nconst engineCache = new Map<string, SearchEngine>();\n\nfunction getSearchEngine(projectRoot: string): SearchEngine {\n\tlet engine = engineCache.get(projectRoot);\n\tif (!engine) {\n\t\tengine = new SearchEngine(projectRoot);\n\t\tengineCache.set(projectRoot, engine);\n\t}\n\treturn engine;\n}\n\n// ============================================================================\n// Server Factory\n// ============================================================================\n\n/**\n * Create an MCP server instance configured with the semantic search tool.\n *\n * @param defaultProjectDir - Default project directory for searches. Used when\n * the client doesn't specify `projectDir` in the tool call. Typically the\n * server's CWD, which Claude Code sets to the project root.\n */\nexport function createMcpServer(defaultProjectDir: string): Server {\n\tconst server = new Server(\n\t\t{ name: \"semantic-search\", version: packageVersion },\n\t\t{ capabilities: { tools: {}, logging: {} } },\n\t);\n\n\tserver.setRequestHandler(ListToolsRequestSchema, async () => ({\n\t\ttools: [SEARCH_TOOL],\n\t}));\n\n\tserver.setRequestHandler(CallToolRequestSchema, async (request): Promise<CallToolResult> => {\n\t\tif (request.params.name !== \"search\") {\n\t\t\treturn {\n\t\t\t\tcontent: [{ type: \"text\", text: `Unknown tool: ${request.params.name}` }],\n\t\t\t\tisError: true,\n\t\t\t};\n\t\t}\n\n\t\tconst args = (request.params.arguments ?? {}) as {\n\t\t\tquery?: string;\n\t\t\tprojectDir?: string;\n\t\t\tpath?: string;\n\t\t\tlimit?: number;\n\t\t\trebuild?: boolean;\n\t\t};\n\t\tconst { query, path: searchPath, rebuild = false } = args;\n\t\tconst limit = typeof args.limit === \"number\" && args.limit > 0 ? Math.floor(args.limit) : 20;\n\n\t\tconst projectDir = args.projectDir ? resolve(args.projectDir) : defaultProjectDir;\n\n\t\tif (!SearchEngine.isAvailable()) {\n\t\t\treturn {\n\t\t\t\tcontent: [\n\t\t\t\t\t{\n\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\ttext: \"Semantic search requires Node.js 22+ (for built-in SQLite). Current version does not support node:sqlite.\",\n\t\t\t\t\t},\n\t\t\t\t],\n\t\t\t\tisError: true,\n\t\t\t};\n\t\t}\n\n\t\tif (!query || query.trim().length === 0) {\n\t\t\treturn {\n\t\t\t\tcontent: [{ type: \"text\", text: \"Search query cannot be empty.\" }],\n\t\t\t\tisError: true,\n\t\t\t};\n\t\t}\n\n\t\ttry {\n\t\t\tconst engine = getSearchEngine(projectDir);\n\n\t\t\tif (rebuild) {\n\t\t\t\tawait engine.resetIndex();\n\t\t\t}\n\n\t\t\t// Send progress via logging messages\n\t\t\tconst results = await engine.search(query, {\n\t\t\t\tlimit,\n\t\t\t\tpathFilter: searchPath,\n\t\t\t\tonProgress: (phase, current, total) => {\n\t\t\t\t\tserver\n\t\t\t\t\t\t.sendLoggingMessage({\n\t\t\t\t\t\t\tlevel: \"info\",\n\t\t\t\t\t\t\tlogger: \"semantic-search\",\n\t\t\t\t\t\t\tdata: `${phase}: ${current}/${total}`,\n\t\t\t\t\t\t})\n\t\t\t\t\t\t.catch(() => {\n\t\t\t\t\t\t\t// Ignore errors sending progress — client may not support logging\n\t\t\t\t\t\t});\n\t\t\t\t},\n\t\t\t});\n\n\t\t\tconst text = formatResults(results);\n\t\t\tconst stats = engine.getStats();\n\n\t\t\tlet statsLine = \"\";\n\t\t\tif (stats) {\n\t\t\t\tstatsLine = `\\n\\n[Index: ${stats.files} files, ${stats.chunks} chunks]`;\n\t\t\t}\n\n\t\t\treturn {\n\t\t\t\tcontent: [{ type: \"text\", text: text + statsLine }],\n\t\t\t};\n\t\t} catch (err) {\n\t\t\treturn {\n\t\t\t\tcontent: [{ type: \"text\", text: `Search failed: ${err instanceof Error ? err.message : String(err)}` }],\n\t\t\t\tisError: true,\n\t\t\t};\n\t\t}\n\t});\n\n\treturn server;\n}\n\n// ============================================================================\n// Server Startup\n// ============================================================================\n\n/**\n * Create and start an MCP server over stdio.\n * This blocks until the transport is closed.\n */\nexport async function startServer(projectDir: string): Promise<void> {\n\tconst server = createMcpServer(projectDir);\n\tconst transport = new StdioServerTransport();\n\tawait server.connect(transport);\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/metrics/bm25.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAE/C;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAwBvG"
|
|
1
|
+
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/metrics/bm25.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAE/C;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAwBvG"}
|
package/dist/metrics/bm25.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/metrics/bm25.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,EAAkB,EAAE,KAAa,EAAE,KAAa
|
|
1
|
+
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/metrics/bm25.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,EAAkB,EAAE,KAAa,EAAE,KAAa;IACjF,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEzC,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,EAAE,CAAC,SAAS,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC3C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC;QAExC,2CAA2C;QAC3C,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACzB,IAAI,CAAC,CAAC,KAAK,GAAG,QAAQ;gBAAE,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC;QAC5C,CAAC;QAED,mDAAmD;QACnD,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YAClB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;gBACzB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,CAAC;YAC3C,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,uCAAuC;IACxC,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC","sourcesContent":["/**\n * BM25 metric — full-text search scoring via FTS5.\n */\n\nimport type { SearchDatabase } from \"../db.js\";\n\n/**\n * Compute BM25 scores for a query using FTS5.\n * Returns a Map of chunkId → normalized score (0-1, higher = more relevant).\n */\nexport function computeBm25Scores(db: SearchDatabase, query: string, limit: number): Map<number, number> {\n\tconst scores = new Map<number, number>();\n\n\ttry {\n\t\tconst results = db.ftsSearch(query, limit);\n\t\tif (results.length === 0) return scores;\n\n\t\t// Find the maximum score for normalization\n\t\tlet maxScore = 0;\n\t\tfor (const r of results) {\n\t\t\tif (r.score > maxScore) maxScore = r.score;\n\t\t}\n\n\t\t// Normalize: top result → 1.0, others proportional\n\t\tif (maxScore > 0) {\n\t\t\tfor (const r of results) {\n\t\t\t\tscores.set(r.chunkId, r.score / maxScore);\n\t\t\t}\n\t\t}\n\t} catch {\n\t\t// If FTS query fails, return empty map\n\t}\n\n\treturn scores;\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"git-recency.d.ts","sourceRoot":"","sources":["../../src/metrics/git-recency.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAa/C;;;;GAIG;AACH,wBAAsB,uBAAuB,CAC5C,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,WAAW,EAAE,GACnB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAyD9B"
|
|
1
|
+
{"version":3,"file":"git-recency.d.ts","sourceRoot":"","sources":["../../src/metrics/git-recency.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAa/C;;;;GAIG;AACH,wBAAsB,uBAAuB,CAC5C,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,WAAW,EAAE,GACnB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAyD9B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"git-recency.js","sourceRoot":"","sources":["../../src/metrics/git-recency.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,IAAI,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAGtC,MAAM,QAAQ,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;AAEvC,mDAAmD;AACnD,MAAM,cAAc,GAAG,KAAK,CAAC;AAE7B,
|
|
1
|
+
{"version":3,"file":"git-recency.js","sourceRoot":"","sources":["../../src/metrics/git-recency.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,IAAI,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAGtC,MAAM,QAAQ,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;AAEvC,mDAAmD;AACnD,MAAM,cAAc,GAAG,KAAK,CAAC;AAE7B,sEAAsE;AACtE,MAAM,cAAc,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAExC,6DAA6D;AAC7D,MAAM,aAAa,GAAG,GAAG,CAAC;AAE1B;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC5C,WAAmB,EACnB,MAAqB;IAErB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEzC,IAAI,CAAC;QACJ,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC;QAEvC,4BAA4B;QAC5B,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;QACtC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;QAED,qDAAqD;QACrD,4EAA4E;QAC5E,gEAAgE;QAChE,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC;QAEzE,gDAAgD;QAChD,IAAI,cAAc,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC5B,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,aAAa,CAAC,CAAC;YACrC,CAAC;YACD,OAAO,MAAM,CAAC;QACf,CAAC;QAED,oCAAoC;QACpC,IAAI,MAAM,GAAG,QAAQ,CAAC;QACtB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;QACvB,KAAK,MAAM,EAAE,IAAI,cAAc,CAAC,MAAM,EAAE,EAAE,CAAC;YAC1C,IAAI,EAAE,GAAG,MAAM;gBAAE,MAAM,GAAG,EAAE,CAAC;YAC7B,IAAI,EAAE,GAAG,MAAM;gBAAE,MAAM,GAAG,EAAE,CAAC;QAC9B,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,GAAG,MAAM,CAAC;QAE9B,0BAA0B;QAC1B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAC9C,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;gBACtB,qCAAqC;gBACrC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,aAAa,CAAC,CAAC;YACrC,CAAC;iBAAM,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBACxB,oCAAoC;gBACpC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACP,2CAA2C;gBAC3C,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC,EAAE,GAAG,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC;YAC7C,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,mDAAmD;QACnD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,aAAa,CAAC,CAAC;QACrC,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,iBAAiB,CAAC,WAAmB,EAAE,WAAwB;IAC7E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEjD,IAAI,CAAC;QACJ,6EAA6E;QAC7E,sEAAsE;QACtE,kDAAkD;QAClD,qEAAqE;QACrE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,QAAQ,CACxC,KAAK,EACL,CAAC,KAAK,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,aAAa,EAAE,oBAAoB,CAAC,EACxF;YACC,GAAG,EAAE,WAAW;YAChB,OAAO,EAAE,cAAc;YACvB,QAAQ,EAAE,OAAO;YACjB,SAAS,EAAE,cAAc;SACzB,CACD,CAAC;QAEF,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACvC,IAAI,QAAQ;gBAAE,MAAM;YAEpB,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;gBAChC,gBAAgB,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACtD,IAAI,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,gBAAgB,IAAI,CAAC,EAAE,CAAC;oBAC7D,gBAAgB,GAAG,CAAC,CAAC;gBACtB,CAAC;YACF,CAAC;iBAAM,IAAI,IAAI,CAAC,IAAI,EAAE,IAAI,gBAAgB,GAAG,CAAC,EAAE,CAAC;gBAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC7B,yDAAyD;gBACzD,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAChE,cAAc,CAAC,GAAG,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;oBAC/C,+CAA+C;oBAC/C,IAAI,cAAc,CAAC,IAAI,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;wBAC9C,QAAQ,GAAG,IAAI,CAAC;oBACjB,CAAC;gBACF,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,+CAA+C;IAChD,CAAC;IAED,OAAO,cAAc,CAAC;AACvB,CAAC","sourcesContent":["/**\n * Git recency metric — recently modified files score higher.\n *\n * Runs a single `git log` command to get last-modified timestamps for all\n * files, then applies linear decay scoring.\n */\n\nimport { execFile as execFileCb } from \"node:child_process\";\nimport { promisify } from \"node:util\";\nimport type { StoredChunk } from \"../types.js\";\n\nconst execFile = promisify(execFileCb);\n\n/** Timeout for the git command in milliseconds. */\nconst GIT_TIMEOUT_MS = 15000;\n\n/** Max buffer for git output (10 MB — sufficient for large repos). */\nconst GIT_MAX_BUFFER = 10 * 1024 * 1024;\n\n/** Default score for files where git info is unavailable. */\nconst NEUTRAL_SCORE = 0.5;\n\n/**\n * Compute git recency scores based on when files were last modified.\n * More recently modified files score higher.\n * Falls back gracefully if git is unavailable.\n */\nexport async function computeGitRecencyScores(\n\tprojectRoot: string,\n\tchunks: StoredChunk[],\n): Promise<Map<number, number>> {\n\tconst scores = new Map<number, number>();\n\n\ttry {\n\t\tif (chunks.length === 0) return scores;\n\n\t\t// Collect unique file paths\n\t\tconst uniquePaths = new Set<string>();\n\t\tfor (const chunk of chunks) {\n\t\t\tuniquePaths.add(chunk.filePath);\n\t\t}\n\n\t\t// Get last-modified timestamps in a single git call.\n\t\t// Output format: \"COMMIT <timestamp>\" lines followed by changed file names.\n\t\t// We take the first (most recent) timestamp seen for each file.\n\t\tconst fileTimestamps = await getFileTimestamps(projectRoot, uniquePaths);\n\n\t\t// If no timestamps found, assign neutral scores\n\t\tif (fileTimestamps.size === 0) {\n\t\t\tfor (const chunk of chunks) {\n\t\t\t\tscores.set(chunk.id, NEUTRAL_SCORE);\n\t\t\t}\n\t\t\treturn scores;\n\t\t}\n\n\t\t// Find oldest and newest timestamps\n\t\tlet oldest = Infinity;\n\t\tlet newest = -Infinity;\n\t\tfor (const ts of fileTimestamps.values()) {\n\t\t\tif (ts < oldest) oldest = ts;\n\t\t\tif (ts > newest) newest = ts;\n\t\t}\n\n\t\tconst range = newest - oldest;\n\n\t\t// Assign scores to chunks\n\t\tfor (const chunk of chunks) {\n\t\t\tconst ts = fileTimestamps.get(chunk.filePath);\n\t\t\tif (ts === undefined) {\n\t\t\t\t// Not tracked by git → neutral score\n\t\t\t\tscores.set(chunk.id, NEUTRAL_SCORE);\n\t\t\t} else if (range === 0) {\n\t\t\t\t// All files have the same timestamp\n\t\t\t\tscores.set(chunk.id, 1);\n\t\t\t} else {\n\t\t\t\t// Linear decay: newest → 1.0, oldest → 0.0\n\t\t\t\tscores.set(chunk.id, (ts - oldest) / range);\n\t\t\t}\n\t\t}\n\t} catch {\n\t\t// Git unavailable entirely — assign neutral scores\n\t\tfor (const chunk of chunks) {\n\t\t\tscores.set(chunk.id, NEUTRAL_SCORE);\n\t\t}\n\t}\n\n\treturn scores;\n}\n\n/**\n * Get last-modified timestamps for files using a single `git log` invocation.\n * Returns a Map of filePath → unix timestamp (seconds).\n */\nasync function getFileTimestamps(projectRoot: string, targetPaths: Set<string>): Promise<Map<string, number>> {\n\tconst fileTimestamps = new Map<string, number>();\n\n\ttry {\n\t\t// Single git call: list all commits with their timestamps and changed files.\n\t\t// --diff-filter=AMCR: only additions, modifications, copies, renames.\n\t\t// --name-only: list file names after each commit.\n\t\t// --format=\"COMMIT %at\": prefix each commit with its unix timestamp.\n\t\tconst { stdout: output } = await execFile(\n\t\t\t\"git\",\n\t\t\t[\"log\", \"--max-count=10000\", \"--format=COMMIT %at\", \"--name-only\", \"--diff-filter=AMCR\"],\n\t\t\t{\n\t\t\t\tcwd: projectRoot,\n\t\t\t\ttimeout: GIT_TIMEOUT_MS,\n\t\t\t\tencoding: \"utf-8\",\n\t\t\t\tmaxBuffer: GIT_MAX_BUFFER,\n\t\t\t},\n\t\t);\n\n\t\tlet currentTimestamp = 0;\n\t\tlet foundAll = false;\n\n\t\tfor (const line of output.split(\"\\n\")) {\n\t\t\tif (foundAll) break;\n\n\t\t\tif (line.startsWith(\"COMMIT \")) {\n\t\t\t\tcurrentTimestamp = Number.parseInt(line.slice(7), 10);\n\t\t\t\tif (Number.isNaN(currentTimestamp) || currentTimestamp <= 0) {\n\t\t\t\t\tcurrentTimestamp = 0;\n\t\t\t\t}\n\t\t\t} else if (line.trim() && currentTimestamp > 0) {\n\t\t\t\tconst filePath = line.trim();\n\t\t\t\t// Only record the first (most recent) timestamp per file\n\t\t\t\tif (targetPaths.has(filePath) && !fileTimestamps.has(filePath)) {\n\t\t\t\t\tfileTimestamps.set(filePath, currentTimestamp);\n\t\t\t\t\t// Early exit once we've found all target files\n\t\t\t\t\tif (fileTimestamps.size === targetPaths.size) {\n\t\t\t\t\t\tfoundAll = true;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t} catch {\n\t\t// Git unavailable or failed — return empty map\n\t}\n\n\treturn fileTimestamps;\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"import-graph.d.ts","sourceRoot":"","sources":["../../src/metrics/import-graph.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAQ/C;;;;;GAKG;AACH,wBAAgB,wBAAwB,CACvC,EAAE,EAAE,cAAc,EAClB,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAC/B,gBAAgB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GACrC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAqGrB"
|
|
1
|
+
{"version":3,"file":"import-graph.d.ts","sourceRoot":"","sources":["../../src/metrics/import-graph.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAQ/C;;;;;GAKG;AACH,wBAAgB,wBAAwB,CACvC,EAAE,EAAE,cAAc,EAClB,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAC/B,gBAAgB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GACrC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAqGrB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"import-graph.js","sourceRoot":"","sources":["../../src/metrics/import-graph.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,sDAAsD;AACtD,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B,mFAAmF;AACnF,MAAM,iBAAiB,GAAG,IAAI,CAAC;AAE/B;;;;;GAKG;AACH,MAAM,UAAU,wBAAwB,CACvC,EAAkB,EAClB,UAA+B,EAC/B,gBAAuC
|
|
1
|
+
{"version":3,"file":"import-graph.js","sourceRoot":"","sources":["../../src/metrics/import-graph.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,sDAAsD;AACtD,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B,mFAAmF;AACnF,MAAM,iBAAiB,GAAG,IAAI,CAAC;AAE/B;;;;;GAKG;AACH,MAAM,UAAU,wBAAwB,CACvC,EAAkB,EAClB,UAA+B,EAC/B,gBAAuC;IAEvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEzC,IAAI,CAAC;QACJ,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC;QAEzC,mEAAmE;QACnE,MAAM,QAAQ,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAkB,CAAC;QAEnD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YAC1B,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC;YACnC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;YACnC,qEAAqE;YACrE,yDAAyD;YACzD,MAAM,QAAQ,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;YAC5C,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACrC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;YACtC,CAAC;QACF,CAAC;QAED,mGAAmG;QACnG,SAAS,aAAa,CAAC,UAAkB;YACxC,OAAO,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,gBAAgB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACzE,CAAC;QAED,gCAAgC;QAChC,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,wCAAwC;QAEtF,KAAK,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,IAAI,UAAU,EAAE,CAAC;YAC9C,MAAM,eAAe,GAAG,SAAS,GAAG,kBAAkB,CAAC;YACvD,IAAI,eAAe,IAAI,CAAC;gBAAE,SAAS;YAEnC,IAAI,kBAAkB,GAAG,CAAC,CAAC;YAE3B,0BAA0B;YAC1B,MAAM,aAAa,GAAG,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;YAChD,KAAK,MAAM,UAAU,IAAI,aAAa,EAAE,CAAC;gBACxC,MAAM,YAAY,GAAG,aAAa,CAAC,UAAU,CAAC,CAAC;gBAC/C,IAAI,YAAY,KAAK,SAAS;oBAAE,SAAS;gBAEzC,IAAI,UAAU,CAAC,GAAG,CAAC,YAAY,CAAC,EAAE,CAAC;oBAClC,kBAAkB,EAAE,CAAC;gBACtB,CAAC;qBAAM,CAAC;oBACP,UAAU,CAAC,GAAG,CAAC,YAAY,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC;gBACrF,CAAC;YACF,CAAC;YAED,8BAA8B;YAC9B,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAC1C,IAAI,QAAQ,EAAE,CAAC;gBACd,MAAM,WAAW,GAAG,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;gBAChD,wCAAwC;gBACxC,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;gBAC1C,MAAM,mBAAmB,GAAG,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACrF,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,WAAW,EAAE,GAAG,mBAAmB,CAAC,CAAC,CAAC;gBAEzE,KAAK,MAAM,cAAc,IAAI,cAAc,EAAE,CAAC;oBAC7C,IAAI,UAAU,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;wBACpC,kBAAkB,EAAE,CAAC;oBACtB,CAAC;yBAAM,CAAC;wBACP,UAAU,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC;oBACzF,CAAC;gBACF,CAAC;YACF,CAAC;YAED,0EAA0E;YAC1E,IAAI,kBAAkB,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,SAAS,GAAG,SAAS,GAAG,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,kBAAkB,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;gBACtF,UAAU,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC;YACnE,CAAC;QACF,CAAC;QAED,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC;QAEzC,6BAA6B;QAC7B,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,KAAK,IAAI,UAAU,CAAC,MAAM,EAAE,EAAE,CAAC;YACzC,IAAI,KAAK,GAAG,QAAQ;gBAAE,QAAQ,GAAG,KAAK,CAAC;QACxC,CAAC;QAED,IAAI,QAAQ,IAAI,CAAC;YAAE,OAAO,MAAM,CAAC;QAEjC,qCAAqC;QACrC,KAAK,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,IAAI,UAAU,EAAE,CAAC;YAC9C,MAAM,QAAQ,GAAG,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAC9C,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAEjD,+CAA+C;YAC/C,MAAM,aAAa,GAAG,SAAS,GAAG,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC;YAC7D,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAChC,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;YACpC,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,sCAAsC;IACvC,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED,6DAA6D;AAC7D,SAAS,cAAc,CAAC,QAAgB;IACvC,OAAO,QAAQ,CAAC,OAAO,CAAC,2EAA2E,EAAE,EAAE,CAAC,CAAC;AAC1G,CAAC","sourcesContent":["/**\n * Import graph proximity metric.\n *\n * Files that import or are imported by high-scoring files get a boost.\n * Uses a simple 1-hop propagation from seed scores.\n */\n\nimport type { SearchDatabase } from \"../db.js\";\n\n/** Fraction of seed score propagated to neighbors. */\nconst PROPAGATION_FACTOR = 0.5;\n\n/** Fraction of propagated score given back to seed files with many connections. */\nconst SELF_BOOST_FACTOR = 0.25;\n\n/**\n * Compute import graph proximity scores.\n * Files that import/are imported by high-scoring files get a boost.\n * Seed files also get a connectivity bonus based on how many of their\n * neighbors are in the seed set.\n */\nexport function computeImportGraphScores(\n\tdb: SearchDatabase,\n\tseedScores: Map<number, number>,\n\tfileIdToChunkIds: Map<number, number[]>,\n): Map<number, number> {\n\tconst scores = new Map<number, number>();\n\n\ttry {\n\t\tif (seedScores.size === 0) return scores;\n\n\t\t// Build fileId → filePath lookup and extension-stripped path index\n\t\tconst allFiles = db.getAllFiles();\n\t\tconst fileIdToPath = new Map<number, string>();\n\t\tconst pathToFileId = new Map<string, number>();\n\t\tconst strippedToFileId = new Map<string, number>();\n\n\t\tfor (const f of allFiles) {\n\t\t\tfileIdToPath.set(f.id, f.filePath);\n\t\t\tpathToFileId.set(f.filePath, f.id);\n\t\t\t// Also index without extension so import paths (which strip .js/.ts)\n\t\t\t// can match stored file paths (which keep the extension)\n\t\t\tconst stripped = stripExtension(f.filePath);\n\t\t\tif (!strippedToFileId.has(stripped)) {\n\t\t\t\tstrippedToFileId.set(stripped, f.id);\n\t\t\t}\n\t\t}\n\n\t\t/** Resolve an import target path to a fileId. Tries exact match first, then extension-stripped. */\n\t\tfunction resolveTarget(targetPath: string): number | undefined {\n\t\t\treturn pathToFileId.get(targetPath) ?? strippedToFileId.get(targetPath);\n\t\t}\n\n\t\t// Propagate scores to neighbors\n\t\tconst propagated = new Map<number, number>(); // fileId → accumulated propagated score\n\n\t\tfor (const [fileId, seedScore] of seedScores) {\n\t\t\tconst propagatedScore = seedScore * PROPAGATION_FACTOR;\n\t\t\tif (propagatedScore <= 0) continue;\n\n\t\t\tlet connectedSeedCount = 0;\n\n\t\t\t// Files this file imports\n\t\t\tconst importedPaths = db.getImportsFrom(fileId);\n\t\t\tfor (const targetPath of importedPaths) {\n\t\t\t\tconst targetFileId = resolveTarget(targetPath);\n\t\t\t\tif (targetFileId === undefined) continue;\n\n\t\t\t\tif (seedScores.has(targetFileId)) {\n\t\t\t\t\tconnectedSeedCount++;\n\t\t\t\t} else {\n\t\t\t\t\tpropagated.set(targetFileId, (propagated.get(targetFileId) ?? 0) + propagatedScore);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Files that import this file\n\t\t\tconst filePath = fileIdToPath.get(fileId);\n\t\t\tif (filePath) {\n\t\t\t\tconst importerIds = db.getImportersOf(filePath);\n\t\t\t\t// Also check extension-stripped variant\n\t\t\t\tconst stripped = stripExtension(filePath);\n\t\t\t\tconst strippedImporterIds = stripped !== filePath ? db.getImportersOf(stripped) : [];\n\t\t\t\tconst allImporterIds = new Set([...importerIds, ...strippedImporterIds]);\n\n\t\t\t\tfor (const importerFileId of allImporterIds) {\n\t\t\t\t\tif (seedScores.has(importerFileId)) {\n\t\t\t\t\t\tconnectedSeedCount++;\n\t\t\t\t\t} else {\n\t\t\t\t\t\tpropagated.set(importerFileId, (propagated.get(importerFileId) ?? 0) + propagatedScore);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Self-boost: seed files with many connections to other seeds get a bonus\n\t\t\tif (connectedSeedCount > 0) {\n\t\t\t\tconst selfBoost = seedScore * SELF_BOOST_FACTOR * Math.min(connectedSeedCount / 3, 1);\n\t\t\t\tpropagated.set(fileId, (propagated.get(fileId) ?? 0) + selfBoost);\n\t\t\t}\n\t\t}\n\n\t\tif (propagated.size === 0) return scores;\n\n\t\t// Find max for normalization\n\t\tlet maxScore = 0;\n\t\tfor (const score of propagated.values()) {\n\t\t\tif (score > maxScore) maxScore = score;\n\t\t}\n\n\t\tif (maxScore <= 0) return scores;\n\n\t\t// Distribute to chunks and normalize\n\t\tfor (const [fileId, fileScore] of propagated) {\n\t\t\tconst chunkIds = fileIdToChunkIds.get(fileId);\n\t\t\tif (!chunkIds || chunkIds.length === 0) continue;\n\n\t\t\t// Distribute equally among chunks in this file\n\t\t\tconst perChunkScore = fileScore / maxScore / chunkIds.length;\n\t\t\tfor (const chunkId of chunkIds) {\n\t\t\t\tscores.set(chunkId, perChunkScore);\n\t\t\t}\n\t\t}\n\t} catch {\n\t\t// If anything fails, return empty map\n\t}\n\n\treturn scores;\n}\n\n/** Strip common source file extensions for path matching. */\nfunction stripExtension(filePath: string): string {\n\treturn filePath.replace(/\\.[jt]sx?$|\\.py$|\\.go$|\\.rs$|\\.java$|\\.c$|\\.h$|\\.cpp$|\\.hpp$|\\.cc$|\\.cxx$/, \"\");\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"path-match.d.ts","sourceRoot":"","sources":["../../src/metrics/path-match.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAG/C;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CA+ChG"
|
|
1
|
+
{"version":3,"file":"path-match.d.ts","sourceRoot":"","sources":["../../src/metrics/path-match.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAG/C;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CA+ChG"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"path-match.js","sourceRoot":"","sources":["../../src/metrics/path-match.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAEzC;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,KAAa,EAAE,MAAqB
|
|
1
|
+
{"version":3,"file":"path-match.js","sourceRoot":"","sources":["../../src/metrics/path-match.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAEzC;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,KAAa,EAAE,MAAqB;IAC1E,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEzC,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;QACpC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC;QAE5C,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;QAEtC,oEAAoE;QACpE,MAAM,cAAc,GAAG,IAAI,GAAG,EAAuB,CAAC;QAEtD,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,IAAI,YAAY,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACtD,IAAI,CAAC,YAAY,EAAE,CAAC;gBACnB,YAAY,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACjD,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;YAClD,CAAC;YAED,4DAA4D;YAC5D,IAAI,YAAY,GAAG,CAAC,CAAC;YACrB,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;gBAC3B,IAAI,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAC1B,YAAY,EAAE,CAAC;gBAChB,CAAC;YACF,CAAC;YAED,MAAM,KAAK,GAAG,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC;YAChD,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;gBAC5B,IAAI,KAAK,GAAG,QAAQ;oBAAE,QAAQ,GAAG,KAAK,CAAC;YACxC,CAAC;QACF,CAAC;QAED,0DAA0D;QAC1D,IAAI,QAAQ,GAAG,CAAC,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACpC,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;gBAClC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,GAAG,QAAQ,CAAC,CAAC;YAClC,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,sCAAsC;IACvC,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC","sourcesContent":["/**\n * Path/filename similarity metric.\n *\n * Tokenizes query and file paths, computes a recall-oriented overlap score\n * (what fraction of query tokens appear in the path).\n */\n\nimport type { StoredChunk } from \"../types.js\";\nimport { tokenize } from \"./tokenize.js\";\n\n/**\n * Compute path/filename similarity scores.\n * Tokenizes query and file paths, returns Jaccard-like overlap score.\n */\nexport function computePathMatchScores(query: string, chunks: StoredChunk[]): Map<number, number> {\n\tconst scores = new Map<number, number>();\n\n\ttry {\n\t\tconst queryTokens = tokenize(query);\n\t\tif (queryTokens.length === 0) return scores;\n\n\t\tconst querySet = new Set(queryTokens);\n\n\t\t// Cache path tokens per file path (many chunks share the same file)\n\t\tconst pathTokenCache = new Map<string, Set<string>>();\n\n\t\tlet maxScore = 0;\n\n\t\tfor (const chunk of chunks) {\n\t\t\tlet pathTokenSet = pathTokenCache.get(chunk.filePath);\n\t\t\tif (!pathTokenSet) {\n\t\t\t\tpathTokenSet = new Set(tokenize(chunk.filePath));\n\t\t\t\tpathTokenCache.set(chunk.filePath, pathTokenSet);\n\t\t\t}\n\n\t\t\t// Score = |intersection| / |query tokens| (recall-oriented)\n\t\t\tlet intersection = 0;\n\t\t\tfor (const qt of querySet) {\n\t\t\t\tif (pathTokenSet.has(qt)) {\n\t\t\t\t\tintersection++;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tconst score = intersection / queryTokens.length;\n\t\t\tif (score > 0) {\n\t\t\t\tscores.set(chunk.id, score);\n\t\t\t\tif (score > maxScore) maxScore = score;\n\t\t\t}\n\t\t}\n\n\t\t// Normalize to 0-1 (divide by max if not already bounded)\n\t\tif (maxScore > 0 && maxScore !== 1) {\n\t\t\tfor (const [id, score] of scores) {\n\t\t\t\tscores.set(id, score / maxScore);\n\t\t\t}\n\t\t}\n\t} catch {\n\t\t// If anything fails, return empty map\n\t}\n\n\treturn scores;\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"symbol-match.d.ts","sourceRoot":"","sources":["../../src/metrics/symbol-match.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH;;;GAGG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAyD3G"
|
|
1
|
+
{"version":3,"file":"symbol-match.d.ts","sourceRoot":"","sources":["../../src/metrics/symbol-match.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH;;;GAGG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAyD3G"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"symbol-match.js","sourceRoot":"","sources":["../../src/metrics/symbol-match.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAEzC;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CAAC,KAAa,EAAE,OAA8B
|
|
1
|
+
{"version":3,"file":"symbol-match.js","sourceRoot":"","sources":["../../src/metrics/symbol-match.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAEzC;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CAAC,KAAa,EAAE,OAA8B;IACrF,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEzC,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;QACpC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC;QAE5C,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAEvC,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,KAAK,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,IAAI,OAAO,EAAE,CAAC;YAC9C,IAAI,SAAS,GAAG,CAAC,CAAC;YAElB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;gBACtC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC;gBACnD,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC;gBAE7C,0DAA0D;gBAC1D,IAAI,UAAU,GAAG,CAAC,CAAC;gBACnB,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;oBAC9B,IAAI,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;wBAC1B,UAAU,EAAE,CAAC;oBACd,CAAC;gBACF,CAAC;gBAED,IAAI,KAAK,GAAG,UAAU,GAAG,WAAW,CAAC,MAAM,CAAC;gBAE5C,0DAA0D;gBAC1D,IAAI,WAAW,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;oBACtC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;gBAClC,CAAC;gBACD,qDAAqD;qBAChD,IAAI,UAAU,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,WAAW,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;oBACtE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;gBAClC,CAAC;gBAED,IAAI,KAAK,GAAG,SAAS;oBAAE,SAAS,GAAG,KAAK,CAAC;YAC1C,CAAC;YAED,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBACnB,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;gBAC/B,IAAI,SAAS,GAAG,QAAQ;oBAAE,QAAQ,GAAG,SAAS,CAAC;YAChD,CAAC;QACF,CAAC;QAED,mBAAmB;QACnB,IAAI,QAAQ,GAAG,CAAC,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACpC,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;gBAClC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,GAAG,QAAQ,CAAC,CAAC;YAClC,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,sCAAsC;IACvC,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC","sourcesContent":["/**\n * Symbol name match metric.\n *\n * Compares query terms against symbol names (function/class/etc names)\n * using tokenized overlap with bonuses for exact substring matches.\n */\n\nimport { tokenize } from \"./tokenize.js\";\n\n/**\n * Compute symbol name match scores.\n * Compares query terms against symbol names (function/class/etc names).\n */\nexport function computeSymbolMatchScores(query: string, symbols: Map<number, string[]>): Map<number, number> {\n\tconst scores = new Map<number, number>();\n\n\ttry {\n\t\tconst queryTokens = tokenize(query);\n\t\tif (queryTokens.length === 0) return scores;\n\n\t\tconst queryLower = query.toLowerCase();\n\n\t\tlet maxScore = 0;\n\n\t\tfor (const [chunkId, symbolNames] of symbols) {\n\t\t\tlet bestScore = 0;\n\n\t\t\tfor (const symbolName of symbolNames) {\n\t\t\t\tconst symbolTokens = new Set(tokenize(symbolName));\n\t\t\t\tconst symbolLower = symbolName.toLowerCase();\n\n\t\t\t\t// Token overlap: fraction of query tokens found in symbol\n\t\t\t\tlet matchCount = 0;\n\t\t\t\tfor (const qt of queryTokens) {\n\t\t\t\t\tif (symbolTokens.has(qt)) {\n\t\t\t\t\t\tmatchCount++;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tlet score = matchCount / queryTokens.length;\n\n\t\t\t\t// Bonus for exact substring match of query in symbol name\n\t\t\t\tif (symbolLower.includes(queryLower)) {\n\t\t\t\t\tscore = Math.min(1, score + 0.3);\n\t\t\t\t}\n\t\t\t\t// Bonus for exact substring match of symbol in query\n\t\t\t\telse if (queryLower.includes(symbolLower) && symbolLower.length >= 3) {\n\t\t\t\t\tscore = Math.min(1, score + 0.2);\n\t\t\t\t}\n\n\t\t\t\tif (score > bestScore) bestScore = score;\n\t\t\t}\n\n\t\t\tif (bestScore > 0) {\n\t\t\t\tscores.set(chunkId, bestScore);\n\t\t\t\tif (bestScore > maxScore) maxScore = bestScore;\n\t\t\t}\n\t\t}\n\n\t\t// Normalize to 0-1\n\t\tif (maxScore > 0 && maxScore !== 1) {\n\t\t\tfor (const [id, score] of scores) {\n\t\t\t\tscores.set(id, score / maxScore);\n\t\t\t}\n\t\t}\n\t} catch {\n\t\t// If anything fails, return empty map\n\t}\n\n\treturn scores;\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenize.d.ts","sourceRoot":"","sources":["../../src/metrics/tokenize.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAoB/C"
|
|
1
|
+
{"version":3,"file":"tokenize.d.ts","sourceRoot":"","sources":["../../src/metrics/tokenize.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAoB/C"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenize.js","sourceRoot":"","sources":["../../src/metrics/tokenize.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY
|
|
1
|
+
{"version":3,"file":"tokenize.js","sourceRoot":"","sources":["../../src/metrics/tokenize.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY;IACpC,oFAAoF;IACpF,uFAAuF;IACvF,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,uBAAuB,EAAE,OAAO,CAAC,CAAC;IAElG,6BAA6B;IAC7B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IAE3C,8CAA8C;IAC9C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACjC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC3C,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAChB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC","sourcesContent":["/**\n * Shared tokenizer for path-match and symbol-match metrics.\n *\n * Splits text on common code boundaries (spaces, path separators, dots,\n * dashes, underscores, camelCase) and normalizes to lowercase.\n */\n\n/**\n * Tokenize text by splitting on spaces, `/`, `\\`, `.`, `-`, `_`, and\n * camelCase boundaries. Returns unique, lowercase tokens ≥ 2 chars.\n */\nexport function tokenize(text: string): string[] {\n\t// Insert a space before uppercase letters that follow lowercase letters (camelCase)\n\t// or before uppercase letters followed by lowercase (e.g., \"XMLParser\" → \"XML Parser\")\n\tconst spaced = text.replace(/([a-z])([A-Z])/g, \"$1 $2\").replace(/([A-Z]+)([A-Z][a-z])/g, \"$1 $2\");\n\n\t// Split on common delimiters\n\tconst parts = spaced.split(/[\\s/\\\\.\\-_]+/);\n\n\t// Lowercase, deduplicate, filter short tokens\n\tconst seen = new Set<string>();\n\tconst tokens: string[] = [];\n\tfor (const part of parts) {\n\t\tconst lower = part.toLowerCase();\n\t\tif (lower.length >= 2 && !seen.has(lower)) {\n\t\t\tseen.add(lower);\n\t\t\ttokens.push(lower);\n\t\t}\n\t}\n\n\treturn tokens;\n}\n"]}
|
package/dist/poem.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"poem.d.ts","sourceRoot":"","sources":["../src/poem.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAiC,KAAK,YAAY,EAAE,MAAM,YAAY,CAAC;AAM9E,MAAM,WAAW,eAAe;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;CACb;AA4KD;;;;;;;GAOG;AACH,wBAAgB,QAAQ,CAAC,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,SAAO,GAAG,eAAe,EAAE,CA+CpH"
|
|
1
|
+
{"version":3,"file":"poem.d.ts","sourceRoot":"","sources":["../src/poem.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAiC,KAAK,YAAY,EAAE,MAAM,YAAY,CAAC;AAM9E,MAAM,WAAW,eAAe;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;CACb;AA4KD;;;;;;;GAOG;AACH,wBAAgB,QAAQ,CAAC,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,SAAO,GAAG,eAAe,EAAE,CA+CpH"}
|
package/dist/poem.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"poem.js","sourceRoot":"","sources":["../src/poem.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,EAAE,YAAY,EAAsC,MAAM,YAAY,CAAC;AAmB9E,MAAM,cAAc,GAAqC;IACxD,UAAU,EAAE;QACX,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;IACD,gBAAgB,EAAE;QACjB,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;IACD,SAAS,EAAE;QACV,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;CACD,CAAC;AAEF,+EAA+E;AAC/E,uDAAuD;AACvD,+EAA+E;AAE/E,MAAM,OAAO,GAAG,IAAI,CAAC;AAErB,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E;;GAEG;AACH,SAAS,SAAS,CAAC,UAAqC,EAAE,IAAY,EAAe;IACpF,IAAI,UAAU,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC;QAC7B,OAAO,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAA4B,EAAE,CAAC;QAC1C,KAAK,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YACvC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E;;;;;;;;;;;;;;;;;;GAkBG;AACH,SAAS,oBAAoB,CAC5B,MAAsB,EACtB,OAAsB,EACtB,IAAY,EACiC;IAC7C,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACtC,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,mCAAmC;IACnC,MAAM,OAAO,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC;IACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAE3C,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;QAC/B,IAAI,MAAM,KAAK,CAAC;YAAE,SAAS;QAC3B,WAAW,IAAI,MAAM,CAAC;QAEtB,8CAA8C;QAC9C,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAE3E,6CAA6C;QAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAE5B,oDAAoD;QACpD,iEAA+D;QAC/D,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;YAC/B,MAAM,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC;YACrB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,KAAK,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;gBACpC,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC;YACtC,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;AAAA,CAC7B;AAED;;;;;;;;;GASG;AACH,SAAS,cAAc,CAAC,MAAmB,EAAE,CAAS,EAAE,WAAmB,EAAgB;IAC1F,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;IACpC,MAAM,SAAS,GAAG,WAAW,GAAG,GAAG,CAAC;IAEpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;QAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,IAAI,CAAC,KAAK,CAAC;gBAAE,SAAS;YACtB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;YAChC,MAAM,IAAI,KAAK,CAAC;YAChB,IAAI,KAAK,GAAG,SAAS;gBAAE,aAAa,EAAE,CAAC;YACvC,IAAI,KAAK,GAAG,SAAS;gBAAE,aAAa,EAAE,CAAC;QACxC,CAAC;QAED,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,aAAa,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,aAAa,GAAG,OAAO,CAAC,CAAC;IAChF,CAAC;IAED,OAAO,OAAO,CAAC;AAAA,CACf;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;GAOG;AACH,MAAM,UAAU,QAAQ,CAAC,UAAqC,EAAE,SAAoB,EAAE,IAAI,GAAG,IAAI,EAAqB;IACrH,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,uCAAqC;IACrC,MAAM,SAAS,GAAG,SAAS,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAE9C,wBAAwB;IACxB,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;QAC5B,MAAM,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;QAC9B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACb,MAAM,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC;YACjB,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC;YACrB,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,CAAC;YAC3B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,CAAC;YAC/B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,CAAC;YAC/B,UAAU,EAAE,CAAC,CAAC,UAAU,IAAI,CAAC;SAC7B,CAAC,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC;IAErB,8BAA4B;IAC5B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACb,OAAO,CAAC,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IACrD,CAAC;IAED,sDAAsD;IACtD,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,GAAG,oBAAoB,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAE1E,4BAA4B;IAC5B,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;IAEvD,mDAAmD;IACnD,MAAM,KAAK,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACzC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;IAE9C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;QAChC,EAAE,EAAE,GAAG,CAAC,GAAG,CAAC;QACZ,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC;QACnB,IAAI;KACJ,CAAC,CAAC,CAAC;AAAA,CACJ","sourcesContent":["/**\n * POEM — Pareto-Optimal Embedded Modeling, multi-metric ranking.\n *\n * Ranks search candidates across multiple relevance metrics without requiring\n * hand-tuned weights. Uses the TFPR (Top-Fraction Pareto Ranker) approach:\n * vectorized dominance matrix computation with column duplication for\n * query-type-dependent metric weighting.\n *\n * Algorithm:\n * 1. Prune: per-metric top-K → union of survivors\n * 2. Build objectives matrix with column duplication for query-type weighting\n * 3. For each objective column, sort candidates and accumulate pairwise\n * dominance counts (duplicate columns contribute via weight multiplier)\n * 4. Compute fitness: meanDominance × (numDominating + ε) / (numSubmitting + ε)\n * 5. Sort by fitness, assign ranks\n *\n * References:\n * - POEM paper: https://iopscience.iop.org/article/10.1088/2632-2153/ab891b\n * - TFPR: https://github.com/merckgroup/aidd_tfpr\n * - colourdle: https://github.com/aebrer/colourdle\n */\n\nimport type { QueryType } from \"./query-classifier.js\";\nimport { METRIC_NAMES, type MetricName, type MetricScores } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\nexport interface RankedCandidate {\n\tid: number;\n\tscores: MetricScores;\n\trank: number;\n}\n\n// ============================================================================\n// Column duplication config per query type\n// ============================================================================\n\n/** How many times each metric column appears in the objectives matrix. */\ntype ColumnWeights = Record<MetricName, number>;\n\nconst COLUMN_WEIGHTS: Record<QueryType, ColumnWeights> = {\n\tidentifier: {\n\t\tbm25: 2,\n\t\tcosine: 1,\n\t\tpathMatch: 1,\n\t\tsymbolMatch: 2,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n\tnatural_language: {\n\t\tbm25: 1,\n\t\tcosine: 2,\n\t\tpathMatch: 1,\n\t\tsymbolMatch: 1,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n\tpath_like: {\n\t\tbm25: 1,\n\t\tcosine: 1,\n\t\tpathMatch: 3,\n\t\tsymbolMatch: 1,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n};\n\n// ============================================================================\n// Fitness smoothing constant (avoids division by zero)\n// ============================================================================\n\nconst EPSILON = 0.05;\n\n// ============================================================================\n// Pruning\n// ============================================================================\n\n/**\n * Per-metric top-K pruning → union of surviving candidate IDs.\n */\nfunction pruneTopK(candidates: Map<number, MetricScores>, topK: number): Set<number> {\n\tif (candidates.size <= topK) {\n\t\treturn new Set(candidates.keys());\n\t}\n\n\tconst union = new Set<number>();\n\n\tfor (const metric of METRIC_NAMES) {\n\t\tconst pairs: Array<[number, number]> = [];\n\t\tfor (const [id, scores] of candidates) {\n\t\t\tpairs.push([id, scores[metric] ?? 0]);\n\t\t}\n\t\tpairs.sort((a, b) => b[1] - a[1]);\n\t\tconst limit = Math.min(topK, pairs.length);\n\t\tfor (let i = 0; i < limit; i++) {\n\t\t\tunion.add(pairs[i][0]);\n\t\t}\n\t}\n\n\treturn union;\n}\n\n// ============================================================================\n// Dominance matrix computation\n// ============================================================================\n\n/**\n * Build the dominance count matrix using the TFPR approach.\n *\n * For each objective (metric), sorts candidates and accumulates pairwise\n * dominance: if candidate i ranks above candidate j on an objective,\n * dominanceCounts[i][j] increases by the column weight.\n *\n * Duplicate columns (from column duplication) are handled by multiplying\n * the contribution by the weight rather than re-sorting — same result,\n * no redundant work.\n *\n * Uses Uint16Array to keep memory compact (max possible value per cell\n * is the sum of all weights, which is ≤ 10).\n *\n * @param scores Dense array of MetricScores, indexed 0..n-1\n * @param weights Column weights from query type\n * @param topK Only top-K per objective contribute to pairwise dominance\n * @returns Flat dominance count matrix [n × n] and the total weight sum\n */\nfunction buildDominanceCounts(\n\tscores: MetricScores[],\n\tweights: ColumnWeights,\n\ttopK: number,\n): [counts: Uint16Array, totalWeight: number] {\n\tconst n = scores.length;\n\tconst counts = new Uint16Array(n * n);\n\tlet totalWeight = 0;\n\n\t// Reusable index array for sorting\n\tconst indices = new Array<number>(n);\n\tfor (let i = 0; i < n; i++) indices[i] = i;\n\n\tfor (const metric of METRIC_NAMES) {\n\t\tconst weight = weights[metric];\n\t\tif (weight === 0) continue;\n\t\ttotalWeight += weight;\n\n\t\t// Sort candidates by this metric (descending)\n\t\tconst sorted = indices.slice();\n\t\tsorted.sort((a, b) => (scores[b][metric] ?? 0) - (scores[a][metric] ?? 0));\n\n\t\t// Only consider top-K for pairwise dominance\n\t\tconst k = Math.min(topK, n);\n\n\t\t// For each pair in the top-K where i ranks above j:\n\t\t// i dominates j on this objective → add weight to counts[i, j]\n\t\tfor (let ri = 0; ri < k; ri++) {\n\t\t\tconst i = sorted[ri];\n\t\t\tconst iBase = i * n;\n\t\t\tfor (let rj = ri + 1; rj < k; rj++) {\n\t\t\t\tcounts[iBase + sorted[rj]] += weight;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn [counts, totalWeight];\n}\n\n/**\n * Compute fitness scores from the dominance count matrix.\n *\n * Fitness = meanDominance × (numDominating + ε) / (numSubmitting + ε)\n *\n * Where:\n * - meanDominance = average normalized dominance across all other candidates\n * - numDominating = count of candidates this one dominates (>50% of objectives)\n * - numSubmitting = count of candidates this one fails to dominate (<50%)\n */\nfunction computeFitness(counts: Uint16Array, n: number, totalWeight: number): Float64Array {\n\tconst fitness = new Float64Array(n);\n\tconst threshold = totalWeight * 0.5;\n\n\tfor (let i = 0; i < n; i++) {\n\t\tlet sumDom = 0;\n\t\tlet numDominating = 0;\n\t\tlet numSubmitting = 0;\n\t\tconst iBase = i * n;\n\n\t\tfor (let j = 0; j < n; j++) {\n\t\t\tif (i === j) continue;\n\t\t\tconst count = counts[iBase + j];\n\t\t\tsumDom += count;\n\t\t\tif (count > threshold) numDominating++;\n\t\t\tif (count < threshold) numSubmitting++;\n\t\t}\n\n\t\tconst meanDom = n > 1 ? sumDom / ((n - 1) * totalWeight) : 0;\n\t\tfitness[i] = (meanDom * (numDominating + EPSILON)) / (numSubmitting + EPSILON);\n\t}\n\n\treturn fitness;\n}\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Rank candidates using POEM / TFPR.\n *\n * @param candidates Map of candidateId → MetricScores (all values 0–1)\n * @param queryType Query type for column duplication weighting\n * @param topK Per-metric pruning limit (default: 1000)\n * @returns Candidates ordered best-first with assigned ranks (0 = best)\n */\nexport function poemRank(candidates: Map<number, MetricScores>, queryType: QueryType, topK = 1000): RankedCandidate[] {\n\tif (candidates.size === 0) return [];\n\n\t// 1. Prune: per-metric top-K → union\n\tconst surviving = pruneTopK(candidates, topK);\n\n\t// 2. Build dense arrays\n\tconst ids: number[] = [];\n\tconst scores: MetricScores[] = [];\n\n\tfor (const id of surviving) {\n\t\tconst s = candidates.get(id)!;\n\t\tids.push(id);\n\t\tscores.push({\n\t\t\tbm25: s.bm25 ?? 0,\n\t\t\tcosine: s.cosine ?? 0,\n\t\t\tpathMatch: s.pathMatch ?? 0,\n\t\t\tsymbolMatch: s.symbolMatch ?? 0,\n\t\t\timportGraph: s.importGraph ?? 0,\n\t\t\tgitRecency: s.gitRecency ?? 0,\n\t\t});\n\t}\n\n\tconst n = ids.length;\n\n\t// Single candidate → rank 0\n\tif (n === 1) {\n\t\treturn [{ id: ids[0], scores: scores[0], rank: 0 }];\n\t}\n\n\t// 3. Compute dominance matrix with column duplication\n\tconst weights = COLUMN_WEIGHTS[queryType];\n\tconst [counts, totalWeight] = buildDominanceCounts(scores, weights, topK);\n\n\t// 4. Compute fitness scores\n\tconst fitness = computeFitness(counts, n, totalWeight);\n\n\t// 5. Sort by fitness (descending) and assign ranks\n\tconst order = new Array<number>(n);\n\tfor (let i = 0; i < n; i++) order[i] = i;\n\torder.sort((a, b) => fitness[b] - fitness[a]);\n\n\treturn order.map((idx, rank) => ({\n\t\tid: ids[idx],\n\t\tscores: scores[idx],\n\t\trank,\n\t}));\n}\n"]}
|
|
1
|
+
{"version":3,"file":"poem.js","sourceRoot":"","sources":["../src/poem.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,EAAE,YAAY,EAAsC,MAAM,YAAY,CAAC;AAmB9E,MAAM,cAAc,GAAqC;IACxD,UAAU,EAAE;QACX,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;IACD,gBAAgB,EAAE;QACjB,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;IACD,SAAS,EAAE;QACV,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,CAAC;QACT,SAAS,EAAE,CAAC;QACZ,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,CAAC;KACb;CACD,CAAC;AAEF,+EAA+E;AAC/E,uDAAuD;AACvD,+EAA+E;AAE/E,MAAM,OAAO,GAAG,IAAI,CAAC;AAErB,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E;;GAEG;AACH,SAAS,SAAS,CAAC,UAAqC,EAAE,IAAY;IACrE,IAAI,UAAU,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC;QAC7B,OAAO,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAA4B,EAAE,CAAC;QAC1C,KAAK,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YACvC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AACd,CAAC;AAED,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E;;;;;;;;;;;;;;;;;;GAkBG;AACH,SAAS,oBAAoB,CAC5B,MAAsB,EACtB,OAAsB,EACtB,IAAY;IAEZ,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACtC,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,mCAAmC;IACnC,MAAM,OAAO,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC;IACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAE3C,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;QAC/B,IAAI,MAAM,KAAK,CAAC;YAAE,SAAS;QAC3B,WAAW,IAAI,MAAM,CAAC;QAEtB,8CAA8C;QAC9C,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAE3E,6CAA6C;QAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAE5B,oDAAoD;QACpD,+DAA+D;QAC/D,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;YAC/B,MAAM,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC;YACrB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,KAAK,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;gBACpC,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC;YACtC,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;AAC9B,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,cAAc,CAAC,MAAmB,EAAE,CAAS,EAAE,WAAmB;IAC1E,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;IACpC,MAAM,SAAS,GAAG,WAAW,GAAG,GAAG,CAAC;IAEpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;QAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,IAAI,CAAC,KAAK,CAAC;gBAAE,SAAS;YACtB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;YAChC,MAAM,IAAI,KAAK,CAAC;YAChB,IAAI,KAAK,GAAG,SAAS;gBAAE,aAAa,EAAE,CAAC;YACvC,IAAI,KAAK,GAAG,SAAS;gBAAE,aAAa,EAAE,CAAC;QACxC,CAAC;QAED,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,aAAa,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,aAAa,GAAG,OAAO,CAAC,CAAC;IAChF,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;GAOG;AACH,MAAM,UAAU,QAAQ,CAAC,UAAqC,EAAE,SAAoB,EAAE,IAAI,GAAG,IAAI;IAChG,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,qCAAqC;IACrC,MAAM,SAAS,GAAG,SAAS,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAE9C,wBAAwB;IACxB,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;QAC5B,MAAM,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;QAC9B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACb,MAAM,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC;YACjB,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC;YACrB,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,CAAC;YAC3B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,CAAC;YAC/B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,CAAC;YAC/B,UAAU,EAAE,CAAC,CAAC,UAAU,IAAI,CAAC;SAC7B,CAAC,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC;IAErB,4BAA4B;IAC5B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACb,OAAO,CAAC,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IACrD,CAAC;IAED,sDAAsD;IACtD,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,GAAG,oBAAoB,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAE1E,4BAA4B;IAC5B,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;IAEvD,mDAAmD;IACnD,MAAM,KAAK,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACzC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;IAE9C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;QAChC,EAAE,EAAE,GAAG,CAAC,GAAG,CAAC;QACZ,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC;QACnB,IAAI;KACJ,CAAC,CAAC,CAAC;AACL,CAAC","sourcesContent":["/**\n * POEM — Pareto-Optimal Embedded Modeling, multi-metric ranking.\n *\n * Ranks search candidates across multiple relevance metrics without requiring\n * hand-tuned weights. Uses the TFPR (Top-Fraction Pareto Ranker) approach:\n * vectorized dominance matrix computation with column duplication for\n * query-type-dependent metric weighting.\n *\n * Algorithm:\n * 1. Prune: per-metric top-K → union of survivors\n * 2. Build objectives matrix with column duplication for query-type weighting\n * 3. For each objective column, sort candidates and accumulate pairwise\n * dominance counts (duplicate columns contribute via weight multiplier)\n * 4. Compute fitness: meanDominance × (numDominating + ε) / (numSubmitting + ε)\n * 5. Sort by fitness, assign ranks\n *\n * References:\n * - POEM paper: https://iopscience.iop.org/article/10.1088/2632-2153/ab891b\n * - TFPR: https://github.com/merckgroup/aidd_tfpr\n * - colourdle: https://github.com/aebrer/colourdle\n */\n\nimport type { QueryType } from \"./query-classifier.js\";\nimport { METRIC_NAMES, type MetricName, type MetricScores } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\nexport interface RankedCandidate {\n\tid: number;\n\tscores: MetricScores;\n\trank: number;\n}\n\n// ============================================================================\n// Column duplication config per query type\n// ============================================================================\n\n/** How many times each metric column appears in the objectives matrix. */\ntype ColumnWeights = Record<MetricName, number>;\n\nconst COLUMN_WEIGHTS: Record<QueryType, ColumnWeights> = {\n\tidentifier: {\n\t\tbm25: 2,\n\t\tcosine: 1,\n\t\tpathMatch: 1,\n\t\tsymbolMatch: 2,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n\tnatural_language: {\n\t\tbm25: 1,\n\t\tcosine: 2,\n\t\tpathMatch: 1,\n\t\tsymbolMatch: 1,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n\tpath_like: {\n\t\tbm25: 1,\n\t\tcosine: 1,\n\t\tpathMatch: 3,\n\t\tsymbolMatch: 1,\n\t\timportGraph: 1,\n\t\tgitRecency: 1,\n\t},\n};\n\n// ============================================================================\n// Fitness smoothing constant (avoids division by zero)\n// ============================================================================\n\nconst EPSILON = 0.05;\n\n// ============================================================================\n// Pruning\n// ============================================================================\n\n/**\n * Per-metric top-K pruning → union of surviving candidate IDs.\n */\nfunction pruneTopK(candidates: Map<number, MetricScores>, topK: number): Set<number> {\n\tif (candidates.size <= topK) {\n\t\treturn new Set(candidates.keys());\n\t}\n\n\tconst union = new Set<number>();\n\n\tfor (const metric of METRIC_NAMES) {\n\t\tconst pairs: Array<[number, number]> = [];\n\t\tfor (const [id, scores] of candidates) {\n\t\t\tpairs.push([id, scores[metric] ?? 0]);\n\t\t}\n\t\tpairs.sort((a, b) => b[1] - a[1]);\n\t\tconst limit = Math.min(topK, pairs.length);\n\t\tfor (let i = 0; i < limit; i++) {\n\t\t\tunion.add(pairs[i][0]);\n\t\t}\n\t}\n\n\treturn union;\n}\n\n// ============================================================================\n// Dominance matrix computation\n// ============================================================================\n\n/**\n * Build the dominance count matrix using the TFPR approach.\n *\n * For each objective (metric), sorts candidates and accumulates pairwise\n * dominance: if candidate i ranks above candidate j on an objective,\n * dominanceCounts[i][j] increases by the column weight.\n *\n * Duplicate columns (from column duplication) are handled by multiplying\n * the contribution by the weight rather than re-sorting — same result,\n * no redundant work.\n *\n * Uses Uint16Array to keep memory compact (max possible value per cell\n * is the sum of all weights, which is ≤ 10).\n *\n * @param scores Dense array of MetricScores, indexed 0..n-1\n * @param weights Column weights from query type\n * @param topK Only top-K per objective contribute to pairwise dominance\n * @returns Flat dominance count matrix [n × n] and the total weight sum\n */\nfunction buildDominanceCounts(\n\tscores: MetricScores[],\n\tweights: ColumnWeights,\n\ttopK: number,\n): [counts: Uint16Array, totalWeight: number] {\n\tconst n = scores.length;\n\tconst counts = new Uint16Array(n * n);\n\tlet totalWeight = 0;\n\n\t// Reusable index array for sorting\n\tconst indices = new Array<number>(n);\n\tfor (let i = 0; i < n; i++) indices[i] = i;\n\n\tfor (const metric of METRIC_NAMES) {\n\t\tconst weight = weights[metric];\n\t\tif (weight === 0) continue;\n\t\ttotalWeight += weight;\n\n\t\t// Sort candidates by this metric (descending)\n\t\tconst sorted = indices.slice();\n\t\tsorted.sort((a, b) => (scores[b][metric] ?? 0) - (scores[a][metric] ?? 0));\n\n\t\t// Only consider top-K for pairwise dominance\n\t\tconst k = Math.min(topK, n);\n\n\t\t// For each pair in the top-K where i ranks above j:\n\t\t// i dominates j on this objective → add weight to counts[i, j]\n\t\tfor (let ri = 0; ri < k; ri++) {\n\t\t\tconst i = sorted[ri];\n\t\t\tconst iBase = i * n;\n\t\t\tfor (let rj = ri + 1; rj < k; rj++) {\n\t\t\t\tcounts[iBase + sorted[rj]] += weight;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn [counts, totalWeight];\n}\n\n/**\n * Compute fitness scores from the dominance count matrix.\n *\n * Fitness = meanDominance × (numDominating + ε) / (numSubmitting + ε)\n *\n * Where:\n * - meanDominance = average normalized dominance across all other candidates\n * - numDominating = count of candidates this one dominates (>50% of objectives)\n * - numSubmitting = count of candidates this one fails to dominate (<50%)\n */\nfunction computeFitness(counts: Uint16Array, n: number, totalWeight: number): Float64Array {\n\tconst fitness = new Float64Array(n);\n\tconst threshold = totalWeight * 0.5;\n\n\tfor (let i = 0; i < n; i++) {\n\t\tlet sumDom = 0;\n\t\tlet numDominating = 0;\n\t\tlet numSubmitting = 0;\n\t\tconst iBase = i * n;\n\n\t\tfor (let j = 0; j < n; j++) {\n\t\t\tif (i === j) continue;\n\t\t\tconst count = counts[iBase + j];\n\t\t\tsumDom += count;\n\t\t\tif (count > threshold) numDominating++;\n\t\t\tif (count < threshold) numSubmitting++;\n\t\t}\n\n\t\tconst meanDom = n > 1 ? sumDom / ((n - 1) * totalWeight) : 0;\n\t\tfitness[i] = (meanDom * (numDominating + EPSILON)) / (numSubmitting + EPSILON);\n\t}\n\n\treturn fitness;\n}\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Rank candidates using POEM / TFPR.\n *\n * @param candidates Map of candidateId → MetricScores (all values 0–1)\n * @param queryType Query type for column duplication weighting\n * @param topK Per-metric pruning limit (default: 1000)\n * @returns Candidates ordered best-first with assigned ranks (0 = best)\n */\nexport function poemRank(candidates: Map<number, MetricScores>, queryType: QueryType, topK = 1000): RankedCandidate[] {\n\tif (candidates.size === 0) return [];\n\n\t// 1. Prune: per-metric top-K → union\n\tconst surviving = pruneTopK(candidates, topK);\n\n\t// 2. Build dense arrays\n\tconst ids: number[] = [];\n\tconst scores: MetricScores[] = [];\n\n\tfor (const id of surviving) {\n\t\tconst s = candidates.get(id)!;\n\t\tids.push(id);\n\t\tscores.push({\n\t\t\tbm25: s.bm25 ?? 0,\n\t\t\tcosine: s.cosine ?? 0,\n\t\t\tpathMatch: s.pathMatch ?? 0,\n\t\t\tsymbolMatch: s.symbolMatch ?? 0,\n\t\t\timportGraph: s.importGraph ?? 0,\n\t\t\tgitRecency: s.gitRecency ?? 0,\n\t\t});\n\t}\n\n\tconst n = ids.length;\n\n\t// Single candidate → rank 0\n\tif (n === 1) {\n\t\treturn [{ id: ids[0], scores: scores[0], rank: 0 }];\n\t}\n\n\t// 3. Compute dominance matrix with column duplication\n\tconst weights = COLUMN_WEIGHTS[queryType];\n\tconst [counts, totalWeight] = buildDominanceCounts(scores, weights, topK);\n\n\t// 4. Compute fitness scores\n\tconst fitness = computeFitness(counts, n, totalWeight);\n\n\t// 5. Sort by fitness (descending) and assign ranks\n\tconst order = new Array<number>(n);\n\tfor (let i = 0; i < n; i++) order[i] = i;\n\torder.sort((a, b) => fitness[b] - fitness[a]);\n\n\treturn order.map((idx, rank) => ({\n\t\tid: ids[idx],\n\t\tscores: scores[idx],\n\t\trank,\n\t}));\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"query-classifier.d.ts","sourceRoot":"","sources":["../src/query-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,MAAM,SAAS,GAAG,YAAY,GAAG,kBAAkB,GAAG,WAAW,CAAC;AAoBxE;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,CAyBtD"
|
|
1
|
+
{"version":3,"file":"query-classifier.d.ts","sourceRoot":"","sources":["../src/query-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,MAAM,SAAS,GAAG,YAAY,GAAG,kBAAkB,GAAG,WAAW,CAAC;AAoBxE;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,CAyBtD"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"query-classifier.js","sourceRoot":"","sources":["../src/query-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,
|
|
1
|
+
{"version":3,"file":"query-classifier.js","sourceRoot":"","sources":["../src/query-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,wEAAwE;AACxE,MAAM,QAAQ,GAAG,YAAY,CAAC;AAE9B,4DAA4D;AAC5D,MAAM,QAAQ,GAAG,SAAS,CAAC;AAE3B,wFAAwF;AACxF,MAAM,kBAAkB,GAAG,iCAAiC,CAAC;AAE7D,uBAAuB;AACvB,MAAM,WAAW,GAAG,OAAO,CAAC;AAE5B,oDAAoD;AACpD,MAAM,cAAc,GAAG,gBAAgB,CAAC;AAExC,qFAAqF;AACrF,MAAM,WAAW,GAAG,YAAY,CAAC;AAEjC;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,KAAa;IAC1C,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;IAC7B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,kBAAkB,CAAC;IAEpD,oBAAoB;IACpB,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,WAAW,CAAC;IAClD,IAAI,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,WAAW,CAAC;IACrD,0EAA0E;IAC1E,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3E,OAAO,WAAW,CAAC;IACpB,CAAC;IAED,qBAAqB;IACrB,0DAA0D;IAC1D,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,YAAY,CAAC;IAC5C,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,mEAAmE;QACnE,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3F,OAAO,YAAY,CAAC;QACrB,CAAC;IACF,CAAC;IAED,2BAA2B;IAC3B,OAAO,kBAAkB,CAAC;AAC3B,CAAC","sourcesContent":["/**\n * Classify search queries into types for POEM column weighting.\n *\n * Query types affect how metric columns are duplicated during ranking:\n * - identifier: emphasise BM25 and symbol-match scores\n * - path_like: emphasise path-match scores\n * - natural_language: emphasise cosine similarity scores\n */\n\nexport type QueryType = \"identifier\" | \"natural_language\" | \"path_like\";\n\n/** Matches camelCase or PascalCase boundaries (lowercase→uppercase). */\nconst CAMEL_RE = /[a-z][A-Z]/;\n\n/** Matches snake_case — word chars around an underscore. */\nconst SNAKE_RE = /\\w+_\\w+/;\n\n/** SCREAMING_SNAKE_CASE — two or more uppercase-letter groups joined by underscores. */\nconst SCREAMING_SNAKE_RE = /^[A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+$/;\n\n/** Path separators. */\nconst PATH_SEP_RE = /[/\\\\]/;\n\n/** Dotted path like `foo.bar.baz` (3+ segments). */\nconst DOTTED_PATH_RE = /^\\w+\\.\\w+\\.\\w+/;\n\n/** File extension pattern — ends with `.ext` where ext is 1-5 alphanumeric chars. */\nconst FILE_EXT_RE = /\\.\\w{1,5}$/;\n\n/**\n * Classify a search query to guide POEM column weighting.\n *\n * @param query Raw user query string\n * @returns The detected query type\n */\nexport function classifyQuery(query: string): QueryType {\n\tconst trimmed = query.trim();\n\tif (trimmed.length === 0) return \"natural_language\";\n\n\t// --- path_like ---\n\tif (PATH_SEP_RE.test(trimmed)) return \"path_like\";\n\tif (DOTTED_PATH_RE.test(trimmed)) return \"path_like\";\n\t// File extension at end of a single token (e.g. \"config.yaml\", \"auth.ts\")\n\tconst words = trimmed.split(/\\s+/);\n\tif (words.length === 1 && FILE_EXT_RE.test(trimmed) && /\\./.test(trimmed)) {\n\t\treturn \"path_like\";\n\t}\n\n\t// --- identifier ---\n\t// Single token or short (≤3 words) with code-style naming\n\tif (words.length === 1) return \"identifier\";\n\tif (words.length <= 3) {\n\t\t// If any word looks like a code identifier, classify as identifier\n\t\tif (words.some((w) => CAMEL_RE.test(w) || SNAKE_RE.test(w) || SCREAMING_SNAKE_RE.test(w))) {\n\t\t\treturn \"identifier\";\n\t\t}\n\t}\n\n\t// --- natural_language ---\n\treturn \"natural_language\";\n}\n"]}
|
package/dist/scanner.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scanner.d.ts","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAM3C,4DAA4D;AAC5D,MAAM,WAAW,WAAW;IAC3B,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IACjB,0BAA0B;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAC;CACd;AAiED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAIhE;AAED;;;;;;GAMG;AACH,wBAAsB,WAAW,CAChC,WAAW,EAAE,MAAM,EACnB,eAAe,CAAC,EAAE,MAAM,EACxB,WAAW,CAAC,EAAE,MAAM,EAAE,GACpB,OAAO,CAAC,WAAW,EAAE,CAAC,CAgCxB","sourcesContent":["/**\n * File scanner for the semantic search subsystem.\n *\n * Discovers project files for indexing by walking the directory tree,\n * respecting .gitignore rules, and classifying files by type.\n */\n\nimport { existsSync, readdirSync, readFileSync, type Stats, statSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { extname, isAbsolute, join, relative, sep } from \"node:path\";\nimport ignore from \"ignore\";\nimport type { FileType } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** A file discovered by the scanner, ready for indexing. */\nexport interface ScannedFile {\n\t/** Path relative to the project root (posix separators). */\n\tfilePath: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n\t/** File modification time in milliseconds since epoch. */\n\tmtime: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Maximum file size to index (1 MB). */\nconst MAX_FILE_SIZE = 1024 * 1024;\n\n/** Directories unconditionally skipped during traversal. */\nconst SKIP_DIRS = new Set([\n\t\"node_modules\",\n\t\".git\",\n\t\".dreb/index\",\n\t\".hg\",\n\t\".svn\",\n\t\"__pycache__\",\n\t\".tox\",\n\t\".venv\",\n\t\"dist\",\n\t\"build\",\n\t\".next\",\n\t\".nuxt\",\n\t\"coverage\",\n\t\".cache\",\n]);\n\n/** Extension → FileType mapping. */\nconst EXTENSION_MAP: ReadonlyMap<string, FileType> = new Map<string, FileType>([\n\t// Tree-sitter languages\n\t[\".ts\", \"typescript\"],\n\t[\".tsx\", \"tsx\"],\n\t[\".js\", \"javascript\"],\n\t[\".mjs\", \"javascript\"],\n\t[\".cjs\", \"javascript\"],\n\t[\".py\", \"python\"],\n\t[\".go\", \"go\"],\n\t[\".rs\", \"rust\"],\n\t[\".java\", \"java\"],\n\t[\".c\", \"c\"],\n\t[\".h\", \"c\"],\n\t[\".cpp\", \"cpp\"],\n\t[\".hpp\", \"cpp\"],\n\t[\".cc\", \"cpp\"],\n\t[\".cxx\", \"cpp\"],\n\t[\".hh\", \"cpp\"],\n\t[\".hxx\", \"cpp\"],\n\t// Text file types\n\t[\".md\", \"markdown\"],\n\t[\".mdx\", \"markdown\"],\n\t[\".yml\", \"yaml\"],\n\t[\".yaml\", \"yaml\"],\n\t[\".json\", \"json\"],\n\t[\".toml\", \"toml\"],\n\t[\".txt\", \"plaintext\"],\n\t[\".cfg\", \"plaintext\"],\n\t[\".ini\", \"plaintext\"],\n\t[\".env\", \"plaintext\"],\n\t[\".conf\", \"plaintext\"],\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Detect the {@link FileType} for a file path based on its extension.\n * Returns `null` for unrecognized extensions or files without an extension.\n */\nexport function detectFileType(filePath: string): FileType | null {\n\tconst ext = extname(filePath).toLowerCase();\n\tif (!ext) return null;\n\treturn EXTENSION_MAP.get(ext) ?? null;\n}\n\n/**\n * Scan a project directory and return all indexable files.\n *\n * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,\n * skips binary / oversized files, and optionally includes memory files from\n * a global memory directory.\n */\nexport async function scanProject(\n\tprojectRoot: string,\n\tglobalMemoryDir?: string,\n\tvisibleDirs?: string[],\n): Promise<ScannedFile[]> {\n\tconst results: ScannedFile[] = [];\n\n\t// Detect if projectRoot is the home directory — use shallow scan mode\n\t// to avoid recursing into the entire home dir (which would be catastrophic).\n\tconst isHomeDir = isHomeDirPath(projectRoot);\n\n\tif (isHomeDir) {\n\t\t// Shallow mode: only scan top-level files and ~/.dreb/memory/\n\t\tscanShallow(projectRoot, results);\n\t} else {\n\t\t// Normal mode: full recursive walk with .gitignore\n\t\tconst ig = ignore();\n\t\tloadGitignore(ig, projectRoot, projectRoot);\n\t\twalkDirectory(projectRoot, projectRoot, ig, results);\n\t}\n\n\t// Include tool-visible .dreb/ subdirs (bypasses gitignore).\n\t// In home dir mode, global memory is already handled separately below,\n\t// and we don't want to double-scan ~/.dreb/memory/.\n\tif (!isHomeDir) {\n\t\tfor (const dir of visibleDirs ?? []) {\n\t\t\tscanMemoryDir(dir, projectRoot, results);\n\t\t}\n\t}\n\n\t// Include global memory files if the directory exists\n\tif (globalMemoryDir && existsSync(globalMemoryDir)) {\n\t\tscanMemoryDir(globalMemoryDir, projectRoot, results);\n\t}\n\n\treturn results;\n}\n\n/** Check if a path is the user's home directory. */\nfunction isHomeDirPath(dir: string): boolean {\n\ttry {\n\t\tconst home = homedir();\n\t\t// Normalize trailing slashes for comparison\n\t\tconst normalizedDir = dir.replace(/[/\\\\]+$/, \"\");\n\t\tconst normalizedHome = home.replace(/[/\\\\]+$/, \"\");\n\t\treturn normalizedDir === normalizedHome;\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Shallow scan mode for home directory: only index top-level files\n * (no directory recursion) to avoid scanning the entire home directory.\n * Memory files are handled separately via scanMemoryDir.\n */\nfunction scanShallow(dir: string, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\t// Skip dotfiles/dotdirs in home dir (except specific ones we want)\n\t\tif (entry.startsWith(\".\")) continue;\n\n\t\tconst fullPath = join(dir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\t// Only index files, not directories (shallow mode)\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: entry,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n// ============================================================================\n// Internal helpers\n// ============================================================================\n\ntype IgnoreMatcher = ReturnType<typeof ignore>;\n\n/** Convert an OS path to posix separators for ignore matching. */\nfunction toPosix(p: string): string {\n\treturn p.split(sep).join(\"/\");\n}\n\n/** Load .gitignore rules from a directory into the ignore matcher. */\nfunction loadGitignore(ig: IgnoreMatcher, dir: string, root: string): void {\n\tconst gitignorePath = join(dir, \".gitignore\");\n\tif (!existsSync(gitignorePath)) return;\n\n\ttry {\n\t\tconst content = readFileSync(gitignorePath, \"utf-8\");\n\t\tconst relDir = relative(root, dir);\n\t\tconst prefix = relDir ? `${toPosix(relDir)}/` : \"\";\n\n\t\tconst patterns = content\n\t\t\t.split(/\\r?\\n/)\n\t\t\t.map((line) => prefixPattern(line, prefix))\n\t\t\t.filter((line): line is string => line !== null);\n\n\t\tif (patterns.length > 0) {\n\t\t\tig.add(patterns);\n\t\t}\n\t} catch {\n\t\t// Unreadable .gitignore — skip silently\n\t}\n}\n\n/**\n * Prefix a .gitignore pattern with a directory path so it applies\n * correctly when matching against root-relative paths.\n */\nfunction prefixPattern(line: string, prefix: string): string | null {\n\tconst trimmed = line.trim();\n\tif (!trimmed) return null;\n\tif (trimmed.startsWith(\"#\") && !trimmed.startsWith(\"\\\\#\")) return null;\n\n\tlet pattern = line;\n\tlet negated = false;\n\n\tif (pattern.startsWith(\"!\")) {\n\t\tnegated = true;\n\t\tpattern = pattern.slice(1);\n\t} else if (pattern.startsWith(\"\\\\!\")) {\n\t\tpattern = pattern.slice(1);\n\t}\n\n\tconst prefixed = prefix ? `${prefix}${pattern}` : pattern;\n\treturn negated ? `!${prefixed}` : prefixed;\n}\n\n/**\n * Check if a directory component (relative to root) should be unconditionally skipped.\n * Handles both top-level names (\"node_modules\") and nested paths (\".dreb/index\").\n */\nfunction shouldSkipDir(relPath: string): boolean {\n\tconst posix = toPosix(relPath);\n\n\t// Check the directory name itself\n\tconst parts = posix.split(\"/\");\n\tconst name = parts[parts.length - 1];\n\tif (SKIP_DIRS.has(name)) return true;\n\n\t// Check multi-segment skip patterns (e.g. \".dreb/index\")\n\tfor (const skip of SKIP_DIRS) {\n\t\tif (skip.includes(\"/\") && (posix === skip || posix.endsWith(`/${skip}`))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/** Recursively walk a directory, collecting indexable files. */\nfunction walkDirectory(dir: string, root: string, ig: IgnoreMatcher, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn; // Permission denied, etc.\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(dir, entry);\n\t\tconst relPath = relative(root, fullPath);\n\t\tconst posixRel = toPosix(relPath);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue; // Broken symlink, etc.\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Hard-coded skip list\n\t\t\tif (shouldSkipDir(relPath)) continue;\n\n\t\t\t// .gitignore check (directories need trailing slash)\n\t\t\tif (ig.ignores(`${posixRel}/`)) continue;\n\n\t\t\t// Load nested .gitignore before descending\n\t\t\tloadGitignore(ig, fullPath, root);\n\n\t\t\twalkDirectory(fullPath, root, ig, results);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\n\t\t// .gitignore check for files\n\t\tif (ig.ignores(posixRel)) continue;\n\n\t\t// Size gate\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\t// File type detection\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: posixRel,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n/**\n * Scan a memory directory (project or global) for indexable files.\n *\n * Memory directories are always fully included — no .gitignore filtering —\n * because they live outside the normal project tree or in `.dreb/` which\n * is typically gitignored.\n *\n * Paths for global memory files are stored with a `~memory/` prefix\n * to distinguish them from project files.\n */\nfunction scanMemoryDir(memoryDir: string, projectRoot: string, results: ScannedFile[], baseMemoryDir?: string): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(memoryDir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(memoryDir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Recurse into subdirectories\n\t\t\tscanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\t// If the memory dir is inside the project root, use normal relative path.\n\t\t// Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.\n\t\tconst rel = relative(projectRoot, fullPath);\n\t\tconst isOutsideProject = rel.startsWith(\"..\") || isAbsolute(rel);\n\t\tconst rootMemoryDir = baseMemoryDir ?? memoryDir;\n\t\tconst filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;\n\n\t\tresults.push({\n\t\t\tfilePath: toPosix(filePath),\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n"]}
|
|
1
|
+
{"version":3,"file":"scanner.d.ts","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAM3C,4DAA4D;AAC5D,MAAM,WAAW,WAAW;IAC3B,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IACjB,0BAA0B;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAC;CACd;AAiED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAIhE;AAED;;;;;;GAMG;AACH,wBAAsB,WAAW,CAChC,WAAW,EAAE,MAAM,EACnB,eAAe,CAAC,EAAE,MAAM,EACxB,WAAW,CAAC,EAAE,MAAM,EAAE,GACpB,OAAO,CAAC,WAAW,EAAE,CAAC,CAgCxB"}
|
package/dist/scanner.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scanner.js","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAc,QAAQ,EAAE,MAAM,SAAS,CAAC;AACtF,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACrE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAiB5B,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,yCAAyC;AACzC,MAAM,aAAa,GAAG,IAAI,GAAG,IAAI,CAAC;AAElC,4DAA4D;AAC5D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACzB,cAAc;IACd,MAAM;IACN,aAAa;IACb,KAAK;IACL,MAAM;IACN,aAAa;IACb,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,OAAO;IACP,OAAO;IACP,UAAU;IACV,QAAQ;CACR,CAAC,CAAC;AAEH,sCAAoC;AACpC,MAAM,aAAa,GAAkC,IAAI,GAAG,CAAmB;IAC9E,wBAAwB;IACxB,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,KAAK,EAAE,QAAQ,CAAC;IACjB,CAAC,KAAK,EAAE,IAAI,CAAC;IACb,CAAC,KAAK,EAAE,MAAM,CAAC;IACf,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,kBAAkB;IAClB,CAAC,KAAK,EAAE,UAAU,CAAC;IACnB,CAAC,MAAM,EAAE,UAAU,CAAC;IACpB,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,OAAO,EAAE,WAAW,CAAC;CACtB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB,EAAmB;IACjE,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,OAAO,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC;AAAA,CACtC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAChC,WAAmB,EACnB,eAAwB,EACxB,WAAsB,EACG;IACzB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,wEAAsE;IACtE,6EAA6E;IAC7E,MAAM,SAAS,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE7C,IAAI,SAAS,EAAE,CAAC;QACf,8DAA8D;QAC9D,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACP,mDAAmD;QACnD,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;QACpB,aAAa,CAAC,EAAE,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;QAC5C,aAAa,CAAC,WAAW,EAAE,WAAW,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,4DAA4D;IAC5D,uEAAuE;IACvE,oDAAoD;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QAChB,KAAK,MAAM,GAAG,IAAI,WAAW,IAAI,EAAE,EAAE,CAAC;YACrC,aAAa,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC;IACF,CAAC;IAED,sDAAsD;IACtD,IAAI,eAAe,IAAI,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;QACpD,aAAa,CAAC,eAAe,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,OAAO,CAAC;AAAA,CACf;AAED,oDAAoD;AACpD,SAAS,aAAa,CAAC,GAAW,EAAW;IAC5C,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;QACvB,4CAA4C;QAC5C,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACnD,OAAO,aAAa,KAAK,cAAc,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,KAAK,CAAC;IACd,CAAC;AAAA,CACD;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW,EAAE,OAAsB,EAAQ;IAC/D,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,mEAAmE;QACnE,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QAED,mDAAmD;QACnD,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD;AAQD,kEAAkE;AAClE,SAAS,OAAO,CAAC,CAAS,EAAU;IACnC,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAAA,CAC9B;AAED,sEAAsE;AACtE,SAAS,aAAa,CAAC,EAAiB,EAAE,GAAW,EAAE,IAAY,EAAQ;IAC1E,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAC9C,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO;IAEvC,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAEnD,MAAM,QAAQ,GAAG,OAAO;aACtB,KAAK,CAAC,OAAO,CAAC;aACd,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;aAC1C,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QAElD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClB,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,0CAAwC;IACzC,CAAC;AAAA,CACD;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,IAAY,EAAE,MAAc,EAAiB;IACnE,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvE,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC1D,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;AAAA,CAC3C;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,OAAe,EAAW;IAChD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B,kCAAkC;IAClC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,yDAAyD;IACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;YAC1E,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED,gEAAgE;AAChE,SAAS,aAAa,CAAC,GAAW,EAAE,IAAY,EAAE,EAAiB,EAAE,OAAsB,EAAQ;IAClG,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,CAAC,0BAA0B;IACnC,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS,CAAC,uBAAuB;QAClC,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,uBAAuB;YACvB,IAAI,aAAa,CAAC,OAAO,CAAC;gBAAE,SAAS;YAErC,qDAAqD;YACrD,IAAI,EAAE,CAAC,OAAO,CAAC,GAAG,QAAQ,GAAG,CAAC;gBAAE,SAAS;YAEzC,2CAA2C;YAC3C,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;YAElC,aAAa,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;YAC3C,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAE9B,6BAA6B;QAC7B,IAAI,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEnC,YAAY;QACZ,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,sBAAsB;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,QAAQ;YAClB,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD;AAED;;;;;;;;;GASG;AACH,SAAS,aAAa,CAAC,SAAiB,EAAE,WAAmB,EAAE,OAAsB,EAAE,aAAsB,EAAQ;IACpH,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAExC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,8BAA8B;YAC9B,aAAa,CAAC,QAAQ,EAAE,WAAW,EAAE,OAAO,EAAE,aAAa,IAAI,SAAS,CAAC,CAAC;YAC1E,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,0EAA0E;QAC1E,4EAA4E;QAC5E,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAC5C,MAAM,gBAAgB,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC;QACjE,MAAM,aAAa,GAAG,aAAa,IAAI,SAAS,CAAC;QACjD,MAAM,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,WAAW,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAEzF,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC;YAC3B,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD","sourcesContent":["/**\n * File scanner for the semantic search subsystem.\n *\n * Discovers project files for indexing by walking the directory tree,\n * respecting .gitignore rules, and classifying files by type.\n */\n\nimport { existsSync, readdirSync, readFileSync, type Stats, statSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { extname, isAbsolute, join, relative, sep } from \"node:path\";\nimport ignore from \"ignore\";\nimport type { FileType } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** A file discovered by the scanner, ready for indexing. */\nexport interface ScannedFile {\n\t/** Path relative to the project root (posix separators). */\n\tfilePath: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n\t/** File modification time in milliseconds since epoch. */\n\tmtime: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Maximum file size to index (1 MB). */\nconst MAX_FILE_SIZE = 1024 * 1024;\n\n/** Directories unconditionally skipped during traversal. */\nconst SKIP_DIRS = new Set([\n\t\"node_modules\",\n\t\".git\",\n\t\".dreb/index\",\n\t\".hg\",\n\t\".svn\",\n\t\"__pycache__\",\n\t\".tox\",\n\t\".venv\",\n\t\"dist\",\n\t\"build\",\n\t\".next\",\n\t\".nuxt\",\n\t\"coverage\",\n\t\".cache\",\n]);\n\n/** Extension → FileType mapping. */\nconst EXTENSION_MAP: ReadonlyMap<string, FileType> = new Map<string, FileType>([\n\t// Tree-sitter languages\n\t[\".ts\", \"typescript\"],\n\t[\".tsx\", \"tsx\"],\n\t[\".js\", \"javascript\"],\n\t[\".mjs\", \"javascript\"],\n\t[\".cjs\", \"javascript\"],\n\t[\".py\", \"python\"],\n\t[\".go\", \"go\"],\n\t[\".rs\", \"rust\"],\n\t[\".java\", \"java\"],\n\t[\".c\", \"c\"],\n\t[\".h\", \"c\"],\n\t[\".cpp\", \"cpp\"],\n\t[\".hpp\", \"cpp\"],\n\t[\".cc\", \"cpp\"],\n\t[\".cxx\", \"cpp\"],\n\t[\".hh\", \"cpp\"],\n\t[\".hxx\", \"cpp\"],\n\t// Text file types\n\t[\".md\", \"markdown\"],\n\t[\".mdx\", \"markdown\"],\n\t[\".yml\", \"yaml\"],\n\t[\".yaml\", \"yaml\"],\n\t[\".json\", \"json\"],\n\t[\".toml\", \"toml\"],\n\t[\".txt\", \"plaintext\"],\n\t[\".cfg\", \"plaintext\"],\n\t[\".ini\", \"plaintext\"],\n\t[\".env\", \"plaintext\"],\n\t[\".conf\", \"plaintext\"],\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Detect the {@link FileType} for a file path based on its extension.\n * Returns `null` for unrecognized extensions or files without an extension.\n */\nexport function detectFileType(filePath: string): FileType | null {\n\tconst ext = extname(filePath).toLowerCase();\n\tif (!ext) return null;\n\treturn EXTENSION_MAP.get(ext) ?? null;\n}\n\n/**\n * Scan a project directory and return all indexable files.\n *\n * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,\n * skips binary / oversized files, and optionally includes memory files from\n * a global memory directory.\n */\nexport async function scanProject(\n\tprojectRoot: string,\n\tglobalMemoryDir?: string,\n\tvisibleDirs?: string[],\n): Promise<ScannedFile[]> {\n\tconst results: ScannedFile[] = [];\n\n\t// Detect if projectRoot is the home directory — use shallow scan mode\n\t// to avoid recursing into the entire home dir (which would be catastrophic).\n\tconst isHomeDir = isHomeDirPath(projectRoot);\n\n\tif (isHomeDir) {\n\t\t// Shallow mode: only scan top-level files and ~/.dreb/memory/\n\t\tscanShallow(projectRoot, results);\n\t} else {\n\t\t// Normal mode: full recursive walk with .gitignore\n\t\tconst ig = ignore();\n\t\tloadGitignore(ig, projectRoot, projectRoot);\n\t\twalkDirectory(projectRoot, projectRoot, ig, results);\n\t}\n\n\t// Include tool-visible .dreb/ subdirs (bypasses gitignore).\n\t// In home dir mode, global memory is already handled separately below,\n\t// and we don't want to double-scan ~/.dreb/memory/.\n\tif (!isHomeDir) {\n\t\tfor (const dir of visibleDirs ?? []) {\n\t\t\tscanMemoryDir(dir, projectRoot, results);\n\t\t}\n\t}\n\n\t// Include global memory files if the directory exists\n\tif (globalMemoryDir && existsSync(globalMemoryDir)) {\n\t\tscanMemoryDir(globalMemoryDir, projectRoot, results);\n\t}\n\n\treturn results;\n}\n\n/** Check if a path is the user's home directory. */\nfunction isHomeDirPath(dir: string): boolean {\n\ttry {\n\t\tconst home = homedir();\n\t\t// Normalize trailing slashes for comparison\n\t\tconst normalizedDir = dir.replace(/[/\\\\]+$/, \"\");\n\t\tconst normalizedHome = home.replace(/[/\\\\]+$/, \"\");\n\t\treturn normalizedDir === normalizedHome;\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Shallow scan mode for home directory: only index top-level files\n * (no directory recursion) to avoid scanning the entire home directory.\n * Memory files are handled separately via scanMemoryDir.\n */\nfunction scanShallow(dir: string, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\t// Skip dotfiles/dotdirs in home dir (except specific ones we want)\n\t\tif (entry.startsWith(\".\")) continue;\n\n\t\tconst fullPath = join(dir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\t// Only index files, not directories (shallow mode)\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: entry,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n// ============================================================================\n// Internal helpers\n// ============================================================================\n\ntype IgnoreMatcher = ReturnType<typeof ignore>;\n\n/** Convert an OS path to posix separators for ignore matching. */\nfunction toPosix(p: string): string {\n\treturn p.split(sep).join(\"/\");\n}\n\n/** Load .gitignore rules from a directory into the ignore matcher. */\nfunction loadGitignore(ig: IgnoreMatcher, dir: string, root: string): void {\n\tconst gitignorePath = join(dir, \".gitignore\");\n\tif (!existsSync(gitignorePath)) return;\n\n\ttry {\n\t\tconst content = readFileSync(gitignorePath, \"utf-8\");\n\t\tconst relDir = relative(root, dir);\n\t\tconst prefix = relDir ? `${toPosix(relDir)}/` : \"\";\n\n\t\tconst patterns = content\n\t\t\t.split(/\\r?\\n/)\n\t\t\t.map((line) => prefixPattern(line, prefix))\n\t\t\t.filter((line): line is string => line !== null);\n\n\t\tif (patterns.length > 0) {\n\t\t\tig.add(patterns);\n\t\t}\n\t} catch {\n\t\t// Unreadable .gitignore — skip silently\n\t}\n}\n\n/**\n * Prefix a .gitignore pattern with a directory path so it applies\n * correctly when matching against root-relative paths.\n */\nfunction prefixPattern(line: string, prefix: string): string | null {\n\tconst trimmed = line.trim();\n\tif (!trimmed) return null;\n\tif (trimmed.startsWith(\"#\") && !trimmed.startsWith(\"\\\\#\")) return null;\n\n\tlet pattern = line;\n\tlet negated = false;\n\n\tif (pattern.startsWith(\"!\")) {\n\t\tnegated = true;\n\t\tpattern = pattern.slice(1);\n\t} else if (pattern.startsWith(\"\\\\!\")) {\n\t\tpattern = pattern.slice(1);\n\t}\n\n\tconst prefixed = prefix ? `${prefix}${pattern}` : pattern;\n\treturn negated ? `!${prefixed}` : prefixed;\n}\n\n/**\n * Check if a directory component (relative to root) should be unconditionally skipped.\n * Handles both top-level names (\"node_modules\") and nested paths (\".dreb/index\").\n */\nfunction shouldSkipDir(relPath: string): boolean {\n\tconst posix = toPosix(relPath);\n\n\t// Check the directory name itself\n\tconst parts = posix.split(\"/\");\n\tconst name = parts[parts.length - 1];\n\tif (SKIP_DIRS.has(name)) return true;\n\n\t// Check multi-segment skip patterns (e.g. \".dreb/index\")\n\tfor (const skip of SKIP_DIRS) {\n\t\tif (skip.includes(\"/\") && (posix === skip || posix.endsWith(`/${skip}`))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/** Recursively walk a directory, collecting indexable files. */\nfunction walkDirectory(dir: string, root: string, ig: IgnoreMatcher, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn; // Permission denied, etc.\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(dir, entry);\n\t\tconst relPath = relative(root, fullPath);\n\t\tconst posixRel = toPosix(relPath);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue; // Broken symlink, etc.\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Hard-coded skip list\n\t\t\tif (shouldSkipDir(relPath)) continue;\n\n\t\t\t// .gitignore check (directories need trailing slash)\n\t\t\tif (ig.ignores(`${posixRel}/`)) continue;\n\n\t\t\t// Load nested .gitignore before descending\n\t\t\tloadGitignore(ig, fullPath, root);\n\n\t\t\twalkDirectory(fullPath, root, ig, results);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\n\t\t// .gitignore check for files\n\t\tif (ig.ignores(posixRel)) continue;\n\n\t\t// Size gate\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\t// File type detection\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: posixRel,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n/**\n * Scan a memory directory (project or global) for indexable files.\n *\n * Memory directories are always fully included — no .gitignore filtering —\n * because they live outside the normal project tree or in `.dreb/` which\n * is typically gitignored.\n *\n * Paths for global memory files are stored with a `~memory/` prefix\n * to distinguish them from project files.\n */\nfunction scanMemoryDir(memoryDir: string, projectRoot: string, results: ScannedFile[], baseMemoryDir?: string): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(memoryDir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(memoryDir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Recurse into subdirectories\n\t\t\tscanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\t// If the memory dir is inside the project root, use normal relative path.\n\t\t// Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.\n\t\tconst rel = relative(projectRoot, fullPath);\n\t\tconst isOutsideProject = rel.startsWith(\"..\") || isAbsolute(rel);\n\t\tconst rootMemoryDir = baseMemoryDir ?? memoryDir;\n\t\tconst filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;\n\n\t\tresults.push({\n\t\t\tfilePath: toPosix(filePath),\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n"]}
|
|
1
|
+
{"version":3,"file":"scanner.js","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAc,QAAQ,EAAE,MAAM,SAAS,CAAC;AACtF,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACrE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAiB5B,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,yCAAyC;AACzC,MAAM,aAAa,GAAG,IAAI,GAAG,IAAI,CAAC;AAElC,4DAA4D;AAC5D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACzB,cAAc;IACd,MAAM;IACN,aAAa;IACb,KAAK;IACL,MAAM;IACN,aAAa;IACb,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,OAAO;IACP,OAAO;IACP,UAAU;IACV,QAAQ;CACR,CAAC,CAAC;AAEH,oCAAoC;AACpC,MAAM,aAAa,GAAkC,IAAI,GAAG,CAAmB;IAC9E,wBAAwB;IACxB,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,KAAK,EAAE,QAAQ,CAAC;IACjB,CAAC,KAAK,EAAE,IAAI,CAAC;IACb,CAAC,KAAK,EAAE,MAAM,CAAC;IACf,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,kBAAkB;IAClB,CAAC,KAAK,EAAE,UAAU,CAAC;IACnB,CAAC,MAAM,EAAE,UAAU,CAAC;IACpB,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,OAAO,EAAE,WAAW,CAAC;CACtB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB;IAC9C,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,OAAO,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC;AACvC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAChC,WAAmB,EACnB,eAAwB,EACxB,WAAsB;IAEtB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,sEAAsE;IACtE,6EAA6E;IAC7E,MAAM,SAAS,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE7C,IAAI,SAAS,EAAE,CAAC;QACf,8DAA8D;QAC9D,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACP,mDAAmD;QACnD,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;QACpB,aAAa,CAAC,EAAE,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;QAC5C,aAAa,CAAC,WAAW,EAAE,WAAW,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,4DAA4D;IAC5D,uEAAuE;IACvE,oDAAoD;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QAChB,KAAK,MAAM,GAAG,IAAI,WAAW,IAAI,EAAE,EAAE,CAAC;YACrC,aAAa,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC;IACF,CAAC;IAED,sDAAsD;IACtD,IAAI,eAAe,IAAI,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;QACpD,aAAa,CAAC,eAAe,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED,oDAAoD;AACpD,SAAS,aAAa,CAAC,GAAW;IACjC,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;QACvB,4CAA4C;QAC5C,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACnD,OAAO,aAAa,KAAK,cAAc,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,KAAK,CAAC;IACd,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW,EAAE,OAAsB;IACvD,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,mEAAmE;QACnE,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QAED,mDAAmD;QACnD,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC;AAQD,kEAAkE;AAClE,SAAS,OAAO,CAAC,CAAS;IACzB,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/B,CAAC;AAED,sEAAsE;AACtE,SAAS,aAAa,CAAC,EAAiB,EAAE,GAAW,EAAE,IAAY;IAClE,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAC9C,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO;IAEvC,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAEnD,MAAM,QAAQ,GAAG,OAAO;aACtB,KAAK,CAAC,OAAO,CAAC;aACd,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;aAC1C,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QAElD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClB,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,wCAAwC;IACzC,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,IAAY,EAAE,MAAc;IAClD,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvE,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC1D,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;AAC5C,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,OAAe;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B,kCAAkC;IAClC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,yDAAyD;IACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;YAC1E,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AACd,CAAC;AAED,gEAAgE;AAChE,SAAS,aAAa,CAAC,GAAW,EAAE,IAAY,EAAE,EAAiB,EAAE,OAAsB;IAC1F,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,CAAC,0BAA0B;IACnC,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS,CAAC,uBAAuB;QAClC,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,uBAAuB;YACvB,IAAI,aAAa,CAAC,OAAO,CAAC;gBAAE,SAAS;YAErC,qDAAqD;YACrD,IAAI,EAAE,CAAC,OAAO,CAAC,GAAG,QAAQ,GAAG,CAAC;gBAAE,SAAS;YAEzC,2CAA2C;YAC3C,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;YAElC,aAAa,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;YAC3C,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAE9B,6BAA6B;QAC7B,IAAI,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEnC,YAAY;QACZ,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,sBAAsB;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,QAAQ;YAClB,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,aAAa,CAAC,SAAiB,EAAE,WAAmB,EAAE,OAAsB,EAAE,aAAsB;IAC5G,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAExC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,8BAA8B;YAC9B,aAAa,CAAC,QAAQ,EAAE,WAAW,EAAE,OAAO,EAAE,aAAa,IAAI,SAAS,CAAC,CAAC;YAC1E,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,0EAA0E;QAC1E,4EAA4E;QAC5E,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAC5C,MAAM,gBAAgB,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC;QACjE,MAAM,aAAa,GAAG,aAAa,IAAI,SAAS,CAAC;QACjD,MAAM,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,WAAW,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAEzF,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC;YAC3B,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC","sourcesContent":["/**\n * File scanner for the semantic search subsystem.\n *\n * Discovers project files for indexing by walking the directory tree,\n * respecting .gitignore rules, and classifying files by type.\n */\n\nimport { existsSync, readdirSync, readFileSync, type Stats, statSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { extname, isAbsolute, join, relative, sep } from \"node:path\";\nimport ignore from \"ignore\";\nimport type { FileType } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** A file discovered by the scanner, ready for indexing. */\nexport interface ScannedFile {\n\t/** Path relative to the project root (posix separators). */\n\tfilePath: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n\t/** File modification time in milliseconds since epoch. */\n\tmtime: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Maximum file size to index (1 MB). */\nconst MAX_FILE_SIZE = 1024 * 1024;\n\n/** Directories unconditionally skipped during traversal. */\nconst SKIP_DIRS = new Set([\n\t\"node_modules\",\n\t\".git\",\n\t\".dreb/index\",\n\t\".hg\",\n\t\".svn\",\n\t\"__pycache__\",\n\t\".tox\",\n\t\".venv\",\n\t\"dist\",\n\t\"build\",\n\t\".next\",\n\t\".nuxt\",\n\t\"coverage\",\n\t\".cache\",\n]);\n\n/** Extension → FileType mapping. */\nconst EXTENSION_MAP: ReadonlyMap<string, FileType> = new Map<string, FileType>([\n\t// Tree-sitter languages\n\t[\".ts\", \"typescript\"],\n\t[\".tsx\", \"tsx\"],\n\t[\".js\", \"javascript\"],\n\t[\".mjs\", \"javascript\"],\n\t[\".cjs\", \"javascript\"],\n\t[\".py\", \"python\"],\n\t[\".go\", \"go\"],\n\t[\".rs\", \"rust\"],\n\t[\".java\", \"java\"],\n\t[\".c\", \"c\"],\n\t[\".h\", \"c\"],\n\t[\".cpp\", \"cpp\"],\n\t[\".hpp\", \"cpp\"],\n\t[\".cc\", \"cpp\"],\n\t[\".cxx\", \"cpp\"],\n\t[\".hh\", \"cpp\"],\n\t[\".hxx\", \"cpp\"],\n\t// Text file types\n\t[\".md\", \"markdown\"],\n\t[\".mdx\", \"markdown\"],\n\t[\".yml\", \"yaml\"],\n\t[\".yaml\", \"yaml\"],\n\t[\".json\", \"json\"],\n\t[\".toml\", \"toml\"],\n\t[\".txt\", \"plaintext\"],\n\t[\".cfg\", \"plaintext\"],\n\t[\".ini\", \"plaintext\"],\n\t[\".env\", \"plaintext\"],\n\t[\".conf\", \"plaintext\"],\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Detect the {@link FileType} for a file path based on its extension.\n * Returns `null` for unrecognized extensions or files without an extension.\n */\nexport function detectFileType(filePath: string): FileType | null {\n\tconst ext = extname(filePath).toLowerCase();\n\tif (!ext) return null;\n\treturn EXTENSION_MAP.get(ext) ?? null;\n}\n\n/**\n * Scan a project directory and return all indexable files.\n *\n * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,\n * skips binary / oversized files, and optionally includes memory files from\n * a global memory directory.\n */\nexport async function scanProject(\n\tprojectRoot: string,\n\tglobalMemoryDir?: string,\n\tvisibleDirs?: string[],\n): Promise<ScannedFile[]> {\n\tconst results: ScannedFile[] = [];\n\n\t// Detect if projectRoot is the home directory — use shallow scan mode\n\t// to avoid recursing into the entire home dir (which would be catastrophic).\n\tconst isHomeDir = isHomeDirPath(projectRoot);\n\n\tif (isHomeDir) {\n\t\t// Shallow mode: only scan top-level files and ~/.dreb/memory/\n\t\tscanShallow(projectRoot, results);\n\t} else {\n\t\t// Normal mode: full recursive walk with .gitignore\n\t\tconst ig = ignore();\n\t\tloadGitignore(ig, projectRoot, projectRoot);\n\t\twalkDirectory(projectRoot, projectRoot, ig, results);\n\t}\n\n\t// Include tool-visible .dreb/ subdirs (bypasses gitignore).\n\t// In home dir mode, global memory is already handled separately below,\n\t// and we don't want to double-scan ~/.dreb/memory/.\n\tif (!isHomeDir) {\n\t\tfor (const dir of visibleDirs ?? []) {\n\t\t\tscanMemoryDir(dir, projectRoot, results);\n\t\t}\n\t}\n\n\t// Include global memory files if the directory exists\n\tif (globalMemoryDir && existsSync(globalMemoryDir)) {\n\t\tscanMemoryDir(globalMemoryDir, projectRoot, results);\n\t}\n\n\treturn results;\n}\n\n/** Check if a path is the user's home directory. */\nfunction isHomeDirPath(dir: string): boolean {\n\ttry {\n\t\tconst home = homedir();\n\t\t// Normalize trailing slashes for comparison\n\t\tconst normalizedDir = dir.replace(/[/\\\\]+$/, \"\");\n\t\tconst normalizedHome = home.replace(/[/\\\\]+$/, \"\");\n\t\treturn normalizedDir === normalizedHome;\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Shallow scan mode for home directory: only index top-level files\n * (no directory recursion) to avoid scanning the entire home directory.\n * Memory files are handled separately via scanMemoryDir.\n */\nfunction scanShallow(dir: string, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\t// Skip dotfiles/dotdirs in home dir (except specific ones we want)\n\t\tif (entry.startsWith(\".\")) continue;\n\n\t\tconst fullPath = join(dir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\t// Only index files, not directories (shallow mode)\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: entry,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n// ============================================================================\n// Internal helpers\n// ============================================================================\n\ntype IgnoreMatcher = ReturnType<typeof ignore>;\n\n/** Convert an OS path to posix separators for ignore matching. */\nfunction toPosix(p: string): string {\n\treturn p.split(sep).join(\"/\");\n}\n\n/** Load .gitignore rules from a directory into the ignore matcher. */\nfunction loadGitignore(ig: IgnoreMatcher, dir: string, root: string): void {\n\tconst gitignorePath = join(dir, \".gitignore\");\n\tif (!existsSync(gitignorePath)) return;\n\n\ttry {\n\t\tconst content = readFileSync(gitignorePath, \"utf-8\");\n\t\tconst relDir = relative(root, dir);\n\t\tconst prefix = relDir ? `${toPosix(relDir)}/` : \"\";\n\n\t\tconst patterns = content\n\t\t\t.split(/\\r?\\n/)\n\t\t\t.map((line) => prefixPattern(line, prefix))\n\t\t\t.filter((line): line is string => line !== null);\n\n\t\tif (patterns.length > 0) {\n\t\t\tig.add(patterns);\n\t\t}\n\t} catch {\n\t\t// Unreadable .gitignore — skip silently\n\t}\n}\n\n/**\n * Prefix a .gitignore pattern with a directory path so it applies\n * correctly when matching against root-relative paths.\n */\nfunction prefixPattern(line: string, prefix: string): string | null {\n\tconst trimmed = line.trim();\n\tif (!trimmed) return null;\n\tif (trimmed.startsWith(\"#\") && !trimmed.startsWith(\"\\\\#\")) return null;\n\n\tlet pattern = line;\n\tlet negated = false;\n\n\tif (pattern.startsWith(\"!\")) {\n\t\tnegated = true;\n\t\tpattern = pattern.slice(1);\n\t} else if (pattern.startsWith(\"\\\\!\")) {\n\t\tpattern = pattern.slice(1);\n\t}\n\n\tconst prefixed = prefix ? `${prefix}${pattern}` : pattern;\n\treturn negated ? `!${prefixed}` : prefixed;\n}\n\n/**\n * Check if a directory component (relative to root) should be unconditionally skipped.\n * Handles both top-level names (\"node_modules\") and nested paths (\".dreb/index\").\n */\nfunction shouldSkipDir(relPath: string): boolean {\n\tconst posix = toPosix(relPath);\n\n\t// Check the directory name itself\n\tconst parts = posix.split(\"/\");\n\tconst name = parts[parts.length - 1];\n\tif (SKIP_DIRS.has(name)) return true;\n\n\t// Check multi-segment skip patterns (e.g. \".dreb/index\")\n\tfor (const skip of SKIP_DIRS) {\n\t\tif (skip.includes(\"/\") && (posix === skip || posix.endsWith(`/${skip}`))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/** Recursively walk a directory, collecting indexable files. */\nfunction walkDirectory(dir: string, root: string, ig: IgnoreMatcher, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn; // Permission denied, etc.\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(dir, entry);\n\t\tconst relPath = relative(root, fullPath);\n\t\tconst posixRel = toPosix(relPath);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue; // Broken symlink, etc.\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Hard-coded skip list\n\t\t\tif (shouldSkipDir(relPath)) continue;\n\n\t\t\t// .gitignore check (directories need trailing slash)\n\t\t\tif (ig.ignores(`${posixRel}/`)) continue;\n\n\t\t\t// Load nested .gitignore before descending\n\t\t\tloadGitignore(ig, fullPath, root);\n\n\t\t\twalkDirectory(fullPath, root, ig, results);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\n\t\t// .gitignore check for files\n\t\tif (ig.ignores(posixRel)) continue;\n\n\t\t// Size gate\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\t// File type detection\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: posixRel,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n/**\n * Scan a memory directory (project or global) for indexable files.\n *\n * Memory directories are always fully included — no .gitignore filtering —\n * because they live outside the normal project tree or in `.dreb/` which\n * is typically gitignored.\n *\n * Paths for global memory files are stored with a `~memory/` prefix\n * to distinguish them from project files.\n */\nfunction scanMemoryDir(memoryDir: string, projectRoot: string, results: ScannedFile[], baseMemoryDir?: string): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(memoryDir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(memoryDir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Recurse into subdirectories\n\t\t\tscanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\t// If the memory dir is inside the project root, use normal relative path.\n\t\t// Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.\n\t\tconst rel = relative(projectRoot, fullPath);\n\t\tconst isOutsideProject = rel.startsWith(\"..\") || isAbsolute(rel);\n\t\tconst rootMemoryDir = baseMemoryDir ?? memoryDir;\n\t\tconst filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;\n\n\t\tresults.push({\n\t\t\tfilePath: toPosix(filePath),\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n"]}
|