codesift-mcp 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +66 -21
- package/README.md +402 -56
- package/dist/cli/args.d.ts +2 -0
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +11 -0
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +177 -67
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/help.d.ts +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +157 -0
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/hooks.d.ts +3 -0
- package/dist/cli/hooks.d.ts.map +1 -0
- package/dist/cli/hooks.js +163 -0
- package/dist/cli/hooks.js.map +1 -0
- package/dist/cli/setup.d.ts +25 -0
- package/dist/cli/setup.d.ts.map +1 -0
- package/dist/cli/setup.js +400 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/formatters-shortening.d.ts +7 -0
- package/dist/formatters-shortening.d.ts.map +1 -0
- package/dist/formatters-shortening.js +68 -0
- package/dist/formatters-shortening.js.map +1 -0
- package/dist/formatters.d.ts +314 -0
- package/dist/formatters.d.ts.map +1 -0
- package/dist/formatters.js +396 -0
- package/dist/formatters.js.map +1 -0
- package/dist/instructions.d.ts +6 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +72 -0
- package/dist/instructions.js.map +1 -0
- package/dist/lsp/lsp-client.d.ts +21 -0
- package/dist/lsp/lsp-client.d.ts.map +1 -0
- package/dist/lsp/lsp-client.js +122 -0
- package/dist/lsp/lsp-client.js.map +1 -0
- package/dist/lsp/lsp-manager.d.ts +12 -0
- package/dist/lsp/lsp-manager.d.ts.map +1 -0
- package/dist/lsp/lsp-manager.js +82 -0
- package/dist/lsp/lsp-manager.js.map +1 -0
- package/dist/lsp/lsp-servers.d.ts +13 -0
- package/dist/lsp/lsp-servers.d.ts.map +1 -0
- package/dist/lsp/lsp-servers.js +57 -0
- package/dist/lsp/lsp-servers.js.map +1 -0
- package/dist/lsp/lsp-tools.d.ts +67 -0
- package/dist/lsp/lsp-tools.d.ts.map +1 -0
- package/dist/lsp/lsp-tools.js +359 -0
- package/dist/lsp/lsp-tools.js.map +1 -0
- package/dist/parser/extractors/_shared.d.ts +11 -0
- package/dist/parser/extractors/_shared.d.ts.map +1 -0
- package/dist/parser/extractors/_shared.js +38 -0
- package/dist/parser/extractors/_shared.js.map +1 -0
- package/dist/parser/extractors/astro.d.ts +15 -0
- package/dist/parser/extractors/astro.d.ts.map +1 -0
- package/dist/parser/extractors/astro.js +104 -0
- package/dist/parser/extractors/astro.js.map +1 -0
- package/dist/parser/extractors/conversation.d.ts +16 -0
- package/dist/parser/extractors/conversation.d.ts.map +1 -0
- package/dist/parser/extractors/conversation.js +196 -0
- package/dist/parser/extractors/conversation.js.map +1 -0
- package/dist/parser/extractors/go.d.ts.map +1 -1
- package/dist/parser/extractors/go.js +22 -45
- package/dist/parser/extractors/go.js.map +1 -1
- package/dist/parser/extractors/python.d.ts +1 -1
- package/dist/parser/extractors/python.d.ts.map +1 -1
- package/dist/parser/extractors/python.js +19 -50
- package/dist/parser/extractors/python.js.map +1 -1
- package/dist/parser/extractors/rust.d.ts +1 -1
- package/dist/parser/extractors/rust.d.ts.map +1 -1
- package/dist/parser/extractors/rust.js +7 -34
- package/dist/parser/extractors/rust.js.map +1 -1
- package/dist/parser/extractors/typescript.d.ts +1 -1
- package/dist/parser/extractors/typescript.d.ts.map +1 -1
- package/dist/parser/extractors/typescript.js +99 -68
- package/dist/parser/extractors/typescript.js.map +1 -1
- package/dist/parser/parser-manager.d.ts.map +1 -1
- package/dist/parser/parser-manager.js +12 -2
- package/dist/parser/parser-manager.js.map +1 -1
- package/dist/parser/symbol-extractor.d.ts +2 -0
- package/dist/parser/symbol-extractor.d.ts.map +1 -1
- package/dist/parser/symbol-extractor.js +2 -0
- package/dist/parser/symbol-extractor.js.map +1 -1
- package/dist/register-tools.d.ts +127 -0
- package/dist/register-tools.d.ts.map +1 -0
- package/dist/register-tools.js +1453 -0
- package/dist/register-tools.js.map +1 -0
- package/dist/retrieval/codebase-retrieval.d.ts +4 -26
- package/dist/retrieval/codebase-retrieval.d.ts.map +1 -1
- package/dist/retrieval/codebase-retrieval.js +105 -403
- package/dist/retrieval/codebase-retrieval.js.map +1 -1
- package/dist/retrieval/retrieval-constants.d.ts +27 -0
- package/dist/retrieval/retrieval-constants.d.ts.map +1 -0
- package/dist/retrieval/retrieval-constants.js +27 -0
- package/dist/retrieval/retrieval-constants.js.map +1 -0
- package/dist/retrieval/retrieval-schemas.d.ts +107 -0
- package/dist/retrieval/retrieval-schemas.d.ts.map +1 -0
- package/dist/retrieval/retrieval-schemas.js +102 -0
- package/dist/retrieval/retrieval-schemas.js.map +1 -0
- package/dist/retrieval/retrieval-utils.d.ts +40 -0
- package/dist/retrieval/retrieval-utils.d.ts.map +1 -0
- package/dist/retrieval/retrieval-utils.js +139 -0
- package/dist/retrieval/retrieval-utils.js.map +1 -0
- package/dist/retrieval/semantic-handlers.d.ts +8 -0
- package/dist/retrieval/semantic-handlers.d.ts.map +1 -0
- package/dist/retrieval/semantic-handlers.js +152 -0
- package/dist/retrieval/semantic-handlers.js.map +1 -0
- package/dist/search/bm25.d.ts +6 -1
- package/dist/search/bm25.d.ts.map +1 -1
- package/dist/search/bm25.js +95 -32
- package/dist/search/bm25.js.map +1 -1
- package/dist/search/chunker.d.ts +10 -0
- package/dist/search/chunker.d.ts.map +1 -1
- package/dist/search/chunker.js +63 -11
- package/dist/search/chunker.js.map +1 -1
- package/dist/search/reranker.d.ts +15 -0
- package/dist/search/reranker.d.ts.map +1 -0
- package/dist/search/reranker.js +126 -0
- package/dist/search/reranker.js.map +1 -0
- package/dist/search/semantic.d.ts +1 -1
- package/dist/search/semantic.d.ts.map +1 -1
- package/dist/search/semantic.js +40 -45
- package/dist/search/semantic.js.map +1 -1
- package/dist/server-helpers.d.ts +29 -0
- package/dist/server-helpers.d.ts.map +1 -0
- package/dist/server-helpers.js +312 -0
- package/dist/server-helpers.js.map +1 -0
- package/dist/server.d.ts +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -271
- package/dist/server.js.map +1 -1
- package/dist/storage/_shared.d.ts +9 -0
- package/dist/storage/_shared.d.ts.map +1 -0
- package/dist/storage/_shared.js +26 -0
- package/dist/storage/_shared.js.map +1 -0
- package/dist/storage/chunk-store.d.ts.map +1 -1
- package/dist/storage/chunk-store.js +23 -63
- package/dist/storage/chunk-store.js.map +1 -1
- package/dist/storage/embedding-store.d.ts +6 -3
- package/dist/storage/embedding-store.d.ts.map +1 -1
- package/dist/storage/embedding-store.js +54 -30
- package/dist/storage/embedding-store.js.map +1 -1
- package/dist/storage/graph-store.d.ts +48 -0
- package/dist/storage/graph-store.d.ts.map +1 -0
- package/dist/storage/graph-store.js +52 -0
- package/dist/storage/graph-store.js.map +1 -0
- package/dist/storage/index-store.d.ts +5 -0
- package/dist/storage/index-store.d.ts.map +1 -1
- package/dist/storage/index-store.js +28 -16
- package/dist/storage/index-store.js.map +1 -1
- package/dist/storage/registry.d.ts +4 -0
- package/dist/storage/registry.d.ts.map +1 -1
- package/dist/storage/registry.js +16 -16
- package/dist/storage/registry.js.map +1 -1
- package/dist/storage/usage-stats.d.ts +6 -0
- package/dist/storage/usage-stats.d.ts.map +1 -1
- package/dist/storage/usage-stats.js +59 -11
- package/dist/storage/usage-stats.js.map +1 -1
- package/dist/storage/usage-tracker.d.ts +3 -0
- package/dist/storage/usage-tracker.d.ts.map +1 -1
- package/dist/storage/usage-tracker.js +50 -132
- package/dist/storage/usage-tracker.js.map +1 -1
- package/dist/storage/watcher.d.ts +2 -1
- package/dist/storage/watcher.d.ts.map +1 -1
- package/dist/storage/watcher.js +16 -16
- package/dist/storage/watcher.js.map +1 -1
- package/dist/tools/ast-query-tools.d.ts +29 -0
- package/dist/tools/ast-query-tools.d.ts.map +1 -0
- package/dist/tools/ast-query-tools.js +110 -0
- package/dist/tools/ast-query-tools.js.map +1 -0
- package/dist/tools/boundary-tools.d.ts +31 -0
- package/dist/tools/boundary-tools.d.ts.map +1 -0
- package/dist/tools/boundary-tools.js +62 -0
- package/dist/tools/boundary-tools.js.map +1 -0
- package/dist/tools/clone-tools.d.ts +35 -0
- package/dist/tools/clone-tools.d.ts.map +1 -0
- package/dist/tools/clone-tools.js +181 -0
- package/dist/tools/clone-tools.js.map +1 -0
- package/dist/tools/community-tools.d.ts +23 -0
- package/dist/tools/community-tools.d.ts.map +1 -0
- package/dist/tools/community-tools.js +297 -0
- package/dist/tools/community-tools.js.map +1 -0
- package/dist/tools/complexity-tools.d.ts +34 -0
- package/dist/tools/complexity-tools.d.ts.map +1 -0
- package/dist/tools/complexity-tools.js +135 -0
- package/dist/tools/complexity-tools.js.map +1 -0
- package/dist/tools/context-tools.d.ts +44 -3
- package/dist/tools/context-tools.d.ts.map +1 -1
- package/dist/tools/context-tools.js +329 -99
- package/dist/tools/context-tools.js.map +1 -1
- package/dist/tools/conversation-tools.d.ts +107 -0
- package/dist/tools/conversation-tools.d.ts.map +1 -0
- package/dist/tools/conversation-tools.js +419 -0
- package/dist/tools/conversation-tools.js.map +1 -0
- package/dist/tools/coordinator-tools.d.ts +73 -0
- package/dist/tools/coordinator-tools.d.ts.map +1 -0
- package/dist/tools/coordinator-tools.js +153 -0
- package/dist/tools/coordinator-tools.js.map +1 -0
- package/dist/tools/cross-repo-tools.d.ts +43 -0
- package/dist/tools/cross-repo-tools.d.ts.map +1 -0
- package/dist/tools/cross-repo-tools.js +55 -0
- package/dist/tools/cross-repo-tools.js.map +1 -0
- package/dist/tools/diff-tools.d.ts +4 -1
- package/dist/tools/diff-tools.d.ts.map +1 -1
- package/dist/tools/diff-tools.js +23 -5
- package/dist/tools/diff-tools.js.map +1 -1
- package/dist/tools/frequency-tools.d.ts +46 -0
- package/dist/tools/frequency-tools.d.ts.map +1 -0
- package/dist/tools/frequency-tools.js +184 -0
- package/dist/tools/frequency-tools.js.map +1 -0
- package/dist/tools/generate-tools.d.ts.map +1 -1
- package/dist/tools/generate-tools.js +13 -2
- package/dist/tools/generate-tools.js.map +1 -1
- package/dist/tools/graph-tools.d.ts +44 -11
- package/dist/tools/graph-tools.d.ts.map +1 -1
- package/dist/tools/graph-tools.js +147 -104
- package/dist/tools/graph-tools.js.map +1 -1
- package/dist/tools/hotspot-tools.d.ts +24 -0
- package/dist/tools/hotspot-tools.d.ts.map +1 -0
- package/dist/tools/hotspot-tools.js +122 -0
- package/dist/tools/hotspot-tools.js.map +1 -0
- package/dist/tools/impact-tools.d.ts +13 -0
- package/dist/tools/impact-tools.d.ts.map +1 -0
- package/dist/tools/impact-tools.js +238 -0
- package/dist/tools/impact-tools.js.map +1 -0
- package/dist/tools/index-tools.d.ts +44 -3
- package/dist/tools/index-tools.d.ts.map +1 -1
- package/dist/tools/index-tools.js +530 -222
- package/dist/tools/index-tools.js.map +1 -1
- package/dist/tools/memory-tools.d.ts +35 -0
- package/dist/tools/memory-tools.d.ts.map +1 -0
- package/dist/tools/memory-tools.js +229 -0
- package/dist/tools/memory-tools.js.map +1 -0
- package/dist/tools/outline-tools.d.ts +24 -13
- package/dist/tools/outline-tools.d.ts.map +1 -1
- package/dist/tools/outline-tools.js +113 -87
- package/dist/tools/outline-tools.js.map +1 -1
- package/dist/tools/pattern-tools.d.ts +32 -0
- package/dist/tools/pattern-tools.d.ts.map +1 -0
- package/dist/tools/pattern-tools.js +116 -0
- package/dist/tools/pattern-tools.js.map +1 -0
- package/dist/tools/report-tools.d.ts +5 -0
- package/dist/tools/report-tools.d.ts.map +1 -0
- package/dist/tools/report-tools.js +167 -0
- package/dist/tools/report-tools.js.map +1 -0
- package/dist/tools/review-diff-tools.d.ts +148 -0
- package/dist/tools/review-diff-tools.d.ts.map +1 -0
- package/dist/tools/review-diff-tools.js +852 -0
- package/dist/tools/review-diff-tools.js.map +1 -0
- package/dist/tools/route-tools.d.ts +32 -0
- package/dist/tools/route-tools.d.ts.map +1 -0
- package/dist/tools/route-tools.js +276 -0
- package/dist/tools/route-tools.js.map +1 -0
- package/dist/tools/search-ranker.d.ts +5 -0
- package/dist/tools/search-ranker.d.ts.map +1 -0
- package/dist/tools/search-ranker.js +142 -0
- package/dist/tools/search-ranker.js.map +1 -0
- package/dist/tools/search-tools.d.ts +24 -1
- package/dist/tools/search-tools.d.ts.map +1 -1
- package/dist/tools/search-tools.js +459 -225
- package/dist/tools/search-tools.js.map +1 -1
- package/dist/tools/secret-tools.d.ts +104 -0
- package/dist/tools/secret-tools.d.ts.map +1 -0
- package/dist/tools/secret-tools.js +410 -0
- package/dist/tools/secret-tools.js.map +1 -0
- package/dist/tools/symbol-tools.d.ts +90 -2
- package/dist/tools/symbol-tools.d.ts.map +1 -1
- package/dist/tools/symbol-tools.js +576 -42
- package/dist/tools/symbol-tools.js.map +1 -1
- package/dist/types.d.ts +34 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/framework-detect.d.ts +5 -0
- package/dist/utils/framework-detect.d.ts.map +1 -0
- package/dist/utils/framework-detect.js +36 -0
- package/dist/utils/framework-detect.js.map +1 -0
- package/dist/utils/glob.d.ts +19 -0
- package/dist/utils/glob.d.ts.map +1 -0
- package/dist/utils/glob.js +74 -0
- package/dist/utils/glob.js.map +1 -0
- package/dist/utils/import-graph.d.ts +29 -0
- package/dist/utils/import-graph.d.ts.map +1 -0
- package/dist/utils/import-graph.js +125 -0
- package/dist/utils/import-graph.js.map +1 -0
- package/dist/utils/test-file.d.ts.map +1 -1
- package/dist/utils/test-file.js +1 -0
- package/dist/utils/test-file.js.map +1 -1
- package/dist/utils/walk.d.ts +45 -0
- package/dist/utils/walk.d.ts.map +1 -0
- package/dist/utils/walk.js +87 -0
- package/dist/utils/walk.js.map +1 -0
- package/package.json +12 -5
- package/rules/codesift.md +187 -0
- package/rules/codesift.mdc +192 -0
- package/rules/codex.md +187 -0
- package/rules/gemini.md +187 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-handlers.js","sourceRoot":"","sources":["../../src/retrieval/semantic-handlers.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAEnD,OAAO,EACL,cAAc,EACd,sBAAsB,EACtB,gBAAgB,EAChB,kBAAkB,EAClB,cAAc,EACd,oBAAoB,EACpB,WAAW,GACZ,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,aAAa,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,KAAK,EAAE,MAAM,0BAA0B,CAAC;AAExG,SAAS,qBAAqB,CAAC,OAAiI;IAC9J,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,cAAc,CAAC;IAChD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACvB,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;QACnB,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACtG,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;IACtD,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAClB,CAAC;AAqBD,KAAK,UAAU,mBAAmB,CAChC,IAAY,EACZ,KAAsF;IAEtF,MAAM,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;IAC5F,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;IAC/D,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;IAC3D,MAAM,EAAE,UAAU,EAAE,mBAAmB,EAAE,YAAY,EAAE,qBAAqB,EAAE,GAC5E,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;IAE5C,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAC3B,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CACb,iHAAiH,CAClH,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,IAAI,aAAa,CAAC;IAC1C,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,CAAC;IACrC,MAAM,YAAY,GAAG,KAAK,CAAC,aAAa,IAAI,IAAI,CAAC;IAEjD,MAAM,QAAQ,GAAG,uBAAuB,CAAC,MAAM,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC;IAC3E,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAClD,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,EAAE,gBAAgB,EAAE,eAAe,CAAC,CAAC;IAEjG,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,IAAI,CAAC,CAAC;IAC1D,IAAI,MAAM,GAAkC,IAAI,CAAC;IACjD,IAAI,eAAe,GAAqC,IAAI,CAAC;IAE7D,IAAI,QAAQ,EAAE,CAAC;QACb,CAAC,MAAM,EAAE,eAAe,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC5C,UAAU,CAAC,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;YAC7C,mBAAmB,CAAC,qBAAqB,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;SAChE,CAAC,CAAC;IACL,CAAC;IAED,MAAM,eAAe,GAAG,MAAM;QAC5B,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/D,CAAC,CAAC,IAAI,GAAG,EAA8B,CAAC;IAE1C,MAAM,gBAAgB,GAAG,eAAe,IAAI,IAAI,GAAG,EAAwB,CAAC;IAC5E,MAAM,kBAAkB,GAAG,sBAAsB,CAAC,gBAAgB,EAAE,eAAe,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;IAE/G,OAAO;QACL,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ;QAChC,MAAM,EAAE,eAAe,EAAE,eAAe,EAAE,kBAAkB;QAC5D,IAAI,EAAE,UAAU,EAAE,YAAY,EAAE,gBAAgB;KACjD,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,IAAY,EACZ,KAA8C;IAE9C,MAAM,GAAG,GAAG,MAAM,mBAAmB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IAEnD,+CAA+C;IAC/C,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,eAAe,EAAE,CAAC;QACtC,MAAM,SAAS,GAAG,gBAAgB,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,kBAAkB,EAAE,GAAG,CAAC,gBAAgB,CAAC,CAAC;QAC3F,MAAM,MAAM,GAAG,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;aACpC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;aAC3B,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,CAAC;aAClB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;QAErB,IAAI,QAAQ,GAAG,MAAM,CAAC;QACtB,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;YACjE,QAAQ,GAAG,MAAM,cAAc,CAAC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;QACnE,CAAC;QAED,MAAM,IAAI,GAAG,kBAAkB,CAAC,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QAC7D,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC;IACxE,CAAC;IAED,4CAA4C;IAC5C,MAAM,EAAE,YAAY,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,yBAAyB,CAAC,CAAC;IACpF,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;IAEjE,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,CAAC,KAAK;QAAE,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,aAAa,CAAC,CAAC;IAE9D,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACjD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CAAC,sBAAsB,IAAI,4DAA4D,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,WAAW,GAAG,KAAK,CAAC,YAAY,IAAI,oBAAoB,CAAC;IAC/D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACvF,MAAM,kBAAkB,GAAG,sBAAsB,CAAC,UAAU,EAAE,aAAa,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,YAAY,CAAC,CAAC;IAE/G,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/B,IAAI,CAAC,UAAU;QAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAE1E,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,YAAY,CAAC,UAAU,CAAC,EAAE,kBAAkB,EAAE,SAAS,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;IACtG,wEAAwE;IACxE,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,oBAAoB,CAAC,CAAC,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC;IACtG,MAAM,IAAI,GAAG,qBAAqB,CAAC,SAAS,CAAC,CAAC;IAC9C,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC;AACxE,CAAC;AAED,8EAA8E;AAC9E,qDAAqD;AACrD,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,KAA4C;IAE5C,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,0BAA0B,CAAC,CAAC;IAEhE,iFAAiF;IACjF,MAAM,CAAC,GAAG,EAAE,WAAW,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAC3C,mBAAmB,CAAC,IAAI,EAAE,KAAK,CAAC;QAChC,UAAU,CAAC,IAAI,EAAE,KAAK,CAAC,KAAK,EAAE,EAAE,YAAY,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC;KACnF,CAAC,CAAC;IACH,IAAI,CAAC,GAAG,CAAC,QAAQ;QAAE,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,aAAa,CAAC,CAAC;IACrE,IAAI,CAAC,GAAG,CAAC,MAAM,IAAI,CAAC,GAAG,CAAC,eAAe;QAAE,MAAM,IAAI,KAAK,CAAC,uBAAuB,IAAI,GAAG,CAAC,CAAC;IAGzF,gCAAgC;IAChC,MAAM,SAAS,GAAG,gBAAgB,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,kBAAkB,EAAE,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAE3F,qEAAqE;IACrE,MAAM,YAAY,GAAG,IAAI,GAAG,EAAqE,CAAC;IAClG,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACtE,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACrC,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC;QACzC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IACjD,CAAC;IAED,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;QACrD,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,IAAI,GAAG,CAAC,YAAY,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC;YAAE,SAAS;QACzD,MAAM,IAAI,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAChD,iEAAiE;QACjE,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QACjC,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC;YAChB,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;YAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAE,CAAC;YACzB,IAAI,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;gBAC/B,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;YACf,CAAC;iBAAM,IAAI,KAAK,CAAC,SAAS,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;gBACxC,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;gBACjF,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,GAAG,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;SAClC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,CAAC;SAClB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;IAErB,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;QACjE,MAAM,GAAG,MAAM,cAAc,CAAC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,YAAY,CAAC,CAAC;IAC5E,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,EAAE,CAAC;AAClF,CAAC"}
|
package/dist/search/bm25.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { CodeSymbol, SearchResult } from "../types.js";
|
|
2
|
-
type FieldName = "name" | "signature" | "docstring" | "body";
|
|
2
|
+
type FieldName = "name" | "signature" | "docstring" | "body" | "comments";
|
|
3
3
|
export interface BM25Index {
|
|
4
4
|
/** Per-field inverted index: token -> Map<symbolId, termFrequency> */
|
|
5
5
|
fields: Record<FieldName, Map<string, Map<string, number>>>;
|
|
@@ -9,6 +9,10 @@ export interface BM25Index {
|
|
|
9
9
|
docCount: number;
|
|
10
10
|
/** Symbol lookup by ID */
|
|
11
11
|
symbols: Map<string, CodeSymbol>;
|
|
12
|
+
/** Import centrality: file -> log-scaled importer count (for search ranking bonus) */
|
|
13
|
+
centrality: Map<string, number>;
|
|
14
|
+
/** Pre-computed per-document field lengths (avoids O(n*m) recomputation per search) */
|
|
15
|
+
fieldLengths: Map<string, Record<FieldName, number>>;
|
|
12
16
|
}
|
|
13
17
|
/**
|
|
14
18
|
* General-purpose tokenizer for signature, docstring, and body text.
|
|
@@ -18,5 +22,6 @@ export interface BM25Index {
|
|
|
18
22
|
export declare function tokenizeText(text: string): string[];
|
|
19
23
|
export declare function buildBM25Index(symbols: CodeSymbol[]): BM25Index;
|
|
20
24
|
export declare function searchBM25(index: BM25Index, query: string, topK: number, fieldWeights: Record<FieldName, number>): SearchResult[];
|
|
25
|
+
export declare function applyCutoff(results: SearchResult[]): SearchResult[];
|
|
21
26
|
export {};
|
|
22
27
|
//# sourceMappingURL=bm25.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAe5D,KAAK,SAAS,GAAG,MAAM,GAAG,WAAW,GAAG,WAAW,GAAG,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAe5D,KAAK,SAAS,GAAG,MAAM,GAAG,WAAW,GAAG,WAAW,GAAG,MAAM,GAAG,UAAU,CAAC;AAE1E,MAAM,WAAW,SAAS;IACxB,sEAAsE;IACtE,MAAM,EAAE,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;IAC5D,oDAAoD;IACpD,eAAe,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC3C,wCAAwC;IACxC,QAAQ,EAAE,MAAM,CAAC;IACjB,0BAA0B;IAC1B,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACjC,sFAAsF;IACtF,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,uFAAuF;IACvF,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;CACtD;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAqBnD;AA8CD,wBAAgB,cAAc,CAAC,OAAO,EAAE,UAAU,EAAE,GAAG,SAAS,CAoF/D;AAED,wBAAgB,UAAU,CACxB,KAAK,EAAE,SAAS,EAChB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EACZ,YAAY,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GACtC,YAAY,EAAE,CAwFhB;AAKD,wBAAgB,WAAW,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,EAAE,CAWnE"}
|
package/dist/search/bm25.js
CHANGED
|
@@ -35,15 +35,37 @@ export function tokenizeText(text) {
|
|
|
35
35
|
return tokens;
|
|
36
36
|
}
|
|
37
37
|
function getFieldTokens(symbol) {
|
|
38
|
+
const source = symbol.source?.slice(0, BODY_CHAR_LIMIT) ?? "";
|
|
39
|
+
const { code, comments } = splitCodeAndComments(source);
|
|
38
40
|
return {
|
|
39
41
|
name: tokenizeIdentifier(symbol.name),
|
|
40
42
|
signature: symbol.signature ? tokenizeText(symbol.signature) : [],
|
|
41
43
|
docstring: symbol.docstring ? tokenizeText(symbol.docstring) : [],
|
|
42
|
-
body:
|
|
43
|
-
|
|
44
|
-
: [],
|
|
44
|
+
body: source ? tokenizeText(code) : [],
|
|
45
|
+
comments: comments ? tokenizeText(comments) : [],
|
|
45
46
|
};
|
|
46
47
|
}
|
|
48
|
+
/**
|
|
49
|
+
* Split source into code (logic) vs inline comments.
|
|
50
|
+
* Strips single-line (//) and multi-line comments from code,
|
|
51
|
+
* collects them into a separate string.
|
|
52
|
+
*
|
|
53
|
+
* Limitation: regex-based, so `//` inside string literals (e.g. URLs)
|
|
54
|
+
* may be misclassified as comments. Acceptable for BM25 scoring where
|
|
55
|
+
* a few misclassified tokens have negligible impact on ranking.
|
|
56
|
+
*/
|
|
57
|
+
function splitCodeAndComments(source) {
|
|
58
|
+
const commentParts = [];
|
|
59
|
+
// Match // comments and /* ... */ blocks
|
|
60
|
+
const stripped = source.replace(/\/\/[^\n]*/g, (m) => {
|
|
61
|
+
commentParts.push(m);
|
|
62
|
+
return "";
|
|
63
|
+
}).replace(/\/\*[\s\S]*?\*\//g, (m) => {
|
|
64
|
+
commentParts.push(m);
|
|
65
|
+
return "";
|
|
66
|
+
});
|
|
67
|
+
return { code: stripped, comments: commentParts.join(" ") };
|
|
68
|
+
}
|
|
47
69
|
function countTermFrequencies(tokens) {
|
|
48
70
|
const tf = new Map();
|
|
49
71
|
for (const token of tokens) {
|
|
@@ -52,26 +74,31 @@ function countTermFrequencies(tokens) {
|
|
|
52
74
|
return tf;
|
|
53
75
|
}
|
|
54
76
|
export function buildBM25Index(symbols) {
|
|
55
|
-
const fieldNames = ["name", "signature", "docstring", "body"];
|
|
77
|
+
const fieldNames = ["name", "signature", "docstring", "body", "comments"];
|
|
56
78
|
const fields = {
|
|
57
79
|
name: new Map(),
|
|
58
80
|
signature: new Map(),
|
|
59
81
|
docstring: new Map(),
|
|
60
82
|
body: new Map(),
|
|
83
|
+
comments: new Map(),
|
|
61
84
|
};
|
|
62
85
|
const totalFieldLengths = {
|
|
63
86
|
name: 0,
|
|
64
87
|
signature: 0,
|
|
65
88
|
docstring: 0,
|
|
66
89
|
body: 0,
|
|
90
|
+
comments: 0,
|
|
67
91
|
};
|
|
68
92
|
const symbolMap = new Map();
|
|
93
|
+
const fieldLengths = new Map();
|
|
69
94
|
for (const symbol of symbols) {
|
|
70
95
|
symbolMap.set(symbol.id, symbol);
|
|
71
96
|
const fieldTokens = getFieldTokens(symbol);
|
|
97
|
+
const lengths = { name: 0, signature: 0, docstring: 0, body: 0, comments: 0 };
|
|
72
98
|
for (const field of fieldNames) {
|
|
73
99
|
const tokens = fieldTokens[field];
|
|
74
100
|
totalFieldLengths[field] += tokens.length;
|
|
101
|
+
lengths[field] = tokens.length;
|
|
75
102
|
const tf = countTermFrequencies(tokens);
|
|
76
103
|
for (const [token, freq] of tf) {
|
|
77
104
|
let postings = fields[field].get(token);
|
|
@@ -82,6 +109,7 @@ export function buildBM25Index(symbols) {
|
|
|
82
109
|
postings.set(symbol.id, freq);
|
|
83
110
|
}
|
|
84
111
|
}
|
|
112
|
+
fieldLengths.set(symbol.id, lengths);
|
|
85
113
|
}
|
|
86
114
|
const docCount = symbols.length;
|
|
87
115
|
const avgFieldLengths = {
|
|
@@ -89,8 +117,37 @@ export function buildBM25Index(symbols) {
|
|
|
89
117
|
signature: docCount > 0 ? totalFieldLengths.signature / docCount : 0,
|
|
90
118
|
docstring: docCount > 0 ? totalFieldLengths.docstring / docCount : 0,
|
|
91
119
|
body: docCount > 0 ? totalFieldLengths.body / docCount : 0,
|
|
120
|
+
comments: docCount > 0 ? totalFieldLengths.comments / docCount : 0,
|
|
92
121
|
};
|
|
93
|
-
|
|
122
|
+
// Compute import centrality: count how many files import each file
|
|
123
|
+
// Heuristic: scan symbol source for import/require patterns pointing to files in the index
|
|
124
|
+
const importCount = new Map();
|
|
125
|
+
const allFiles = new Set();
|
|
126
|
+
for (const sym of symbols)
|
|
127
|
+
allFiles.add(sym.file);
|
|
128
|
+
for (const sym of symbols) {
|
|
129
|
+
if (!sym.source)
|
|
130
|
+
continue;
|
|
131
|
+
// Quick regex for import paths (captures relative paths)
|
|
132
|
+
const importRe = /from\s+['"]\.?\.\/([\w/.-]+)['"]/g;
|
|
133
|
+
let match;
|
|
134
|
+
while ((match = importRe.exec(sym.source)) !== null) {
|
|
135
|
+
const imported = match[1];
|
|
136
|
+
// Try to match against known files
|
|
137
|
+
for (const file of allFiles) {
|
|
138
|
+
if (file.includes(imported)) {
|
|
139
|
+
importCount.set(file, (importCount.get(file) ?? 0) + 1);
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Log-scale centrality: avoids a single highly-imported utility from dominating
|
|
146
|
+
const centrality = new Map();
|
|
147
|
+
for (const [file, count] of importCount) {
|
|
148
|
+
centrality.set(file, Math.log2(1 + count));
|
|
149
|
+
}
|
|
150
|
+
return { fields, avgFieldLengths, docCount, symbols: symbolMap, centrality, fieldLengths };
|
|
94
151
|
}
|
|
95
152
|
export function searchBM25(index, query, topK, fieldWeights) {
|
|
96
153
|
if (index.docCount === 0 || !query.trim()) {
|
|
@@ -100,34 +157,13 @@ export function searchBM25(index, query, topK, fieldWeights) {
|
|
|
100
157
|
if (queryTokens.length === 0) {
|
|
101
158
|
return [];
|
|
102
159
|
}
|
|
103
|
-
const fieldNames = ["name", "signature", "docstring", "body"];
|
|
160
|
+
const fieldNames = ["name", "signature", "docstring", "body", "comments"];
|
|
104
161
|
// Accumulate scores per document
|
|
105
162
|
const scores = new Map();
|
|
106
163
|
// Track which query tokens matched per document
|
|
107
164
|
const matchedTokens = new Map();
|
|
108
|
-
//
|
|
109
|
-
|
|
110
|
-
const fieldLengths = new Map();
|
|
111
|
-
for (const [symbolId] of index.symbols) {
|
|
112
|
-
const lengths = {
|
|
113
|
-
name: 0,
|
|
114
|
-
signature: 0,
|
|
115
|
-
docstring: 0,
|
|
116
|
-
body: 0,
|
|
117
|
-
};
|
|
118
|
-
fieldLengths.set(symbolId, lengths);
|
|
119
|
-
}
|
|
120
|
-
// Compute field lengths by summing all term frequencies per doc per field
|
|
121
|
-
for (const field of fieldNames) {
|
|
122
|
-
for (const [, postings] of index.fields[field]) {
|
|
123
|
-
for (const [symbolId, freq] of postings) {
|
|
124
|
-
const lengths = fieldLengths.get(symbolId);
|
|
125
|
-
if (lengths) {
|
|
126
|
-
lengths[field] += freq;
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
}
|
|
165
|
+
// Use precomputed field lengths from index (built once at index time)
|
|
166
|
+
const { fieldLengths } = index;
|
|
131
167
|
for (const qToken of queryTokens) {
|
|
132
168
|
for (const field of fieldNames) {
|
|
133
169
|
const postings = index.fields[field].get(qToken);
|
|
@@ -152,12 +188,23 @@ export function searchBM25(index, query, topK, fieldWeights) {
|
|
|
152
188
|
}
|
|
153
189
|
}
|
|
154
190
|
}
|
|
155
|
-
//
|
|
191
|
+
// Centrality bonus: symbols in frequently-imported files get a tiebreaker
|
|
192
|
+
const maxCentrality = Math.max(1, ...index.centrality.values());
|
|
156
193
|
for (const [symbolId, score] of scores) {
|
|
157
194
|
const symbol = index.symbols.get(symbolId);
|
|
158
|
-
if (symbol
|
|
159
|
-
|
|
195
|
+
if (!symbol)
|
|
196
|
+
continue;
|
|
197
|
+
let adjusted = score;
|
|
198
|
+
// Centrality: 0-10% bonus scaled by file import popularity
|
|
199
|
+
const fileCentrality = index.centrality.get(symbol.file) ?? 0;
|
|
200
|
+
if (fileCentrality > 0) {
|
|
201
|
+
adjusted += score * 0.1 * (fileCentrality / maxCentrality);
|
|
160
202
|
}
|
|
203
|
+
// Demote test file symbols so production code ranks above test helpers
|
|
204
|
+
if (isTestFile(symbol.file)) {
|
|
205
|
+
adjusted *= TEST_FILE_SCORE_MULTIPLIER;
|
|
206
|
+
}
|
|
207
|
+
scores.set(symbolId, adjusted);
|
|
161
208
|
}
|
|
162
209
|
// Sort by score descending, take top-K
|
|
163
210
|
const sorted = [...scores.entries()]
|
|
@@ -176,4 +223,20 @@ export function searchBM25(index, query, topK, fieldWeights) {
|
|
|
176
223
|
}
|
|
177
224
|
return results;
|
|
178
225
|
}
|
|
226
|
+
const CUTOFF_THRESHOLD = 0.15;
|
|
227
|
+
const CUTOFF_MIN_RESULTS = 3;
|
|
228
|
+
export function applyCutoff(results) {
|
|
229
|
+
if (results.length <= CUTOFF_MIN_RESULTS)
|
|
230
|
+
return results;
|
|
231
|
+
const topScore = results[0]?.score ?? 0;
|
|
232
|
+
if (topScore <= 0)
|
|
233
|
+
return results;
|
|
234
|
+
const threshold = topScore * CUTOFF_THRESHOLD;
|
|
235
|
+
for (let i = CUTOFF_MIN_RESULTS; i < results.length; i++) {
|
|
236
|
+
if ((results[i]?.score ?? 0) < threshold) {
|
|
237
|
+
return results.slice(0, i);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return results;
|
|
241
|
+
}
|
|
179
242
|
//# sourceMappingURL=bm25.js.map
|
package/dist/search/bm25.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAGnD,kBAAkB;AAClB,MAAM,EAAE,GAAG,GAAG,CAAC;AACf,MAAM,CAAC,GAAG,IAAI,CAAC;AAEf,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B;;;;GAIG;AACH,MAAM,0BAA0B,GAAG,GAAG,CAAC;
|
|
1
|
+
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAGnD,kBAAkB;AAClB,MAAM,EAAE,GAAG,GAAG,CAAC;AACf,MAAM,CAAC,GAAG,IAAI,CAAC;AAEf,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B;;;;GAIG;AACH,MAAM,0BAA0B,GAAG,GAAG,CAAC;AAmBvC;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,uCAAuC;IACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAE7D,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,kEAAkE;QAClE,MAAM,QAAQ,GAAG,IAAI;aAClB,OAAO,CAAC,oBAAoB,EAAE,QAAQ,CAAC;aACvC,OAAO,CAAC,uBAAuB,EAAE,QAAQ,CAAC;aAC1C,KAAK,CAAC,IAAI,CAAC,CAAC;QAEf,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YAChC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBACtB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,cAAc,CAAC,MAAkB;IACxC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,EAAE,eAAe,CAAC,IAAI,EAAE,CAAC;IAC9D,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAExD,OAAO;QACL,IAAI,EAAE,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC;QACrC,SAAS,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE;QACjE,SAAS,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE;QACjE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE;QACtC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE;KACjD,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,oBAAoB,CAAC,MAAc;IAC1C,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,yCAAyC;IACzC,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE;QACnD,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC,EAAE,EAAE;QACpC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC;IAEH,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;AAC9D,CAAC;AAED,SAAS,oBAAoB,CAAC,MAAgB;IAC5C,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,EAAE,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,OAAqB;IAClD,MAAM,UAAU,GAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;IAEvF,MAAM,MAAM,GAAwD;QAClE,IAAI,EAAE,IAAI,GAAG,EAAE;QACf,SAAS,EAAE,IAAI,GAAG,EAAE;QACpB,SAAS,EAAE,IAAI,GAAG,EAAE;QACpB,IAAI,EAAE,IAAI,GAAG,EAAE;QACf,QAAQ,EAAE,IAAI,GAAG,EAAE;KACpB,CAAC;IAEF,MAAM,iBAAiB,GAA8B;QACnD,IAAI,EAAE,CAAC;QACP,SAAS,EAAE,CAAC;QACZ,SAAS,EAAE,CAAC;QACZ,IAAI,EAAE,CAAC;QACP,QAAQ,EAAE,CAAC;KACZ,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,GAAG,EAAsB,CAAC;IAChD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAqC,CAAC;IAElE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACjC,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;QAC3C,MAAM,OAAO,GAA8B,EAAE,IAAI,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;QAEzG,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;YAC/B,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;YAClC,iBAAiB,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC;YAC1C,OAAO,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;YAE/B,MAAM,EAAE,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;YACxC,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;gBAC/B,IAAI,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACxC,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,QAAQ,GAAG,IAAI,GAAG,EAAE,CAAC;oBACrB,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;gBACrC,CAAC;gBACD,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;QACD,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;IACvC,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAChC,MAAM,eAAe,GAA8B;QACjD,IAAI,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC1D,SAAS,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACpE,SAAS,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACpE,IAAI,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC1D,QAAQ,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;KACnE,CAAC;IAEF,mEAAmE;IACnE,2FAA2F;IAC3F,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,KAAK,MAAM,GAAG,IAAI,OAAO;QAAE,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAElD,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,CAAC,GAAG,CAAC,MAAM;YAAE,SAAS;QAC1B,yDAAyD;QACzD,MAAM,QAAQ,GAAG,mCAAmC,CAAC;QACrD,IAAI,KAA6B,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACpD,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC;YAC3B,mCAAmC;YACnC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;gBAC5B,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC5B,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;oBACxD,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,gFAAgF;IAChF,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC7C,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,WAAW,EAAE,CAAC;QACxC,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,CAAC;AAC7F,CAAC;AAED,MAAM,UAAU,UAAU,CACxB,KAAgB,EAChB,KAAa,EACb,IAAY,EACZ,YAAuC;IAEvC,IAAI,KAAK,CAAC,QAAQ,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;QAC1C,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IACxC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;IAEvF,iCAAiC;IACjC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,gDAAgD;IAChD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAuB,CAAC;IAErD,sEAAsE;IACtE,MAAM,EAAE,YAAY,EAAE,GAAG,KAAK,CAAC;IAE/B,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;QACjC,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACjD,IAAI,CAAC,QAAQ;gBAAE,SAAS;YAExB,MAAM,EAAE,GAAG,QAAQ,CAAC,IAAI,CAAC;YACzB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,QAAQ,GAAG,EAAE,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YACnE,MAAM,KAAK,GAAG,KAAK,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;YAC3C,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YAEnC,KAAK,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,IAAI,QAAQ,EAAE,CAAC;gBACtC,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,MAAM,IAAI,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,MAAM,OAAO,GAAG,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC;gBACjE,MAAM,UAAU,GAAG,GAAG,GAAG,OAAO,GAAG,MAAM,CAAC;gBAE1C,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC;gBAE/D,IAAI,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAC3C,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,QAAQ,GAAG,IAAI,GAAG,EAAE,CAAC;oBACrB,aAAa,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBACxC,CAAC;gBACD,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAED,0EAA0E;IAC1E,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IAChE,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QACvC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM;YAAE,SAAS;QAEtB,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,2DAA2D;QAC3D,MAAM,cAAc,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9D,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;YACvB,QAAQ,IAAI,KAAK,GAAG,GAAG,GAAG,CAAC,cAAc,GAAG,aAAa,CAAC,CAAC;QAC7D,CAAC;QAED,uEAAuE;QACvE,IAAI,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5B,QAAQ,IAAI,0BAA0B,CAAC;QACzC,CAAC;QAED,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACjC,CAAC;IAED,uCAAuC;IACvC,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;SACjC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAElB,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QACvC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM;YAAE,SAAS;QAEtB,OAAO,CAAC,IAAI,CAAC;YACX,MAAM;YACN,KAAK;YACL,OAAO,EAAE,CAAC,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;SAClD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,gBAAgB,GAAG,IAAI,CAAC;AAC9B,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAE7B,MAAM,UAAU,WAAW,CAAC,OAAuB;IACjD,IAAI,OAAO,CAAC,MAAM,IAAI,kBAAkB;QAAE,OAAO,OAAO,CAAC;IACzD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;IACxC,IAAI,QAAQ,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAClC,MAAM,SAAS,GAAG,QAAQ,GAAG,gBAAgB,CAAC;IAC9C,KAAK,IAAI,CAAC,GAAG,kBAAkB,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzD,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,SAAS,EAAE,CAAC;YACzC,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
package/dist/search/chunker.d.ts
CHANGED
|
@@ -6,4 +6,14 @@ import type { CodeChunk } from "../types.js";
|
|
|
6
6
|
* non-code extension).
|
|
7
7
|
*/
|
|
8
8
|
export declare function chunkFile(file: string, content: string, repo: string): CodeChunk[];
|
|
9
|
+
/**
|
|
10
|
+
* Chunk a file at symbol boundaries instead of fixed character count.
|
|
11
|
+
* Each symbol = one chunk. Preamble (imports) = separate chunk.
|
|
12
|
+
* Falls back to chunkFile when no symbols provided.
|
|
13
|
+
*/
|
|
14
|
+
export declare function chunkBySymbols(file: string, content: string, repo: string, symbols: Array<{
|
|
15
|
+
name: string;
|
|
16
|
+
start_line: number;
|
|
17
|
+
end_line: number;
|
|
18
|
+
}>): CodeChunk[];
|
|
9
19
|
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/search/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/search/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAoC7C;;;;;GAKG;AACH,wBAAgB,SAAS,CACvB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,GACX,SAAS,EAAE,CA2Cb;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,GACrE,SAAS,EAAE,CAiDb"}
|
package/dist/search/chunker.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// ---------------------------------------------------------------------------
|
|
4
4
|
const CHUNK_TOKENS = 400; // target tokens per chunk
|
|
5
5
|
const OVERLAP_TOKENS = 80; // overlap between consecutive chunks
|
|
6
|
-
const CHARS_PER_TOKEN = 4; //
|
|
6
|
+
const CHARS_PER_TOKEN = 4; // intentionally high for chunk sizing (smaller chunks = safe)
|
|
7
7
|
const CHUNK_CHARS = CHUNK_TOKENS * CHARS_PER_TOKEN; // 1600
|
|
8
8
|
const OVERLAP_CHARS = OVERLAP_TOKENS * CHARS_PER_TOKEN; // 320
|
|
9
9
|
const MAX_FILE_BYTES = 50_000; // skip files > 50KB
|
|
@@ -12,6 +12,17 @@ const SKIP_EXTENSIONS = new Set([
|
|
|
12
12
|
".json", ".lock", ".md", ".yaml", ".yml",
|
|
13
13
|
".env", ".txt", ".svg", ".png", ".wasm",
|
|
14
14
|
]);
|
|
15
|
+
function shouldSkipChunking(file, content) {
|
|
16
|
+
const dotIdx = file.lastIndexOf(".");
|
|
17
|
+
const ext = dotIdx !== -1 ? file.slice(dotIdx) : "";
|
|
18
|
+
if (SKIP_EXTENSIONS.has(ext))
|
|
19
|
+
return true;
|
|
20
|
+
if (content.length > MAX_FILE_BYTES)
|
|
21
|
+
return true;
|
|
22
|
+
if (content.includes("\0"))
|
|
23
|
+
return true;
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
15
26
|
// ---------------------------------------------------------------------------
|
|
16
27
|
// Public API
|
|
17
28
|
// ---------------------------------------------------------------------------
|
|
@@ -22,16 +33,7 @@ const SKIP_EXTENSIONS = new Set([
|
|
|
22
33
|
* non-code extension).
|
|
23
34
|
*/
|
|
24
35
|
export function chunkFile(file, content, repo) {
|
|
25
|
-
|
|
26
|
-
const dotIdx = file.lastIndexOf(".");
|
|
27
|
-
const ext = dotIdx !== -1 ? file.slice(dotIdx) : "";
|
|
28
|
-
if (SKIP_EXTENSIONS.has(ext))
|
|
29
|
-
return [];
|
|
30
|
-
// Skip files that are too large
|
|
31
|
-
if (content.length > MAX_FILE_BYTES)
|
|
32
|
-
return [];
|
|
33
|
-
// Skip binary files (presence of null bytes is a reliable signal)
|
|
34
|
-
if (content.includes("\0"))
|
|
36
|
+
if (shouldSkipChunking(file, content))
|
|
35
37
|
return [];
|
|
36
38
|
const lines = content.split("\n");
|
|
37
39
|
const totalLines = lines.length;
|
|
@@ -66,6 +68,56 @@ export function chunkFile(file, content, repo) {
|
|
|
66
68
|
}
|
|
67
69
|
return chunks;
|
|
68
70
|
}
|
|
71
|
+
/**
|
|
72
|
+
* Chunk a file at symbol boundaries instead of fixed character count.
|
|
73
|
+
* Each symbol = one chunk. Preamble (imports) = separate chunk.
|
|
74
|
+
* Falls back to chunkFile when no symbols provided.
|
|
75
|
+
*/
|
|
76
|
+
export function chunkBySymbols(file, content, repo, symbols) {
|
|
77
|
+
if (shouldSkipChunking(file, content))
|
|
78
|
+
return [];
|
|
79
|
+
if (symbols.length === 0)
|
|
80
|
+
return chunkFile(file, content, repo);
|
|
81
|
+
const lines = content.split("\n");
|
|
82
|
+
const chunks = [];
|
|
83
|
+
// Sort symbols by start_line
|
|
84
|
+
const sorted = [...symbols].sort((a, b) => a.start_line - b.start_line);
|
|
85
|
+
// Preamble: lines before first symbol (imports, comments)
|
|
86
|
+
const firstStart = sorted[0]?.start_line ?? 1;
|
|
87
|
+
if (firstStart > 1) {
|
|
88
|
+
const text = lines.slice(0, firstStart - 1).join("\n");
|
|
89
|
+
if (text.trim().length > 0) {
|
|
90
|
+
chunks.push({
|
|
91
|
+
id: `${repo}:${file}:1`,
|
|
92
|
+
file,
|
|
93
|
+
startLine: 1,
|
|
94
|
+
endLine: firstStart - 1,
|
|
95
|
+
text,
|
|
96
|
+
tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// One chunk per symbol
|
|
101
|
+
for (const sym of sorted) {
|
|
102
|
+
const start = sym.start_line - 1; // 0-based
|
|
103
|
+
const end = Math.min(sym.end_line, lines.length); // 1-based inclusive
|
|
104
|
+
const symLines = lines.slice(start, end);
|
|
105
|
+
const text = symLines.join("\n");
|
|
106
|
+
if (text.trim().length === 0)
|
|
107
|
+
continue;
|
|
108
|
+
// Cap very large symbols
|
|
109
|
+
const cappedText = text.length > MAX_FILE_BYTES ? text.slice(0, MAX_FILE_BYTES) : text;
|
|
110
|
+
chunks.push({
|
|
111
|
+
id: `${repo}:${file}:${sym.start_line}`,
|
|
112
|
+
file,
|
|
113
|
+
startLine: sym.start_line,
|
|
114
|
+
endLine: end,
|
|
115
|
+
text: cappedText,
|
|
116
|
+
tokenCount: Math.ceil(cappedText.length / CHARS_PER_TOKEN),
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
return chunks;
|
|
120
|
+
}
|
|
69
121
|
// ---------------------------------------------------------------------------
|
|
70
122
|
// Internal helpers
|
|
71
123
|
// ---------------------------------------------------------------------------
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/search/chunker.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,MAAM,YAAY,GAAG,GAAG,CAAC,CAAW,0BAA0B;AAC9D,MAAM,cAAc,GAAG,EAAE,CAAC,CAAU,qCAAqC;AACzE,MAAM,eAAe,GAAG,CAAC,CAAC,CAAU,
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/search/chunker.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,MAAM,YAAY,GAAG,GAAG,CAAC,CAAW,0BAA0B;AAC9D,MAAM,cAAc,GAAG,EAAE,CAAC,CAAU,qCAAqC;AACzE,MAAM,eAAe,GAAG,CAAC,CAAC,CAAU,8DAA8D;AAElG,MAAM,WAAW,GAAG,YAAY,GAAG,eAAe,CAAC,CAAG,OAAO;AAC7D,MAAM,aAAa,GAAG,cAAc,GAAG,eAAe,CAAC,CAAC,MAAM;AAE9D,MAAM,cAAc,GAAG,MAAM,CAAC,CAAM,oBAAoB;AAExD,6DAA6D;AAC7D,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM;IACxC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;CACxC,CAAC,CAAC;AAEH,SAAS,kBAAkB,CAAC,IAAY,EAAE,OAAe;IACvD,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACrC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAEpD,IAAI,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1C,IAAI,OAAO,CAAC,MAAM,GAAG,cAAc;QAAE,OAAO,IAAI,CAAC;IACjD,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAExC,OAAO,KAAK,CAAC;AACf,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CACvB,IAAY,EACZ,OAAe,EACf,IAAY;IAEZ,IAAI,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;IAEhC,IAAI,UAAU,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEhC,2EAA2E;IAC3E,wEAAwE;IACxE,MAAM,eAAe,GAAa,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC;IACxD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,eAAe,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC;QAC5B,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,kBAAkB;IAC3D,CAAC;IACD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAElC,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,sCAAsC;IAE1D,OAAO,UAAU,GAAG,UAAU,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,WAAW,EAAE,UAAU,CAAC,CAAC;QAChE,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAEjD,2CAA2C;QAC3C,MAAM,SAAS,GAAG,gBAAgB,CAAC,UAAU,EAAE,eAAe,CAAC,GAAG,CAAC,CAAC;QACpE,MAAM,OAAO,GAAG,gBAAgB,CAAC,QAAQ,GAAG,CAAC,EAAE,eAAe,CAAC,GAAG,CAAC,CAAC;QAEpE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;QAE5D,MAAM,EAAE,GAAG,GAAG,IAAI,IAAI,IAAI,IAAI,SAAS,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;QAEhE,oEAAoE;QACpE,MAAM,OAAO,GAAG,WAAW,GAAG,aAAa,CAAC;QAC5C,UAAU,IAAI,OAAO,CAAC;QAEtB,uEAAuE;QACvE,IAAI,UAAU,IAAI,UAAU;YAAE,MAAM;IACtC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,OAAe,EACf,IAAY,EACZ,OAAsE;IAEtE,IAAI,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAEhE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,6BAA6B;IAC7B,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;IAExE,0DAA0D;IAC1D,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,UAAU,IAAI,CAAC,CAAC;IAC9C,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,GAAG,IAAI,IAAI,IAAI,IAAI;gBACvB,IAAI;gBACJ,SAAS,EAAE,CAAC;gBACZ,OAAO,EAAE,UAAU,GAAG,CAAC;gBACvB,IAAI;gBACJ,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC;aACrD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,uBAAuB;IACvB,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU;QAC5C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,oBAAoB;QACtE,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACzC,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEjC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEvC,yBAAyB;QACzB,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAEvF,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,GAAG,IAAI,IAAI,IAAI,IAAI,GAAG,CAAC,UAAU,EAAE;YACvC,IAAI;YACJ,SAAS,EAAE,GAAG,CAAC,UAAU;YACzB,OAAO,EAAE,GAAG;YACZ,IAAI,EAAE,UAAU;YAChB,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,GAAG,eAAe,CAAC;SAC3D,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E;;;GAGG;AACH,SAAS,gBAAgB,CAAC,UAAkB,EAAE,eAAyB;IACrE,IAAI,EAAE,GAAG,CAAC,CAAC;IACX,IAAI,EAAE,GAAG,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC;IAEpC,OAAO,EAAE,GAAG,EAAE,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,SAAS,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;QACvC,IAAI,SAAS,KAAK,SAAS,IAAI,SAAS,IAAI,UAAU,EAAE,CAAC;YACvD,EAAE,GAAG,GAAG,CAAC;QACX,CAAC;aAAM,CAAC;YACN,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { SearchResult, CodeChunk } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Rerank SearchResult[] using a cross-encoder model.
|
|
4
|
+
* Returns results reordered by cross-encoder score.
|
|
5
|
+
* Falls back to original order if the model is unavailable.
|
|
6
|
+
*/
|
|
7
|
+
export declare function rerankResults(query: string, results: SearchResult[], topN?: number, model?: string): Promise<SearchResult[]>;
|
|
8
|
+
/**
|
|
9
|
+
* Rerank chunk IDs using a cross-encoder model.
|
|
10
|
+
* Returns reordered chunk IDs.
|
|
11
|
+
*/
|
|
12
|
+
export declare function rerankChunkIds(query: string, chunkIds: string[], chunks: Map<string, CodeChunk>, topN?: number, model?: string): Promise<string[]>;
|
|
13
|
+
/** Reset caches for testing. */
|
|
14
|
+
export declare function _resetReranker(): void;
|
|
15
|
+
//# sourceMappingURL=reranker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reranker.d.ts","sourceRoot":"","sources":["../../src/search/reranker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAmD3D;;;;GAIG;AACH,wBAAsB,aAAa,CACjC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,YAAY,EAAE,EACvB,IAAI,CAAC,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,YAAY,EAAE,CAAC,CAiCzB;AAED;;;GAGG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,CAAC,EAC9B,IAAI,CAAC,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,MAAM,EAAE,CAAC,CAkCnB;AAYD,gCAAgC;AAChC,wBAAgB,cAAc,IAAI,IAAI,CAIrC"}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
const DEFAULT_RERANK_TOP_N = 50;
|
|
2
|
+
const DEFAULT_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2";
|
|
3
|
+
const pipelineCache = new Map();
|
|
4
|
+
const failedModels = new Set();
|
|
5
|
+
let loadWarned = false;
|
|
6
|
+
async function loadPipeline(model) {
|
|
7
|
+
const modelName = model ?? DEFAULT_MODEL;
|
|
8
|
+
const cached = pipelineCache.get(modelName);
|
|
9
|
+
if (cached)
|
|
10
|
+
return cached;
|
|
11
|
+
if (failedModels.has(modelName))
|
|
12
|
+
return null;
|
|
13
|
+
try {
|
|
14
|
+
// @ts-expect-error — optional dependency, may not be installed
|
|
15
|
+
const transformers = await import("@huggingface/transformers");
|
|
16
|
+
const pipelineFn = transformers.pipeline ?? transformers.default?.pipeline;
|
|
17
|
+
if (!pipelineFn) {
|
|
18
|
+
failedModels.add(modelName);
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
const classifier = await pipelineFn("text-classification", modelName, {
|
|
22
|
+
quantized: true,
|
|
23
|
+
});
|
|
24
|
+
const rerankerFn = async (pairs) => {
|
|
25
|
+
// Batch: send all inputs at once for better throughput
|
|
26
|
+
const inputs = pairs.map(([q, t]) => `${q} [SEP] ${t}`);
|
|
27
|
+
const outputs = await classifier(inputs, { topk: 1 });
|
|
28
|
+
// Normalize: pipeline returns single object for 1 input, array for N
|
|
29
|
+
const results = [];
|
|
30
|
+
for (let i = 0; i < pairs.length; i++) {
|
|
31
|
+
const out = Array.isArray(outputs[i]) ? outputs[i][0] : outputs[i] ?? outputs;
|
|
32
|
+
const score = out?.score ?? 0;
|
|
33
|
+
results.push({ score: typeof score === "number" ? score : 0 });
|
|
34
|
+
}
|
|
35
|
+
return results;
|
|
36
|
+
};
|
|
37
|
+
pipelineCache.set(modelName, rerankerFn);
|
|
38
|
+
return rerankerFn;
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
failedModels.add(modelName);
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Rerank SearchResult[] using a cross-encoder model.
|
|
47
|
+
* Returns results reordered by cross-encoder score.
|
|
48
|
+
* Falls back to original order if the model is unavailable.
|
|
49
|
+
*/
|
|
50
|
+
export async function rerankResults(query, results, topN, model) {
|
|
51
|
+
if (results.length <= 1)
|
|
52
|
+
return results;
|
|
53
|
+
const limit = Math.min(topN ?? DEFAULT_RERANK_TOP_N, results.length);
|
|
54
|
+
const candidates = results.slice(0, limit);
|
|
55
|
+
const remainder = results.slice(limit);
|
|
56
|
+
const reranker = await loadPipeline(model);
|
|
57
|
+
if (!reranker) {
|
|
58
|
+
if (!loadWarned) {
|
|
59
|
+
loadWarned = true;
|
|
60
|
+
console.error("[codesift] Cross-encoder reranking unavailable. Install @huggingface/transformers for improved search quality.");
|
|
61
|
+
}
|
|
62
|
+
return results;
|
|
63
|
+
}
|
|
64
|
+
const pairs = candidates.map((r) => {
|
|
65
|
+
const text = buildCandidateText(r);
|
|
66
|
+
return [query, text];
|
|
67
|
+
});
|
|
68
|
+
const scores = await reranker(pairs);
|
|
69
|
+
const scored = candidates.map((r, i) => ({
|
|
70
|
+
result: r,
|
|
71
|
+
ceScore: scores[i]?.score ?? 0,
|
|
72
|
+
}));
|
|
73
|
+
scored.sort((a, b) => b.ceScore - a.ceScore);
|
|
74
|
+
return [...scored.map((s) => s.result), ...remainder];
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Rerank chunk IDs using a cross-encoder model.
|
|
78
|
+
* Returns reordered chunk IDs.
|
|
79
|
+
*/
|
|
80
|
+
export async function rerankChunkIds(query, chunkIds, chunks, topN, model) {
|
|
81
|
+
if (chunkIds.length <= 1)
|
|
82
|
+
return chunkIds;
|
|
83
|
+
const limit = Math.min(topN ?? DEFAULT_RERANK_TOP_N, chunkIds.length);
|
|
84
|
+
const candidates = chunkIds.slice(0, limit);
|
|
85
|
+
const remainder = chunkIds.slice(limit);
|
|
86
|
+
const reranker = await loadPipeline(model);
|
|
87
|
+
if (!reranker) {
|
|
88
|
+
if (!loadWarned) {
|
|
89
|
+
loadWarned = true;
|
|
90
|
+
console.error("[codesift] Cross-encoder reranking unavailable. Install @huggingface/transformers for improved search quality.");
|
|
91
|
+
}
|
|
92
|
+
return chunkIds;
|
|
93
|
+
}
|
|
94
|
+
const pairs = candidates.map((id) => {
|
|
95
|
+
const chunk = chunks.get(id);
|
|
96
|
+
const text = chunk?.text ?? id;
|
|
97
|
+
return [query, text];
|
|
98
|
+
});
|
|
99
|
+
const scores = await reranker(pairs);
|
|
100
|
+
const scored = candidates.map((id, i) => ({
|
|
101
|
+
id,
|
|
102
|
+
ceScore: scores[i]?.score ?? 0,
|
|
103
|
+
}));
|
|
104
|
+
scored.sort((a, b) => b.ceScore - a.ceScore);
|
|
105
|
+
return [...scored.map((s) => s.id), ...remainder];
|
|
106
|
+
}
|
|
107
|
+
function buildCandidateText(r) {
|
|
108
|
+
const parts = [];
|
|
109
|
+
if (r.symbol.kind)
|
|
110
|
+
parts.push(r.symbol.kind);
|
|
111
|
+
parts.push(r.symbol.name);
|
|
112
|
+
if (r.symbol.signature)
|
|
113
|
+
parts.push(r.symbol.signature);
|
|
114
|
+
if (r.symbol.source)
|
|
115
|
+
parts.push(r.symbol.source.slice(0, 500));
|
|
116
|
+
else if (r.symbol.docstring)
|
|
117
|
+
parts.push(r.symbol.docstring.slice(0, 200));
|
|
118
|
+
return parts.join(" ");
|
|
119
|
+
}
|
|
120
|
+
/** Reset caches for testing. */
|
|
121
|
+
export function _resetReranker() {
|
|
122
|
+
pipelineCache.clear();
|
|
123
|
+
failedModels.clear();
|
|
124
|
+
loadWarned = false;
|
|
125
|
+
}
|
|
126
|
+
//# sourceMappingURL=reranker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reranker.js","sourceRoot":"","sources":["../../src/search/reranker.ts"],"names":[],"mappings":"AAEA,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAChC,MAAM,aAAa,GAAG,+BAA+B,CAAC;AAItD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAsB,CAAC;AACpD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;AACvC,IAAI,UAAU,GAAG,KAAK,CAAC;AAEvB,KAAK,UAAU,YAAY,CAAC,KAAc;IACxC,MAAM,SAAS,GAAG,KAAK,IAAI,aAAa,CAAC;IAEzC,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC5C,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC;IAC1B,IAAI,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC;QAAE,OAAO,IAAI,CAAC;IAE7C,IAAI,CAAC;QACH,+DAA+D;QAC/D,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;QAC/D,MAAM,UAAU,GAAG,YAAY,CAAC,QAAQ,IAAI,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC;QAC3E,IAAI,CAAC,UAAU,EAAE,CAAC;YAAC,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAAC,OAAO,IAAI,CAAC;QAAC,CAAC;QAE9D,MAAM,UAAU,GAAG,MAAM,UAAU,CAAC,qBAAqB,EAAE,SAAS,EAAE;YACpE,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,UAAU,GAAe,KAAK,EAAE,KAAiB,EAAE,EAAE;YACzD,uDAAuD;YACvD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;YACxD,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;YAEtD,qEAAqE;YACrE,MAAM,OAAO,GAA6B,EAAE,CAAC;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC;gBAC9E,MAAM,KAAK,GAAG,GAAG,EAAE,KAAK,IAAI,CAAC,CAAC;gBAC9B,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACjE,CAAC;YACD,OAAO,OAAO,CAAC;QACjB,CAAC,CAAC;QAEF,aAAa,CAAC,GAAG,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;QACzC,OAAO,UAAU,CAAC;IACpB,CAAC;IAAC,MAAM,CAAC;QACP,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC5B,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAa,EACb,OAAuB,EACvB,IAAa,EACb,KAAc;IAEd,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,oBAAoB,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IACrE,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEvC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,KAAK,CAAC,CAAC;IAC3C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,UAAU,GAAG,IAAI,CAAC;YAClB,OAAO,CAAC,KAAK,CACX,gHAAgH,CACjH,CAAC;QACJ,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACjC,MAAM,IAAI,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC;QACnC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC;IAErC,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QACvC,MAAM,EAAE,CAAC;QACT,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;KAC/B,CAAC,CAAC,CAAC;IAEJ,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAE7C,OAAO,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,GAAG,SAAS,CAAC,CAAC;AACxD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAa,EACb,QAAkB,EAClB,MAA8B,EAC9B,IAAa,EACb,KAAc;IAEd,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE1C,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,oBAAoB,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IACtE,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,KAAK,CAAC,CAAC;IAC3C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,UAAU,GAAG,IAAI,CAAC;YAClB,OAAO,CAAC,KAAK,CACX,gHAAgH,CACjH,CAAC;QACJ,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE;QAClC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC7B,MAAM,IAAI,GAAG,KAAK,EAAE,IAAI,IAAI,EAAE,CAAC;QAC/B,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC;IAErC,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QACxC,EAAE;QACF,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;KAC/B,CAAC,CAAC,CAAC;IAEJ,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAE7C,OAAO,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,SAAS,CAAC,CAAC;AACpD,CAAC;AAED,SAAS,kBAAkB,CAAC,CAAe;IACzC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAC7C,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAC1B,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IACvD,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;SAC1D,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;IAC1E,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,gCAAgC;AAChC,MAAM,UAAU,cAAc;IAC5B,aAAa,CAAC,KAAK,EAAE,CAAC;IACtB,YAAY,CAAC,KAAK,EAAE,CAAC;IACrB,UAAU,GAAG,KAAK,CAAC;AACrB,CAAC"}
|
|
@@ -6,7 +6,7 @@ export interface EmbeddingProvider {
|
|
|
6
6
|
}
|
|
7
7
|
/**
|
|
8
8
|
* Build a searchable text string from a symbol for embedding.
|
|
9
|
-
* Format: "{kind} {name}\n{signature}\n{docstring first line}\n{body first
|
|
9
|
+
* Format: "{kind} {name}\n{signature}\n{docstring first line}\n{body first N chars}"
|
|
10
10
|
*/
|
|
11
11
|
export declare function buildSymbolText(symbol: CodeSymbol): string;
|
|
12
12
|
export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../src/search/semantic.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../src/search/semantic.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAU5D,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC5C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,UAAU,GAAG,MAAM,CAiB1D;AAMD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAiBzE;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,cAAc,EAAE,YAAY,EAC5B,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,EACrC,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,EAChC,IAAI,EAAE,MAAM,GACX,YAAY,EAAE,CAoBhB;AAsDD,qBAAa,cAAe,YAAW,iBAAiB;IACtD,QAAQ,CAAC,KAAK,mBAAmB;IACjC,QAAQ,CAAC,UAAU,QAAQ;IAC3B,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,EAAE,MAAM;IAIpB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CAQlD;AAMD,qBAAa,cAAe,YAAW,iBAAiB;IACtD,QAAQ,CAAC,KAAK,4BAA4B;IAC1C,QAAQ,CAAC,UAAU,QAAQ;IAC3B,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,EAAE,MAAM;IAIpB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CAQlD;AAMD,qBAAa,cAAe,YAAW,iBAAiB;IACtD,QAAQ,CAAC,KAAK,sBAAsB;IACpC,QAAQ,CAAC,UAAU,OAAO;IAC1B,OAAO,CAAC,OAAO,CAAS;gBAEZ,OAAO,EAAE,MAAM;IAIrB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CA+BlD;AAMD,wBAAgB,uBAAuB,CACrC,QAAQ,EAAE,QAAQ,GAAG,QAAQ,GAAG,QAAQ,EACxC,MAAM,EAAE;IAAE,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAChG,iBAAiB,CAenB"}
|