distill-mcp 0.6.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +133 -0
- package/dist/analytics/session-tracker.d.ts +74 -0
- package/dist/analytics/session-tracker.d.ts.map +1 -0
- package/dist/analytics/session-tracker.js +123 -0
- package/dist/ast/benchmark.test.d.ts +7 -0
- package/dist/ast/benchmark.test.d.ts.map +1 -0
- package/dist/ast/benchmark.test.js +175 -0
- package/dist/ast/go/index.d.ts +9 -0
- package/dist/ast/go/index.d.ts.map +1 -0
- package/dist/ast/go/index.js +8 -0
- package/dist/ast/go/parser.d.ts +31 -0
- package/dist/ast/go/parser.d.ts.map +1 -0
- package/dist/ast/go/parser.js +428 -0
- package/dist/ast/go/parser.test.d.ts +5 -0
- package/dist/ast/go/parser.test.d.ts.map +1 -0
- package/dist/ast/go/parser.test.js +241 -0
- package/dist/ast/go/queries.d.ts +51 -0
- package/dist/ast/go/queries.d.ts.map +1 -0
- package/dist/ast/go/queries.js +114 -0
- package/dist/ast/go/utils.d.ts +66 -0
- package/dist/ast/go/utils.d.ts.map +1 -0
- package/dist/ast/go/utils.js +140 -0
- package/dist/ast/index.d.ts +39 -0
- package/dist/ast/index.d.ts.map +1 -0
- package/dist/ast/index.js +245 -0
- package/dist/ast/php/index.d.ts +9 -0
- package/dist/ast/php/index.d.ts.map +1 -0
- package/dist/ast/php/index.js +8 -0
- package/dist/ast/php/parser.d.ts +31 -0
- package/dist/ast/php/parser.d.ts.map +1 -0
- package/dist/ast/php/parser.js +388 -0
- package/dist/ast/php/parser.test.d.ts +5 -0
- package/dist/ast/php/parser.test.d.ts.map +1 -0
- package/dist/ast/php/parser.test.js +328 -0
- package/dist/ast/php/queries.d.ts +61 -0
- package/dist/ast/php/queries.d.ts.map +1 -0
- package/dist/ast/php/queries.js +117 -0
- package/dist/ast/php/utils.d.ts +83 -0
- package/dist/ast/php/utils.d.ts.map +1 -0
- package/dist/ast/php/utils.js +246 -0
- package/dist/ast/python/index.d.ts +9 -0
- package/dist/ast/python/index.d.ts.map +1 -0
- package/dist/ast/python/index.js +8 -0
- package/dist/ast/python/parser.d.ts +32 -0
- package/dist/ast/python/parser.d.ts.map +1 -0
- package/dist/ast/python/parser.js +422 -0
- package/dist/ast/python/parser.test.d.ts +5 -0
- package/dist/ast/python/parser.test.d.ts.map +1 -0
- package/dist/ast/python/parser.test.js +186 -0
- package/dist/ast/python/queries.d.ts +73 -0
- package/dist/ast/python/queries.d.ts.map +1 -0
- package/dist/ast/python/queries.js +137 -0
- package/dist/ast/python/utils.d.ts +63 -0
- package/dist/ast/python/utils.d.ts.map +1 -0
- package/dist/ast/python/utils.js +159 -0
- package/dist/ast/quick-scan.d.ts +40 -0
- package/dist/ast/quick-scan.d.ts.map +1 -0
- package/dist/ast/quick-scan.js +287 -0
- package/dist/ast/rust/index.d.ts +9 -0
- package/dist/ast/rust/index.d.ts.map +1 -0
- package/dist/ast/rust/index.js +8 -0
- package/dist/ast/rust/parser.d.ts +31 -0
- package/dist/ast/rust/parser.d.ts.map +1 -0
- package/dist/ast/rust/parser.js +416 -0
- package/dist/ast/rust/parser.test.d.ts +5 -0
- package/dist/ast/rust/parser.test.d.ts.map +1 -0
- package/dist/ast/rust/parser.test.js +329 -0
- package/dist/ast/rust/queries.d.ts +66 -0
- package/dist/ast/rust/queries.d.ts.map +1 -0
- package/dist/ast/rust/queries.js +132 -0
- package/dist/ast/rust/utils.d.ts +91 -0
- package/dist/ast/rust/utils.d.ts.map +1 -0
- package/dist/ast/rust/utils.js +254 -0
- package/dist/ast/swift/index.d.ts +10 -0
- package/dist/ast/swift/index.d.ts.map +1 -0
- package/dist/ast/swift/index.js +8 -0
- package/dist/ast/swift/parser.d.ts +31 -0
- package/dist/ast/swift/parser.d.ts.map +1 -0
- package/dist/ast/swift/parser.js +554 -0
- package/dist/ast/swift/parser.test.d.ts +5 -0
- package/dist/ast/swift/parser.test.d.ts.map +1 -0
- package/dist/ast/swift/parser.test.js +398 -0
- package/dist/ast/swift/queries.d.ts +71 -0
- package/dist/ast/swift/queries.d.ts.map +1 -0
- package/dist/ast/swift/queries.js +137 -0
- package/dist/ast/swift/utils.d.ts +94 -0
- package/dist/ast/swift/utils.d.ts.map +1 -0
- package/dist/ast/swift/utils.js +411 -0
- package/dist/ast/types.d.ts +96 -0
- package/dist/ast/types.d.ts.map +1 -0
- package/dist/ast/types.js +21 -0
- package/dist/ast/typescript.d.ts +24 -0
- package/dist/ast/typescript.d.ts.map +1 -0
- package/dist/ast/typescript.js +357 -0
- package/dist/cache/file-hash.d.ts +33 -0
- package/dist/cache/file-hash.d.ts.map +1 -0
- package/dist/cache/file-hash.js +59 -0
- package/dist/cache/index.d.ts +9 -0
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/cache/index.js +8 -0
- package/dist/cache/smart-cache.d.ts +68 -0
- package/dist/cache/smart-cache.d.ts.map +1 -0
- package/dist/cache/smart-cache.js +266 -0
- package/dist/cache/types.d.ts +102 -0
- package/dist/cache/types.d.ts.map +1 -0
- package/dist/cache/types.js +6 -0
- package/dist/cli/analyze.d.ts +43 -0
- package/dist/cli/analyze.d.ts.map +1 -0
- package/dist/cli/analyze.js +250 -0
- package/dist/cli/doctor.d.ts +2 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +127 -0
- package/dist/cli/hooks.d.ts +14 -0
- package/dist/cli/hooks.d.ts.map +1 -0
- package/dist/cli/hooks.js +229 -0
- package/dist/cli/index.d.ts +5 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +4 -0
- package/dist/cli/setup.d.ts +10 -0
- package/dist/cli/setup.d.ts.map +1 -0
- package/dist/cli/setup.js +117 -0
- package/dist/cli/utils.d.ts +30 -0
- package/dist/cli/utils.d.ts.map +1 -0
- package/dist/cli/utils.js +116 -0
- package/dist/compressors/config.d.ts +9 -0
- package/dist/compressors/config.d.ts.map +1 -0
- package/dist/compressors/config.js +183 -0
- package/dist/compressors/conversation.d.ts +109 -0
- package/dist/compressors/conversation.d.ts.map +1 -0
- package/dist/compressors/conversation.js +404 -0
- package/dist/compressors/diff.d.ts +35 -0
- package/dist/compressors/diff.d.ts.map +1 -0
- package/dist/compressors/diff.js +389 -0
- package/dist/compressors/generic.d.ts +9 -0
- package/dist/compressors/generic.d.ts.map +1 -0
- package/dist/compressors/generic.js +188 -0
- package/dist/compressors/index.d.ts +31 -0
- package/dist/compressors/index.d.ts.map +1 -0
- package/dist/compressors/index.js +82 -0
- package/dist/compressors/logs.d.ts +9 -0
- package/dist/compressors/logs.d.ts.map +1 -0
- package/dist/compressors/logs.js +245 -0
- package/dist/compressors/multifile.d.ts +106 -0
- package/dist/compressors/multifile.d.ts.map +1 -0
- package/dist/compressors/multifile.js +498 -0
- package/dist/compressors/semantic.d.ts +33 -0
- package/dist/compressors/semantic.d.ts.map +1 -0
- package/dist/compressors/semantic.js +233 -0
- package/dist/compressors/stacktrace.d.ts +9 -0
- package/dist/compressors/stacktrace.d.ts.map +1 -0
- package/dist/compressors/stacktrace.js +259 -0
- package/dist/compressors/types.d.ts +146 -0
- package/dist/compressors/types.d.ts.map +1 -0
- package/dist/compressors/types.js +6 -0
- package/dist/config/output-config.d.ts +56 -0
- package/dist/config/output-config.d.ts.map +1 -0
- package/dist/config/output-config.js +78 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +27 -0
- package/dist/middleware/chain.d.ts +49 -0
- package/dist/middleware/chain.d.ts.map +1 -0
- package/dist/middleware/chain.js +126 -0
- package/dist/middleware/index.d.ts +4 -0
- package/dist/middleware/index.d.ts.map +1 -0
- package/dist/middleware/index.js +3 -0
- package/dist/middleware/logging.d.ts +8 -0
- package/dist/middleware/logging.d.ts.map +1 -0
- package/dist/middleware/logging.js +71 -0
- package/dist/middleware/types.d.ts +58 -0
- package/dist/middleware/types.d.ts.map +1 -0
- package/dist/middleware/types.js +7 -0
- package/dist/parsers/eslint.d.ts +8 -0
- package/dist/parsers/eslint.d.ts.map +1 -0
- package/dist/parsers/eslint.js +132 -0
- package/dist/parsers/generic.d.ts +8 -0
- package/dist/parsers/generic.d.ts.map +1 -0
- package/dist/parsers/generic.js +234 -0
- package/dist/parsers/index.d.ts +34 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +216 -0
- package/dist/parsers/types.d.ts +84 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers/types.js +6 -0
- package/dist/parsers/typescript.d.ts +8 -0
- package/dist/parsers/typescript.d.ts.map +1 -0
- package/dist/parsers/typescript.js +107 -0
- package/dist/pipelines/definitions.d.ts +50 -0
- package/dist/pipelines/definitions.d.ts.map +1 -0
- package/dist/pipelines/definitions.js +206 -0
- package/dist/sandbox/executor.d.ts +12 -0
- package/dist/sandbox/executor.d.ts.map +1 -0
- package/dist/sandbox/executor.js +191 -0
- package/dist/sandbox/index.d.ts +11 -0
- package/dist/sandbox/index.d.ts.map +1 -0
- package/dist/sandbox/index.js +9 -0
- package/dist/sandbox/sandbox.test.d.ts +7 -0
- package/dist/sandbox/sandbox.test.d.ts.map +1 -0
- package/dist/sandbox/sandbox.test.js +202 -0
- package/dist/sandbox/sdk/analyze.d.ts +36 -0
- package/dist/sandbox/sdk/analyze.d.ts.map +1 -0
- package/dist/sandbox/sdk/analyze.js +413 -0
- package/dist/sandbox/sdk/analyze.test.d.ts +7 -0
- package/dist/sandbox/sdk/analyze.test.d.ts.map +1 -0
- package/dist/sandbox/sdk/analyze.test.js +191 -0
- package/dist/sandbox/sdk/code.d.ts +20 -0
- package/dist/sandbox/sdk/code.d.ts.map +1 -0
- package/dist/sandbox/sdk/code.js +104 -0
- package/dist/sandbox/sdk/compress.d.ts +23 -0
- package/dist/sandbox/sdk/compress.d.ts.map +1 -0
- package/dist/sandbox/sdk/compress.js +107 -0
- package/dist/sandbox/sdk/conversation.d.ts +148 -0
- package/dist/sandbox/sdk/conversation.d.ts.map +1 -0
- package/dist/sandbox/sdk/conversation.js +177 -0
- package/dist/sandbox/sdk/files.d.ts +29 -0
- package/dist/sandbox/sdk/files.d.ts.map +1 -0
- package/dist/sandbox/sdk/files.js +41 -0
- package/dist/sandbox/sdk/git.d.ts +37 -0
- package/dist/sandbox/sdk/git.d.ts.map +1 -0
- package/dist/sandbox/sdk/git.js +313 -0
- package/dist/sandbox/sdk/git.test.d.ts +8 -0
- package/dist/sandbox/sdk/git.test.d.ts.map +1 -0
- package/dist/sandbox/sdk/git.test.js +160 -0
- package/dist/sandbox/sdk/index.d.ts +16 -0
- package/dist/sandbox/sdk/index.d.ts.map +1 -0
- package/dist/sandbox/sdk/index.js +15 -0
- package/dist/sandbox/sdk/multifile.d.ts +63 -0
- package/dist/sandbox/sdk/multifile.d.ts.map +1 -0
- package/dist/sandbox/sdk/multifile.js +130 -0
- package/dist/sandbox/sdk/pipeline.d.ts +16 -0
- package/dist/sandbox/sdk/pipeline.d.ts.map +1 -0
- package/dist/sandbox/sdk/pipeline.js +454 -0
- package/dist/sandbox/sdk/pipeline.test.d.ts +7 -0
- package/dist/sandbox/sdk/pipeline.test.d.ts.map +1 -0
- package/dist/sandbox/sdk/pipeline.test.js +197 -0
- package/dist/sandbox/sdk/search.d.ts +36 -0
- package/dist/sandbox/sdk/search.d.ts.map +1 -0
- package/dist/sandbox/sdk/search.js +338 -0
- package/dist/sandbox/sdk/search.test.d.ts +7 -0
- package/dist/sandbox/sdk/search.test.d.ts.map +1 -0
- package/dist/sandbox/sdk/search.test.js +183 -0
- package/dist/sandbox/sdk/utils.d.ts +18 -0
- package/dist/sandbox/sdk/utils.d.ts.map +1 -0
- package/dist/sandbox/sdk/utils.js +24 -0
- package/dist/sandbox/security/code-analyzer.d.ts +15 -0
- package/dist/sandbox/security/code-analyzer.d.ts.map +1 -0
- package/dist/sandbox/security/code-analyzer.js +87 -0
- package/dist/sandbox/security/index.d.ts +6 -0
- package/dist/sandbox/security/index.d.ts.map +1 -0
- package/dist/sandbox/security/index.js +5 -0
- package/dist/sandbox/security/path-validator.d.ts +23 -0
- package/dist/sandbox/security/path-validator.d.ts.map +1 -0
- package/dist/sandbox/security/path-validator.js +113 -0
- package/dist/sandbox/types.d.ts +577 -0
- package/dist/sandbox/types.d.ts.map +1 -0
- package/dist/sandbox/types.js +14 -0
- package/dist/server.d.ts +36 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +133 -0
- package/dist/summarizers/build-logs.d.ts +11 -0
- package/dist/summarizers/build-logs.d.ts.map +1 -0
- package/dist/summarizers/build-logs.js +234 -0
- package/dist/summarizers/generic.d.ts +11 -0
- package/dist/summarizers/generic.d.ts.map +1 -0
- package/dist/summarizers/generic.js +93 -0
- package/dist/summarizers/index.d.ts +20 -0
- package/dist/summarizers/index.d.ts.map +1 -0
- package/dist/summarizers/index.js +43 -0
- package/dist/summarizers/server-logs.d.ts +11 -0
- package/dist/summarizers/server-logs.d.ts.map +1 -0
- package/dist/summarizers/server-logs.js +215 -0
- package/dist/summarizers/test-logs.d.ts +11 -0
- package/dist/summarizers/test-logs.d.ts.map +1 -0
- package/dist/summarizers/test-logs.js +258 -0
- package/dist/summarizers/types.d.ts +146 -0
- package/dist/summarizers/types.d.ts.map +1 -0
- package/dist/summarizers/types.js +21 -0
- package/dist/tools/analyze-build-output.d.ts +30 -0
- package/dist/tools/analyze-build-output.d.ts.map +1 -0
- package/dist/tools/analyze-build-output.js +45 -0
- package/dist/tools/analyze-context.d.ts +23 -0
- package/dist/tools/analyze-context.d.ts.map +1 -0
- package/dist/tools/analyze-context.js +78 -0
- package/dist/tools/auto-optimize.d.ts +9 -0
- package/dist/tools/auto-optimize.d.ts.map +1 -0
- package/dist/tools/auto-optimize.js +191 -0
- package/dist/tools/code-execute.d.ts +9 -0
- package/dist/tools/code-execute.d.ts.map +1 -0
- package/dist/tools/code-execute.js +84 -0
- package/dist/tools/code-skeleton.d.ts +33 -0
- package/dist/tools/code-skeleton.d.ts.map +1 -0
- package/dist/tools/code-skeleton.js +206 -0
- package/dist/tools/compress-context.d.ts +33 -0
- package/dist/tools/compress-context.d.ts.map +1 -0
- package/dist/tools/compress-context.js +64 -0
- package/dist/tools/context-budget.d.ts +43 -0
- package/dist/tools/context-budget.d.ts.map +1 -0
- package/dist/tools/context-budget.js +260 -0
- package/dist/tools/context-budget.test.d.ts +5 -0
- package/dist/tools/context-budget.test.d.ts.map +1 -0
- package/dist/tools/context-budget.test.js +219 -0
- package/dist/tools/conversation-compress.d.ts +46 -0
- package/dist/tools/conversation-compress.d.ts.map +1 -0
- package/dist/tools/conversation-compress.js +78 -0
- package/dist/tools/conversation-memory.d.ts +75 -0
- package/dist/tools/conversation-memory.d.ts.map +1 -0
- package/dist/tools/conversation-memory.js +289 -0
- package/dist/tools/deduplicate-errors.d.ts +30 -0
- package/dist/tools/deduplicate-errors.d.ts.map +1 -0
- package/dist/tools/deduplicate-errors.js +72 -0
- package/dist/tools/detect-retry-loop.d.ts +40 -0
- package/dist/tools/detect-retry-loop.d.ts.map +1 -0
- package/dist/tools/detect-retry-loop.js +212 -0
- package/dist/tools/diff-compress.d.ts +40 -0
- package/dist/tools/diff-compress.d.ts.map +1 -0
- package/dist/tools/diff-compress.js +94 -0
- package/dist/tools/discover-tools.d.ts +11 -0
- package/dist/tools/discover-tools.d.ts.map +1 -0
- package/dist/tools/discover-tools.js +163 -0
- package/dist/tools/dynamic-loader.d.ts +131 -0
- package/dist/tools/dynamic-loader.d.ts.map +1 -0
- package/dist/tools/dynamic-loader.js +378 -0
- package/dist/tools/dynamic-loader.test.d.ts +10 -0
- package/dist/tools/dynamic-loader.test.d.ts.map +1 -0
- package/dist/tools/dynamic-loader.test.js +164 -0
- package/dist/tools/lazy-mcp.d.ts +31 -0
- package/dist/tools/lazy-mcp.d.ts.map +1 -0
- package/dist/tools/lazy-mcp.js +151 -0
- package/dist/tools/lazy-mcp.test.d.ts +10 -0
- package/dist/tools/lazy-mcp.test.d.ts.map +1 -0
- package/dist/tools/lazy-mcp.test.js +172 -0
- package/dist/tools/multifile-compress.d.ts +36 -0
- package/dist/tools/multifile-compress.d.ts.map +1 -0
- package/dist/tools/multifile-compress.js +223 -0
- package/dist/tools/optimization-tips.d.ts +18 -0
- package/dist/tools/optimization-tips.d.ts.map +1 -0
- package/dist/tools/optimization-tips.js +133 -0
- package/dist/tools/registry.d.ts +70 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +169 -0
- package/dist/tools/semantic-compress.d.ts +39 -0
- package/dist/tools/semantic-compress.d.ts.map +1 -0
- package/dist/tools/semantic-compress.js +113 -0
- package/dist/tools/semantic-compress.test.d.ts +5 -0
- package/dist/tools/semantic-compress.test.d.ts.map +1 -0
- package/dist/tools/semantic-compress.test.js +182 -0
- package/dist/tools/session-stats.d.ts +34 -0
- package/dist/tools/session-stats.d.ts.map +1 -0
- package/dist/tools/session-stats.js +194 -0
- package/dist/tools/set-output-config.d.ts +38 -0
- package/dist/tools/set-output-config.d.ts.map +1 -0
- package/dist/tools/set-output-config.js +122 -0
- package/dist/tools/smart-cache-tool.d.ts +38 -0
- package/dist/tools/smart-cache-tool.d.ts.map +1 -0
- package/dist/tools/smart-cache-tool.js +224 -0
- package/dist/tools/smart-file-read.d.ts +52 -0
- package/dist/tools/smart-file-read.d.ts.map +1 -0
- package/dist/tools/smart-file-read.js +481 -0
- package/dist/tools/smart-pipeline.d.ts +40 -0
- package/dist/tools/smart-pipeline.d.ts.map +1 -0
- package/dist/tools/smart-pipeline.js +295 -0
- package/dist/tools/summarize-logs.d.ts +36 -0
- package/dist/tools/summarize-logs.d.ts.map +1 -0
- package/dist/tools/summarize-logs.js +184 -0
- package/dist/tools/token-budget.test.d.ts +11 -0
- package/dist/tools/token-budget.test.d.ts.map +1 -0
- package/dist/tools/token-budget.test.js +275 -0
- package/dist/utils/bm25.d.ts +86 -0
- package/dist/utils/bm25.d.ts.map +1 -0
- package/dist/utils/bm25.js +153 -0
- package/dist/utils/bm25.test.d.ts +5 -0
- package/dist/utils/bm25.test.d.ts.map +1 -0
- package/dist/utils/bm25.test.js +156 -0
- package/dist/utils/command-normalizer.d.ts +39 -0
- package/dist/utils/command-normalizer.d.ts.map +1 -0
- package/dist/utils/command-normalizer.js +90 -0
- package/dist/utils/content-detector.d.ts +27 -0
- package/dist/utils/content-detector.d.ts.map +1 -0
- package/dist/utils/content-detector.js +127 -0
- package/dist/utils/embeddings.d.ts +54 -0
- package/dist/utils/embeddings.d.ts.map +1 -0
- package/dist/utils/embeddings.js +97 -0
- package/dist/utils/embeddings.test.d.ts +8 -0
- package/dist/utils/embeddings.test.d.ts.map +1 -0
- package/dist/utils/embeddings.test.js +96 -0
- package/dist/utils/error-normalizer.d.ts +39 -0
- package/dist/utils/error-normalizer.d.ts.map +1 -0
- package/dist/utils/error-normalizer.js +233 -0
- package/dist/utils/hybrid-search.d.ts +79 -0
- package/dist/utils/hybrid-search.d.ts.map +1 -0
- package/dist/utils/hybrid-search.js +146 -0
- package/dist/utils/hybrid-search.test.d.ts +5 -0
- package/dist/utils/hybrid-search.test.d.ts.map +1 -0
- package/dist/utils/hybrid-search.test.js +172 -0
- package/dist/utils/index.d.ts +13 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +12 -0
- package/dist/utils/language-detector.d.ts +27 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +94 -0
- package/dist/utils/log-parser.d.ts +46 -0
- package/dist/utils/log-parser.d.ts.map +1 -0
- package/dist/utils/log-parser.js +287 -0
- package/dist/utils/output-estimator.d.ts +54 -0
- package/dist/utils/output-estimator.d.ts.map +1 -0
- package/dist/utils/output-estimator.js +119 -0
- package/dist/utils/output-estimator.test.d.ts +5 -0
- package/dist/utils/output-estimator.test.d.ts.map +1 -0
- package/dist/utils/output-estimator.test.js +115 -0
- package/dist/utils/output-similarity.d.ts +48 -0
- package/dist/utils/output-similarity.d.ts.map +1 -0
- package/dist/utils/output-similarity.js +140 -0
- package/dist/utils/project-detector.d.ts +16 -0
- package/dist/utils/project-detector.d.ts.map +1 -0
- package/dist/utils/project-detector.js +119 -0
- package/dist/utils/segment-scorer.d.ts +99 -0
- package/dist/utils/segment-scorer.d.ts.map +1 -0
- package/dist/utils/segment-scorer.js +148 -0
- package/dist/utils/signature-grouper.d.ts +58 -0
- package/dist/utils/signature-grouper.d.ts.map +1 -0
- package/dist/utils/signature-grouper.js +185 -0
- package/dist/utils/tfidf.d.ts +45 -0
- package/dist/utils/tfidf.d.ts.map +1 -0
- package/dist/utils/tfidf.js +204 -0
- package/dist/utils/tfidf.test.d.ts +5 -0
- package/dist/utils/tfidf.test.d.ts.map +1 -0
- package/dist/utils/tfidf.test.js +115 -0
- package/dist/utils/token-counter.d.ts +35 -0
- package/dist/utils/token-counter.d.ts.map +1 -0
- package/dist/utils/token-counter.js +83 -0
- package/dist/utils/toon-serializer.d.ts +120 -0
- package/dist/utils/toon-serializer.d.ts.map +1 -0
- package/dist/utils/toon-serializer.js +472 -0
- package/dist/utils/toon-serializer.test.d.ts +7 -0
- package/dist/utils/toon-serializer.test.d.ts.map +1 -0
- package/dist/utils/toon-serializer.test.js +290 -0
- package/package.json +63 -0
- package/scripts/install.ps1 +133 -0
- package/scripts/install.sh +183 -0
- package/scripts/pre-commit-hook.sh +86 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Budget Tests
|
|
3
|
+
*
|
|
4
|
+
* Ensures tool definitions stay within token budgets to prevent
|
|
5
|
+
* context window bloat from MCP tool descriptions.
|
|
6
|
+
*
|
|
7
|
+
* These tests guard against regression - any change that increases
|
|
8
|
+
* token consumption will fail the test.
|
|
9
|
+
*/
|
|
10
|
+
import { describe, it, expect } from "vitest";
|
|
11
|
+
import { countTokens } from "../utils/token-counter.js";
|
|
12
|
+
// Import all tool definitions
|
|
13
|
+
import { autoOptimizeTool } from "./auto-optimize.js";
|
|
14
|
+
import { smartFileReadTool } from "./smart-file-read.js";
|
|
15
|
+
import { discoverToolsTool } from "./discover-tools.js";
|
|
16
|
+
import { analyzeBuildOutputTool } from "./analyze-build-output.js";
|
|
17
|
+
import { compressContextTool } from "./compress-context.js";
|
|
18
|
+
import { semanticCompressTool } from "./semantic-compress.js";
|
|
19
|
+
import { diffCompressTool } from "./diff-compress.js";
|
|
20
|
+
import { summarizeLogsTool } from "./summarize-logs.js";
|
|
21
|
+
import { codeSkeletonTool } from "./code-skeleton.js";
|
|
22
|
+
import { contextBudgetTool } from "./context-budget.js";
|
|
23
|
+
import { conversationCompressTool } from "./conversation-compress.js";
|
|
24
|
+
import { deduplicateErrorsTool } from "./deduplicate-errors.js";
|
|
25
|
+
import { smartCacheTool } from "./smart-cache-tool.js";
|
|
26
|
+
import { smartPipelineTool } from "./smart-pipeline.js";
|
|
27
|
+
// ============================================================================
|
|
28
|
+
// Token Budgets (in tokens)
|
|
29
|
+
// ============================================================================
|
|
30
|
+
/**
|
|
31
|
+
* Maximum tokens allowed per tool definition.
|
|
32
|
+
* These are intentionally tight to catch any bloat early.
|
|
33
|
+
*
|
|
34
|
+
* 2024-12: Tightened budgets after schema optimization
|
|
35
|
+
*/
|
|
36
|
+
const TOKEN_BUDGETS = {
|
|
37
|
+
// Core tools (always loaded) - ultra-minimal
|
|
38
|
+
auto_optimize: 90,
|
|
39
|
+
smart_file_read: 120,
|
|
40
|
+
discover_tools: 80, // +5 for TOON format option
|
|
41
|
+
// Compress category - aggressively optimized
|
|
42
|
+
compress_context: 95,
|
|
43
|
+
semantic_compress: 60,
|
|
44
|
+
diff_compress: 75,
|
|
45
|
+
conversation_compress: 110,
|
|
46
|
+
// Analyze category
|
|
47
|
+
analyze_build_output: 95,
|
|
48
|
+
context_budget: 105,
|
|
49
|
+
// Logs category
|
|
50
|
+
summarize_logs: 115,
|
|
51
|
+
deduplicate_errors: 65,
|
|
52
|
+
// Code category
|
|
53
|
+
code_skeleton: 75,
|
|
54
|
+
smart_cache: 90,
|
|
55
|
+
// Pipeline category
|
|
56
|
+
smart_pipeline: 80,
|
|
57
|
+
};
|
|
58
|
+
/**
|
|
59
|
+
* Maximum tokens for the entire ListTools response (core tools only).
|
|
60
|
+
* Currently: auto_optimize + smart_file_read + discover_tools
|
|
61
|
+
* 2024-12: Reduced from 500 after schema optimization
|
|
62
|
+
*/
|
|
63
|
+
const CORE_TOOLS_BUDGET = 300;
|
|
64
|
+
/**
|
|
65
|
+
* Maximum tokens for all tools combined.
|
|
66
|
+
* 2024-12: Reduced from 1500 after aggressive schema optimization
|
|
67
|
+
*/
|
|
68
|
+
const ALL_TOOLS_BUDGET = 1200;
|
|
69
|
+
// ============================================================================
|
|
70
|
+
// Helper Functions
|
|
71
|
+
// ============================================================================
|
|
72
|
+
/**
|
|
73
|
+
* Serialize a tool definition as it would appear in ListTools response
|
|
74
|
+
*/
|
|
75
|
+
function serializeToolForMCP(tool) {
|
|
76
|
+
return JSON.stringify({
|
|
77
|
+
name: tool.name,
|
|
78
|
+
description: tool.description,
|
|
79
|
+
inputSchema: tool.inputSchema,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Count tokens in a tool definition
|
|
84
|
+
*/
|
|
85
|
+
function countToolTokens(tool) {
|
|
86
|
+
const serialized = serializeToolForMCP(tool);
|
|
87
|
+
return countTokens(serialized);
|
|
88
|
+
}
|
|
89
|
+
// ============================================================================
|
|
90
|
+
// All Tools
|
|
91
|
+
// ============================================================================
|
|
92
|
+
const ALL_TOOLS = [
|
|
93
|
+
autoOptimizeTool,
|
|
94
|
+
smartFileReadTool,
|
|
95
|
+
discoverToolsTool,
|
|
96
|
+
analyzeBuildOutputTool,
|
|
97
|
+
compressContextTool,
|
|
98
|
+
semanticCompressTool,
|
|
99
|
+
diffCompressTool,
|
|
100
|
+
summarizeLogsTool,
|
|
101
|
+
codeSkeletonTool,
|
|
102
|
+
contextBudgetTool,
|
|
103
|
+
conversationCompressTool,
|
|
104
|
+
deduplicateErrorsTool,
|
|
105
|
+
smartCacheTool,
|
|
106
|
+
smartPipelineTool,
|
|
107
|
+
];
|
|
108
|
+
const CORE_TOOLS = [
|
|
109
|
+
autoOptimizeTool,
|
|
110
|
+
smartFileReadTool,
|
|
111
|
+
discoverToolsTool,
|
|
112
|
+
];
|
|
113
|
+
// ============================================================================
|
|
114
|
+
// Tests
|
|
115
|
+
// ============================================================================
|
|
116
|
+
describe("Tool Token Budgets", () => {
|
|
117
|
+
describe("Individual tool budgets", () => {
|
|
118
|
+
it.each(ALL_TOOLS.map((t) => [t.name, t]))("%s should be under budget", (name, tool) => {
|
|
119
|
+
const tokens = countToolTokens(tool);
|
|
120
|
+
const budget = TOKEN_BUDGETS[name];
|
|
121
|
+
expect(tokens).toBeLessThanOrEqual(budget);
|
|
122
|
+
// Log for visibility
|
|
123
|
+
const usage = Math.round((tokens / budget) * 100);
|
|
124
|
+
console.log(` ${name}: ${tokens}/${budget} tokens (${usage}%)`);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
describe("Aggregate budgets", () => {
|
|
128
|
+
it("core tools should be under combined budget", () => {
|
|
129
|
+
const totalTokens = CORE_TOOLS.reduce((sum, tool) => sum + countToolTokens(tool), 0);
|
|
130
|
+
expect(totalTokens).toBeLessThanOrEqual(CORE_TOOLS_BUDGET);
|
|
131
|
+
const usage = Math.round((totalTokens / CORE_TOOLS_BUDGET) * 100);
|
|
132
|
+
console.log(` Core tools total: ${totalTokens}/${CORE_TOOLS_BUDGET} tokens (${usage}%)`);
|
|
133
|
+
});
|
|
134
|
+
it("all tools should be under combined budget", () => {
|
|
135
|
+
const totalTokens = ALL_TOOLS.reduce((sum, tool) => sum + countToolTokens(tool), 0);
|
|
136
|
+
expect(totalTokens).toBeLessThanOrEqual(ALL_TOOLS_BUDGET);
|
|
137
|
+
const usage = Math.round((totalTokens / ALL_TOOLS_BUDGET) * 100);
|
|
138
|
+
console.log(` All tools total: ${totalTokens}/${ALL_TOOLS_BUDGET} tokens (${usage}%)`);
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
describe("Token distribution", () => {
|
|
142
|
+
it("should have balanced token distribution (no tool > 20% of total)", () => {
|
|
143
|
+
const totalTokens = ALL_TOOLS.reduce((sum, tool) => sum + countToolTokens(tool), 0);
|
|
144
|
+
for (const tool of ALL_TOOLS) {
|
|
145
|
+
const tokens = countToolTokens(tool);
|
|
146
|
+
const percentage = (tokens / totalTokens) * 100;
|
|
147
|
+
expect(percentage).toBeLessThan(20);
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
});
|
|
152
|
+
describe("Tool Schema Constraints", () => {
|
|
153
|
+
describe("Description length", () => {
|
|
154
|
+
it.each(ALL_TOOLS.map((t) => [t.name, t]))("%s description should be concise (< 150 chars)", (name, tool) => {
|
|
155
|
+
const description = tool.description;
|
|
156
|
+
expect(description.length).toBeLessThan(150);
|
|
157
|
+
});
|
|
158
|
+
});
|
|
159
|
+
describe("Schema structure", () => {
|
|
160
|
+
it.each(ALL_TOOLS.map((t) => [t.name, t]))("%s should not have deeply nested descriptions", (name, tool) => {
|
|
161
|
+
const schema = tool.inputSchema;
|
|
162
|
+
const serialized = JSON.stringify(schema);
|
|
163
|
+
// Count "description" occurrences - should be minimal
|
|
164
|
+
const descriptionCount = (serialized.match(/"description"/g) || [])
|
|
165
|
+
.length;
|
|
166
|
+
// Allow max 3 descriptions per schema (for complex tools)
|
|
167
|
+
expect(descriptionCount).toBeLessThanOrEqual(3);
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
describe("ListTools Response Size", () => {
|
|
172
|
+
it("should generate compact ListTools response for core tools", () => {
|
|
173
|
+
const response = {
|
|
174
|
+
tools: CORE_TOOLS.map((tool) => ({
|
|
175
|
+
name: tool.name,
|
|
176
|
+
description: tool.description,
|
|
177
|
+
inputSchema: tool.inputSchema,
|
|
178
|
+
})),
|
|
179
|
+
};
|
|
180
|
+
const serialized = JSON.stringify(response);
|
|
181
|
+
const tokens = countTokens(serialized);
|
|
182
|
+
// ListTools response should be under 600 tokens for core tools
|
|
183
|
+
expect(tokens).toBeLessThan(600);
|
|
184
|
+
console.log(` ListTools (core): ${serialized.length} chars, ${tokens} tokens`);
|
|
185
|
+
});
|
|
186
|
+
it("should generate compact ListTools response for all tools", () => {
|
|
187
|
+
const response = {
|
|
188
|
+
tools: ALL_TOOLS.map((tool) => ({
|
|
189
|
+
name: tool.name,
|
|
190
|
+
description: tool.description,
|
|
191
|
+
inputSchema: tool.inputSchema,
|
|
192
|
+
})),
|
|
193
|
+
};
|
|
194
|
+
const serialized = JSON.stringify(response);
|
|
195
|
+
const tokens = countTokens(serialized);
|
|
196
|
+
// Full ListTools response should be under 1800 tokens
|
|
197
|
+
expect(tokens).toBeLessThan(1800);
|
|
198
|
+
console.log(` ListTools (all): ${serialized.length} chars, ${tokens} tokens`);
|
|
199
|
+
});
|
|
200
|
+
});
|
|
201
|
+
describe("Token Reduction Verification", () => {
|
|
202
|
+
/**
|
|
203
|
+
* Baseline values from BEFORE optimization (commit d4cdb98).
|
|
204
|
+
* These are used to verify we actually reduced tokens.
|
|
205
|
+
*/
|
|
206
|
+
const BASELINE_TOKENS = {
|
|
207
|
+
auto_optimize: 287,
|
|
208
|
+
smart_file_read: 342,
|
|
209
|
+
discover_tools: 153,
|
|
210
|
+
core_total: 782,
|
|
211
|
+
};
|
|
212
|
+
it("auto_optimize should be reduced from baseline", () => {
|
|
213
|
+
const current = countToolTokens(autoOptimizeTool);
|
|
214
|
+
const baseline = BASELINE_TOKENS.auto_optimize;
|
|
215
|
+
const reduction = Math.round((1 - current / baseline) * 100);
|
|
216
|
+
expect(current).toBeLessThan(baseline);
|
|
217
|
+
expect(reduction).toBeGreaterThan(30); // At least 30% reduction
|
|
218
|
+
console.log(` auto_optimize: ${baseline} → ${current} (${reduction}% reduction)`);
|
|
219
|
+
});
|
|
220
|
+
it("smart_file_read should be reduced from baseline", () => {
|
|
221
|
+
const current = countToolTokens(smartFileReadTool);
|
|
222
|
+
const baseline = BASELINE_TOKENS.smart_file_read;
|
|
223
|
+
const reduction = Math.round((1 - current / baseline) * 100);
|
|
224
|
+
expect(current).toBeLessThan(baseline);
|
|
225
|
+
expect(reduction).toBeGreaterThan(20); // At least 20% reduction
|
|
226
|
+
console.log(` smart_file_read: ${baseline} → ${current} (${reduction}% reduction)`);
|
|
227
|
+
});
|
|
228
|
+
it("discover_tools should be reduced from baseline", () => {
|
|
229
|
+
const current = countToolTokens(discoverToolsTool);
|
|
230
|
+
const baseline = BASELINE_TOKENS.discover_tools;
|
|
231
|
+
const reduction = Math.round((1 - current / baseline) * 100);
|
|
232
|
+
expect(current).toBeLessThan(baseline);
|
|
233
|
+
expect(reduction).toBeGreaterThan(20); // At least 20% reduction
|
|
234
|
+
console.log(` discover_tools: ${baseline} → ${current} (${reduction}% reduction)`);
|
|
235
|
+
});
|
|
236
|
+
it("core tools total should be at least 40% reduced from baseline", () => {
|
|
237
|
+
const currentTotal = CORE_TOOLS.reduce((sum, tool) => sum + countToolTokens(tool), 0);
|
|
238
|
+
const baseline = BASELINE_TOKENS.core_total;
|
|
239
|
+
const reduction = Math.round((1 - currentTotal / baseline) * 100);
|
|
240
|
+
expect(currentTotal).toBeLessThan(baseline);
|
|
241
|
+
expect(reduction).toBeGreaterThan(40); // At least 40% total reduction
|
|
242
|
+
console.log(` Core total: ${baseline} → ${currentTotal} (${reduction}% reduction)`);
|
|
243
|
+
});
|
|
244
|
+
});
|
|
245
|
+
describe("Regression Prevention", () => {
|
|
246
|
+
/**
|
|
247
|
+
* Snapshot of current token counts.
|
|
248
|
+
* Update these when intentionally adding features.
|
|
249
|
+
* Any unexpected change will fail the test.
|
|
250
|
+
*
|
|
251
|
+
* 2024-12: Optimized schemas to reduce token overhead
|
|
252
|
+
* - Removed property descriptions (moved to tool description)
|
|
253
|
+
* - Removed rarely-used properties from public schema
|
|
254
|
+
* - Simplified nested object type declarations
|
|
255
|
+
* - Added TOON format output option to discover_tools
|
|
256
|
+
*/
|
|
257
|
+
const CURRENT_SNAPSHOT = {
|
|
258
|
+
auto_optimize: 80,
|
|
259
|
+
smart_file_read: 106,
|
|
260
|
+
discover_tools: 78, // +15 for TOON format option (list|toon|toon-tabular)
|
|
261
|
+
};
|
|
262
|
+
// Tolerance: ±5 tokens for minor changes
|
|
263
|
+
const TOLERANCE = 5;
|
|
264
|
+
it.each(Object.entries(CURRENT_SNAPSHOT))("%s should match snapshot (±5 tokens)", (name, expected) => {
|
|
265
|
+
const tool = ALL_TOOLS.find((t) => t.name === name);
|
|
266
|
+
if (!tool)
|
|
267
|
+
throw new Error(`Tool ${name} not found`);
|
|
268
|
+
const actual = countToolTokens(tool);
|
|
269
|
+
const diff = Math.abs(actual - expected);
|
|
270
|
+
expect(diff).toBeLessThanOrEqual(TOLERANCE);
|
|
271
|
+
if (diff > 0) {
|
|
272
|
+
console.log(` ${name}: expected ${expected}, got ${actual} (diff: ${diff})`);
|
|
273
|
+
}
|
|
274
|
+
});
|
|
275
|
+
});
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 (Best Matching 25) implementation for tool search
|
|
3
|
+
*
|
|
4
|
+
* BM25 is a ranking function used by search engines to rank documents
|
|
5
|
+
* based on query terms appearing in each document.
|
|
6
|
+
*
|
|
7
|
+
* Formula: score = IDF * ((tf * (k1 + 1)) / (tf + k1 * (1 - b + b * (dl/avgdl))))
|
|
8
|
+
*
|
|
9
|
+
* Where:
|
|
10
|
+
* - tf = term frequency in document
|
|
11
|
+
* - k1 = term saturation parameter (default: 1.2)
|
|
12
|
+
* - b = length normalization parameter (default: 0.75)
|
|
13
|
+
* - dl = document length (number of terms)
|
|
14
|
+
* - avgdl = average document length across corpus
|
|
15
|
+
* - IDF = log((N - df + 0.5) / (df + 0.5) + 1)
|
|
16
|
+
* - N = total number of documents
|
|
17
|
+
* - df = document frequency (docs containing term)
|
|
18
|
+
*/
|
|
19
|
+
/**
|
|
20
|
+
* Configuration options for BM25 algorithm
|
|
21
|
+
*/
|
|
22
|
+
export interface BM25Options {
|
|
23
|
+
/** Term saturation parameter. Higher = more weight to term frequency. Default: 1.2 */
|
|
24
|
+
k1?: number;
|
|
25
|
+
/** Length normalization. 0 = no normalization, 1 = full normalization. Default: 0.75 */
|
|
26
|
+
b?: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Result item from BM25 search
|
|
30
|
+
*/
|
|
31
|
+
export interface BM25Result<T> {
|
|
32
|
+
/** Original item from the corpus */
|
|
33
|
+
item: T;
|
|
34
|
+
/** BM25 relevance score (higher = more relevant) */
|
|
35
|
+
score: number;
|
|
36
|
+
/** Query terms that matched in this document */
|
|
37
|
+
matchedTerms: string[];
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* BM25 search index interface
|
|
41
|
+
*/
|
|
42
|
+
export interface BM25Index<T> {
|
|
43
|
+
/** Search the index and return ranked results */
|
|
44
|
+
search: (query: string) => BM25Result<T>[];
|
|
45
|
+
/** Get corpus statistics */
|
|
46
|
+
stats: () => {
|
|
47
|
+
documentCount: number;
|
|
48
|
+
avgDocLength: number;
|
|
49
|
+
vocabularySize: number;
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Tokenize text into searchable terms
|
|
54
|
+
*
|
|
55
|
+
* - Lowercases text
|
|
56
|
+
* - Removes punctuation
|
|
57
|
+
* - Splits on whitespace
|
|
58
|
+
* - Filters words shorter than 2 characters
|
|
59
|
+
*/
|
|
60
|
+
export declare function tokenize(text: string): string[];
|
|
61
|
+
/**
|
|
62
|
+
* Create a BM25 search index from a collection of items
|
|
63
|
+
*
|
|
64
|
+
* @param items - Array of items to index
|
|
65
|
+
* @param getSearchableText - Function to extract searchable text from each item
|
|
66
|
+
* @param options - BM25 configuration options
|
|
67
|
+
* @returns BM25 search index
|
|
68
|
+
*
|
|
69
|
+
* @example
|
|
70
|
+
* ```typescript
|
|
71
|
+
* const tools = [
|
|
72
|
+
* { name: "compress", description: "Compress content" },
|
|
73
|
+
* { name: "analyze", description: "Analyze build output" }
|
|
74
|
+
* ];
|
|
75
|
+
*
|
|
76
|
+
* const index = createBM25Index(
|
|
77
|
+
* tools,
|
|
78
|
+
* (tool) => `${tool.name} ${tool.description}`
|
|
79
|
+
* );
|
|
80
|
+
*
|
|
81
|
+
* const results = index.search("compress content");
|
|
82
|
+
* // [{ item: { name: "compress", ... }, score: 2.5, matchedTerms: ["compress", "content"] }]
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
export declare function createBM25Index<T>(items: T[], getSearchableText: (item: T) => string, options?: BM25Options): BM25Index<T>;
|
|
86
|
+
//# sourceMappingURL=bm25.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/utils/bm25.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,sFAAsF;IACtF,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,wFAAwF;IACxF,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED;;GAEG;AACH,MAAM,WAAW,UAAU,CAAC,CAAC;IAC3B,oCAAoC;IACpC,IAAI,EAAE,CAAC,CAAC;IACR,oDAAoD;IACpD,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS,CAAC,CAAC;IAC1B,iDAAiD;IACjD,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;IAC3C,4BAA4B;IAC5B,KAAK,EAAE,MAAM;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC;CACtF;AAED;;;;;;;GAOG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAM/C;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,eAAe,CAAC,CAAC,EAC/B,KAAK,EAAE,CAAC,EAAE,EACV,iBAAiB,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,MAAM,EACtC,OAAO,CAAC,EAAE,WAAW,GACpB,SAAS,CAAC,CAAC,CAAC,CAkHd"}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 (Best Matching 25) implementation for tool search
|
|
3
|
+
*
|
|
4
|
+
* BM25 is a ranking function used by search engines to rank documents
|
|
5
|
+
* based on query terms appearing in each document.
|
|
6
|
+
*
|
|
7
|
+
* Formula: score = IDF * ((tf * (k1 + 1)) / (tf + k1 * (1 - b + b * (dl/avgdl))))
|
|
8
|
+
*
|
|
9
|
+
* Where:
|
|
10
|
+
* - tf = term frequency in document
|
|
11
|
+
* - k1 = term saturation parameter (default: 1.2)
|
|
12
|
+
* - b = length normalization parameter (default: 0.75)
|
|
13
|
+
* - dl = document length (number of terms)
|
|
14
|
+
* - avgdl = average document length across corpus
|
|
15
|
+
* - IDF = log((N - df + 0.5) / (df + 0.5) + 1)
|
|
16
|
+
* - N = total number of documents
|
|
17
|
+
* - df = document frequency (docs containing term)
|
|
18
|
+
*/
|
|
19
|
+
/**
|
|
20
|
+
* Tokenize text into searchable terms
|
|
21
|
+
*
|
|
22
|
+
* - Lowercases text
|
|
23
|
+
* - Removes punctuation
|
|
24
|
+
* - Splits on whitespace
|
|
25
|
+
* - Filters words shorter than 2 characters
|
|
26
|
+
*/
|
|
27
|
+
export function tokenize(text) {
|
|
28
|
+
return text
|
|
29
|
+
.toLowerCase()
|
|
30
|
+
.replace(/[^\w\s]/g, " ")
|
|
31
|
+
.split(/\s+/)
|
|
32
|
+
.filter((word) => word.length >= 2);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Create a BM25 search index from a collection of items
|
|
36
|
+
*
|
|
37
|
+
* @param items - Array of items to index
|
|
38
|
+
* @param getSearchableText - Function to extract searchable text from each item
|
|
39
|
+
* @param options - BM25 configuration options
|
|
40
|
+
* @returns BM25 search index
|
|
41
|
+
*
|
|
42
|
+
* @example
|
|
43
|
+
* ```typescript
|
|
44
|
+
* const tools = [
|
|
45
|
+
* { name: "compress", description: "Compress content" },
|
|
46
|
+
* { name: "analyze", description: "Analyze build output" }
|
|
47
|
+
* ];
|
|
48
|
+
*
|
|
49
|
+
* const index = createBM25Index(
|
|
50
|
+
* tools,
|
|
51
|
+
* (tool) => `${tool.name} ${tool.description}`
|
|
52
|
+
* );
|
|
53
|
+
*
|
|
54
|
+
* const results = index.search("compress content");
|
|
55
|
+
* // [{ item: { name: "compress", ... }, score: 2.5, matchedTerms: ["compress", "content"] }]
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
export function createBM25Index(items, getSearchableText, options) {
|
|
59
|
+
const k1 = options?.k1 ?? 1.2;
|
|
60
|
+
const b = options?.b ?? 0.75;
|
|
61
|
+
// Handle empty corpus
|
|
62
|
+
if (items.length === 0) {
|
|
63
|
+
return {
|
|
64
|
+
search: () => [],
|
|
65
|
+
stats: () => ({ documentCount: 0, avgDocLength: 0, vocabularySize: 0 }),
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// Build tokenized corpus
|
|
69
|
+
const documents = items.map((item) => tokenize(getSearchableText(item)));
|
|
70
|
+
// Calculate average document length
|
|
71
|
+
const totalLength = documents.reduce((sum, doc) => sum + doc.length, 0);
|
|
72
|
+
const avgdl = totalLength / documents.length;
|
|
73
|
+
// Build document frequency map (how many docs contain each term)
|
|
74
|
+
const df = new Map();
|
|
75
|
+
for (const doc of documents) {
|
|
76
|
+
const uniqueTerms = new Set(doc);
|
|
77
|
+
for (const term of uniqueTerms) {
|
|
78
|
+
df.set(term, (df.get(term) ?? 0) + 1);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
const N = documents.length;
|
|
82
|
+
/**
|
|
83
|
+
* Calculate IDF (Inverse Document Frequency) for a term
|
|
84
|
+
* Using the standard BM25 IDF formula with smoothing
|
|
85
|
+
*/
|
|
86
|
+
function idf(term) {
|
|
87
|
+
const docFreq = df.get(term) ?? 0;
|
|
88
|
+
// BM25 IDF formula: log((N - df + 0.5) / (df + 0.5) + 1)
|
|
89
|
+
return Math.log((N - docFreq + 0.5) / (docFreq + 0.5) + 1);
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Score a single document against query terms
|
|
93
|
+
*/
|
|
94
|
+
function scoreDocument(docIndex, queryTerms) {
|
|
95
|
+
const doc = documents[docIndex];
|
|
96
|
+
if (!doc) {
|
|
97
|
+
return { score: 0, matchedTerms: [] };
|
|
98
|
+
}
|
|
99
|
+
const dl = doc.length;
|
|
100
|
+
// Build term frequency map for this document
|
|
101
|
+
const termFreq = new Map();
|
|
102
|
+
for (const term of doc) {
|
|
103
|
+
termFreq.set(term, (termFreq.get(term) ?? 0) + 1);
|
|
104
|
+
}
|
|
105
|
+
let score = 0;
|
|
106
|
+
const matchedTerms = [];
|
|
107
|
+
for (const term of queryTerms) {
|
|
108
|
+
const tf = termFreq.get(term) ?? 0;
|
|
109
|
+
if (tf > 0) {
|
|
110
|
+
matchedTerms.push(term);
|
|
111
|
+
const termIdf = idf(term);
|
|
112
|
+
// BM25 term score formula
|
|
113
|
+
const numerator = tf * (k1 + 1);
|
|
114
|
+
const denominator = tf + k1 * (1 - b + b * (dl / avgdl));
|
|
115
|
+
score += termIdf * (numerator / denominator);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return { score, matchedTerms };
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
search(query) {
|
|
122
|
+
const queryTerms = tokenize(query);
|
|
123
|
+
// Empty query returns no results
|
|
124
|
+
if (queryTerms.length === 0) {
|
|
125
|
+
return [];
|
|
126
|
+
}
|
|
127
|
+
const results = [];
|
|
128
|
+
for (let i = 0; i < items.length; i++) {
|
|
129
|
+
const item = items[i];
|
|
130
|
+
if (!item)
|
|
131
|
+
continue;
|
|
132
|
+
const { score, matchedTerms } = scoreDocument(i, queryTerms);
|
|
133
|
+
// Only include documents with at least one matching term
|
|
134
|
+
if (score > 0) {
|
|
135
|
+
results.push({
|
|
136
|
+
item,
|
|
137
|
+
score,
|
|
138
|
+
matchedTerms,
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// Sort by score descending (most relevant first)
|
|
143
|
+
return results.sort((a, b) => b.score - a.score);
|
|
144
|
+
},
|
|
145
|
+
stats() {
|
|
146
|
+
return {
|
|
147
|
+
documentCount: N,
|
|
148
|
+
avgDocLength: avgdl,
|
|
149
|
+
vocabularySize: df.size,
|
|
150
|
+
};
|
|
151
|
+
},
|
|
152
|
+
};
|
|
153
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.test.d.ts","sourceRoot":"","sources":["../../src/utils/bm25.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Search Algorithm Tests
|
|
3
|
+
*/
|
|
4
|
+
import { describe, it, expect } from "vitest";
|
|
5
|
+
import { tokenize, createBM25Index } from "./bm25.js";
|
|
6
|
+
describe("BM25 utilities", () => {
|
|
7
|
+
describe("tokenize", () => {
|
|
8
|
+
it("should lowercase and split text", () => {
|
|
9
|
+
const result = tokenize("Hello World Test");
|
|
10
|
+
expect(result).toEqual(["hello", "world", "test"]);
|
|
11
|
+
});
|
|
12
|
+
it("should remove punctuation", () => {
|
|
13
|
+
const result = tokenize("hello, world! how's it going?");
|
|
14
|
+
expect(result).toEqual(["hello", "world", "how", "it", "going"]);
|
|
15
|
+
});
|
|
16
|
+
it("should filter short words (less than 2 chars)", () => {
|
|
17
|
+
const result = tokenize("I am a test for x y z");
|
|
18
|
+
expect(result).toEqual(["am", "test", "for"]);
|
|
19
|
+
});
|
|
20
|
+
it("should handle empty string", () => {
|
|
21
|
+
const result = tokenize("");
|
|
22
|
+
expect(result).toEqual([]);
|
|
23
|
+
});
|
|
24
|
+
it("should handle string with only punctuation", () => {
|
|
25
|
+
const result = tokenize("!@#$%^&*()");
|
|
26
|
+
expect(result).toEqual([]);
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
describe("createBM25Index", () => {
|
|
30
|
+
const testTools = [
|
|
31
|
+
{ name: "compress", description: "Compress and reduce content size" },
|
|
32
|
+
{ name: "analyze", description: "Analyze build output and errors" },
|
|
33
|
+
{ name: "summarize", description: "Summarize log files" },
|
|
34
|
+
{ name: "optimize", description: "Optimize token usage" },
|
|
35
|
+
];
|
|
36
|
+
const getSearchableText = (tool) => `${tool.name} ${tool.description}`;
|
|
37
|
+
it("should return empty array for empty query", () => {
|
|
38
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
39
|
+
const results = index.search("");
|
|
40
|
+
expect(results).toEqual([]);
|
|
41
|
+
});
|
|
42
|
+
it("should return empty array for query with only short words", () => {
|
|
43
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
44
|
+
const results = index.search("a x");
|
|
45
|
+
expect(results).toEqual([]);
|
|
46
|
+
});
|
|
47
|
+
it("should return empty array for empty corpus", () => {
|
|
48
|
+
const index = createBM25Index([], getSearchableText);
|
|
49
|
+
const results = index.search("compress");
|
|
50
|
+
expect(results).toEqual([]);
|
|
51
|
+
});
|
|
52
|
+
it("should find exact name matches", () => {
|
|
53
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
54
|
+
const results = index.search("compress");
|
|
55
|
+
expect(results.length).toBeGreaterThan(0);
|
|
56
|
+
expect(results[0].item.name).toBe("compress");
|
|
57
|
+
});
|
|
58
|
+
it("should find matches in description", () => {
|
|
59
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
60
|
+
const results = index.search("build output");
|
|
61
|
+
expect(results.length).toBeGreaterThan(0);
|
|
62
|
+
expect(results[0].item.name).toBe("analyze");
|
|
63
|
+
});
|
|
64
|
+
it("should rank exact matches higher", () => {
|
|
65
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
66
|
+
const results = index.search("compress content");
|
|
67
|
+
expect(results.length).toBeGreaterThan(0);
|
|
68
|
+
// "compress" tool should be first as it matches both query terms
|
|
69
|
+
expect(results[0].item.name).toBe("compress");
|
|
70
|
+
expect(results[0].matchedTerms).toContain("compress");
|
|
71
|
+
expect(results[0].matchedTerms).toContain("content");
|
|
72
|
+
});
|
|
73
|
+
it("should handle multi-word queries", () => {
|
|
74
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
75
|
+
const results = index.search("analyze build errors");
|
|
76
|
+
expect(results.length).toBeGreaterThan(0);
|
|
77
|
+
expect(results[0].item.name).toBe("analyze");
|
|
78
|
+
expect(results[0].matchedTerms.length).toBeGreaterThan(1);
|
|
79
|
+
});
|
|
80
|
+
it("should return matchedTerms for each result", () => {
|
|
81
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
82
|
+
const results = index.search("compress");
|
|
83
|
+
expect(results.length).toBeGreaterThan(0);
|
|
84
|
+
expect(results[0].matchedTerms).toContain("compress");
|
|
85
|
+
});
|
|
86
|
+
it("should return results sorted by score descending", () => {
|
|
87
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
88
|
+
const results = index.search("optimize token");
|
|
89
|
+
for (let i = 1; i < results.length; i++) {
|
|
90
|
+
expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
it("should return positive scores for matching documents", () => {
|
|
94
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
95
|
+
const results = index.search("compress");
|
|
96
|
+
for (const result of results) {
|
|
97
|
+
expect(result.score).toBeGreaterThan(0);
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
it("should not return documents without matching terms", () => {
|
|
101
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
102
|
+
const results = index.search("nonexistent");
|
|
103
|
+
expect(results).toEqual([]);
|
|
104
|
+
});
|
|
105
|
+
it("should provide corpus statistics", () => {
|
|
106
|
+
const index = createBM25Index(testTools, getSearchableText);
|
|
107
|
+
const stats = index.stats();
|
|
108
|
+
expect(stats.documentCount).toBe(4);
|
|
109
|
+
expect(stats.avgDocLength).toBeGreaterThan(0);
|
|
110
|
+
expect(stats.vocabularySize).toBeGreaterThan(0);
|
|
111
|
+
});
|
|
112
|
+
it("should handle custom BM25 parameters", () => {
|
|
113
|
+
const index = createBM25Index(testTools, getSearchableText, {
|
|
114
|
+
k1: 2.0,
|
|
115
|
+
b: 0.5,
|
|
116
|
+
});
|
|
117
|
+
const results = index.search("compress");
|
|
118
|
+
expect(results.length).toBeGreaterThan(0);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
describe("BM25 ranking behavior", () => {
|
|
122
|
+
it("should rank documents with rare terms higher", () => {
|
|
123
|
+
const docs = [
|
|
124
|
+
{ id: 1, text: "common common common" },
|
|
125
|
+
{ id: 2, text: "common rare unique" },
|
|
126
|
+
{ id: 3, text: "common common" },
|
|
127
|
+
];
|
|
128
|
+
const index = createBM25Index(docs, (d) => d.text);
|
|
129
|
+
const results = index.search("unique");
|
|
130
|
+
expect(results.length).toBe(1);
|
|
131
|
+
expect(results[0].item.id).toBe(2);
|
|
132
|
+
});
|
|
133
|
+
it("should give higher scores to shorter documents (length normalization)", () => {
|
|
134
|
+
const docs = [
|
|
135
|
+
{ id: 1, text: "compress file" },
|
|
136
|
+
{ id: 2, text: "compress file with additional words that make it longer" },
|
|
137
|
+
];
|
|
138
|
+
const index = createBM25Index(docs, (d) => d.text);
|
|
139
|
+
const results = index.search("compress file");
|
|
140
|
+
// Shorter document should have higher score due to length normalization
|
|
141
|
+
expect(results[0].item.id).toBe(1);
|
|
142
|
+
});
|
|
143
|
+
it("should handle repeated terms correctly", () => {
|
|
144
|
+
const docs = [
|
|
145
|
+
{ id: 1, text: "error error error" },
|
|
146
|
+
{ id: 2, text: "error warning info" },
|
|
147
|
+
];
|
|
148
|
+
const index = createBM25Index(docs, (d) => d.text);
|
|
149
|
+
const results = index.search("error");
|
|
150
|
+
// Both should match, with scores reflecting term frequency
|
|
151
|
+
expect(results.length).toBe(2);
|
|
152
|
+
// Document with more "error" terms should score higher (BM25 saturation)
|
|
153
|
+
expect(results[0].item.id).toBe(1);
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
});
|