distill-mcp 0.6.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (440) hide show
  1. package/bin/cli.js +133 -0
  2. package/dist/analytics/session-tracker.d.ts +74 -0
  3. package/dist/analytics/session-tracker.d.ts.map +1 -0
  4. package/dist/analytics/session-tracker.js +123 -0
  5. package/dist/ast/benchmark.test.d.ts +7 -0
  6. package/dist/ast/benchmark.test.d.ts.map +1 -0
  7. package/dist/ast/benchmark.test.js +175 -0
  8. package/dist/ast/go/index.d.ts +9 -0
  9. package/dist/ast/go/index.d.ts.map +1 -0
  10. package/dist/ast/go/index.js +8 -0
  11. package/dist/ast/go/parser.d.ts +31 -0
  12. package/dist/ast/go/parser.d.ts.map +1 -0
  13. package/dist/ast/go/parser.js +428 -0
  14. package/dist/ast/go/parser.test.d.ts +5 -0
  15. package/dist/ast/go/parser.test.d.ts.map +1 -0
  16. package/dist/ast/go/parser.test.js +241 -0
  17. package/dist/ast/go/queries.d.ts +51 -0
  18. package/dist/ast/go/queries.d.ts.map +1 -0
  19. package/dist/ast/go/queries.js +114 -0
  20. package/dist/ast/go/utils.d.ts +66 -0
  21. package/dist/ast/go/utils.d.ts.map +1 -0
  22. package/dist/ast/go/utils.js +140 -0
  23. package/dist/ast/index.d.ts +39 -0
  24. package/dist/ast/index.d.ts.map +1 -0
  25. package/dist/ast/index.js +245 -0
  26. package/dist/ast/php/index.d.ts +9 -0
  27. package/dist/ast/php/index.d.ts.map +1 -0
  28. package/dist/ast/php/index.js +8 -0
  29. package/dist/ast/php/parser.d.ts +31 -0
  30. package/dist/ast/php/parser.d.ts.map +1 -0
  31. package/dist/ast/php/parser.js +388 -0
  32. package/dist/ast/php/parser.test.d.ts +5 -0
  33. package/dist/ast/php/parser.test.d.ts.map +1 -0
  34. package/dist/ast/php/parser.test.js +328 -0
  35. package/dist/ast/php/queries.d.ts +61 -0
  36. package/dist/ast/php/queries.d.ts.map +1 -0
  37. package/dist/ast/php/queries.js +117 -0
  38. package/dist/ast/php/utils.d.ts +83 -0
  39. package/dist/ast/php/utils.d.ts.map +1 -0
  40. package/dist/ast/php/utils.js +246 -0
  41. package/dist/ast/python/index.d.ts +9 -0
  42. package/dist/ast/python/index.d.ts.map +1 -0
  43. package/dist/ast/python/index.js +8 -0
  44. package/dist/ast/python/parser.d.ts +32 -0
  45. package/dist/ast/python/parser.d.ts.map +1 -0
  46. package/dist/ast/python/parser.js +422 -0
  47. package/dist/ast/python/parser.test.d.ts +5 -0
  48. package/dist/ast/python/parser.test.d.ts.map +1 -0
  49. package/dist/ast/python/parser.test.js +186 -0
  50. package/dist/ast/python/queries.d.ts +73 -0
  51. package/dist/ast/python/queries.d.ts.map +1 -0
  52. package/dist/ast/python/queries.js +137 -0
  53. package/dist/ast/python/utils.d.ts +63 -0
  54. package/dist/ast/python/utils.d.ts.map +1 -0
  55. package/dist/ast/python/utils.js +159 -0
  56. package/dist/ast/quick-scan.d.ts +40 -0
  57. package/dist/ast/quick-scan.d.ts.map +1 -0
  58. package/dist/ast/quick-scan.js +287 -0
  59. package/dist/ast/rust/index.d.ts +9 -0
  60. package/dist/ast/rust/index.d.ts.map +1 -0
  61. package/dist/ast/rust/index.js +8 -0
  62. package/dist/ast/rust/parser.d.ts +31 -0
  63. package/dist/ast/rust/parser.d.ts.map +1 -0
  64. package/dist/ast/rust/parser.js +416 -0
  65. package/dist/ast/rust/parser.test.d.ts +5 -0
  66. package/dist/ast/rust/parser.test.d.ts.map +1 -0
  67. package/dist/ast/rust/parser.test.js +329 -0
  68. package/dist/ast/rust/queries.d.ts +66 -0
  69. package/dist/ast/rust/queries.d.ts.map +1 -0
  70. package/dist/ast/rust/queries.js +132 -0
  71. package/dist/ast/rust/utils.d.ts +91 -0
  72. package/dist/ast/rust/utils.d.ts.map +1 -0
  73. package/dist/ast/rust/utils.js +254 -0
  74. package/dist/ast/swift/index.d.ts +10 -0
  75. package/dist/ast/swift/index.d.ts.map +1 -0
  76. package/dist/ast/swift/index.js +8 -0
  77. package/dist/ast/swift/parser.d.ts +31 -0
  78. package/dist/ast/swift/parser.d.ts.map +1 -0
  79. package/dist/ast/swift/parser.js +554 -0
  80. package/dist/ast/swift/parser.test.d.ts +5 -0
  81. package/dist/ast/swift/parser.test.d.ts.map +1 -0
  82. package/dist/ast/swift/parser.test.js +398 -0
  83. package/dist/ast/swift/queries.d.ts +71 -0
  84. package/dist/ast/swift/queries.d.ts.map +1 -0
  85. package/dist/ast/swift/queries.js +137 -0
  86. package/dist/ast/swift/utils.d.ts +94 -0
  87. package/dist/ast/swift/utils.d.ts.map +1 -0
  88. package/dist/ast/swift/utils.js +411 -0
  89. package/dist/ast/types.d.ts +96 -0
  90. package/dist/ast/types.d.ts.map +1 -0
  91. package/dist/ast/types.js +21 -0
  92. package/dist/ast/typescript.d.ts +24 -0
  93. package/dist/ast/typescript.d.ts.map +1 -0
  94. package/dist/ast/typescript.js +357 -0
  95. package/dist/cache/file-hash.d.ts +33 -0
  96. package/dist/cache/file-hash.d.ts.map +1 -0
  97. package/dist/cache/file-hash.js +59 -0
  98. package/dist/cache/index.d.ts +9 -0
  99. package/dist/cache/index.d.ts.map +1 -0
  100. package/dist/cache/index.js +8 -0
  101. package/dist/cache/smart-cache.d.ts +68 -0
  102. package/dist/cache/smart-cache.d.ts.map +1 -0
  103. package/dist/cache/smart-cache.js +266 -0
  104. package/dist/cache/types.d.ts +102 -0
  105. package/dist/cache/types.d.ts.map +1 -0
  106. package/dist/cache/types.js +6 -0
  107. package/dist/cli/analyze.d.ts +43 -0
  108. package/dist/cli/analyze.d.ts.map +1 -0
  109. package/dist/cli/analyze.js +250 -0
  110. package/dist/cli/doctor.d.ts +2 -0
  111. package/dist/cli/doctor.d.ts.map +1 -0
  112. package/dist/cli/doctor.js +127 -0
  113. package/dist/cli/hooks.d.ts +14 -0
  114. package/dist/cli/hooks.d.ts.map +1 -0
  115. package/dist/cli/hooks.js +229 -0
  116. package/dist/cli/index.d.ts +5 -0
  117. package/dist/cli/index.d.ts.map +1 -0
  118. package/dist/cli/index.js +4 -0
  119. package/dist/cli/setup.d.ts +10 -0
  120. package/dist/cli/setup.d.ts.map +1 -0
  121. package/dist/cli/setup.js +117 -0
  122. package/dist/cli/utils.d.ts +30 -0
  123. package/dist/cli/utils.d.ts.map +1 -0
  124. package/dist/cli/utils.js +116 -0
  125. package/dist/compressors/config.d.ts +9 -0
  126. package/dist/compressors/config.d.ts.map +1 -0
  127. package/dist/compressors/config.js +183 -0
  128. package/dist/compressors/conversation.d.ts +109 -0
  129. package/dist/compressors/conversation.d.ts.map +1 -0
  130. package/dist/compressors/conversation.js +404 -0
  131. package/dist/compressors/diff.d.ts +35 -0
  132. package/dist/compressors/diff.d.ts.map +1 -0
  133. package/dist/compressors/diff.js +389 -0
  134. package/dist/compressors/generic.d.ts +9 -0
  135. package/dist/compressors/generic.d.ts.map +1 -0
  136. package/dist/compressors/generic.js +188 -0
  137. package/dist/compressors/index.d.ts +31 -0
  138. package/dist/compressors/index.d.ts.map +1 -0
  139. package/dist/compressors/index.js +82 -0
  140. package/dist/compressors/logs.d.ts +9 -0
  141. package/dist/compressors/logs.d.ts.map +1 -0
  142. package/dist/compressors/logs.js +245 -0
  143. package/dist/compressors/multifile.d.ts +106 -0
  144. package/dist/compressors/multifile.d.ts.map +1 -0
  145. package/dist/compressors/multifile.js +498 -0
  146. package/dist/compressors/semantic.d.ts +33 -0
  147. package/dist/compressors/semantic.d.ts.map +1 -0
  148. package/dist/compressors/semantic.js +233 -0
  149. package/dist/compressors/stacktrace.d.ts +9 -0
  150. package/dist/compressors/stacktrace.d.ts.map +1 -0
  151. package/dist/compressors/stacktrace.js +259 -0
  152. package/dist/compressors/types.d.ts +146 -0
  153. package/dist/compressors/types.d.ts.map +1 -0
  154. package/dist/compressors/types.js +6 -0
  155. package/dist/config/output-config.d.ts +56 -0
  156. package/dist/config/output-config.d.ts.map +1 -0
  157. package/dist/config/output-config.js +78 -0
  158. package/dist/index.d.ts +21 -0
  159. package/dist/index.d.ts.map +1 -0
  160. package/dist/index.js +27 -0
  161. package/dist/middleware/chain.d.ts +49 -0
  162. package/dist/middleware/chain.d.ts.map +1 -0
  163. package/dist/middleware/chain.js +126 -0
  164. package/dist/middleware/index.d.ts +4 -0
  165. package/dist/middleware/index.d.ts.map +1 -0
  166. package/dist/middleware/index.js +3 -0
  167. package/dist/middleware/logging.d.ts +8 -0
  168. package/dist/middleware/logging.d.ts.map +1 -0
  169. package/dist/middleware/logging.js +71 -0
  170. package/dist/middleware/types.d.ts +58 -0
  171. package/dist/middleware/types.d.ts.map +1 -0
  172. package/dist/middleware/types.js +7 -0
  173. package/dist/parsers/eslint.d.ts +8 -0
  174. package/dist/parsers/eslint.d.ts.map +1 -0
  175. package/dist/parsers/eslint.js +132 -0
  176. package/dist/parsers/generic.d.ts +8 -0
  177. package/dist/parsers/generic.d.ts.map +1 -0
  178. package/dist/parsers/generic.js +234 -0
  179. package/dist/parsers/index.d.ts +34 -0
  180. package/dist/parsers/index.d.ts.map +1 -0
  181. package/dist/parsers/index.js +216 -0
  182. package/dist/parsers/types.d.ts +84 -0
  183. package/dist/parsers/types.d.ts.map +1 -0
  184. package/dist/parsers/types.js +6 -0
  185. package/dist/parsers/typescript.d.ts +8 -0
  186. package/dist/parsers/typescript.d.ts.map +1 -0
  187. package/dist/parsers/typescript.js +107 -0
  188. package/dist/pipelines/definitions.d.ts +50 -0
  189. package/dist/pipelines/definitions.d.ts.map +1 -0
  190. package/dist/pipelines/definitions.js +206 -0
  191. package/dist/sandbox/executor.d.ts +12 -0
  192. package/dist/sandbox/executor.d.ts.map +1 -0
  193. package/dist/sandbox/executor.js +191 -0
  194. package/dist/sandbox/index.d.ts +11 -0
  195. package/dist/sandbox/index.d.ts.map +1 -0
  196. package/dist/sandbox/index.js +9 -0
  197. package/dist/sandbox/sandbox.test.d.ts +7 -0
  198. package/dist/sandbox/sandbox.test.d.ts.map +1 -0
  199. package/dist/sandbox/sandbox.test.js +202 -0
  200. package/dist/sandbox/sdk/analyze.d.ts +36 -0
  201. package/dist/sandbox/sdk/analyze.d.ts.map +1 -0
  202. package/dist/sandbox/sdk/analyze.js +413 -0
  203. package/dist/sandbox/sdk/analyze.test.d.ts +7 -0
  204. package/dist/sandbox/sdk/analyze.test.d.ts.map +1 -0
  205. package/dist/sandbox/sdk/analyze.test.js +191 -0
  206. package/dist/sandbox/sdk/code.d.ts +20 -0
  207. package/dist/sandbox/sdk/code.d.ts.map +1 -0
  208. package/dist/sandbox/sdk/code.js +104 -0
  209. package/dist/sandbox/sdk/compress.d.ts +23 -0
  210. package/dist/sandbox/sdk/compress.d.ts.map +1 -0
  211. package/dist/sandbox/sdk/compress.js +107 -0
  212. package/dist/sandbox/sdk/conversation.d.ts +148 -0
  213. package/dist/sandbox/sdk/conversation.d.ts.map +1 -0
  214. package/dist/sandbox/sdk/conversation.js +177 -0
  215. package/dist/sandbox/sdk/files.d.ts +29 -0
  216. package/dist/sandbox/sdk/files.d.ts.map +1 -0
  217. package/dist/sandbox/sdk/files.js +41 -0
  218. package/dist/sandbox/sdk/git.d.ts +37 -0
  219. package/dist/sandbox/sdk/git.d.ts.map +1 -0
  220. package/dist/sandbox/sdk/git.js +313 -0
  221. package/dist/sandbox/sdk/git.test.d.ts +8 -0
  222. package/dist/sandbox/sdk/git.test.d.ts.map +1 -0
  223. package/dist/sandbox/sdk/git.test.js +160 -0
  224. package/dist/sandbox/sdk/index.d.ts +16 -0
  225. package/dist/sandbox/sdk/index.d.ts.map +1 -0
  226. package/dist/sandbox/sdk/index.js +15 -0
  227. package/dist/sandbox/sdk/multifile.d.ts +63 -0
  228. package/dist/sandbox/sdk/multifile.d.ts.map +1 -0
  229. package/dist/sandbox/sdk/multifile.js +130 -0
  230. package/dist/sandbox/sdk/pipeline.d.ts +16 -0
  231. package/dist/sandbox/sdk/pipeline.d.ts.map +1 -0
  232. package/dist/sandbox/sdk/pipeline.js +454 -0
  233. package/dist/sandbox/sdk/pipeline.test.d.ts +7 -0
  234. package/dist/sandbox/sdk/pipeline.test.d.ts.map +1 -0
  235. package/dist/sandbox/sdk/pipeline.test.js +197 -0
  236. package/dist/sandbox/sdk/search.d.ts +36 -0
  237. package/dist/sandbox/sdk/search.d.ts.map +1 -0
  238. package/dist/sandbox/sdk/search.js +338 -0
  239. package/dist/sandbox/sdk/search.test.d.ts +7 -0
  240. package/dist/sandbox/sdk/search.test.d.ts.map +1 -0
  241. package/dist/sandbox/sdk/search.test.js +183 -0
  242. package/dist/sandbox/sdk/utils.d.ts +18 -0
  243. package/dist/sandbox/sdk/utils.d.ts.map +1 -0
  244. package/dist/sandbox/sdk/utils.js +24 -0
  245. package/dist/sandbox/security/code-analyzer.d.ts +15 -0
  246. package/dist/sandbox/security/code-analyzer.d.ts.map +1 -0
  247. package/dist/sandbox/security/code-analyzer.js +87 -0
  248. package/dist/sandbox/security/index.d.ts +6 -0
  249. package/dist/sandbox/security/index.d.ts.map +1 -0
  250. package/dist/sandbox/security/index.js +5 -0
  251. package/dist/sandbox/security/path-validator.d.ts +23 -0
  252. package/dist/sandbox/security/path-validator.d.ts.map +1 -0
  253. package/dist/sandbox/security/path-validator.js +113 -0
  254. package/dist/sandbox/types.d.ts +577 -0
  255. package/dist/sandbox/types.d.ts.map +1 -0
  256. package/dist/sandbox/types.js +14 -0
  257. package/dist/server.d.ts +36 -0
  258. package/dist/server.d.ts.map +1 -0
  259. package/dist/server.js +133 -0
  260. package/dist/summarizers/build-logs.d.ts +11 -0
  261. package/dist/summarizers/build-logs.d.ts.map +1 -0
  262. package/dist/summarizers/build-logs.js +234 -0
  263. package/dist/summarizers/generic.d.ts +11 -0
  264. package/dist/summarizers/generic.d.ts.map +1 -0
  265. package/dist/summarizers/generic.js +93 -0
  266. package/dist/summarizers/index.d.ts +20 -0
  267. package/dist/summarizers/index.d.ts.map +1 -0
  268. package/dist/summarizers/index.js +43 -0
  269. package/dist/summarizers/server-logs.d.ts +11 -0
  270. package/dist/summarizers/server-logs.d.ts.map +1 -0
  271. package/dist/summarizers/server-logs.js +215 -0
  272. package/dist/summarizers/test-logs.d.ts +11 -0
  273. package/dist/summarizers/test-logs.d.ts.map +1 -0
  274. package/dist/summarizers/test-logs.js +258 -0
  275. package/dist/summarizers/types.d.ts +146 -0
  276. package/dist/summarizers/types.d.ts.map +1 -0
  277. package/dist/summarizers/types.js +21 -0
  278. package/dist/tools/analyze-build-output.d.ts +30 -0
  279. package/dist/tools/analyze-build-output.d.ts.map +1 -0
  280. package/dist/tools/analyze-build-output.js +45 -0
  281. package/dist/tools/analyze-context.d.ts +23 -0
  282. package/dist/tools/analyze-context.d.ts.map +1 -0
  283. package/dist/tools/analyze-context.js +78 -0
  284. package/dist/tools/auto-optimize.d.ts +9 -0
  285. package/dist/tools/auto-optimize.d.ts.map +1 -0
  286. package/dist/tools/auto-optimize.js +191 -0
  287. package/dist/tools/code-execute.d.ts +9 -0
  288. package/dist/tools/code-execute.d.ts.map +1 -0
  289. package/dist/tools/code-execute.js +84 -0
  290. package/dist/tools/code-skeleton.d.ts +33 -0
  291. package/dist/tools/code-skeleton.d.ts.map +1 -0
  292. package/dist/tools/code-skeleton.js +206 -0
  293. package/dist/tools/compress-context.d.ts +33 -0
  294. package/dist/tools/compress-context.d.ts.map +1 -0
  295. package/dist/tools/compress-context.js +64 -0
  296. package/dist/tools/context-budget.d.ts +43 -0
  297. package/dist/tools/context-budget.d.ts.map +1 -0
  298. package/dist/tools/context-budget.js +260 -0
  299. package/dist/tools/context-budget.test.d.ts +5 -0
  300. package/dist/tools/context-budget.test.d.ts.map +1 -0
  301. package/dist/tools/context-budget.test.js +219 -0
  302. package/dist/tools/conversation-compress.d.ts +46 -0
  303. package/dist/tools/conversation-compress.d.ts.map +1 -0
  304. package/dist/tools/conversation-compress.js +78 -0
  305. package/dist/tools/conversation-memory.d.ts +75 -0
  306. package/dist/tools/conversation-memory.d.ts.map +1 -0
  307. package/dist/tools/conversation-memory.js +289 -0
  308. package/dist/tools/deduplicate-errors.d.ts +30 -0
  309. package/dist/tools/deduplicate-errors.d.ts.map +1 -0
  310. package/dist/tools/deduplicate-errors.js +72 -0
  311. package/dist/tools/detect-retry-loop.d.ts +40 -0
  312. package/dist/tools/detect-retry-loop.d.ts.map +1 -0
  313. package/dist/tools/detect-retry-loop.js +212 -0
  314. package/dist/tools/diff-compress.d.ts +40 -0
  315. package/dist/tools/diff-compress.d.ts.map +1 -0
  316. package/dist/tools/diff-compress.js +94 -0
  317. package/dist/tools/discover-tools.d.ts +11 -0
  318. package/dist/tools/discover-tools.d.ts.map +1 -0
  319. package/dist/tools/discover-tools.js +163 -0
  320. package/dist/tools/dynamic-loader.d.ts +131 -0
  321. package/dist/tools/dynamic-loader.d.ts.map +1 -0
  322. package/dist/tools/dynamic-loader.js +378 -0
  323. package/dist/tools/dynamic-loader.test.d.ts +10 -0
  324. package/dist/tools/dynamic-loader.test.d.ts.map +1 -0
  325. package/dist/tools/dynamic-loader.test.js +164 -0
  326. package/dist/tools/lazy-mcp.d.ts +31 -0
  327. package/dist/tools/lazy-mcp.d.ts.map +1 -0
  328. package/dist/tools/lazy-mcp.js +151 -0
  329. package/dist/tools/lazy-mcp.test.d.ts +10 -0
  330. package/dist/tools/lazy-mcp.test.d.ts.map +1 -0
  331. package/dist/tools/lazy-mcp.test.js +172 -0
  332. package/dist/tools/multifile-compress.d.ts +36 -0
  333. package/dist/tools/multifile-compress.d.ts.map +1 -0
  334. package/dist/tools/multifile-compress.js +223 -0
  335. package/dist/tools/optimization-tips.d.ts +18 -0
  336. package/dist/tools/optimization-tips.d.ts.map +1 -0
  337. package/dist/tools/optimization-tips.js +133 -0
  338. package/dist/tools/registry.d.ts +70 -0
  339. package/dist/tools/registry.d.ts.map +1 -0
  340. package/dist/tools/registry.js +169 -0
  341. package/dist/tools/semantic-compress.d.ts +39 -0
  342. package/dist/tools/semantic-compress.d.ts.map +1 -0
  343. package/dist/tools/semantic-compress.js +113 -0
  344. package/dist/tools/semantic-compress.test.d.ts +5 -0
  345. package/dist/tools/semantic-compress.test.d.ts.map +1 -0
  346. package/dist/tools/semantic-compress.test.js +182 -0
  347. package/dist/tools/session-stats.d.ts +34 -0
  348. package/dist/tools/session-stats.d.ts.map +1 -0
  349. package/dist/tools/session-stats.js +194 -0
  350. package/dist/tools/set-output-config.d.ts +38 -0
  351. package/dist/tools/set-output-config.d.ts.map +1 -0
  352. package/dist/tools/set-output-config.js +122 -0
  353. package/dist/tools/smart-cache-tool.d.ts +38 -0
  354. package/dist/tools/smart-cache-tool.d.ts.map +1 -0
  355. package/dist/tools/smart-cache-tool.js +224 -0
  356. package/dist/tools/smart-file-read.d.ts +52 -0
  357. package/dist/tools/smart-file-read.d.ts.map +1 -0
  358. package/dist/tools/smart-file-read.js +481 -0
  359. package/dist/tools/smart-pipeline.d.ts +40 -0
  360. package/dist/tools/smart-pipeline.d.ts.map +1 -0
  361. package/dist/tools/smart-pipeline.js +295 -0
  362. package/dist/tools/summarize-logs.d.ts +36 -0
  363. package/dist/tools/summarize-logs.d.ts.map +1 -0
  364. package/dist/tools/summarize-logs.js +184 -0
  365. package/dist/tools/token-budget.test.d.ts +11 -0
  366. package/dist/tools/token-budget.test.d.ts.map +1 -0
  367. package/dist/tools/token-budget.test.js +275 -0
  368. package/dist/utils/bm25.d.ts +86 -0
  369. package/dist/utils/bm25.d.ts.map +1 -0
  370. package/dist/utils/bm25.js +153 -0
  371. package/dist/utils/bm25.test.d.ts +5 -0
  372. package/dist/utils/bm25.test.d.ts.map +1 -0
  373. package/dist/utils/bm25.test.js +156 -0
  374. package/dist/utils/command-normalizer.d.ts +39 -0
  375. package/dist/utils/command-normalizer.d.ts.map +1 -0
  376. package/dist/utils/command-normalizer.js +90 -0
  377. package/dist/utils/content-detector.d.ts +27 -0
  378. package/dist/utils/content-detector.d.ts.map +1 -0
  379. package/dist/utils/content-detector.js +127 -0
  380. package/dist/utils/embeddings.d.ts +54 -0
  381. package/dist/utils/embeddings.d.ts.map +1 -0
  382. package/dist/utils/embeddings.js +97 -0
  383. package/dist/utils/embeddings.test.d.ts +8 -0
  384. package/dist/utils/embeddings.test.d.ts.map +1 -0
  385. package/dist/utils/embeddings.test.js +96 -0
  386. package/dist/utils/error-normalizer.d.ts +39 -0
  387. package/dist/utils/error-normalizer.d.ts.map +1 -0
  388. package/dist/utils/error-normalizer.js +233 -0
  389. package/dist/utils/hybrid-search.d.ts +79 -0
  390. package/dist/utils/hybrid-search.d.ts.map +1 -0
  391. package/dist/utils/hybrid-search.js +146 -0
  392. package/dist/utils/hybrid-search.test.d.ts +5 -0
  393. package/dist/utils/hybrid-search.test.d.ts.map +1 -0
  394. package/dist/utils/hybrid-search.test.js +172 -0
  395. package/dist/utils/index.d.ts +13 -0
  396. package/dist/utils/index.d.ts.map +1 -0
  397. package/dist/utils/index.js +12 -0
  398. package/dist/utils/language-detector.d.ts +27 -0
  399. package/dist/utils/language-detector.d.ts.map +1 -0
  400. package/dist/utils/language-detector.js +94 -0
  401. package/dist/utils/log-parser.d.ts +46 -0
  402. package/dist/utils/log-parser.d.ts.map +1 -0
  403. package/dist/utils/log-parser.js +287 -0
  404. package/dist/utils/output-estimator.d.ts +54 -0
  405. package/dist/utils/output-estimator.d.ts.map +1 -0
  406. package/dist/utils/output-estimator.js +119 -0
  407. package/dist/utils/output-estimator.test.d.ts +5 -0
  408. package/dist/utils/output-estimator.test.d.ts.map +1 -0
  409. package/dist/utils/output-estimator.test.js +115 -0
  410. package/dist/utils/output-similarity.d.ts +48 -0
  411. package/dist/utils/output-similarity.d.ts.map +1 -0
  412. package/dist/utils/output-similarity.js +140 -0
  413. package/dist/utils/project-detector.d.ts +16 -0
  414. package/dist/utils/project-detector.d.ts.map +1 -0
  415. package/dist/utils/project-detector.js +119 -0
  416. package/dist/utils/segment-scorer.d.ts +99 -0
  417. package/dist/utils/segment-scorer.d.ts.map +1 -0
  418. package/dist/utils/segment-scorer.js +148 -0
  419. package/dist/utils/signature-grouper.d.ts +58 -0
  420. package/dist/utils/signature-grouper.d.ts.map +1 -0
  421. package/dist/utils/signature-grouper.js +185 -0
  422. package/dist/utils/tfidf.d.ts +45 -0
  423. package/dist/utils/tfidf.d.ts.map +1 -0
  424. package/dist/utils/tfidf.js +204 -0
  425. package/dist/utils/tfidf.test.d.ts +5 -0
  426. package/dist/utils/tfidf.test.d.ts.map +1 -0
  427. package/dist/utils/tfidf.test.js +115 -0
  428. package/dist/utils/token-counter.d.ts +35 -0
  429. package/dist/utils/token-counter.d.ts.map +1 -0
  430. package/dist/utils/token-counter.js +83 -0
  431. package/dist/utils/toon-serializer.d.ts +120 -0
  432. package/dist/utils/toon-serializer.d.ts.map +1 -0
  433. package/dist/utils/toon-serializer.js +472 -0
  434. package/dist/utils/toon-serializer.test.d.ts +7 -0
  435. package/dist/utils/toon-serializer.test.d.ts.map +1 -0
  436. package/dist/utils/toon-serializer.test.js +290 -0
  437. package/package.json +63 -0
  438. package/scripts/install.ps1 +133 -0
  439. package/scripts/install.sh +183 -0
  440. package/scripts/pre-commit-hook.sh +86 -0
@@ -0,0 +1,275 @@
1
+ /**
2
+ * Token Budget Tests
3
+ *
4
+ * Ensures tool definitions stay within token budgets to prevent
5
+ * context window bloat from MCP tool descriptions.
6
+ *
7
+ * These tests guard against regression - any change that increases
8
+ * token consumption will fail the test.
9
+ */
10
+ import { describe, it, expect } from "vitest";
11
+ import { countTokens } from "../utils/token-counter.js";
12
+ // Import all tool definitions
13
+ import { autoOptimizeTool } from "./auto-optimize.js";
14
+ import { smartFileReadTool } from "./smart-file-read.js";
15
+ import { discoverToolsTool } from "./discover-tools.js";
16
+ import { analyzeBuildOutputTool } from "./analyze-build-output.js";
17
+ import { compressContextTool } from "./compress-context.js";
18
+ import { semanticCompressTool } from "./semantic-compress.js";
19
+ import { diffCompressTool } from "./diff-compress.js";
20
+ import { summarizeLogsTool } from "./summarize-logs.js";
21
+ import { codeSkeletonTool } from "./code-skeleton.js";
22
+ import { contextBudgetTool } from "./context-budget.js";
23
+ import { conversationCompressTool } from "./conversation-compress.js";
24
+ import { deduplicateErrorsTool } from "./deduplicate-errors.js";
25
+ import { smartCacheTool } from "./smart-cache-tool.js";
26
+ import { smartPipelineTool } from "./smart-pipeline.js";
27
+ // ============================================================================
28
+ // Token Budgets (in tokens)
29
+ // ============================================================================
30
+ /**
31
+ * Maximum tokens allowed per tool definition.
32
+ * These are intentionally tight to catch any bloat early.
33
+ *
34
+ * 2024-12: Tightened budgets after schema optimization
35
+ */
36
+ const TOKEN_BUDGETS = {
37
+ // Core tools (always loaded) - ultra-minimal
38
+ auto_optimize: 90,
39
+ smart_file_read: 120,
40
+ discover_tools: 80, // +5 for TOON format option
41
+ // Compress category - aggressively optimized
42
+ compress_context: 95,
43
+ semantic_compress: 60,
44
+ diff_compress: 75,
45
+ conversation_compress: 110,
46
+ // Analyze category
47
+ analyze_build_output: 95,
48
+ context_budget: 105,
49
+ // Logs category
50
+ summarize_logs: 115,
51
+ deduplicate_errors: 65,
52
+ // Code category
53
+ code_skeleton: 75,
54
+ smart_cache: 90,
55
+ // Pipeline category
56
+ smart_pipeline: 80,
57
+ };
58
+ /**
59
+ * Maximum tokens for the entire ListTools response (core tools only).
60
+ * Currently: auto_optimize + smart_file_read + discover_tools
61
+ * 2024-12: Reduced from 500 after schema optimization
62
+ */
63
+ const CORE_TOOLS_BUDGET = 300;
64
+ /**
65
+ * Maximum tokens for all tools combined.
66
+ * 2024-12: Reduced from 1500 after aggressive schema optimization
67
+ */
68
+ const ALL_TOOLS_BUDGET = 1200;
69
+ // ============================================================================
70
+ // Helper Functions
71
+ // ============================================================================
72
+ /**
73
+ * Serialize a tool definition as it would appear in ListTools response
74
+ */
75
+ function serializeToolForMCP(tool) {
76
+ return JSON.stringify({
77
+ name: tool.name,
78
+ description: tool.description,
79
+ inputSchema: tool.inputSchema,
80
+ });
81
+ }
82
+ /**
83
+ * Count tokens in a tool definition
84
+ */
85
+ function countToolTokens(tool) {
86
+ const serialized = serializeToolForMCP(tool);
87
+ return countTokens(serialized);
88
+ }
89
+ // ============================================================================
90
+ // All Tools
91
+ // ============================================================================
92
+ const ALL_TOOLS = [
93
+ autoOptimizeTool,
94
+ smartFileReadTool,
95
+ discoverToolsTool,
96
+ analyzeBuildOutputTool,
97
+ compressContextTool,
98
+ semanticCompressTool,
99
+ diffCompressTool,
100
+ summarizeLogsTool,
101
+ codeSkeletonTool,
102
+ contextBudgetTool,
103
+ conversationCompressTool,
104
+ deduplicateErrorsTool,
105
+ smartCacheTool,
106
+ smartPipelineTool,
107
+ ];
108
+ const CORE_TOOLS = [
109
+ autoOptimizeTool,
110
+ smartFileReadTool,
111
+ discoverToolsTool,
112
+ ];
113
+ // ============================================================================
114
+ // Tests
115
+ // ============================================================================
116
+ describe("Tool Token Budgets", () => {
117
+ describe("Individual tool budgets", () => {
118
+ it.each(ALL_TOOLS.map((t) => [t.name, t]))("%s should be under budget", (name, tool) => {
119
+ const tokens = countToolTokens(tool);
120
+ const budget = TOKEN_BUDGETS[name];
121
+ expect(tokens).toBeLessThanOrEqual(budget);
122
+ // Log for visibility
123
+ const usage = Math.round((tokens / budget) * 100);
124
+ console.log(` ${name}: ${tokens}/${budget} tokens (${usage}%)`);
125
+ });
126
+ });
127
+ describe("Aggregate budgets", () => {
128
+ it("core tools should be under combined budget", () => {
129
+ const totalTokens = CORE_TOOLS.reduce((sum, tool) => sum + countToolTokens(tool), 0);
130
+ expect(totalTokens).toBeLessThanOrEqual(CORE_TOOLS_BUDGET);
131
+ const usage = Math.round((totalTokens / CORE_TOOLS_BUDGET) * 100);
132
+ console.log(` Core tools total: ${totalTokens}/${CORE_TOOLS_BUDGET} tokens (${usage}%)`);
133
+ });
134
+ it("all tools should be under combined budget", () => {
135
+ const totalTokens = ALL_TOOLS.reduce((sum, tool) => sum + countToolTokens(tool), 0);
136
+ expect(totalTokens).toBeLessThanOrEqual(ALL_TOOLS_BUDGET);
137
+ const usage = Math.round((totalTokens / ALL_TOOLS_BUDGET) * 100);
138
+ console.log(` All tools total: ${totalTokens}/${ALL_TOOLS_BUDGET} tokens (${usage}%)`);
139
+ });
140
+ });
141
+ describe("Token distribution", () => {
142
+ it("should have balanced token distribution (no tool > 20% of total)", () => {
143
+ const totalTokens = ALL_TOOLS.reduce((sum, tool) => sum + countToolTokens(tool), 0);
144
+ for (const tool of ALL_TOOLS) {
145
+ const tokens = countToolTokens(tool);
146
+ const percentage = (tokens / totalTokens) * 100;
147
+ expect(percentage).toBeLessThan(20);
148
+ }
149
+ });
150
+ });
151
+ });
152
+ describe("Tool Schema Constraints", () => {
153
+ describe("Description length", () => {
154
+ it.each(ALL_TOOLS.map((t) => [t.name, t]))("%s description should be concise (< 150 chars)", (name, tool) => {
155
+ const description = tool.description;
156
+ expect(description.length).toBeLessThan(150);
157
+ });
158
+ });
159
+ describe("Schema structure", () => {
160
+ it.each(ALL_TOOLS.map((t) => [t.name, t]))("%s should not have deeply nested descriptions", (name, tool) => {
161
+ const schema = tool.inputSchema;
162
+ const serialized = JSON.stringify(schema);
163
+ // Count "description" occurrences - should be minimal
164
+ const descriptionCount = (serialized.match(/"description"/g) || [])
165
+ .length;
166
+ // Allow max 3 descriptions per schema (for complex tools)
167
+ expect(descriptionCount).toBeLessThanOrEqual(3);
168
+ });
169
+ });
170
+ });
171
+ describe("ListTools Response Size", () => {
172
+ it("should generate compact ListTools response for core tools", () => {
173
+ const response = {
174
+ tools: CORE_TOOLS.map((tool) => ({
175
+ name: tool.name,
176
+ description: tool.description,
177
+ inputSchema: tool.inputSchema,
178
+ })),
179
+ };
180
+ const serialized = JSON.stringify(response);
181
+ const tokens = countTokens(serialized);
182
+ // ListTools response should be under 600 tokens for core tools
183
+ expect(tokens).toBeLessThan(600);
184
+ console.log(` ListTools (core): ${serialized.length} chars, ${tokens} tokens`);
185
+ });
186
+ it("should generate compact ListTools response for all tools", () => {
187
+ const response = {
188
+ tools: ALL_TOOLS.map((tool) => ({
189
+ name: tool.name,
190
+ description: tool.description,
191
+ inputSchema: tool.inputSchema,
192
+ })),
193
+ };
194
+ const serialized = JSON.stringify(response);
195
+ const tokens = countTokens(serialized);
196
+ // Full ListTools response should be under 1800 tokens
197
+ expect(tokens).toBeLessThan(1800);
198
+ console.log(` ListTools (all): ${serialized.length} chars, ${tokens} tokens`);
199
+ });
200
+ });
201
+ describe("Token Reduction Verification", () => {
202
+ /**
203
+ * Baseline values from BEFORE optimization (commit d4cdb98).
204
+ * These are used to verify we actually reduced tokens.
205
+ */
206
+ const BASELINE_TOKENS = {
207
+ auto_optimize: 287,
208
+ smart_file_read: 342,
209
+ discover_tools: 153,
210
+ core_total: 782,
211
+ };
212
+ it("auto_optimize should be reduced from baseline", () => {
213
+ const current = countToolTokens(autoOptimizeTool);
214
+ const baseline = BASELINE_TOKENS.auto_optimize;
215
+ const reduction = Math.round((1 - current / baseline) * 100);
216
+ expect(current).toBeLessThan(baseline);
217
+ expect(reduction).toBeGreaterThan(30); // At least 30% reduction
218
+ console.log(` auto_optimize: ${baseline} → ${current} (${reduction}% reduction)`);
219
+ });
220
+ it("smart_file_read should be reduced from baseline", () => {
221
+ const current = countToolTokens(smartFileReadTool);
222
+ const baseline = BASELINE_TOKENS.smart_file_read;
223
+ const reduction = Math.round((1 - current / baseline) * 100);
224
+ expect(current).toBeLessThan(baseline);
225
+ expect(reduction).toBeGreaterThan(20); // At least 20% reduction
226
+ console.log(` smart_file_read: ${baseline} → ${current} (${reduction}% reduction)`);
227
+ });
228
+ it("discover_tools should be reduced from baseline", () => {
229
+ const current = countToolTokens(discoverToolsTool);
230
+ const baseline = BASELINE_TOKENS.discover_tools;
231
+ const reduction = Math.round((1 - current / baseline) * 100);
232
+ expect(current).toBeLessThan(baseline);
233
+ expect(reduction).toBeGreaterThan(20); // At least 20% reduction
234
+ console.log(` discover_tools: ${baseline} → ${current} (${reduction}% reduction)`);
235
+ });
236
+ it("core tools total should be at least 40% reduced from baseline", () => {
237
+ const currentTotal = CORE_TOOLS.reduce((sum, tool) => sum + countToolTokens(tool), 0);
238
+ const baseline = BASELINE_TOKENS.core_total;
239
+ const reduction = Math.round((1 - currentTotal / baseline) * 100);
240
+ expect(currentTotal).toBeLessThan(baseline);
241
+ expect(reduction).toBeGreaterThan(40); // At least 40% total reduction
242
+ console.log(` Core total: ${baseline} → ${currentTotal} (${reduction}% reduction)`);
243
+ });
244
+ });
245
+ describe("Regression Prevention", () => {
246
+ /**
247
+ * Snapshot of current token counts.
248
+ * Update these when intentionally adding features.
249
+ * Any unexpected change will fail the test.
250
+ *
251
+ * 2024-12: Optimized schemas to reduce token overhead
252
+ * - Removed property descriptions (moved to tool description)
253
+ * - Removed rarely-used properties from public schema
254
+ * - Simplified nested object type declarations
255
+ * - Added TOON format output option to discover_tools
256
+ */
257
+ const CURRENT_SNAPSHOT = {
258
+ auto_optimize: 80,
259
+ smart_file_read: 106,
260
+ discover_tools: 78, // +15 for TOON format option (list|toon|toon-tabular)
261
+ };
262
+ // Tolerance: ±5 tokens for minor changes
263
+ const TOLERANCE = 5;
264
+ it.each(Object.entries(CURRENT_SNAPSHOT))("%s should match snapshot (±5 tokens)", (name, expected) => {
265
+ const tool = ALL_TOOLS.find((t) => t.name === name);
266
+ if (!tool)
267
+ throw new Error(`Tool ${name} not found`);
268
+ const actual = countToolTokens(tool);
269
+ const diff = Math.abs(actual - expected);
270
+ expect(diff).toBeLessThanOrEqual(TOLERANCE);
271
+ if (diff > 0) {
272
+ console.log(` ${name}: expected ${expected}, got ${actual} (diff: ${diff})`);
273
+ }
274
+ });
275
+ });
@@ -0,0 +1,86 @@
1
+ /**
2
+ * BM25 (Best Matching 25) implementation for tool search
3
+ *
4
+ * BM25 is a ranking function used by search engines to rank documents
5
+ * based on query terms appearing in each document.
6
+ *
7
+ * Formula: score = IDF * ((tf * (k1 + 1)) / (tf + k1 * (1 - b + b * (dl/avgdl))))
8
+ *
9
+ * Where:
10
+ * - tf = term frequency in document
11
+ * - k1 = term saturation parameter (default: 1.2)
12
+ * - b = length normalization parameter (default: 0.75)
13
+ * - dl = document length (number of terms)
14
+ * - avgdl = average document length across corpus
15
+ * - IDF = log((N - df + 0.5) / (df + 0.5) + 1)
16
+ * - N = total number of documents
17
+ * - df = document frequency (docs containing term)
18
+ */
19
+ /**
20
+ * Configuration options for BM25 algorithm
21
+ */
22
+ export interface BM25Options {
23
+ /** Term saturation parameter. Higher = more weight to term frequency. Default: 1.2 */
24
+ k1?: number;
25
+ /** Length normalization. 0 = no normalization, 1 = full normalization. Default: 0.75 */
26
+ b?: number;
27
+ }
28
+ /**
29
+ * Result item from BM25 search
30
+ */
31
+ export interface BM25Result<T> {
32
+ /** Original item from the corpus */
33
+ item: T;
34
+ /** BM25 relevance score (higher = more relevant) */
35
+ score: number;
36
+ /** Query terms that matched in this document */
37
+ matchedTerms: string[];
38
+ }
39
+ /**
40
+ * BM25 search index interface
41
+ */
42
+ export interface BM25Index<T> {
43
+ /** Search the index and return ranked results */
44
+ search: (query: string) => BM25Result<T>[];
45
+ /** Get corpus statistics */
46
+ stats: () => {
47
+ documentCount: number;
48
+ avgDocLength: number;
49
+ vocabularySize: number;
50
+ };
51
+ }
52
+ /**
53
+ * Tokenize text into searchable terms
54
+ *
55
+ * - Lowercases text
56
+ * - Removes punctuation
57
+ * - Splits on whitespace
58
+ * - Filters words shorter than 2 characters
59
+ */
60
+ export declare function tokenize(text: string): string[];
61
+ /**
62
+ * Create a BM25 search index from a collection of items
63
+ *
64
+ * @param items - Array of items to index
65
+ * @param getSearchableText - Function to extract searchable text from each item
66
+ * @param options - BM25 configuration options
67
+ * @returns BM25 search index
68
+ *
69
+ * @example
70
+ * ```typescript
71
+ * const tools = [
72
+ * { name: "compress", description: "Compress content" },
73
+ * { name: "analyze", description: "Analyze build output" }
74
+ * ];
75
+ *
76
+ * const index = createBM25Index(
77
+ * tools,
78
+ * (tool) => `${tool.name} ${tool.description}`
79
+ * );
80
+ *
81
+ * const results = index.search("compress content");
82
+ * // [{ item: { name: "compress", ... }, score: 2.5, matchedTerms: ["compress", "content"] }]
83
+ * ```
84
+ */
85
+ export declare function createBM25Index<T>(items: T[], getSearchableText: (item: T) => string, options?: BM25Options): BM25Index<T>;
86
+ //# sourceMappingURL=bm25.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/utils/bm25.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,sFAAsF;IACtF,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,wFAAwF;IACxF,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED;;GAEG;AACH,MAAM,WAAW,UAAU,CAAC,CAAC;IAC3B,oCAAoC;IACpC,IAAI,EAAE,CAAC,CAAC;IACR,oDAAoD;IACpD,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS,CAAC,CAAC;IAC1B,iDAAiD;IACjD,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;IAC3C,4BAA4B;IAC5B,KAAK,EAAE,MAAM;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC;CACtF;AAED;;;;;;;GAOG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAM/C;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,eAAe,CAAC,CAAC,EAC/B,KAAK,EAAE,CAAC,EAAE,EACV,iBAAiB,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,MAAM,EACtC,OAAO,CAAC,EAAE,WAAW,GACpB,SAAS,CAAC,CAAC,CAAC,CAkHd"}
@@ -0,0 +1,153 @@
1
+ /**
2
+ * BM25 (Best Matching 25) implementation for tool search
3
+ *
4
+ * BM25 is a ranking function used by search engines to rank documents
5
+ * based on query terms appearing in each document.
6
+ *
7
+ * Formula: score = IDF * ((tf * (k1 + 1)) / (tf + k1 * (1 - b + b * (dl/avgdl))))
8
+ *
9
+ * Where:
10
+ * - tf = term frequency in document
11
+ * - k1 = term saturation parameter (default: 1.2)
12
+ * - b = length normalization parameter (default: 0.75)
13
+ * - dl = document length (number of terms)
14
+ * - avgdl = average document length across corpus
15
+ * - IDF = log((N - df + 0.5) / (df + 0.5) + 1)
16
+ * - N = total number of documents
17
+ * - df = document frequency (docs containing term)
18
+ */
19
+ /**
20
+ * Tokenize text into searchable terms
21
+ *
22
+ * - Lowercases text
23
+ * - Removes punctuation
24
+ * - Splits on whitespace
25
+ * - Filters words shorter than 2 characters
26
+ */
27
+ export function tokenize(text) {
28
+ return text
29
+ .toLowerCase()
30
+ .replace(/[^\w\s]/g, " ")
31
+ .split(/\s+/)
32
+ .filter((word) => word.length >= 2);
33
+ }
34
+ /**
35
+ * Create a BM25 search index from a collection of items
36
+ *
37
+ * @param items - Array of items to index
38
+ * @param getSearchableText - Function to extract searchable text from each item
39
+ * @param options - BM25 configuration options
40
+ * @returns BM25 search index
41
+ *
42
+ * @example
43
+ * ```typescript
44
+ * const tools = [
45
+ * { name: "compress", description: "Compress content" },
46
+ * { name: "analyze", description: "Analyze build output" }
47
+ * ];
48
+ *
49
+ * const index = createBM25Index(
50
+ * tools,
51
+ * (tool) => `${tool.name} ${tool.description}`
52
+ * );
53
+ *
54
+ * const results = index.search("compress content");
55
+ * // [{ item: { name: "compress", ... }, score: 2.5, matchedTerms: ["compress", "content"] }]
56
+ * ```
57
+ */
58
+ export function createBM25Index(items, getSearchableText, options) {
59
+ const k1 = options?.k1 ?? 1.2;
60
+ const b = options?.b ?? 0.75;
61
+ // Handle empty corpus
62
+ if (items.length === 0) {
63
+ return {
64
+ search: () => [],
65
+ stats: () => ({ documentCount: 0, avgDocLength: 0, vocabularySize: 0 }),
66
+ };
67
+ }
68
+ // Build tokenized corpus
69
+ const documents = items.map((item) => tokenize(getSearchableText(item)));
70
+ // Calculate average document length
71
+ const totalLength = documents.reduce((sum, doc) => sum + doc.length, 0);
72
+ const avgdl = totalLength / documents.length;
73
+ // Build document frequency map (how many docs contain each term)
74
+ const df = new Map();
75
+ for (const doc of documents) {
76
+ const uniqueTerms = new Set(doc);
77
+ for (const term of uniqueTerms) {
78
+ df.set(term, (df.get(term) ?? 0) + 1);
79
+ }
80
+ }
81
+ const N = documents.length;
82
+ /**
83
+ * Calculate IDF (Inverse Document Frequency) for a term
84
+ * Using the standard BM25 IDF formula with smoothing
85
+ */
86
+ function idf(term) {
87
+ const docFreq = df.get(term) ?? 0;
88
+ // BM25 IDF formula: log((N - df + 0.5) / (df + 0.5) + 1)
89
+ return Math.log((N - docFreq + 0.5) / (docFreq + 0.5) + 1);
90
+ }
91
+ /**
92
+ * Score a single document against query terms
93
+ */
94
+ function scoreDocument(docIndex, queryTerms) {
95
+ const doc = documents[docIndex];
96
+ if (!doc) {
97
+ return { score: 0, matchedTerms: [] };
98
+ }
99
+ const dl = doc.length;
100
+ // Build term frequency map for this document
101
+ const termFreq = new Map();
102
+ for (const term of doc) {
103
+ termFreq.set(term, (termFreq.get(term) ?? 0) + 1);
104
+ }
105
+ let score = 0;
106
+ const matchedTerms = [];
107
+ for (const term of queryTerms) {
108
+ const tf = termFreq.get(term) ?? 0;
109
+ if (tf > 0) {
110
+ matchedTerms.push(term);
111
+ const termIdf = idf(term);
112
+ // BM25 term score formula
113
+ const numerator = tf * (k1 + 1);
114
+ const denominator = tf + k1 * (1 - b + b * (dl / avgdl));
115
+ score += termIdf * (numerator / denominator);
116
+ }
117
+ }
118
+ return { score, matchedTerms };
119
+ }
120
+ return {
121
+ search(query) {
122
+ const queryTerms = tokenize(query);
123
+ // Empty query returns no results
124
+ if (queryTerms.length === 0) {
125
+ return [];
126
+ }
127
+ const results = [];
128
+ for (let i = 0; i < items.length; i++) {
129
+ const item = items[i];
130
+ if (!item)
131
+ continue;
132
+ const { score, matchedTerms } = scoreDocument(i, queryTerms);
133
+ // Only include documents with at least one matching term
134
+ if (score > 0) {
135
+ results.push({
136
+ item,
137
+ score,
138
+ matchedTerms,
139
+ });
140
+ }
141
+ }
142
+ // Sort by score descending (most relevant first)
143
+ return results.sort((a, b) => b.score - a.score);
144
+ },
145
+ stats() {
146
+ return {
147
+ documentCount: N,
148
+ avgDocLength: avgdl,
149
+ vocabularySize: df.size,
150
+ };
151
+ },
152
+ };
153
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * BM25 Search Algorithm Tests
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=bm25.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bm25.test.d.ts","sourceRoot":"","sources":["../../src/utils/bm25.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,156 @@
1
+ /**
2
+ * BM25 Search Algorithm Tests
3
+ */
4
+ import { describe, it, expect } from "vitest";
5
+ import { tokenize, createBM25Index } from "./bm25.js";
6
+ describe("BM25 utilities", () => {
7
+ describe("tokenize", () => {
8
+ it("should lowercase and split text", () => {
9
+ const result = tokenize("Hello World Test");
10
+ expect(result).toEqual(["hello", "world", "test"]);
11
+ });
12
+ it("should remove punctuation", () => {
13
+ const result = tokenize("hello, world! how's it going?");
14
+ expect(result).toEqual(["hello", "world", "how", "it", "going"]);
15
+ });
16
+ it("should filter short words (less than 2 chars)", () => {
17
+ const result = tokenize("I am a test for x y z");
18
+ expect(result).toEqual(["am", "test", "for"]);
19
+ });
20
+ it("should handle empty string", () => {
21
+ const result = tokenize("");
22
+ expect(result).toEqual([]);
23
+ });
24
+ it("should handle string with only punctuation", () => {
25
+ const result = tokenize("!@#$%^&*()");
26
+ expect(result).toEqual([]);
27
+ });
28
+ });
29
+ describe("createBM25Index", () => {
30
+ const testTools = [
31
+ { name: "compress", description: "Compress and reduce content size" },
32
+ { name: "analyze", description: "Analyze build output and errors" },
33
+ { name: "summarize", description: "Summarize log files" },
34
+ { name: "optimize", description: "Optimize token usage" },
35
+ ];
36
+ const getSearchableText = (tool) => `${tool.name} ${tool.description}`;
37
+ it("should return empty array for empty query", () => {
38
+ const index = createBM25Index(testTools, getSearchableText);
39
+ const results = index.search("");
40
+ expect(results).toEqual([]);
41
+ });
42
+ it("should return empty array for query with only short words", () => {
43
+ const index = createBM25Index(testTools, getSearchableText);
44
+ const results = index.search("a x");
45
+ expect(results).toEqual([]);
46
+ });
47
+ it("should return empty array for empty corpus", () => {
48
+ const index = createBM25Index([], getSearchableText);
49
+ const results = index.search("compress");
50
+ expect(results).toEqual([]);
51
+ });
52
+ it("should find exact name matches", () => {
53
+ const index = createBM25Index(testTools, getSearchableText);
54
+ const results = index.search("compress");
55
+ expect(results.length).toBeGreaterThan(0);
56
+ expect(results[0].item.name).toBe("compress");
57
+ });
58
+ it("should find matches in description", () => {
59
+ const index = createBM25Index(testTools, getSearchableText);
60
+ const results = index.search("build output");
61
+ expect(results.length).toBeGreaterThan(0);
62
+ expect(results[0].item.name).toBe("analyze");
63
+ });
64
+ it("should rank exact matches higher", () => {
65
+ const index = createBM25Index(testTools, getSearchableText);
66
+ const results = index.search("compress content");
67
+ expect(results.length).toBeGreaterThan(0);
68
+ // "compress" tool should be first as it matches both query terms
69
+ expect(results[0].item.name).toBe("compress");
70
+ expect(results[0].matchedTerms).toContain("compress");
71
+ expect(results[0].matchedTerms).toContain("content");
72
+ });
73
+ it("should handle multi-word queries", () => {
74
+ const index = createBM25Index(testTools, getSearchableText);
75
+ const results = index.search("analyze build errors");
76
+ expect(results.length).toBeGreaterThan(0);
77
+ expect(results[0].item.name).toBe("analyze");
78
+ expect(results[0].matchedTerms.length).toBeGreaterThan(1);
79
+ });
80
+ it("should return matchedTerms for each result", () => {
81
+ const index = createBM25Index(testTools, getSearchableText);
82
+ const results = index.search("compress");
83
+ expect(results.length).toBeGreaterThan(0);
84
+ expect(results[0].matchedTerms).toContain("compress");
85
+ });
86
+ it("should return results sorted by score descending", () => {
87
+ const index = createBM25Index(testTools, getSearchableText);
88
+ const results = index.search("optimize token");
89
+ for (let i = 1; i < results.length; i++) {
90
+ expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
91
+ }
92
+ });
93
+ it("should return positive scores for matching documents", () => {
94
+ const index = createBM25Index(testTools, getSearchableText);
95
+ const results = index.search("compress");
96
+ for (const result of results) {
97
+ expect(result.score).toBeGreaterThan(0);
98
+ }
99
+ });
100
+ it("should not return documents without matching terms", () => {
101
+ const index = createBM25Index(testTools, getSearchableText);
102
+ const results = index.search("nonexistent");
103
+ expect(results).toEqual([]);
104
+ });
105
+ it("should provide corpus statistics", () => {
106
+ const index = createBM25Index(testTools, getSearchableText);
107
+ const stats = index.stats();
108
+ expect(stats.documentCount).toBe(4);
109
+ expect(stats.avgDocLength).toBeGreaterThan(0);
110
+ expect(stats.vocabularySize).toBeGreaterThan(0);
111
+ });
112
+ it("should handle custom BM25 parameters", () => {
113
+ const index = createBM25Index(testTools, getSearchableText, {
114
+ k1: 2.0,
115
+ b: 0.5,
116
+ });
117
+ const results = index.search("compress");
118
+ expect(results.length).toBeGreaterThan(0);
119
+ });
120
+ });
121
+ describe("BM25 ranking behavior", () => {
122
+ it("should rank documents with rare terms higher", () => {
123
+ const docs = [
124
+ { id: 1, text: "common common common" },
125
+ { id: 2, text: "common rare unique" },
126
+ { id: 3, text: "common common" },
127
+ ];
128
+ const index = createBM25Index(docs, (d) => d.text);
129
+ const results = index.search("unique");
130
+ expect(results.length).toBe(1);
131
+ expect(results[0].item.id).toBe(2);
132
+ });
133
+ it("should give higher scores to shorter documents (length normalization)", () => {
134
+ const docs = [
135
+ { id: 1, text: "compress file" },
136
+ { id: 2, text: "compress file with additional words that make it longer" },
137
+ ];
138
+ const index = createBM25Index(docs, (d) => d.text);
139
+ const results = index.search("compress file");
140
+ // Shorter document should have higher score due to length normalization
141
+ expect(results[0].item.id).toBe(1);
142
+ });
143
+ it("should handle repeated terms correctly", () => {
144
+ const docs = [
145
+ { id: 1, text: "error error error" },
146
+ { id: 2, text: "error warning info" },
147
+ ];
148
+ const index = createBM25Index(docs, (d) => d.text);
149
+ const results = index.search("error");
150
+ // Both should match, with scores reflecting term frequency
151
+ expect(results.length).toBe(2);
152
+ // Document with more "error" terms should score higher (BM25 saturation)
153
+ expect(results[0].item.id).toBe(1);
154
+ });
155
+ });
156
+ });