distill-mcp 0.6.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (440) hide show
  1. package/bin/cli.js +133 -0
  2. package/dist/analytics/session-tracker.d.ts +74 -0
  3. package/dist/analytics/session-tracker.d.ts.map +1 -0
  4. package/dist/analytics/session-tracker.js +123 -0
  5. package/dist/ast/benchmark.test.d.ts +7 -0
  6. package/dist/ast/benchmark.test.d.ts.map +1 -0
  7. package/dist/ast/benchmark.test.js +175 -0
  8. package/dist/ast/go/index.d.ts +9 -0
  9. package/dist/ast/go/index.d.ts.map +1 -0
  10. package/dist/ast/go/index.js +8 -0
  11. package/dist/ast/go/parser.d.ts +31 -0
  12. package/dist/ast/go/parser.d.ts.map +1 -0
  13. package/dist/ast/go/parser.js +428 -0
  14. package/dist/ast/go/parser.test.d.ts +5 -0
  15. package/dist/ast/go/parser.test.d.ts.map +1 -0
  16. package/dist/ast/go/parser.test.js +241 -0
  17. package/dist/ast/go/queries.d.ts +51 -0
  18. package/dist/ast/go/queries.d.ts.map +1 -0
  19. package/dist/ast/go/queries.js +114 -0
  20. package/dist/ast/go/utils.d.ts +66 -0
  21. package/dist/ast/go/utils.d.ts.map +1 -0
  22. package/dist/ast/go/utils.js +140 -0
  23. package/dist/ast/index.d.ts +39 -0
  24. package/dist/ast/index.d.ts.map +1 -0
  25. package/dist/ast/index.js +245 -0
  26. package/dist/ast/php/index.d.ts +9 -0
  27. package/dist/ast/php/index.d.ts.map +1 -0
  28. package/dist/ast/php/index.js +8 -0
  29. package/dist/ast/php/parser.d.ts +31 -0
  30. package/dist/ast/php/parser.d.ts.map +1 -0
  31. package/dist/ast/php/parser.js +388 -0
  32. package/dist/ast/php/parser.test.d.ts +5 -0
  33. package/dist/ast/php/parser.test.d.ts.map +1 -0
  34. package/dist/ast/php/parser.test.js +328 -0
  35. package/dist/ast/php/queries.d.ts +61 -0
  36. package/dist/ast/php/queries.d.ts.map +1 -0
  37. package/dist/ast/php/queries.js +117 -0
  38. package/dist/ast/php/utils.d.ts +83 -0
  39. package/dist/ast/php/utils.d.ts.map +1 -0
  40. package/dist/ast/php/utils.js +246 -0
  41. package/dist/ast/python/index.d.ts +9 -0
  42. package/dist/ast/python/index.d.ts.map +1 -0
  43. package/dist/ast/python/index.js +8 -0
  44. package/dist/ast/python/parser.d.ts +32 -0
  45. package/dist/ast/python/parser.d.ts.map +1 -0
  46. package/dist/ast/python/parser.js +422 -0
  47. package/dist/ast/python/parser.test.d.ts +5 -0
  48. package/dist/ast/python/parser.test.d.ts.map +1 -0
  49. package/dist/ast/python/parser.test.js +186 -0
  50. package/dist/ast/python/queries.d.ts +73 -0
  51. package/dist/ast/python/queries.d.ts.map +1 -0
  52. package/dist/ast/python/queries.js +137 -0
  53. package/dist/ast/python/utils.d.ts +63 -0
  54. package/dist/ast/python/utils.d.ts.map +1 -0
  55. package/dist/ast/python/utils.js +159 -0
  56. package/dist/ast/quick-scan.d.ts +40 -0
  57. package/dist/ast/quick-scan.d.ts.map +1 -0
  58. package/dist/ast/quick-scan.js +287 -0
  59. package/dist/ast/rust/index.d.ts +9 -0
  60. package/dist/ast/rust/index.d.ts.map +1 -0
  61. package/dist/ast/rust/index.js +8 -0
  62. package/dist/ast/rust/parser.d.ts +31 -0
  63. package/dist/ast/rust/parser.d.ts.map +1 -0
  64. package/dist/ast/rust/parser.js +416 -0
  65. package/dist/ast/rust/parser.test.d.ts +5 -0
  66. package/dist/ast/rust/parser.test.d.ts.map +1 -0
  67. package/dist/ast/rust/parser.test.js +329 -0
  68. package/dist/ast/rust/queries.d.ts +66 -0
  69. package/dist/ast/rust/queries.d.ts.map +1 -0
  70. package/dist/ast/rust/queries.js +132 -0
  71. package/dist/ast/rust/utils.d.ts +91 -0
  72. package/dist/ast/rust/utils.d.ts.map +1 -0
  73. package/dist/ast/rust/utils.js +254 -0
  74. package/dist/ast/swift/index.d.ts +10 -0
  75. package/dist/ast/swift/index.d.ts.map +1 -0
  76. package/dist/ast/swift/index.js +8 -0
  77. package/dist/ast/swift/parser.d.ts +31 -0
  78. package/dist/ast/swift/parser.d.ts.map +1 -0
  79. package/dist/ast/swift/parser.js +554 -0
  80. package/dist/ast/swift/parser.test.d.ts +5 -0
  81. package/dist/ast/swift/parser.test.d.ts.map +1 -0
  82. package/dist/ast/swift/parser.test.js +398 -0
  83. package/dist/ast/swift/queries.d.ts +71 -0
  84. package/dist/ast/swift/queries.d.ts.map +1 -0
  85. package/dist/ast/swift/queries.js +137 -0
  86. package/dist/ast/swift/utils.d.ts +94 -0
  87. package/dist/ast/swift/utils.d.ts.map +1 -0
  88. package/dist/ast/swift/utils.js +411 -0
  89. package/dist/ast/types.d.ts +96 -0
  90. package/dist/ast/types.d.ts.map +1 -0
  91. package/dist/ast/types.js +21 -0
  92. package/dist/ast/typescript.d.ts +24 -0
  93. package/dist/ast/typescript.d.ts.map +1 -0
  94. package/dist/ast/typescript.js +357 -0
  95. package/dist/cache/file-hash.d.ts +33 -0
  96. package/dist/cache/file-hash.d.ts.map +1 -0
  97. package/dist/cache/file-hash.js +59 -0
  98. package/dist/cache/index.d.ts +9 -0
  99. package/dist/cache/index.d.ts.map +1 -0
  100. package/dist/cache/index.js +8 -0
  101. package/dist/cache/smart-cache.d.ts +68 -0
  102. package/dist/cache/smart-cache.d.ts.map +1 -0
  103. package/dist/cache/smart-cache.js +266 -0
  104. package/dist/cache/types.d.ts +102 -0
  105. package/dist/cache/types.d.ts.map +1 -0
  106. package/dist/cache/types.js +6 -0
  107. package/dist/cli/analyze.d.ts +43 -0
  108. package/dist/cli/analyze.d.ts.map +1 -0
  109. package/dist/cli/analyze.js +250 -0
  110. package/dist/cli/doctor.d.ts +2 -0
  111. package/dist/cli/doctor.d.ts.map +1 -0
  112. package/dist/cli/doctor.js +127 -0
  113. package/dist/cli/hooks.d.ts +14 -0
  114. package/dist/cli/hooks.d.ts.map +1 -0
  115. package/dist/cli/hooks.js +229 -0
  116. package/dist/cli/index.d.ts +5 -0
  117. package/dist/cli/index.d.ts.map +1 -0
  118. package/dist/cli/index.js +4 -0
  119. package/dist/cli/setup.d.ts +10 -0
  120. package/dist/cli/setup.d.ts.map +1 -0
  121. package/dist/cli/setup.js +117 -0
  122. package/dist/cli/utils.d.ts +30 -0
  123. package/dist/cli/utils.d.ts.map +1 -0
  124. package/dist/cli/utils.js +116 -0
  125. package/dist/compressors/config.d.ts +9 -0
  126. package/dist/compressors/config.d.ts.map +1 -0
  127. package/dist/compressors/config.js +183 -0
  128. package/dist/compressors/conversation.d.ts +109 -0
  129. package/dist/compressors/conversation.d.ts.map +1 -0
  130. package/dist/compressors/conversation.js +404 -0
  131. package/dist/compressors/diff.d.ts +35 -0
  132. package/dist/compressors/diff.d.ts.map +1 -0
  133. package/dist/compressors/diff.js +389 -0
  134. package/dist/compressors/generic.d.ts +9 -0
  135. package/dist/compressors/generic.d.ts.map +1 -0
  136. package/dist/compressors/generic.js +188 -0
  137. package/dist/compressors/index.d.ts +31 -0
  138. package/dist/compressors/index.d.ts.map +1 -0
  139. package/dist/compressors/index.js +82 -0
  140. package/dist/compressors/logs.d.ts +9 -0
  141. package/dist/compressors/logs.d.ts.map +1 -0
  142. package/dist/compressors/logs.js +245 -0
  143. package/dist/compressors/multifile.d.ts +106 -0
  144. package/dist/compressors/multifile.d.ts.map +1 -0
  145. package/dist/compressors/multifile.js +498 -0
  146. package/dist/compressors/semantic.d.ts +33 -0
  147. package/dist/compressors/semantic.d.ts.map +1 -0
  148. package/dist/compressors/semantic.js +233 -0
  149. package/dist/compressors/stacktrace.d.ts +9 -0
  150. package/dist/compressors/stacktrace.d.ts.map +1 -0
  151. package/dist/compressors/stacktrace.js +259 -0
  152. package/dist/compressors/types.d.ts +146 -0
  153. package/dist/compressors/types.d.ts.map +1 -0
  154. package/dist/compressors/types.js +6 -0
  155. package/dist/config/output-config.d.ts +56 -0
  156. package/dist/config/output-config.d.ts.map +1 -0
  157. package/dist/config/output-config.js +78 -0
  158. package/dist/index.d.ts +21 -0
  159. package/dist/index.d.ts.map +1 -0
  160. package/dist/index.js +27 -0
  161. package/dist/middleware/chain.d.ts +49 -0
  162. package/dist/middleware/chain.d.ts.map +1 -0
  163. package/dist/middleware/chain.js +126 -0
  164. package/dist/middleware/index.d.ts +4 -0
  165. package/dist/middleware/index.d.ts.map +1 -0
  166. package/dist/middleware/index.js +3 -0
  167. package/dist/middleware/logging.d.ts +8 -0
  168. package/dist/middleware/logging.d.ts.map +1 -0
  169. package/dist/middleware/logging.js +71 -0
  170. package/dist/middleware/types.d.ts +58 -0
  171. package/dist/middleware/types.d.ts.map +1 -0
  172. package/dist/middleware/types.js +7 -0
  173. package/dist/parsers/eslint.d.ts +8 -0
  174. package/dist/parsers/eslint.d.ts.map +1 -0
  175. package/dist/parsers/eslint.js +132 -0
  176. package/dist/parsers/generic.d.ts +8 -0
  177. package/dist/parsers/generic.d.ts.map +1 -0
  178. package/dist/parsers/generic.js +234 -0
  179. package/dist/parsers/index.d.ts +34 -0
  180. package/dist/parsers/index.d.ts.map +1 -0
  181. package/dist/parsers/index.js +216 -0
  182. package/dist/parsers/types.d.ts +84 -0
  183. package/dist/parsers/types.d.ts.map +1 -0
  184. package/dist/parsers/types.js +6 -0
  185. package/dist/parsers/typescript.d.ts +8 -0
  186. package/dist/parsers/typescript.d.ts.map +1 -0
  187. package/dist/parsers/typescript.js +107 -0
  188. package/dist/pipelines/definitions.d.ts +50 -0
  189. package/dist/pipelines/definitions.d.ts.map +1 -0
  190. package/dist/pipelines/definitions.js +206 -0
  191. package/dist/sandbox/executor.d.ts +12 -0
  192. package/dist/sandbox/executor.d.ts.map +1 -0
  193. package/dist/sandbox/executor.js +191 -0
  194. package/dist/sandbox/index.d.ts +11 -0
  195. package/dist/sandbox/index.d.ts.map +1 -0
  196. package/dist/sandbox/index.js +9 -0
  197. package/dist/sandbox/sandbox.test.d.ts +7 -0
  198. package/dist/sandbox/sandbox.test.d.ts.map +1 -0
  199. package/dist/sandbox/sandbox.test.js +202 -0
  200. package/dist/sandbox/sdk/analyze.d.ts +36 -0
  201. package/dist/sandbox/sdk/analyze.d.ts.map +1 -0
  202. package/dist/sandbox/sdk/analyze.js +413 -0
  203. package/dist/sandbox/sdk/analyze.test.d.ts +7 -0
  204. package/dist/sandbox/sdk/analyze.test.d.ts.map +1 -0
  205. package/dist/sandbox/sdk/analyze.test.js +191 -0
  206. package/dist/sandbox/sdk/code.d.ts +20 -0
  207. package/dist/sandbox/sdk/code.d.ts.map +1 -0
  208. package/dist/sandbox/sdk/code.js +104 -0
  209. package/dist/sandbox/sdk/compress.d.ts +23 -0
  210. package/dist/sandbox/sdk/compress.d.ts.map +1 -0
  211. package/dist/sandbox/sdk/compress.js +107 -0
  212. package/dist/sandbox/sdk/conversation.d.ts +148 -0
  213. package/dist/sandbox/sdk/conversation.d.ts.map +1 -0
  214. package/dist/sandbox/sdk/conversation.js +177 -0
  215. package/dist/sandbox/sdk/files.d.ts +29 -0
  216. package/dist/sandbox/sdk/files.d.ts.map +1 -0
  217. package/dist/sandbox/sdk/files.js +41 -0
  218. package/dist/sandbox/sdk/git.d.ts +37 -0
  219. package/dist/sandbox/sdk/git.d.ts.map +1 -0
  220. package/dist/sandbox/sdk/git.js +313 -0
  221. package/dist/sandbox/sdk/git.test.d.ts +8 -0
  222. package/dist/sandbox/sdk/git.test.d.ts.map +1 -0
  223. package/dist/sandbox/sdk/git.test.js +160 -0
  224. package/dist/sandbox/sdk/index.d.ts +16 -0
  225. package/dist/sandbox/sdk/index.d.ts.map +1 -0
  226. package/dist/sandbox/sdk/index.js +15 -0
  227. package/dist/sandbox/sdk/multifile.d.ts +63 -0
  228. package/dist/sandbox/sdk/multifile.d.ts.map +1 -0
  229. package/dist/sandbox/sdk/multifile.js +130 -0
  230. package/dist/sandbox/sdk/pipeline.d.ts +16 -0
  231. package/dist/sandbox/sdk/pipeline.d.ts.map +1 -0
  232. package/dist/sandbox/sdk/pipeline.js +454 -0
  233. package/dist/sandbox/sdk/pipeline.test.d.ts +7 -0
  234. package/dist/sandbox/sdk/pipeline.test.d.ts.map +1 -0
  235. package/dist/sandbox/sdk/pipeline.test.js +197 -0
  236. package/dist/sandbox/sdk/search.d.ts +36 -0
  237. package/dist/sandbox/sdk/search.d.ts.map +1 -0
  238. package/dist/sandbox/sdk/search.js +338 -0
  239. package/dist/sandbox/sdk/search.test.d.ts +7 -0
  240. package/dist/sandbox/sdk/search.test.d.ts.map +1 -0
  241. package/dist/sandbox/sdk/search.test.js +183 -0
  242. package/dist/sandbox/sdk/utils.d.ts +18 -0
  243. package/dist/sandbox/sdk/utils.d.ts.map +1 -0
  244. package/dist/sandbox/sdk/utils.js +24 -0
  245. package/dist/sandbox/security/code-analyzer.d.ts +15 -0
  246. package/dist/sandbox/security/code-analyzer.d.ts.map +1 -0
  247. package/dist/sandbox/security/code-analyzer.js +87 -0
  248. package/dist/sandbox/security/index.d.ts +6 -0
  249. package/dist/sandbox/security/index.d.ts.map +1 -0
  250. package/dist/sandbox/security/index.js +5 -0
  251. package/dist/sandbox/security/path-validator.d.ts +23 -0
  252. package/dist/sandbox/security/path-validator.d.ts.map +1 -0
  253. package/dist/sandbox/security/path-validator.js +113 -0
  254. package/dist/sandbox/types.d.ts +577 -0
  255. package/dist/sandbox/types.d.ts.map +1 -0
  256. package/dist/sandbox/types.js +14 -0
  257. package/dist/server.d.ts +36 -0
  258. package/dist/server.d.ts.map +1 -0
  259. package/dist/server.js +133 -0
  260. package/dist/summarizers/build-logs.d.ts +11 -0
  261. package/dist/summarizers/build-logs.d.ts.map +1 -0
  262. package/dist/summarizers/build-logs.js +234 -0
  263. package/dist/summarizers/generic.d.ts +11 -0
  264. package/dist/summarizers/generic.d.ts.map +1 -0
  265. package/dist/summarizers/generic.js +93 -0
  266. package/dist/summarizers/index.d.ts +20 -0
  267. package/dist/summarizers/index.d.ts.map +1 -0
  268. package/dist/summarizers/index.js +43 -0
  269. package/dist/summarizers/server-logs.d.ts +11 -0
  270. package/dist/summarizers/server-logs.d.ts.map +1 -0
  271. package/dist/summarizers/server-logs.js +215 -0
  272. package/dist/summarizers/test-logs.d.ts +11 -0
  273. package/dist/summarizers/test-logs.d.ts.map +1 -0
  274. package/dist/summarizers/test-logs.js +258 -0
  275. package/dist/summarizers/types.d.ts +146 -0
  276. package/dist/summarizers/types.d.ts.map +1 -0
  277. package/dist/summarizers/types.js +21 -0
  278. package/dist/tools/analyze-build-output.d.ts +30 -0
  279. package/dist/tools/analyze-build-output.d.ts.map +1 -0
  280. package/dist/tools/analyze-build-output.js +45 -0
  281. package/dist/tools/analyze-context.d.ts +23 -0
  282. package/dist/tools/analyze-context.d.ts.map +1 -0
  283. package/dist/tools/analyze-context.js +78 -0
  284. package/dist/tools/auto-optimize.d.ts +9 -0
  285. package/dist/tools/auto-optimize.d.ts.map +1 -0
  286. package/dist/tools/auto-optimize.js +191 -0
  287. package/dist/tools/code-execute.d.ts +9 -0
  288. package/dist/tools/code-execute.d.ts.map +1 -0
  289. package/dist/tools/code-execute.js +84 -0
  290. package/dist/tools/code-skeleton.d.ts +33 -0
  291. package/dist/tools/code-skeleton.d.ts.map +1 -0
  292. package/dist/tools/code-skeleton.js +206 -0
  293. package/dist/tools/compress-context.d.ts +33 -0
  294. package/dist/tools/compress-context.d.ts.map +1 -0
  295. package/dist/tools/compress-context.js +64 -0
  296. package/dist/tools/context-budget.d.ts +43 -0
  297. package/dist/tools/context-budget.d.ts.map +1 -0
  298. package/dist/tools/context-budget.js +260 -0
  299. package/dist/tools/context-budget.test.d.ts +5 -0
  300. package/dist/tools/context-budget.test.d.ts.map +1 -0
  301. package/dist/tools/context-budget.test.js +219 -0
  302. package/dist/tools/conversation-compress.d.ts +46 -0
  303. package/dist/tools/conversation-compress.d.ts.map +1 -0
  304. package/dist/tools/conversation-compress.js +78 -0
  305. package/dist/tools/conversation-memory.d.ts +75 -0
  306. package/dist/tools/conversation-memory.d.ts.map +1 -0
  307. package/dist/tools/conversation-memory.js +289 -0
  308. package/dist/tools/deduplicate-errors.d.ts +30 -0
  309. package/dist/tools/deduplicate-errors.d.ts.map +1 -0
  310. package/dist/tools/deduplicate-errors.js +72 -0
  311. package/dist/tools/detect-retry-loop.d.ts +40 -0
  312. package/dist/tools/detect-retry-loop.d.ts.map +1 -0
  313. package/dist/tools/detect-retry-loop.js +212 -0
  314. package/dist/tools/diff-compress.d.ts +40 -0
  315. package/dist/tools/diff-compress.d.ts.map +1 -0
  316. package/dist/tools/diff-compress.js +94 -0
  317. package/dist/tools/discover-tools.d.ts +11 -0
  318. package/dist/tools/discover-tools.d.ts.map +1 -0
  319. package/dist/tools/discover-tools.js +163 -0
  320. package/dist/tools/dynamic-loader.d.ts +131 -0
  321. package/dist/tools/dynamic-loader.d.ts.map +1 -0
  322. package/dist/tools/dynamic-loader.js +378 -0
  323. package/dist/tools/dynamic-loader.test.d.ts +10 -0
  324. package/dist/tools/dynamic-loader.test.d.ts.map +1 -0
  325. package/dist/tools/dynamic-loader.test.js +164 -0
  326. package/dist/tools/lazy-mcp.d.ts +31 -0
  327. package/dist/tools/lazy-mcp.d.ts.map +1 -0
  328. package/dist/tools/lazy-mcp.js +151 -0
  329. package/dist/tools/lazy-mcp.test.d.ts +10 -0
  330. package/dist/tools/lazy-mcp.test.d.ts.map +1 -0
  331. package/dist/tools/lazy-mcp.test.js +172 -0
  332. package/dist/tools/multifile-compress.d.ts +36 -0
  333. package/dist/tools/multifile-compress.d.ts.map +1 -0
  334. package/dist/tools/multifile-compress.js +223 -0
  335. package/dist/tools/optimization-tips.d.ts +18 -0
  336. package/dist/tools/optimization-tips.d.ts.map +1 -0
  337. package/dist/tools/optimization-tips.js +133 -0
  338. package/dist/tools/registry.d.ts +70 -0
  339. package/dist/tools/registry.d.ts.map +1 -0
  340. package/dist/tools/registry.js +169 -0
  341. package/dist/tools/semantic-compress.d.ts +39 -0
  342. package/dist/tools/semantic-compress.d.ts.map +1 -0
  343. package/dist/tools/semantic-compress.js +113 -0
  344. package/dist/tools/semantic-compress.test.d.ts +5 -0
  345. package/dist/tools/semantic-compress.test.d.ts.map +1 -0
  346. package/dist/tools/semantic-compress.test.js +182 -0
  347. package/dist/tools/session-stats.d.ts +34 -0
  348. package/dist/tools/session-stats.d.ts.map +1 -0
  349. package/dist/tools/session-stats.js +194 -0
  350. package/dist/tools/set-output-config.d.ts +38 -0
  351. package/dist/tools/set-output-config.d.ts.map +1 -0
  352. package/dist/tools/set-output-config.js +122 -0
  353. package/dist/tools/smart-cache-tool.d.ts +38 -0
  354. package/dist/tools/smart-cache-tool.d.ts.map +1 -0
  355. package/dist/tools/smart-cache-tool.js +224 -0
  356. package/dist/tools/smart-file-read.d.ts +52 -0
  357. package/dist/tools/smart-file-read.d.ts.map +1 -0
  358. package/dist/tools/smart-file-read.js +481 -0
  359. package/dist/tools/smart-pipeline.d.ts +40 -0
  360. package/dist/tools/smart-pipeline.d.ts.map +1 -0
  361. package/dist/tools/smart-pipeline.js +295 -0
  362. package/dist/tools/summarize-logs.d.ts +36 -0
  363. package/dist/tools/summarize-logs.d.ts.map +1 -0
  364. package/dist/tools/summarize-logs.js +184 -0
  365. package/dist/tools/token-budget.test.d.ts +11 -0
  366. package/dist/tools/token-budget.test.d.ts.map +1 -0
  367. package/dist/tools/token-budget.test.js +275 -0
  368. package/dist/utils/bm25.d.ts +86 -0
  369. package/dist/utils/bm25.d.ts.map +1 -0
  370. package/dist/utils/bm25.js +153 -0
  371. package/dist/utils/bm25.test.d.ts +5 -0
  372. package/dist/utils/bm25.test.d.ts.map +1 -0
  373. package/dist/utils/bm25.test.js +156 -0
  374. package/dist/utils/command-normalizer.d.ts +39 -0
  375. package/dist/utils/command-normalizer.d.ts.map +1 -0
  376. package/dist/utils/command-normalizer.js +90 -0
  377. package/dist/utils/content-detector.d.ts +27 -0
  378. package/dist/utils/content-detector.d.ts.map +1 -0
  379. package/dist/utils/content-detector.js +127 -0
  380. package/dist/utils/embeddings.d.ts +54 -0
  381. package/dist/utils/embeddings.d.ts.map +1 -0
  382. package/dist/utils/embeddings.js +97 -0
  383. package/dist/utils/embeddings.test.d.ts +8 -0
  384. package/dist/utils/embeddings.test.d.ts.map +1 -0
  385. package/dist/utils/embeddings.test.js +96 -0
  386. package/dist/utils/error-normalizer.d.ts +39 -0
  387. package/dist/utils/error-normalizer.d.ts.map +1 -0
  388. package/dist/utils/error-normalizer.js +233 -0
  389. package/dist/utils/hybrid-search.d.ts +79 -0
  390. package/dist/utils/hybrid-search.d.ts.map +1 -0
  391. package/dist/utils/hybrid-search.js +146 -0
  392. package/dist/utils/hybrid-search.test.d.ts +5 -0
  393. package/dist/utils/hybrid-search.test.d.ts.map +1 -0
  394. package/dist/utils/hybrid-search.test.js +172 -0
  395. package/dist/utils/index.d.ts +13 -0
  396. package/dist/utils/index.d.ts.map +1 -0
  397. package/dist/utils/index.js +12 -0
  398. package/dist/utils/language-detector.d.ts +27 -0
  399. package/dist/utils/language-detector.d.ts.map +1 -0
  400. package/dist/utils/language-detector.js +94 -0
  401. package/dist/utils/log-parser.d.ts +46 -0
  402. package/dist/utils/log-parser.d.ts.map +1 -0
  403. package/dist/utils/log-parser.js +287 -0
  404. package/dist/utils/output-estimator.d.ts +54 -0
  405. package/dist/utils/output-estimator.d.ts.map +1 -0
  406. package/dist/utils/output-estimator.js +119 -0
  407. package/dist/utils/output-estimator.test.d.ts +5 -0
  408. package/dist/utils/output-estimator.test.d.ts.map +1 -0
  409. package/dist/utils/output-estimator.test.js +115 -0
  410. package/dist/utils/output-similarity.d.ts +48 -0
  411. package/dist/utils/output-similarity.d.ts.map +1 -0
  412. package/dist/utils/output-similarity.js +140 -0
  413. package/dist/utils/project-detector.d.ts +16 -0
  414. package/dist/utils/project-detector.d.ts.map +1 -0
  415. package/dist/utils/project-detector.js +119 -0
  416. package/dist/utils/segment-scorer.d.ts +99 -0
  417. package/dist/utils/segment-scorer.d.ts.map +1 -0
  418. package/dist/utils/segment-scorer.js +148 -0
  419. package/dist/utils/signature-grouper.d.ts +58 -0
  420. package/dist/utils/signature-grouper.d.ts.map +1 -0
  421. package/dist/utils/signature-grouper.js +185 -0
  422. package/dist/utils/tfidf.d.ts +45 -0
  423. package/dist/utils/tfidf.d.ts.map +1 -0
  424. package/dist/utils/tfidf.js +204 -0
  425. package/dist/utils/tfidf.test.d.ts +5 -0
  426. package/dist/utils/tfidf.test.d.ts.map +1 -0
  427. package/dist/utils/tfidf.test.js +115 -0
  428. package/dist/utils/token-counter.d.ts +35 -0
  429. package/dist/utils/token-counter.d.ts.map +1 -0
  430. package/dist/utils/token-counter.js +83 -0
  431. package/dist/utils/toon-serializer.d.ts +120 -0
  432. package/dist/utils/toon-serializer.d.ts.map +1 -0
  433. package/dist/utils/toon-serializer.js +472 -0
  434. package/dist/utils/toon-serializer.test.d.ts +7 -0
  435. package/dist/utils/toon-serializer.test.d.ts.map +1 -0
  436. package/dist/utils/toon-serializer.test.js +290 -0
  437. package/package.json +63 -0
  438. package/scripts/install.ps1 +133 -0
  439. package/scripts/install.sh +183 -0
  440. package/scripts/pre-commit-hook.sh +86 -0
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Segment Scorer
3
+ *
4
+ * Combines multiple scoring signals to determine segment importance:
5
+ * - TF-IDF: Unique/rare terms are more important
6
+ * - Position: Beginning and end of content are more important
7
+ * - Keywords: Errors, instructions, code blocks get priority
8
+ */
9
+ import { countTokens } from "./token-counter.js";
10
+ /**
11
+ * Default scoring weights (must sum to 1.0)
12
+ */
13
+ export const DEFAULT_WEIGHTS = {
14
+ tfidf: 0.4, // Unique content
15
+ position: 0.3, // Location in document
16
+ keyword: 0.3, // Important terms
17
+ };
18
+ /**
19
+ * Calculate position weight using a U-shaped curve
20
+ * Beginning (0-10%) and end (90-100%) get highest weight
21
+ * Middle content gets lower weight
22
+ *
23
+ * @param position - Normalized position (0-1)
24
+ * @returns Position weight (0.6-1.0)
25
+ */
26
+ export function calculatePositionWeight(position) {
27
+ // U-shaped curve: high at edges, low in middle
28
+ if (position <= 0.1 || position >= 0.9) {
29
+ return 1.0; // First/last 10% are most important
30
+ }
31
+ if (position <= 0.2 || position >= 0.8) {
32
+ return 0.85; // Next 10% still important
33
+ }
34
+ if (position <= 0.3 || position >= 0.7) {
35
+ return 0.7; // Transitional zones
36
+ }
37
+ return 0.6; // Middle content baseline
38
+ }
39
+ /**
40
+ * Keyword patterns for importance detection
41
+ */
42
+ const KEYWORD_PATTERNS = {
43
+ // Errors and failures (highest priority)
44
+ errors: /\b(error|Error|ERROR|fail|Fail|FAIL|failed|exception|Exception|EXCEPTION|panic|crash|fatal|critical)\b/,
45
+ // Instructions and requirements
46
+ instructions: /\b(must|MUST|should|SHOULD|required|Required|REQUIRED|important|Important|IMPORTANT|note|Note|NOTE|warning|Warning|WARNING|todo|TODO|fixme|FIXME)\b/,
47
+ // Code blocks (markdown)
48
+ codeBlocks: /```[\s\S]*?```|`[^`]+`/,
49
+ // Technical terms (programming)
50
+ technical: /\b(function|class|interface|type|const|let|var|async|await|return|import|export|def|fn|struct|impl|pub|private|public|protected)\b/,
51
+ // Structural markers (headers, lists)
52
+ structural: /^(#{1,6}\s|[-*+]\s|\d+\.\s|>\s)/m,
53
+ // Questions (often need answers preserved)
54
+ questions: /\?[\s]*$/m,
55
+ // URLs and references
56
+ references: /https?:\/\/[^\s]+|@\w+|#\w+/,
57
+ };
58
+ /**
59
+ * Calculate keyword boost based on content patterns
60
+ * Multiple matches compound additively up to 1.0
61
+ *
62
+ * @param text - Segment text to analyze
63
+ * @returns Keyword boost (0-1)
64
+ */
65
+ export function calculateKeywordBoost(text) {
66
+ let boost = 0;
67
+ if (KEYWORD_PATTERNS.errors.test(text)) {
68
+ boost += 0.4; // Errors are critical
69
+ }
70
+ if (KEYWORD_PATTERNS.instructions.test(text)) {
71
+ boost += 0.3; // Instructions are important
72
+ }
73
+ if (KEYWORD_PATTERNS.codeBlocks.test(text)) {
74
+ boost += 0.2; // Code should be preserved
75
+ }
76
+ if (KEYWORD_PATTERNS.structural.test(text)) {
77
+ boost += 0.15; // Structure helps comprehension
78
+ }
79
+ if (KEYWORD_PATTERNS.technical.test(text)) {
80
+ boost += 0.1; // Technical content is usually relevant
81
+ }
82
+ if (KEYWORD_PATTERNS.questions.test(text)) {
83
+ boost += 0.15; // Questions need context
84
+ }
85
+ if (KEYWORD_PATTERNS.references.test(text)) {
86
+ boost += 0.1; // References are informational
87
+ }
88
+ return Math.min(boost, 1.0); // Cap at 1.0
89
+ }
90
+ /**
91
+ * Create a segment from text
92
+ *
93
+ * @param text - Segment text
94
+ * @param startLine - Starting line number
95
+ * @param endLine - Ending line number
96
+ * @param type - Segment type
97
+ * @param totalLines - Total lines in document (for position calculation)
98
+ * @returns Segment object
99
+ */
100
+ export function createSegment(text, startLine, endLine, type, totalLines) {
101
+ return {
102
+ text,
103
+ startLine,
104
+ endLine,
105
+ type,
106
+ position: totalLines > 0 ? startLine / totalLines : 0,
107
+ tokens: countTokens(text),
108
+ isPreserved: false,
109
+ };
110
+ }
111
+ /**
112
+ * Score a segment combining all signals
113
+ *
114
+ * @param segment - Segment to score
115
+ * @param tfidfScore - Pre-calculated TF-IDF score (0-1)
116
+ * @param weights - Scoring weights
117
+ * @returns Scored segment with importance
118
+ */
119
+ export function scoreSegment(segment, tfidfScore, weights = DEFAULT_WEIGHTS) {
120
+ const positionScore = calculatePositionWeight(segment.position);
121
+ const keywordScore = calculateKeywordBoost(segment.text);
122
+ // Weighted combination
123
+ const combined = weights.tfidf * tfidfScore +
124
+ weights.position * positionScore +
125
+ weights.keyword * keywordScore;
126
+ return {
127
+ ...segment,
128
+ importance: combined,
129
+ scores: {
130
+ tfidf: tfidfScore,
131
+ position: positionScore,
132
+ keyword: keywordScore,
133
+ combined,
134
+ },
135
+ };
136
+ }
137
+ /**
138
+ * Check if text contains error indicators
139
+ */
140
+ export function hasErrorIndicators(text) {
141
+ return KEYWORD_PATTERNS.errors.test(text);
142
+ }
143
+ /**
144
+ * Check if text contains instruction indicators
145
+ */
146
+ export function hasInstructionIndicators(text) {
147
+ return KEYWORD_PATTERNS.instructions.test(text);
148
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Signature Grouper
3
+ *
4
+ * Groups error lines by their normalized signature for deduplication.
5
+ */
6
+ export interface GroupOptions {
7
+ /** Minimum occurrences to consider as duplicate (default: 2) */
8
+ threshold: number;
9
+ /** Number of first occurrences to keep in full (default: 1) */
10
+ keepFirst: number;
11
+ /** Custom regex pattern to identify errors */
12
+ customPattern?: RegExp;
13
+ /** Maximum samples to keep per group */
14
+ maxSamples?: number;
15
+ }
16
+ export interface DeduplicatedErrorGroup {
17
+ /** Normalized signature for this error type */
18
+ signature: string;
19
+ /** Number of occurrences */
20
+ count: number;
21
+ /** First full occurrence (raw line) */
22
+ firstOccurrence: string;
23
+ /** Locations where this error occurred */
24
+ locations: string[];
25
+ /** Sample raw lines (up to maxSamples) */
26
+ samples: string[];
27
+ /** Error code if available */
28
+ code?: string;
29
+ /** Cleaned message without location info */
30
+ message: string;
31
+ }
32
+ export interface GroupingResult {
33
+ /** Grouped errors by signature */
34
+ groups: Map<string, DeduplicatedErrorGroup>;
35
+ /** Lines that don't match error patterns */
36
+ nonErrorLines: string[];
37
+ /** Total error lines processed */
38
+ totalErrorLines: number;
39
+ }
40
+ /**
41
+ * Group error lines by their normalized signature
42
+ */
43
+ export declare function groupBySignature(lines: string[], options?: Partial<GroupOptions>): GroupingResult;
44
+ /**
45
+ * Format grouped errors as a readable string
46
+ */
47
+ export declare function formatGroups(result: GroupingResult, format?: "plain" | "markdown", options?: Partial<GroupOptions>): string;
48
+ /**
49
+ * Calculate deduplication statistics
50
+ */
51
+ export declare function calculateStats(result: GroupingResult): {
52
+ originalLines: number;
53
+ deduplicatedLines: number;
54
+ uniqueErrors: number;
55
+ totalDuplicates: number;
56
+ reductionPercent: number;
57
+ };
58
+ //# sourceMappingURL=signature-grouper.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"signature-grouper.d.ts","sourceRoot":"","sources":["../../src/utils/signature-grouper.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAWH,MAAM,WAAW,YAAY;IAC3B,gEAAgE;IAChE,SAAS,EAAE,MAAM,CAAC;IAClB,+DAA+D;IAC/D,SAAS,EAAE,MAAM,CAAC;IAClB,8CAA8C;IAC9C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,wCAAwC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,sBAAsB;IACrC,+CAA+C;IAC/C,SAAS,EAAE,MAAM,CAAC;IAClB,4BAA4B;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,eAAe,EAAE,MAAM,CAAC;IACxB,0CAA0C;IAC1C,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,0CAA0C;IAC1C,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,8BAA8B;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,cAAc;IAC7B,kCAAkC;IAClC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,sBAAsB,CAAC,CAAC;IAC5C,4CAA4C;IAC5C,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,kCAAkC;IAClC,eAAe,EAAE,MAAM,CAAC;CACzB;AAQD;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAClC,cAAc,CAsChB;AAgED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,MAAM,EAAE,cAAc,EACtB,MAAM,GAAE,OAAO,GAAG,UAAoB,EACtC,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM,GAClC,MAAM,CA2DR;AAUD;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,cAAc,GAAG;IACtD,aAAa,EAAE,MAAM,CAAC;IACtB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;CAC1B,CAeA"}
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Signature Grouper
3
+ *
4
+ * Groups error lines by their normalized signature for deduplication.
5
+ */
6
+ import { normalizeErrorLine, extractErrorParts, createSignature, formatLocation, isLikelyError, } from "./error-normalizer.js";
7
+ const DEFAULT_OPTIONS = {
8
+ threshold: 2,
9
+ keepFirst: 1,
10
+ maxSamples: 3,
11
+ };
12
+ /**
13
+ * Group error lines by their normalized signature
14
+ */
15
+ export function groupBySignature(lines, options = {}) {
16
+ const opts = { ...DEFAULT_OPTIONS, ...options };
17
+ const groups = new Map();
18
+ const nonErrorLines = [];
19
+ let totalErrorLines = 0;
20
+ for (const line of lines) {
21
+ const trimmed = line.trim();
22
+ if (!trimmed)
23
+ continue;
24
+ // Check custom pattern first
25
+ if (opts.customPattern) {
26
+ if (opts.customPattern.test(trimmed)) {
27
+ processErrorLine(trimmed, groups, opts);
28
+ totalErrorLines++;
29
+ continue;
30
+ }
31
+ }
32
+ // Try to extract error parts
33
+ const parts = extractErrorParts(trimmed);
34
+ if (parts) {
35
+ processErrorLineWithParts(trimmed, parts, groups, opts);
36
+ totalErrorLines++;
37
+ }
38
+ else if (isLikelyError(trimmed)) {
39
+ // Line looks like an error but doesn't match patterns
40
+ processErrorLine(trimmed, groups, opts);
41
+ totalErrorLines++;
42
+ }
43
+ else {
44
+ nonErrorLines.push(trimmed);
45
+ }
46
+ }
47
+ return {
48
+ groups,
49
+ nonErrorLines,
50
+ totalErrorLines,
51
+ };
52
+ }
53
+ /**
54
+ * Process an error line with extracted parts
55
+ */
56
+ function processErrorLineWithParts(rawLine, parts, groups, options) {
57
+ const signature = createSignature(parts);
58
+ const location = formatLocation(parts);
59
+ if (groups.has(signature)) {
60
+ const group = groups.get(signature);
61
+ group.count++;
62
+ if (location && !group.locations.includes(location)) {
63
+ group.locations.push(location);
64
+ }
65
+ if (group.samples.length < (options.maxSamples ?? 3)) {
66
+ group.samples.push(rawLine);
67
+ }
68
+ }
69
+ else {
70
+ groups.set(signature, {
71
+ signature,
72
+ count: 1,
73
+ firstOccurrence: rawLine,
74
+ locations: location ? [location] : [],
75
+ samples: [rawLine],
76
+ code: parts.code,
77
+ message: parts.message,
78
+ });
79
+ }
80
+ }
81
+ /**
82
+ * Process an error line without structured parts
83
+ */
84
+ function processErrorLine(rawLine, groups, options) {
85
+ const signature = normalizeErrorLine(rawLine);
86
+ if (groups.has(signature)) {
87
+ const group = groups.get(signature);
88
+ group.count++;
89
+ if (group.samples.length < (options.maxSamples ?? 3)) {
90
+ group.samples.push(rawLine);
91
+ }
92
+ }
93
+ else {
94
+ groups.set(signature, {
95
+ signature,
96
+ count: 1,
97
+ firstOccurrence: rawLine,
98
+ locations: [],
99
+ samples: [rawLine],
100
+ message: rawLine,
101
+ });
102
+ }
103
+ }
104
+ /**
105
+ * Format grouped errors as a readable string
106
+ */
107
+ export function formatGroups(result, format = "plain", options = {}) {
108
+ const opts = { ...DEFAULT_OPTIONS, ...options };
109
+ const parts = [];
110
+ const md = format === "markdown";
111
+ // Sort groups by count (most frequent first)
112
+ const sortedGroups = Array.from(result.groups.values()).sort((a, b) => b.count - a.count);
113
+ // Separate duplicates from unique errors
114
+ const duplicates = sortedGroups.filter((g) => g.count >= opts.threshold);
115
+ const unique = sortedGroups.filter((g) => g.count < opts.threshold);
116
+ // Format duplicated errors
117
+ if (duplicates.length > 0) {
118
+ parts.push(md ? "## Deduplicated Errors\n" : "DEDUPLICATED ERRORS:");
119
+ for (const [i, group] of duplicates.entries()) {
120
+ const code = group.code ? `${group.code}: ` : "";
121
+ const msg = truncateMessage(group.message, 80);
122
+ if (md) {
123
+ parts.push(`### ${i + 1}. ${code}${msg}`);
124
+ parts.push(`**Occurrences:** ${group.count}`);
125
+ parts.push(`**First:** \`${truncateMessage(group.firstOccurrence, 100)}\``);
126
+ }
127
+ else {
128
+ parts.push(`${i + 1}. ${code}${msg}`);
129
+ parts.push(` Occurrences: ${group.count}`);
130
+ parts.push(` First: ${truncateMessage(group.firstOccurrence, 100)}`);
131
+ }
132
+ // Show locations if available
133
+ if (group.locations.length > 1) {
134
+ const otherLocations = group.locations.slice(1, 6);
135
+ const remaining = group.locations.length - 6;
136
+ const suffix = remaining > 0 ? ` (+${remaining} more)` : "";
137
+ if (md) {
138
+ parts.push(`**Also in:** ${otherLocations.join(", ")}${suffix}`);
139
+ }
140
+ else {
141
+ parts.push(` Also in: ${otherLocations.join(", ")}${suffix}`);
142
+ }
143
+ }
144
+ parts.push("");
145
+ }
146
+ }
147
+ // Show unique errors if any (below threshold)
148
+ if (unique.length > 0 && opts.keepFirst > 0) {
149
+ parts.push(md ? "## Unique Errors\n" : "UNIQUE ERRORS:");
150
+ for (const group of unique.slice(0, opts.keepFirst * 5)) {
151
+ parts.push(md ? `- ${group.firstOccurrence}` : ` ${group.firstOccurrence}`);
152
+ }
153
+ if (unique.length > opts.keepFirst * 5) {
154
+ const more = unique.length - opts.keepFirst * 5;
155
+ parts.push(md ? `\n*...and ${more} more unique errors*` : ` ...and ${more} more unique errors`);
156
+ }
157
+ parts.push("");
158
+ }
159
+ return parts.join("\n");
160
+ }
161
+ /**
162
+ * Truncate a message to a maximum length
163
+ */
164
+ function truncateMessage(message, maxLength) {
165
+ if (message.length <= maxLength)
166
+ return message;
167
+ return message.slice(0, maxLength - 3) + "...";
168
+ }
169
+ /**
170
+ * Calculate deduplication statistics
171
+ */
172
+ export function calculateStats(result) {
173
+ const originalLines = result.totalErrorLines + result.nonErrorLines.length;
174
+ const uniqueErrors = result.groups.size;
175
+ const totalDuplicates = result.totalErrorLines - uniqueErrors;
176
+ const deduplicatedLines = uniqueErrors + result.nonErrorLines.length;
177
+ const reductionPercent = originalLines > 0 ? Math.round(((originalLines - deduplicatedLines) / originalLines) * 100) : 0;
178
+ return {
179
+ originalLines,
180
+ deduplicatedLines,
181
+ uniqueErrors,
182
+ totalDuplicates,
183
+ reductionPercent,
184
+ };
185
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * TF-IDF (Term Frequency - Inverse Document Frequency) Utilities
3
+ *
4
+ * Provides local TF-IDF calculation for semantic importance scoring.
5
+ * No external ML models required - pure algorithmic implementation.
6
+ */
7
+ /**
8
+ * TF-IDF result for a single term
9
+ */
10
+ export interface TFIDFScore {
11
+ term: string;
12
+ tf: number;
13
+ idf: number;
14
+ tfidf: number;
15
+ }
16
+ /**
17
+ * TF-IDF scores for all terms in all segments
18
+ */
19
+ export type TFIDFMap = Map<number, TFIDFScore[]>;
20
+ /**
21
+ * Calculate TF-IDF scores for all segments
22
+ *
23
+ * @param segments - Array of text segments to analyze
24
+ * @returns Map of segment index to TF-IDF scores for each term
25
+ */
26
+ export declare function calculateTFIDF(segments: string[]): TFIDFMap;
27
+ /**
28
+ * Get the average TF-IDF score for a segment
29
+ * Higher score = more unique/important content
30
+ *
31
+ * @param segmentIndex - Index of the segment
32
+ * @param tfidfMap - Pre-calculated TF-IDF scores
33
+ * @returns Average TF-IDF score (0-1 normalized)
34
+ */
35
+ export declare function getSegmentTFIDFScore(segmentIndex: number, tfidfMap: TFIDFMap): number;
36
+ /**
37
+ * Get top terms for a segment (useful for debugging/display)
38
+ *
39
+ * @param segmentIndex - Index of the segment
40
+ * @param tfidfMap - Pre-calculated TF-IDF scores
41
+ * @param topN - Number of top terms to return
42
+ * @returns Array of top terms with scores
43
+ */
44
+ export declare function getTopTerms(segmentIndex: number, tfidfMap: TFIDFMap, topN?: number): TFIDFScore[];
45
+ //# sourceMappingURL=tfidf.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.d.ts","sourceRoot":"","sources":["../../src/utils/tfidf.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;AAyIjD;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,QAAQ,CAmC3D;AAED;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAClC,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,QAAQ,GACjB,MAAM,CAYR;AAED;;;;;;;GAOG;AACH,wBAAgB,WAAW,CACzB,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,QAAQ,EAClB,IAAI,GAAE,MAAU,GACf,UAAU,EAAE,CAMd"}
@@ -0,0 +1,204 @@
1
+ /**
2
+ * TF-IDF (Term Frequency - Inverse Document Frequency) Utilities
3
+ *
4
+ * Provides local TF-IDF calculation for semantic importance scoring.
5
+ * No external ML models required - pure algorithmic implementation.
6
+ */
7
+ // Common stopwords to filter out (they don't carry semantic meaning)
8
+ const STOPWORDS = new Set([
9
+ "a",
10
+ "an",
11
+ "the",
12
+ "and",
13
+ "or",
14
+ "but",
15
+ "in",
16
+ "on",
17
+ "at",
18
+ "to",
19
+ "for",
20
+ "of",
21
+ "with",
22
+ "by",
23
+ "from",
24
+ "as",
25
+ "is",
26
+ "was",
27
+ "are",
28
+ "were",
29
+ "been",
30
+ "be",
31
+ "have",
32
+ "has",
33
+ "had",
34
+ "do",
35
+ "does",
36
+ "did",
37
+ "will",
38
+ "would",
39
+ "could",
40
+ "should",
41
+ "may",
42
+ "might",
43
+ "must",
44
+ "shall",
45
+ "can",
46
+ "need",
47
+ "dare",
48
+ "ought",
49
+ "used",
50
+ "it",
51
+ "its",
52
+ "this",
53
+ "that",
54
+ "these",
55
+ "those",
56
+ "i",
57
+ "you",
58
+ "he",
59
+ "she",
60
+ "we",
61
+ "they",
62
+ "what",
63
+ "which",
64
+ "who",
65
+ "whom",
66
+ "when",
67
+ "where",
68
+ "why",
69
+ "how",
70
+ "all",
71
+ "each",
72
+ "every",
73
+ "both",
74
+ "few",
75
+ "more",
76
+ "most",
77
+ "other",
78
+ "some",
79
+ "such",
80
+ "no",
81
+ "nor",
82
+ "not",
83
+ "only",
84
+ "own",
85
+ "same",
86
+ "so",
87
+ "than",
88
+ "too",
89
+ "very",
90
+ "just",
91
+ "also",
92
+ ]);
93
+ /**
94
+ * Tokenize text into words for TF-IDF analysis
95
+ * - Lowercase
96
+ * - Remove punctuation
97
+ * - Filter stopwords
98
+ * - Keep words with 2+ characters
99
+ */
100
+ function tokenize(text) {
101
+ return text
102
+ .toLowerCase()
103
+ .replace(/[^\w\s]/g, " ") // Remove punctuation
104
+ .split(/\s+/)
105
+ .filter((word) => word.length >= 2 && !STOPWORDS.has(word));
106
+ }
107
+ /**
108
+ * Calculate term frequency for a segment
109
+ * TF = count of term / total terms in segment
110
+ */
111
+ function calculateTF(tokens) {
112
+ const counts = new Map();
113
+ for (const token of tokens) {
114
+ counts.set(token, (counts.get(token) ?? 0) + 1);
115
+ }
116
+ const tf = new Map();
117
+ const total = tokens.length || 1;
118
+ for (const [term, count] of counts) {
119
+ tf.set(term, count / total);
120
+ }
121
+ return tf;
122
+ }
123
+ /**
124
+ * Calculate document frequency for all terms
125
+ * DF = number of segments containing the term
126
+ */
127
+ function calculateDF(segmentTokens) {
128
+ const df = new Map();
129
+ for (const tokens of segmentTokens) {
130
+ const uniqueTerms = new Set(tokens);
131
+ for (const term of uniqueTerms) {
132
+ df.set(term, (df.get(term) ?? 0) + 1);
133
+ }
134
+ }
135
+ return df;
136
+ }
137
+ /**
138
+ * Calculate TF-IDF scores for all segments
139
+ *
140
+ * @param segments - Array of text segments to analyze
141
+ * @returns Map of segment index to TF-IDF scores for each term
142
+ */
143
+ export function calculateTFIDF(segments) {
144
+ const result = new Map();
145
+ if (segments.length === 0) {
146
+ return result;
147
+ }
148
+ // Tokenize all segments
149
+ const segmentTokens = segments.map(tokenize);
150
+ // Calculate document frequency
151
+ const df = calculateDF(segmentTokens);
152
+ const numSegments = segments.length;
153
+ // Calculate TF-IDF for each segment
154
+ for (let i = 0; i < segments.length; i++) {
155
+ const tokens = segmentTokens[i];
156
+ const tf = calculateTF(tokens);
157
+ const scores = [];
158
+ for (const [term, tfScore] of tf) {
159
+ const dfScore = df.get(term) ?? 1;
160
+ // IDF = log(N / df) where N is total segments
161
+ const idf = Math.log(numSegments / dfScore);
162
+ const tfidf = tfScore * idf;
163
+ scores.push({ term, tf: tfScore, idf, tfidf });
164
+ }
165
+ // Sort by TF-IDF score descending
166
+ scores.sort((a, b) => b.tfidf - a.tfidf);
167
+ result.set(i, scores);
168
+ }
169
+ return result;
170
+ }
171
+ /**
172
+ * Get the average TF-IDF score for a segment
173
+ * Higher score = more unique/important content
174
+ *
175
+ * @param segmentIndex - Index of the segment
176
+ * @param tfidfMap - Pre-calculated TF-IDF scores
177
+ * @returns Average TF-IDF score (0-1 normalized)
178
+ */
179
+ export function getSegmentTFIDFScore(segmentIndex, tfidfMap) {
180
+ const scores = tfidfMap.get(segmentIndex);
181
+ if (!scores || scores.length === 0) {
182
+ return 0;
183
+ }
184
+ // Calculate average TF-IDF
185
+ const sum = scores.reduce((acc, s) => acc + s.tfidf, 0);
186
+ const avg = sum / scores.length;
187
+ // Normalize to 0-1 range (typical TF-IDF values are 0-2)
188
+ return Math.min(avg / 2, 1);
189
+ }
190
+ /**
191
+ * Get top terms for a segment (useful for debugging/display)
192
+ *
193
+ * @param segmentIndex - Index of the segment
194
+ * @param tfidfMap - Pre-calculated TF-IDF scores
195
+ * @param topN - Number of top terms to return
196
+ * @returns Array of top terms with scores
197
+ */
198
+ export function getTopTerms(segmentIndex, tfidfMap, topN = 5) {
199
+ const scores = tfidfMap.get(segmentIndex);
200
+ if (!scores) {
201
+ return [];
202
+ }
203
+ return scores.slice(0, topN);
204
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * TF-IDF Utilities Tests
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=tfidf.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.test.d.ts","sourceRoot":"","sources":["../../src/utils/tfidf.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}