@miller-tech/uap 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (660) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +888 -0
  3. package/dist/analyzers/index.d.ts +3 -0
  4. package/dist/analyzers/index.d.ts.map +1 -0
  5. package/dist/analyzers/index.js +684 -0
  6. package/dist/analyzers/index.js.map +1 -0
  7. package/dist/benchmarks/agents/naive-agent.d.ts +60 -0
  8. package/dist/benchmarks/agents/naive-agent.d.ts.map +1 -0
  9. package/dist/benchmarks/agents/naive-agent.js +144 -0
  10. package/dist/benchmarks/agents/naive-agent.js.map +1 -0
  11. package/dist/benchmarks/agents/uap-agent.d.ts +167 -0
  12. package/dist/benchmarks/agents/uap-agent.d.ts.map +1 -0
  13. package/dist/benchmarks/agents/uap-agent.js +437 -0
  14. package/dist/benchmarks/agents/uap-agent.js.map +1 -0
  15. package/dist/benchmarks/benchmark.d.ts +328 -0
  16. package/dist/benchmarks/benchmark.d.ts.map +1 -0
  17. package/dist/benchmarks/benchmark.js +112 -0
  18. package/dist/benchmarks/benchmark.js.map +1 -0
  19. package/dist/benchmarks/execution-verifier.d.ts +41 -0
  20. package/dist/benchmarks/execution-verifier.d.ts.map +1 -0
  21. package/dist/benchmarks/execution-verifier.js +340 -0
  22. package/dist/benchmarks/execution-verifier.js.map +1 -0
  23. package/dist/benchmarks/hierarchical-prompting.d.ts +37 -0
  24. package/dist/benchmarks/hierarchical-prompting.d.ts.map +1 -0
  25. package/dist/benchmarks/hierarchical-prompting.js +246 -0
  26. package/dist/benchmarks/hierarchical-prompting.js.map +1 -0
  27. package/dist/benchmarks/improved-benchmark.d.ts +89 -0
  28. package/dist/benchmarks/improved-benchmark.d.ts.map +1 -0
  29. package/dist/benchmarks/improved-benchmark.js +585 -0
  30. package/dist/benchmarks/improved-benchmark.js.map +1 -0
  31. package/dist/benchmarks/index.d.ts +11 -0
  32. package/dist/benchmarks/index.d.ts.map +1 -0
  33. package/dist/benchmarks/index.js +11 -0
  34. package/dist/benchmarks/index.js.map +1 -0
  35. package/dist/benchmarks/model-integration.d.ts +111 -0
  36. package/dist/benchmarks/model-integration.d.ts.map +1 -0
  37. package/dist/benchmarks/model-integration.js +904 -0
  38. package/dist/benchmarks/model-integration.js.map +1 -0
  39. package/dist/benchmarks/multi-turn-agent.d.ts +44 -0
  40. package/dist/benchmarks/multi-turn-agent.d.ts.map +1 -0
  41. package/dist/benchmarks/multi-turn-agent.js +254 -0
  42. package/dist/benchmarks/multi-turn-agent.js.map +1 -0
  43. package/dist/benchmarks/multi-turn-loop.d.ts +57 -0
  44. package/dist/benchmarks/multi-turn-loop.d.ts.map +1 -0
  45. package/dist/benchmarks/multi-turn-loop.js +167 -0
  46. package/dist/benchmarks/multi-turn-loop.js.map +1 -0
  47. package/dist/benchmarks/tasks.d.ts +19 -0
  48. package/dist/benchmarks/tasks.d.ts.map +1 -0
  49. package/dist/benchmarks/tasks.js +435 -0
  50. package/dist/benchmarks/tasks.js.map +1 -0
  51. package/dist/bin/cli.d.ts +3 -0
  52. package/dist/bin/cli.d.ts.map +1 -0
  53. package/dist/bin/cli.js +546 -0
  54. package/dist/bin/cli.js.map +1 -0
  55. package/dist/bin/llama-server-optimize.d.ts +18 -0
  56. package/dist/bin/llama-server-optimize.d.ts.map +1 -0
  57. package/dist/bin/llama-server-optimize.js +708 -0
  58. package/dist/bin/llama-server-optimize.js.map +1 -0
  59. package/dist/bin/policy.d.ts +3 -0
  60. package/dist/bin/policy.d.ts.map +1 -0
  61. package/dist/bin/policy.js +143 -0
  62. package/dist/bin/policy.js.map +1 -0
  63. package/dist/bin/tool-calls.d.ts +3 -0
  64. package/dist/bin/tool-calls.d.ts.map +1 -0
  65. package/dist/bin/tool-calls.js +4 -0
  66. package/dist/bin/tool-calls.js.map +1 -0
  67. package/dist/browser/index.d.ts +2 -0
  68. package/dist/browser/index.d.ts.map +1 -0
  69. package/dist/browser/index.js +2 -0
  70. package/dist/browser/index.js.map +1 -0
  71. package/dist/browser/web-browser.d.ts +30 -0
  72. package/dist/browser/web-browser.d.ts.map +1 -0
  73. package/dist/browser/web-browser.js +93 -0
  74. package/dist/browser/web-browser.js.map +1 -0
  75. package/dist/cli/agent.d.ts +20 -0
  76. package/dist/cli/agent.d.ts.map +1 -0
  77. package/dist/cli/agent.js +474 -0
  78. package/dist/cli/agent.js.map +1 -0
  79. package/dist/cli/analyze.d.ts +7 -0
  80. package/dist/cli/analyze.d.ts.map +1 -0
  81. package/dist/cli/analyze.js +103 -0
  82. package/dist/cli/analyze.js.map +1 -0
  83. package/dist/cli/completion-gates.d.ts +51 -0
  84. package/dist/cli/completion-gates.d.ts.map +1 -0
  85. package/dist/cli/completion-gates.js +201 -0
  86. package/dist/cli/completion-gates.js.map +1 -0
  87. package/dist/cli/compliance.d.ts +8 -0
  88. package/dist/cli/compliance.d.ts.map +1 -0
  89. package/dist/cli/compliance.js +509 -0
  90. package/dist/cli/compliance.js.map +1 -0
  91. package/dist/cli/coord.d.ts +7 -0
  92. package/dist/cli/coord.d.ts.map +1 -0
  93. package/dist/cli/coord.js +138 -0
  94. package/dist/cli/coord.js.map +1 -0
  95. package/dist/cli/dashboard.d.ts +21 -0
  96. package/dist/cli/dashboard.d.ts.map +1 -0
  97. package/dist/cli/dashboard.js +1508 -0
  98. package/dist/cli/dashboard.js.map +1 -0
  99. package/dist/cli/deploy.d.ts +19 -0
  100. package/dist/cli/deploy.d.ts.map +1 -0
  101. package/dist/cli/deploy.js +387 -0
  102. package/dist/cli/deploy.js.map +1 -0
  103. package/dist/cli/droids.d.ts +9 -0
  104. package/dist/cli/droids.d.ts.map +1 -0
  105. package/dist/cli/droids.js +227 -0
  106. package/dist/cli/droids.js.map +1 -0
  107. package/dist/cli/generate.d.ts +17 -0
  108. package/dist/cli/generate.d.ts.map +1 -0
  109. package/dist/cli/generate.js +432 -0
  110. package/dist/cli/generate.js.map +1 -0
  111. package/dist/cli/hooks.d.ts +9 -0
  112. package/dist/cli/hooks.d.ts.map +1 -0
  113. package/dist/cli/hooks.js +464 -0
  114. package/dist/cli/hooks.js.map +1 -0
  115. package/dist/cli/init.d.ts +12 -0
  116. package/dist/cli/init.d.ts.map +1 -0
  117. package/dist/cli/init.js +364 -0
  118. package/dist/cli/init.js.map +1 -0
  119. package/dist/cli/mcp-router.d.ts +16 -0
  120. package/dist/cli/mcp-router.d.ts.map +1 -0
  121. package/dist/cli/mcp-router.js +143 -0
  122. package/dist/cli/mcp-router.js.map +1 -0
  123. package/dist/cli/memory.d.ts +24 -0
  124. package/dist/cli/memory.d.ts.map +1 -0
  125. package/dist/cli/memory.js +885 -0
  126. package/dist/cli/memory.js.map +1 -0
  127. package/dist/cli/model.d.ts +15 -0
  128. package/dist/cli/model.d.ts.map +1 -0
  129. package/dist/cli/model.js +290 -0
  130. package/dist/cli/model.js.map +1 -0
  131. package/dist/cli/patterns.d.ts +26 -0
  132. package/dist/cli/patterns.d.ts.map +1 -0
  133. package/dist/cli/patterns.js +862 -0
  134. package/dist/cli/patterns.js.map +1 -0
  135. package/dist/cli/rtk-validation.d.ts +9 -0
  136. package/dist/cli/rtk-validation.d.ts.map +1 -0
  137. package/dist/cli/rtk-validation.js +9 -0
  138. package/dist/cli/rtk-validation.js.map +1 -0
  139. package/dist/cli/rtk.d.ts +34 -0
  140. package/dist/cli/rtk.d.ts.map +1 -0
  141. package/dist/cli/rtk.js +401 -0
  142. package/dist/cli/rtk.js.map +1 -0
  143. package/dist/cli/schema-diff.d.ts +7 -0
  144. package/dist/cli/schema-diff.d.ts.map +1 -0
  145. package/dist/cli/schema-diff.js +11 -0
  146. package/dist/cli/schema-diff.js.map +1 -0
  147. package/dist/cli/setup-mcp-router.d.ts +8 -0
  148. package/dist/cli/setup-mcp-router.d.ts.map +1 -0
  149. package/dist/cli/setup-mcp-router.js +163 -0
  150. package/dist/cli/setup-mcp-router.js.map +1 -0
  151. package/dist/cli/setup-wizard.d.ts +2 -0
  152. package/dist/cli/setup-wizard.d.ts.map +1 -0
  153. package/dist/cli/setup-wizard.js +806 -0
  154. package/dist/cli/setup-wizard.js.map +1 -0
  155. package/dist/cli/setup.d.ts +15 -0
  156. package/dist/cli/setup.d.ts.map +1 -0
  157. package/dist/cli/setup.js +154 -0
  158. package/dist/cli/setup.js.map +1 -0
  159. package/dist/cli/sync.d.ts +8 -0
  160. package/dist/cli/sync.d.ts.map +1 -0
  161. package/dist/cli/sync.js +395 -0
  162. package/dist/cli/sync.js.map +1 -0
  163. package/dist/cli/task.d.ts +33 -0
  164. package/dist/cli/task.d.ts.map +1 -0
  165. package/dist/cli/task.js +672 -0
  166. package/dist/cli/task.js.map +1 -0
  167. package/dist/cli/tool-calls.d.ts +20 -0
  168. package/dist/cli/tool-calls.d.ts.map +1 -0
  169. package/dist/cli/tool-calls.js +605 -0
  170. package/dist/cli/tool-calls.js.map +1 -0
  171. package/dist/cli/uap.d.ts +10 -0
  172. package/dist/cli/uap.d.ts.map +1 -0
  173. package/dist/cli/uap.js +398 -0
  174. package/dist/cli/uap.js.map +1 -0
  175. package/dist/cli/update.d.ts +10 -0
  176. package/dist/cli/update.d.ts.map +1 -0
  177. package/dist/cli/update.js +300 -0
  178. package/dist/cli/update.js.map +1 -0
  179. package/dist/cli/visualize.d.ts +77 -0
  180. package/dist/cli/visualize.d.ts.map +1 -0
  181. package/dist/cli/visualize.js +287 -0
  182. package/dist/cli/visualize.js.map +1 -0
  183. package/dist/cli/worktree.d.ts +9 -0
  184. package/dist/cli/worktree.d.ts.map +1 -0
  185. package/dist/cli/worktree.js +213 -0
  186. package/dist/cli/worktree.js.map +1 -0
  187. package/dist/coordination/adaptive-patterns.d.ts +65 -0
  188. package/dist/coordination/adaptive-patterns.d.ts.map +1 -0
  189. package/dist/coordination/adaptive-patterns.js +108 -0
  190. package/dist/coordination/adaptive-patterns.js.map +1 -0
  191. package/dist/coordination/auto-agent.d.ts +82 -0
  192. package/dist/coordination/auto-agent.d.ts.map +1 -0
  193. package/dist/coordination/auto-agent.js +145 -0
  194. package/dist/coordination/auto-agent.js.map +1 -0
  195. package/dist/coordination/capability-router.d.ts +79 -0
  196. package/dist/coordination/capability-router.d.ts.map +1 -0
  197. package/dist/coordination/capability-router.js +334 -0
  198. package/dist/coordination/capability-router.js.map +1 -0
  199. package/dist/coordination/database.d.ts +13 -0
  200. package/dist/coordination/database.d.ts.map +1 -0
  201. package/dist/coordination/database.js +136 -0
  202. package/dist/coordination/database.js.map +1 -0
  203. package/dist/coordination/deploy-batcher.d.ts +122 -0
  204. package/dist/coordination/deploy-batcher.d.ts.map +1 -0
  205. package/dist/coordination/deploy-batcher.js +718 -0
  206. package/dist/coordination/deploy-batcher.js.map +1 -0
  207. package/dist/coordination/droid-validator.d.ts +59 -0
  208. package/dist/coordination/droid-validator.d.ts.map +1 -0
  209. package/dist/coordination/droid-validator.js +142 -0
  210. package/dist/coordination/droid-validator.js.map +1 -0
  211. package/dist/coordination/index.d.ts +10 -0
  212. package/dist/coordination/index.d.ts.map +1 -0
  213. package/dist/coordination/index.js +10 -0
  214. package/dist/coordination/index.js.map +1 -0
  215. package/dist/coordination/pattern-router.d.ts +50 -0
  216. package/dist/coordination/pattern-router.d.ts.map +1 -0
  217. package/dist/coordination/pattern-router.js +118 -0
  218. package/dist/coordination/pattern-router.js.map +1 -0
  219. package/dist/coordination/service.d.ts +81 -0
  220. package/dist/coordination/service.d.ts.map +1 -0
  221. package/dist/coordination/service.js +619 -0
  222. package/dist/coordination/service.js.map +1 -0
  223. package/dist/coordination/worktree-enforcer.d.ts +22 -0
  224. package/dist/coordination/worktree-enforcer.d.ts.map +1 -0
  225. package/dist/coordination/worktree-enforcer.js +71 -0
  226. package/dist/coordination/worktree-enforcer.js.map +1 -0
  227. package/dist/generators/claude-md.d.ts +3 -0
  228. package/dist/generators/claude-md.d.ts.map +1 -0
  229. package/dist/generators/claude-md.js +1020 -0
  230. package/dist/generators/claude-md.js.map +1 -0
  231. package/dist/generators/template-loader.d.ts +105 -0
  232. package/dist/generators/template-loader.d.ts.map +1 -0
  233. package/dist/generators/template-loader.js +291 -0
  234. package/dist/generators/template-loader.js.map +1 -0
  235. package/dist/index.d.ts +49 -0
  236. package/dist/index.d.ts.map +1 -0
  237. package/dist/index.js +63 -0
  238. package/dist/index.js.map +1 -0
  239. package/dist/mcp-router/config/parser.d.ts +9 -0
  240. package/dist/mcp-router/config/parser.d.ts.map +1 -0
  241. package/dist/mcp-router/config/parser.js +174 -0
  242. package/dist/mcp-router/config/parser.js.map +1 -0
  243. package/dist/mcp-router/executor/client.d.ts +31 -0
  244. package/dist/mcp-router/executor/client.d.ts.map +1 -0
  245. package/dist/mcp-router/executor/client.js +189 -0
  246. package/dist/mcp-router/executor/client.js.map +1 -0
  247. package/dist/mcp-router/index.d.ts +22 -0
  248. package/dist/mcp-router/index.d.ts.map +1 -0
  249. package/dist/mcp-router/index.js +18 -0
  250. package/dist/mcp-router/index.js.map +1 -0
  251. package/dist/mcp-router/output-compressor.d.ts +26 -0
  252. package/dist/mcp-router/output-compressor.d.ts.map +1 -0
  253. package/dist/mcp-router/output-compressor.js +236 -0
  254. package/dist/mcp-router/output-compressor.js.map +1 -0
  255. package/dist/mcp-router/search/fuzzy.d.ts +26 -0
  256. package/dist/mcp-router/search/fuzzy.d.ts.map +1 -0
  257. package/dist/mcp-router/search/fuzzy.js +94 -0
  258. package/dist/mcp-router/search/fuzzy.js.map +1 -0
  259. package/dist/mcp-router/server.d.ts +50 -0
  260. package/dist/mcp-router/server.d.ts.map +1 -0
  261. package/dist/mcp-router/server.js +229 -0
  262. package/dist/mcp-router/server.js.map +1 -0
  263. package/dist/mcp-router/session-stats.d.ts +37 -0
  264. package/dist/mcp-router/session-stats.d.ts.map +1 -0
  265. package/dist/mcp-router/session-stats.js +56 -0
  266. package/dist/mcp-router/session-stats.js.map +1 -0
  267. package/dist/mcp-router/tools/discover.d.ts +37 -0
  268. package/dist/mcp-router/tools/discover.d.ts.map +1 -0
  269. package/dist/mcp-router/tools/discover.js +65 -0
  270. package/dist/mcp-router/tools/discover.js.map +1 -0
  271. package/dist/mcp-router/tools/execute.d.ts +43 -0
  272. package/dist/mcp-router/tools/execute.d.ts.map +1 -0
  273. package/dist/mcp-router/tools/execute.js +144 -0
  274. package/dist/mcp-router/tools/execute.js.map +1 -0
  275. package/dist/mcp-router/types.d.ts +62 -0
  276. package/dist/mcp-router/types.d.ts.map +1 -0
  277. package/dist/mcp-router/types.js +6 -0
  278. package/dist/mcp-router/types.js.map +1 -0
  279. package/dist/memory/adaptive-context.d.ts +149 -0
  280. package/dist/memory/adaptive-context.d.ts.map +1 -0
  281. package/dist/memory/adaptive-context.js +1095 -0
  282. package/dist/memory/adaptive-context.js.map +1 -0
  283. package/dist/memory/agent-scoped-memory.d.ts +67 -0
  284. package/dist/memory/agent-scoped-memory.d.ts.map +1 -0
  285. package/dist/memory/agent-scoped-memory.js +126 -0
  286. package/dist/memory/agent-scoped-memory.js.map +1 -0
  287. package/dist/memory/ambiguity-detector.d.ts +54 -0
  288. package/dist/memory/ambiguity-detector.d.ts.map +1 -0
  289. package/dist/memory/ambiguity-detector.js +401 -0
  290. package/dist/memory/ambiguity-detector.js.map +1 -0
  291. package/dist/memory/backends/base.d.ts +18 -0
  292. package/dist/memory/backends/base.d.ts.map +1 -0
  293. package/dist/memory/backends/base.js +2 -0
  294. package/dist/memory/backends/base.js.map +1 -0
  295. package/dist/memory/backends/factory.d.ts +4 -0
  296. package/dist/memory/backends/factory.d.ts.map +1 -0
  297. package/dist/memory/backends/factory.js +53 -0
  298. package/dist/memory/backends/factory.js.map +1 -0
  299. package/dist/memory/backends/github.d.ts +27 -0
  300. package/dist/memory/backends/github.d.ts.map +1 -0
  301. package/dist/memory/backends/github.js +134 -0
  302. package/dist/memory/backends/github.js.map +1 -0
  303. package/dist/memory/backends/qdrant-cloud.d.ts +32 -0
  304. package/dist/memory/backends/qdrant-cloud.d.ts.map +1 -0
  305. package/dist/memory/backends/qdrant-cloud.js +167 -0
  306. package/dist/memory/backends/qdrant-cloud.js.map +1 -0
  307. package/dist/memory/context-compressor.d.ts +116 -0
  308. package/dist/memory/context-compressor.d.ts.map +1 -0
  309. package/dist/memory/context-compressor.js +430 -0
  310. package/dist/memory/context-compressor.js.map +1 -0
  311. package/dist/memory/context-pruner.d.ts +55 -0
  312. package/dist/memory/context-pruner.d.ts.map +1 -0
  313. package/dist/memory/context-pruner.js +85 -0
  314. package/dist/memory/context-pruner.js.map +1 -0
  315. package/dist/memory/correction-propagator.d.ts +44 -0
  316. package/dist/memory/correction-propagator.d.ts.map +1 -0
  317. package/dist/memory/correction-propagator.js +156 -0
  318. package/dist/memory/correction-propagator.js.map +1 -0
  319. package/dist/memory/daily-log.d.ts +67 -0
  320. package/dist/memory/daily-log.d.ts.map +1 -0
  321. package/dist/memory/daily-log.js +143 -0
  322. package/dist/memory/daily-log.js.map +1 -0
  323. package/dist/memory/dynamic-retrieval.d.ts +112 -0
  324. package/dist/memory/dynamic-retrieval.d.ts.map +1 -0
  325. package/dist/memory/dynamic-retrieval.js +908 -0
  326. package/dist/memory/dynamic-retrieval.js.map +1 -0
  327. package/dist/memory/embeddings.d.ts +172 -0
  328. package/dist/memory/embeddings.d.ts.map +1 -0
  329. package/dist/memory/embeddings.js +780 -0
  330. package/dist/memory/embeddings.js.map +1 -0
  331. package/dist/memory/generic-uap-patterns.d.ts +7 -0
  332. package/dist/memory/generic-uap-patterns.d.ts.map +1 -0
  333. package/dist/memory/generic-uap-patterns.js +43 -0
  334. package/dist/memory/generic-uap-patterns.js.map +1 -0
  335. package/dist/memory/hierarchical-memory.d.ts +141 -0
  336. package/dist/memory/hierarchical-memory.d.ts.map +1 -0
  337. package/dist/memory/hierarchical-memory.js +485 -0
  338. package/dist/memory/hierarchical-memory.js.map +1 -0
  339. package/dist/memory/knowledge-graph.d.ts +98 -0
  340. package/dist/memory/knowledge-graph.d.ts.map +1 -0
  341. package/dist/memory/knowledge-graph.js +275 -0
  342. package/dist/memory/knowledge-graph.js.map +1 -0
  343. package/dist/memory/memory-consolidator.d.ts +124 -0
  344. package/dist/memory/memory-consolidator.d.ts.map +1 -0
  345. package/dist/memory/memory-consolidator.js +514 -0
  346. package/dist/memory/memory-consolidator.js.map +1 -0
  347. package/dist/memory/memory-maintenance.d.ts +39 -0
  348. package/dist/memory/memory-maintenance.d.ts.map +1 -0
  349. package/dist/memory/memory-maintenance.js +336 -0
  350. package/dist/memory/memory-maintenance.js.map +1 -0
  351. package/dist/memory/model-router.d.ts +105 -0
  352. package/dist/memory/model-router.d.ts.map +1 -0
  353. package/dist/memory/model-router.js +474 -0
  354. package/dist/memory/model-router.js.map +1 -0
  355. package/dist/memory/multi-view-memory.d.ts +134 -0
  356. package/dist/memory/multi-view-memory.d.ts.map +1 -0
  357. package/dist/memory/multi-view-memory.js +430 -0
  358. package/dist/memory/multi-view-memory.js.map +1 -0
  359. package/dist/memory/predictive-memory.d.ts +79 -0
  360. package/dist/memory/predictive-memory.d.ts.map +1 -0
  361. package/dist/memory/predictive-memory.js +294 -0
  362. package/dist/memory/predictive-memory.js.map +1 -0
  363. package/dist/memory/prepopulate.d.ts +76 -0
  364. package/dist/memory/prepopulate.d.ts.map +1 -0
  365. package/dist/memory/prepopulate.js +832 -0
  366. package/dist/memory/prepopulate.js.map +1 -0
  367. package/dist/memory/semantic-compression.d.ts +77 -0
  368. package/dist/memory/semantic-compression.d.ts.map +1 -0
  369. package/dist/memory/semantic-compression.js +359 -0
  370. package/dist/memory/semantic-compression.js.map +1 -0
  371. package/dist/memory/serverless-qdrant.d.ts +102 -0
  372. package/dist/memory/serverless-qdrant.d.ts.map +1 -0
  373. package/dist/memory/serverless-qdrant.js +369 -0
  374. package/dist/memory/serverless-qdrant.js.map +1 -0
  375. package/dist/memory/short-term/factory.d.ts +26 -0
  376. package/dist/memory/short-term/factory.d.ts.map +1 -0
  377. package/dist/memory/short-term/factory.js +28 -0
  378. package/dist/memory/short-term/factory.js.map +1 -0
  379. package/dist/memory/short-term/indexeddb.d.ts +25 -0
  380. package/dist/memory/short-term/indexeddb.d.ts.map +1 -0
  381. package/dist/memory/short-term/indexeddb.js +64 -0
  382. package/dist/memory/short-term/indexeddb.js.map +1 -0
  383. package/dist/memory/short-term/schema.d.ts +6 -0
  384. package/dist/memory/short-term/schema.d.ts.map +1 -0
  385. package/dist/memory/short-term/schema.js +141 -0
  386. package/dist/memory/short-term/schema.js.map +1 -0
  387. package/dist/memory/short-term/sqlite.d.ts +64 -0
  388. package/dist/memory/short-term/sqlite.d.ts.map +1 -0
  389. package/dist/memory/short-term/sqlite.js +274 -0
  390. package/dist/memory/short-term/sqlite.js.map +1 -0
  391. package/dist/memory/speculative-cache.d.ts +111 -0
  392. package/dist/memory/speculative-cache.d.ts.map +1 -0
  393. package/dist/memory/speculative-cache.js +457 -0
  394. package/dist/memory/speculative-cache.js.map +1 -0
  395. package/dist/memory/task-classifier.d.ts +40 -0
  396. package/dist/memory/task-classifier.d.ts.map +1 -0
  397. package/dist/memory/task-classifier.js +342 -0
  398. package/dist/memory/task-classifier.js.map +1 -0
  399. package/dist/memory/terminal-bench-knowledge.d.ts +48 -0
  400. package/dist/memory/terminal-bench-knowledge.d.ts.map +1 -0
  401. package/dist/memory/terminal-bench-knowledge.js +622 -0
  402. package/dist/memory/terminal-bench-knowledge.js.map +1 -0
  403. package/dist/memory/write-gate.d.ts +39 -0
  404. package/dist/memory/write-gate.d.ts.map +1 -0
  405. package/dist/memory/write-gate.js +190 -0
  406. package/dist/memory/write-gate.js.map +1 -0
  407. package/dist/models/api-client.d.ts +46 -0
  408. package/dist/models/api-client.d.ts.map +1 -0
  409. package/dist/models/api-client.js +182 -0
  410. package/dist/models/api-client.js.map +1 -0
  411. package/dist/models/execution-profiles.d.ts +64 -0
  412. package/dist/models/execution-profiles.d.ts.map +1 -0
  413. package/dist/models/execution-profiles.js +403 -0
  414. package/dist/models/execution-profiles.js.map +1 -0
  415. package/dist/models/executor.d.ts +130 -0
  416. package/dist/models/executor.d.ts.map +1 -0
  417. package/dist/models/executor.js +382 -0
  418. package/dist/models/executor.js.map +1 -0
  419. package/dist/models/index.d.ts +19 -0
  420. package/dist/models/index.d.ts.map +1 -0
  421. package/dist/models/index.js +23 -0
  422. package/dist/models/index.js.map +1 -0
  423. package/dist/models/plan-validator.d.ts +37 -0
  424. package/dist/models/plan-validator.d.ts.map +1 -0
  425. package/dist/models/plan-validator.js +179 -0
  426. package/dist/models/plan-validator.js.map +1 -0
  427. package/dist/models/planner.d.ts +73 -0
  428. package/dist/models/planner.d.ts.map +1 -0
  429. package/dist/models/planner.js +375 -0
  430. package/dist/models/planner.js.map +1 -0
  431. package/dist/models/router.d.ts +96 -0
  432. package/dist/models/router.d.ts.map +1 -0
  433. package/dist/models/router.js +523 -0
  434. package/dist/models/router.js.map +1 -0
  435. package/dist/models/types.d.ts +370 -0
  436. package/dist/models/types.d.ts.map +1 -0
  437. package/dist/models/types.js +232 -0
  438. package/dist/models/types.js.map +1 -0
  439. package/dist/models/unified-router.d.ts +152 -0
  440. package/dist/models/unified-router.d.ts.map +1 -0
  441. package/dist/models/unified-router.js +313 -0
  442. package/dist/models/unified-router.js.map +1 -0
  443. package/dist/policies/convert-policy-to-claude.d.ts +3 -0
  444. package/dist/policies/convert-policy-to-claude.d.ts.map +1 -0
  445. package/dist/policies/convert-policy-to-claude.js +87 -0
  446. package/dist/policies/convert-policy-to-claude.js.map +1 -0
  447. package/dist/policies/database-manager.d.ts +27 -0
  448. package/dist/policies/database-manager.d.ts.map +1 -0
  449. package/dist/policies/database-manager.js +198 -0
  450. package/dist/policies/database-manager.js.map +1 -0
  451. package/dist/policies/enforced-tool-router.d.ts +53 -0
  452. package/dist/policies/enforced-tool-router.d.ts.map +1 -0
  453. package/dist/policies/enforced-tool-router.js +80 -0
  454. package/dist/policies/enforced-tool-router.js.map +1 -0
  455. package/dist/policies/index.d.ts +10 -0
  456. package/dist/policies/index.d.ts.map +1 -0
  457. package/dist/policies/index.js +8 -0
  458. package/dist/policies/index.js.map +1 -0
  459. package/dist/policies/policy-gate.d.ts +59 -0
  460. package/dist/policies/policy-gate.d.ts.map +1 -0
  461. package/dist/policies/policy-gate.js +171 -0
  462. package/dist/policies/policy-gate.js.map +1 -0
  463. package/dist/policies/policy-memory.d.ts +18 -0
  464. package/dist/policies/policy-memory.d.ts.map +1 -0
  465. package/dist/policies/policy-memory.js +126 -0
  466. package/dist/policies/policy-memory.js.map +1 -0
  467. package/dist/policies/policy-tools.d.ts +11 -0
  468. package/dist/policies/policy-tools.d.ts.map +1 -0
  469. package/dist/policies/policy-tools.js +66 -0
  470. package/dist/policies/policy-tools.js.map +1 -0
  471. package/dist/policies/schemas/policy.d.ts +69 -0
  472. package/dist/policies/schemas/policy.d.ts.map +1 -0
  473. package/dist/policies/schemas/policy.js +31 -0
  474. package/dist/policies/schemas/policy.js.map +1 -0
  475. package/dist/tasks/coordination.d.ts +83 -0
  476. package/dist/tasks/coordination.d.ts.map +1 -0
  477. package/dist/tasks/coordination.js +291 -0
  478. package/dist/tasks/coordination.js.map +1 -0
  479. package/dist/tasks/database.d.ts +19 -0
  480. package/dist/tasks/database.d.ts.map +1 -0
  481. package/dist/tasks/database.js +149 -0
  482. package/dist/tasks/database.js.map +1 -0
  483. package/dist/tasks/decoder-gate.d.ts +64 -0
  484. package/dist/tasks/decoder-gate.d.ts.map +1 -0
  485. package/dist/tasks/decoder-gate.js +268 -0
  486. package/dist/tasks/decoder-gate.js.map +1 -0
  487. package/dist/tasks/index.d.ts +6 -0
  488. package/dist/tasks/index.d.ts.map +1 -0
  489. package/dist/tasks/index.js +6 -0
  490. package/dist/tasks/index.js.map +1 -0
  491. package/dist/tasks/service.d.ts +40 -0
  492. package/dist/tasks/service.d.ts.map +1 -0
  493. package/dist/tasks/service.js +671 -0
  494. package/dist/tasks/service.js.map +1 -0
  495. package/dist/tasks/types.d.ts +238 -0
  496. package/dist/tasks/types.d.ts.map +1 -0
  497. package/dist/tasks/types.js +74 -0
  498. package/dist/tasks/types.js.map +1 -0
  499. package/dist/telemetry/index.d.ts +2 -0
  500. package/dist/telemetry/index.d.ts.map +1 -0
  501. package/dist/telemetry/index.js +2 -0
  502. package/dist/telemetry/index.js.map +1 -0
  503. package/dist/telemetry/session-telemetry.d.ts +56 -0
  504. package/dist/telemetry/session-telemetry.d.ts.map +1 -0
  505. package/dist/telemetry/session-telemetry.js +807 -0
  506. package/dist/telemetry/session-telemetry.js.map +1 -0
  507. package/dist/types/analysis.d.ts +82 -0
  508. package/dist/types/analysis.d.ts.map +1 -0
  509. package/dist/types/analysis.js +2 -0
  510. package/dist/types/analysis.js.map +1 -0
  511. package/dist/types/config.d.ts +3324 -0
  512. package/dist/types/config.d.ts.map +1 -0
  513. package/dist/types/config.js +418 -0
  514. package/dist/types/config.js.map +1 -0
  515. package/dist/types/coordination.d.ts +240 -0
  516. package/dist/types/coordination.d.ts.map +1 -0
  517. package/dist/types/coordination.js +43 -0
  518. package/dist/types/coordination.js.map +1 -0
  519. package/dist/types/index.d.ts +4 -0
  520. package/dist/types/index.d.ts.map +1 -0
  521. package/dist/types/index.js +4 -0
  522. package/dist/types/index.js.map +1 -0
  523. package/dist/uap-droids-strict.d.ts +59 -0
  524. package/dist/uap-droids-strict.d.ts.map +1 -0
  525. package/dist/uap-droids-strict.js +200 -0
  526. package/dist/uap-droids-strict.js.map +1 -0
  527. package/dist/utils/config-manager.d.ts +30 -0
  528. package/dist/utils/config-manager.d.ts.map +1 -0
  529. package/dist/utils/config-manager.js +41 -0
  530. package/dist/utils/config-manager.js.map +1 -0
  531. package/dist/utils/fetch-with-retry.d.ts +5 -0
  532. package/dist/utils/fetch-with-retry.d.ts.map +1 -0
  533. package/dist/utils/fetch-with-retry.js +61 -0
  534. package/dist/utils/fetch-with-retry.js.map +1 -0
  535. package/dist/utils/merge-claude-md.d.ts +28 -0
  536. package/dist/utils/merge-claude-md.d.ts.map +1 -0
  537. package/dist/utils/merge-claude-md.js +342 -0
  538. package/dist/utils/merge-claude-md.js.map +1 -0
  539. package/dist/utils/rate-limiter.d.ts +58 -0
  540. package/dist/utils/rate-limiter.d.ts.map +1 -0
  541. package/dist/utils/rate-limiter.js +100 -0
  542. package/dist/utils/rate-limiter.js.map +1 -0
  543. package/dist/utils/string-similarity.d.ts +37 -0
  544. package/dist/utils/string-similarity.d.ts.map +1 -0
  545. package/dist/utils/string-similarity.js +114 -0
  546. package/dist/utils/string-similarity.js.map +1 -0
  547. package/dist/utils/validate-json.d.ts +51 -0
  548. package/dist/utils/validate-json.d.ts.map +1 -0
  549. package/dist/utils/validate-json.js +94 -0
  550. package/dist/utils/validate-json.js.map +1 -0
  551. package/docs/INDEX.md +66 -0
  552. package/docs/architecture/MULTI_MODEL.md +224 -0
  553. package/docs/architecture/SYSTEM_ANALYSIS.md +1117 -0
  554. package/docs/architecture/UAP_COMPLIANCE.md +217 -0
  555. package/docs/architecture/UAP_PROTOCOL.md +339 -0
  556. package/docs/architecture/UAP_STRICT_DROIDS.md +172 -0
  557. package/docs/archive/BALLS_MODE_SELF_ANALYSIS.md +260 -0
  558. package/docs/archive/FAILING_TASKS_SOLUTION_PLAN.md +668 -0
  559. package/docs/archive/JINJA2-SYSTEM-MESSAGE-FIX.md +209 -0
  560. package/docs/archive/NPM-PUBLISH-V0.9.1.md +240 -0
  561. package/docs/archive/OPTIMIZATION_OPTIONS.md +334 -0
  562. package/docs/archive/SETUP_IMPROVEMENTS.md +213 -0
  563. package/docs/archive/UAP_GENERIC_OPTIMIZATION_PLAN.md +270 -0
  564. package/docs/archive/UAP_V103_PATTERN_DESIGN.md +315 -0
  565. package/docs/archive/UAP_V104_COMPLIANCE_DESIGN.md +223 -0
  566. package/docs/archive/changelog/2026-03-10_uap-100-compliance.md +77 -0
  567. package/docs/archive/changelog/2026-03-10_uap-full-system-verification.md +109 -0
  568. package/docs/benchmarks/ACCURACY_ANALYSIS.md +471 -0
  569. package/docs/benchmarks/TOKEN_OPTIMIZATION.md +572 -0
  570. package/docs/benchmarks/VALIDATION_PLAN.md +568 -0
  571. package/docs/benchmarks/VALIDATION_RESULTS.md +161 -0
  572. package/docs/deployment/DEPLOYMENT.md +895 -0
  573. package/docs/deployment/DEPLOYMENT_STRATEGIES.md +518 -0
  574. package/docs/deployment/DEPLOY_BATCHER_ANALYSIS.md +856 -0
  575. package/docs/deployment/DEPLOY_BATCHING.md +273 -0
  576. package/docs/deployment/DEPLOY_BUCKETING_ANALYSIS.md +420 -0
  577. package/docs/deployment/QWEN35_LLAMA_CPP.md +265 -0
  578. package/docs/getting-started/INTEGRATION.md +449 -0
  579. package/docs/getting-started/OVERVIEW.md +344 -0
  580. package/docs/getting-started/SETUP.md +203 -0
  581. package/docs/integrations/MCP_ROUTER_SETUP.md +445 -0
  582. package/docs/integrations/RTK_INTEGRATION.md +468 -0
  583. package/docs/operations/TROUBLESHOOTING.md +660 -0
  584. package/docs/reference/API_REFERENCE.md +903 -0
  585. package/docs/reference/FEATURES.md +472 -0
  586. package/docs/reference/HARNESS-MATRIX.md +318 -0
  587. package/docs/reference/UAP_CLI_REFERENCE.md +600 -0
  588. package/docs/research/BEHAVIORAL_PATTERNS.md +228 -0
  589. package/docs/research/DOMAIN_STRATEGIES.md +316 -0
  590. package/docs/research/MEMORY_SYSTEMS_COMPARISON.md +812 -0
  591. package/docs/research/PATTERN_ANALYSIS_2026-01-18.md +436 -0
  592. package/docs/research/PERFORMANCE_ANALYSIS_2026-01-18.md +209 -0
  593. package/docs/research/PERFORMANCE_TEST_PLAN.md +383 -0
  594. package/docs/research/TERMINAL_BENCH_LEARNINGS.md +217 -0
  595. package/package.json +113 -0
  596. package/scripts/README.md +161 -0
  597. package/templates/CLAUDE.template.md +10 -0
  598. package/templates/CLAUDE_ARCHITECTURE.template.md +103 -0
  599. package/templates/CLAUDE_CODING.template.md +127 -0
  600. package/templates/CLAUDE_DROIDS.template.md +109 -0
  601. package/templates/CLAUDE_MEMORY.template.md +131 -0
  602. package/templates/CLAUDE_WORKFLOWS.template.md +139 -0
  603. package/templates/PROJECT.template.md +209 -0
  604. package/templates/SCHEMA.md +57 -0
  605. package/templates/archive/CLAUDE.template.root-v6.md +534 -0
  606. package/templates/archive/CLAUDE.template.v6.md +534 -0
  607. package/templates/hooks/forgecode/pre-compact.sh +68 -0
  608. package/templates/hooks/forgecode/session-start.sh +169 -0
  609. package/templates/hooks/forgecode.plugin.sh +128 -0
  610. package/templates/hooks/pre-compact.sh +74 -0
  611. package/templates/hooks/session-start.sh +366 -0
  612. package/tools/agents/README.md +224 -0
  613. package/tools/agents/UAP/README.md +386 -0
  614. package/tools/agents/UAP/__init__.py +9 -0
  615. package/tools/agents/UAP/cli.py +901 -0
  616. package/tools/agents/UAP/compliance_verify.sh +108 -0
  617. package/tools/agents/UAP/full_verification.sh +126 -0
  618. package/tools/agents/UAP/version.py +32 -0
  619. package/tools/agents/benchmarks/benchmark_memory_systems.py +730 -0
  620. package/tools/agents/benchmarks/results/benchmark_20260106_064817.json +170 -0
  621. package/tools/agents/benchmarks/results/benchmark_20260106_064817.md +51 -0
  622. package/tools/agents/config/chat_template.jinja +77 -0
  623. package/tools/agents/config/tool-call-schema.json +19 -0
  624. package/tools/agents/config/tool-call.gbnf +58 -0
  625. package/tools/agents/docker/Dockerfile.python +52 -0
  626. package/tools/agents/docker/Dockerfile.ubuntu +55 -0
  627. package/tools/agents/docker-compose.qdrant.yml +24 -0
  628. package/tools/agents/install-opencode-local.sh.j2 +135 -0
  629. package/tools/agents/migrations/apply.py +256 -0
  630. package/tools/agents/opencode_uap_agent.py +1505 -0
  631. package/tools/agents/plugin/README.md +91 -0
  632. package/tools/agents/plugin/index.ts +46 -0
  633. package/tools/agents/plugin/pre-compact.sh +68 -0
  634. package/tools/agents/plugin/session-start.sh +175 -0
  635. package/tools/agents/plugin/uap-commands.ts +45 -0
  636. package/tools/agents/plugin/uap-droids.ts +54 -0
  637. package/tools/agents/plugin/uap-patterns.ts +54 -0
  638. package/tools/agents/plugin/uap-skills.ts +52 -0
  639. package/tools/agents/plugins/uap-enforce.ts +314 -0
  640. package/tools/agents/scripts/__pycache__/tool_call_wrapper.cpython-313.pyc +0 -0
  641. package/tools/agents/scripts/chat_template_verifier.py +343 -0
  642. package/tools/agents/scripts/fix-qwen-template.js +38 -0
  643. package/tools/agents/scripts/fix_qwen_chat_template.py +316 -0
  644. package/tools/agents/scripts/generate_lora_training_data.py +412 -0
  645. package/tools/agents/scripts/init_qdrant.py +151 -0
  646. package/tools/agents/scripts/memory_migration.py +560 -0
  647. package/tools/agents/scripts/migrate_memory_to_qdrant.py +110 -0
  648. package/tools/agents/scripts/prepare_lora.sh +512 -0
  649. package/tools/agents/scripts/query_memory.py +200 -0
  650. package/tools/agents/scripts/qwen-tool-call-test.js +38 -0
  651. package/tools/agents/scripts/qwen-tool-call-wrapper.js +38 -0
  652. package/tools/agents/scripts/qwen_tool_call_test.py +464 -0
  653. package/tools/agents/scripts/qwen_tool_call_wrapper.py +686 -0
  654. package/tools/agents/scripts/start-services.sh +96 -0
  655. package/tools/agents/scripts/tool-choice-proxy.cjs +296 -0
  656. package/tools/agents/scripts/tool_call_test.py +656 -0
  657. package/tools/agents/scripts/tool_call_wrapper.py +799 -0
  658. package/tools/agents/tests/test_uap_compliance.py +257 -0
  659. package/tools/agents/uap_agent.py +122 -0
  660. package/tools/agents/uap_agent_install.sh +12 -0
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Improved Benchmark Runner for UAP
3
+ *
4
+ * Integrates all improvements:
5
+ * - Dynamic memory retrieval
6
+ * - Task classification and routing
7
+ * - Multi-turn agent loop
8
+ * - Hierarchical prompting
9
+ * - Execution verification
10
+ */
11
+ import { type TaskClassification } from '../memory/task-classifier.js';
12
+ import { type VerificationResult } from './execution-verifier.js';
13
+ interface ModelConfig {
14
+ id: string;
15
+ name: string;
16
+ apiModel: string;
17
+ }
18
+ interface BenchmarkTask {
19
+ id: string;
20
+ name: string;
21
+ description: string;
22
+ prompt: string;
23
+ difficulty: 'easy' | 'medium' | 'hard';
24
+ category: string;
25
+ expectedPatterns: string[];
26
+ }
27
+ interface TaskResult {
28
+ taskId: string;
29
+ modelId: string;
30
+ success: boolean;
31
+ latencyMs: number;
32
+ turnsUsed: number;
33
+ memoryUsed: boolean;
34
+ classification: TaskClassification;
35
+ verification: VerificationResult;
36
+ errors: string[];
37
+ }
38
+ interface ModelResult {
39
+ modelId: string;
40
+ modelName: string;
41
+ tasksRun: number;
42
+ tasksSucceeded: number;
43
+ successRate: number;
44
+ avgLatencyMs: number;
45
+ avgTurns: number;
46
+ results: TaskResult[];
47
+ }
48
+ interface BenchmarkReport {
49
+ timestamp: string;
50
+ config: {
51
+ maxTurns: number;
52
+ useMemory: boolean;
53
+ useHierarchicalPrompting: boolean;
54
+ };
55
+ models: ModelResult[];
56
+ comparison: {
57
+ bestOverall: string;
58
+ fastestModel: string;
59
+ byDifficulty: Record<string, {
60
+ model: string;
61
+ successRate: number;
62
+ }>;
63
+ byCategory: Record<string, {
64
+ model: string;
65
+ successRate: number;
66
+ }>;
67
+ };
68
+ memoryImpact?: {
69
+ withMemory: ModelResult[];
70
+ withoutMemory: ModelResult[];
71
+ improvement: Record<string, {
72
+ successDelta: number;
73
+ speedup: number;
74
+ }>;
75
+ };
76
+ }
77
+ declare const MODELS: ModelConfig[];
78
+ declare const BENCHMARK_TASKS: BenchmarkTask[];
79
+ export declare function runImprovedBenchmark(options?: {
80
+ apiKey?: string;
81
+ models?: string[];
82
+ compareMemory?: boolean;
83
+ maxTurns?: number;
84
+ useHierarchicalPrompting?: boolean;
85
+ verbose?: boolean;
86
+ parallelModels?: number;
87
+ }): Promise<BenchmarkReport>;
88
+ export { MODELS, BENCHMARK_TASKS };
89
+ //# sourceMappingURL=improved-benchmark.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"improved-benchmark.d.ts","sourceRoot":"","sources":["../../src/benchmarks/improved-benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAOH,OAAO,EAAgB,KAAK,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAIrF,OAAO,EAAuB,KAAK,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAUvF,UAAU,WAAW;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,aAAa;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,gBAAgB,EAAE,MAAM,EAAE,CAAC;CAC5B;AAED,UAAU,UAAU;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,OAAO,CAAC;IACpB,cAAc,EAAE,kBAAkB,CAAC;IACnC,YAAY,EAAE,kBAAkB,CAAC;IACjC,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,UAAU,WAAW;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB;AAED,UAAU,eAAe;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE;QACN,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,OAAO,CAAC;QACnB,wBAAwB,EAAE,OAAO,CAAC;KACnC,CAAC;IACF,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,UAAU,EAAE;QACV,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,WAAW,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;QACrE,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,WAAW,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;KACpE,CAAC;IACF,YAAY,CAAC,EAAE;QACb,UAAU,EAAE,WAAW,EAAE,CAAC;QAC1B,aAAa,EAAE,WAAW,EAAE,CAAC;QAC7B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE;YAAE,YAAY,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;KACxE,CAAC;CACH;AAMD,QAAA,MAAM,MAAM,EAAE,WAAW,EAIxB,CAAC;AAEF,QAAA,MAAM,eAAe,EAAE,aAAa,EA0GnC,CAAC;AAiZF,wBAAsB,oBAAoB,CACxC,OAAO,GAAE;IACP,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wBAAwB,CAAC,EAAE,OAAO,CAAC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;CACpB,GACL,OAAO,CAAC,eAAe,CAAC,CAyJ1B;AAuBD,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC"}
@@ -0,0 +1,585 @@
1
+ /**
2
+ * Improved Benchmark Runner for UAP
3
+ *
4
+ * Integrates all improvements:
5
+ * - Dynamic memory retrieval
6
+ * - Task classification and routing
7
+ * - Multi-turn agent loop
8
+ * - Hierarchical prompting
9
+ * - Execution verification
10
+ */
11
+ import { writeFileSync, existsSync, readFileSync } from 'fs';
12
+ import { join, dirname } from 'path';
13
+ import { fileURLToPath } from 'url';
14
+ import { execSync } from 'child_process';
15
+ import { classifyTask } from '../memory/task-classifier.js';
16
+ import { retrieveDynamicMemoryContext } from '../memory/dynamic-retrieval.js';
17
+ import { executeWithMultiTurn } from './multi-turn-agent.js';
18
+ import { buildHierarchicalPrompt } from './hierarchical-prompting.js';
19
+ import { verifyBenchmarkTask } from './execution-verifier.js';
20
+ const __filename = fileURLToPath(import.meta.url);
21
+ const __dirname = dirname(__filename);
22
+ const PROJECT_ROOT = join(__dirname, '../..');
23
+ // ============================================================================
24
+ // Configuration
25
+ // ============================================================================
26
+ const MODELS = [
27
+ { id: 'opus-4.5', name: 'Claude Opus 4.5', apiModel: 'claude-opus-4-5-20251101' },
28
+ { id: 'glm-4.7', name: 'GLM 4.7', apiModel: 'glm-4.7' },
29
+ { id: 'gpt-5.2-codex', name: 'GPT 5.2 Codex', apiModel: 'gpt-5.2-codex' },
30
+ ];
31
+ const BENCHMARK_TASKS = [
32
+ {
33
+ id: 'task-001-code-generation',
34
+ name: 'TypeScript Function Generation',
35
+ description: 'Generate a well-typed TypeScript function',
36
+ prompt: `Write a TypeScript function called 'calculateAverage' that:
37
+ 1. Takes an array of numbers as input
38
+ 2. Returns the arithmetic mean
39
+ 3. Handles empty arrays (return 0)
40
+ 4. Has proper type annotations
41
+
42
+ Return ONLY the function code, no explanations.`,
43
+ difficulty: 'easy',
44
+ category: 'coding',
45
+ expectedPatterns: ['function calculateAverage', 'number[]', ': number', 'length', 'return'],
46
+ },
47
+ {
48
+ id: 'task-002-bug-fix',
49
+ name: 'Bug Detection and Fix',
50
+ description: 'Identify and fix a bug in code',
51
+ prompt: `Find and fix the bug in this TypeScript code:
52
+
53
+ function sumPositive(nums: number[]): number {
54
+ let sum = 0;
55
+ for (let i = 0; i <= nums.length; i++) {
56
+ if (nums[i] > 0) {
57
+ sum += nums[i];
58
+ }
59
+ }
60
+ return sum;
61
+ }
62
+
63
+ Return ONLY the corrected function code.`,
64
+ difficulty: 'easy',
65
+ category: 'debugging',
66
+ expectedPatterns: ['i < nums.length', 'function sumPositive', 'return sum'],
67
+ },
68
+ {
69
+ id: 'task-003-pattern-implementation',
70
+ name: 'Design Pattern Implementation',
71
+ description: 'Implement a singleton pattern',
72
+ prompt: `Implement a TypeScript singleton class called 'ConfigManager' that:
73
+ 1. Has a private constructor
74
+ 2. Has a static getInstance() method
75
+ 3. Has get(key: string) and set(key: string, value: any) methods
76
+ 4. Stores configuration in a private Map
77
+
78
+ Return ONLY the class code.`,
79
+ difficulty: 'medium',
80
+ category: 'coding',
81
+ expectedPatterns: ['class ConfigManager', 'private constructor', 'static getInstance', 'Map'],
82
+ },
83
+ {
84
+ id: 'task-004-refactoring',
85
+ name: 'Code Refactoring',
86
+ description: 'Refactor code for better maintainability',
87
+ prompt: `Refactor this code to use the Strategy pattern:
88
+
89
+ function processOrder(order: any) {
90
+ if (order.type === 'digital') {
91
+ order.status = 'delivered';
92
+ } else if (order.type === 'physical') {
93
+ order.status = 'shipped';
94
+ } else if (order.type === 'subscription') {
95
+ order.status = 'active';
96
+ }
97
+ return order;
98
+ }
99
+
100
+ Return the refactored TypeScript code with interfaces and classes.`,
101
+ difficulty: 'medium',
102
+ category: 'coding',
103
+ expectedPatterns: ['interface', 'class', 'implements', 'process'],
104
+ },
105
+ {
106
+ id: 'task-005-algorithm',
107
+ name: 'Algorithm Implementation',
108
+ description: "Implement Dijkstra's algorithm",
109
+ prompt: `Implement a TypeScript function 'findShortestPath' using Dijkstra's algorithm:
110
+
111
+ 1. Input: weighted graph as adjacency list Map<string, Map<string, number>>
112
+ 2. Input: start node (string), end node (string)
113
+ 3. Output: { path: string[], distance: number } or null if no path
114
+ 4. Handle disconnected nodes
115
+
116
+ Return ONLY the function code with type definitions.`,
117
+ difficulty: 'hard',
118
+ category: 'coding',
119
+ expectedPatterns: ['function findShortestPath', 'Map<string', 'distance', 'path', 'while'],
120
+ },
121
+ {
122
+ id: 'task-006-error-handling',
123
+ name: 'Comprehensive Error Handling',
124
+ description: 'Implement robust error handling',
125
+ prompt: `Create a TypeScript async function 'fetchWithRetry' that:
126
+
127
+ 1. Takes url: string, retryConfig?: { maxRetries: number; backoffMs: number; }
128
+ 2. Implements exponential backoff retry logic
129
+ 3. Handles network errors and HTTP errors
130
+ 4. Returns Promise<Response> or throws custom error
131
+
132
+ Return ONLY the function code with types.`,
133
+ difficulty: 'hard',
134
+ category: 'coding',
135
+ expectedPatterns: ['async function fetchWithRetry', 'retry', 'catch', 'throw'],
136
+ },
137
+ ];
138
+ // ============================================================================
139
+ // Benchmark Runner
140
+ // ============================================================================
141
+ async function runTaskForModel(task, model, config) {
142
+ const startTime = Date.now();
143
+ // Step 1: Classify the task
144
+ const classification = classifyTask(task.prompt);
145
+ if (config.verbose) {
146
+ console.log(` Category: ${classification.category} (${(classification.confidence * 100).toFixed(0)}% confidence)`);
147
+ }
148
+ // Step 2: Get memory context if enabled
149
+ let memoryContext = '';
150
+ if (config.useMemory) {
151
+ try {
152
+ const dynamicContext = await retrieveDynamicMemoryContext(task.prompt, PROJECT_ROOT);
153
+ memoryContext = dynamicContext.formattedContext;
154
+ if (config.verbose) {
155
+ console.log(` Memory: ${dynamicContext.relevantMemories.length} memories retrieved`);
156
+ }
157
+ }
158
+ catch (error) {
159
+ if (config.verbose) {
160
+ console.log(` Memory: Failed to retrieve (${error})`);
161
+ }
162
+ }
163
+ }
164
+ // Step 3: Build prompt
165
+ let finalPrompt;
166
+ if (config.useHierarchicalPrompting) {
167
+ finalPrompt = buildHierarchicalPrompt(task.prompt, classification, memoryContext);
168
+ }
169
+ else {
170
+ finalPrompt = memoryContext ? memoryContext + '\n\n' + task.prompt : task.prompt;
171
+ }
172
+ // Step 4: Execute with multi-turn if needed
173
+ let success = false;
174
+ let response = '';
175
+ let turnsUsed = 1;
176
+ let verification;
177
+ const errors = [];
178
+ if (config.maxTurns > 1) {
179
+ // Use multi-turn agent
180
+ const multiTurnResult = await executeWithMultiTurn(task.id, task.prompt, {
181
+ maxTurns: config.maxTurns,
182
+ model: model.apiModel,
183
+ apiKey: config.apiKey,
184
+ useMemory: config.useMemory,
185
+ projectRoot: PROJECT_ROOT,
186
+ verbose: config.verbose,
187
+ });
188
+ success = multiTurnResult.success;
189
+ response = multiTurnResult.finalResponse;
190
+ turnsUsed = multiTurnResult.totalTurns;
191
+ verification = multiTurnResult.turns[multiTurnResult.turns.length - 1]?.verification || {
192
+ success: false,
193
+ executionSucceeded: false,
194
+ testsRun: 0,
195
+ testsPassed: 0,
196
+ errors: ['No verification data'],
197
+ output: '',
198
+ executionTimeMs: 0,
199
+ };
200
+ for (const turn of multiTurnResult.turns) {
201
+ errors.push(...turn.verification.errors);
202
+ }
203
+ }
204
+ else {
205
+ // Single-shot execution
206
+ try {
207
+ response = await executeSingleShot(finalPrompt, model.apiModel, config.apiKey);
208
+ verification = await verifyBenchmarkTask(task.id, response);
209
+ success = verification.success;
210
+ errors.push(...verification.errors);
211
+ }
212
+ catch (error) {
213
+ verification = {
214
+ success: false,
215
+ executionSucceeded: false,
216
+ testsRun: 0,
217
+ testsPassed: 0,
218
+ errors: [`Execution failed: ${error}`],
219
+ output: '',
220
+ executionTimeMs: 0,
221
+ };
222
+ errors.push(`Execution failed: ${error}`);
223
+ }
224
+ }
225
+ const latencyMs = Date.now() - startTime;
226
+ return {
227
+ taskId: task.id,
228
+ modelId: model.id,
229
+ success,
230
+ latencyMs,
231
+ turnsUsed,
232
+ memoryUsed: config.useMemory,
233
+ classification,
234
+ verification,
235
+ errors: [...new Set(errors)].slice(0, 5),
236
+ };
237
+ }
238
+ async function executeSingleShot(prompt, model, apiKey) {
239
+ const tmpDir = '/tmp/uap-benchmark';
240
+ const promptFile = join(tmpDir, `prompt-${Date.now()}.txt`);
241
+ if (!existsSync(tmpDir)) {
242
+ execSync(`mkdir -p ${tmpDir}`, { encoding: 'utf-8' });
243
+ }
244
+ writeFileSync(promptFile, prompt, 'utf-8');
245
+ try {
246
+ const result = execSync(`FACTORY_API_KEY="${apiKey}" droid exec --model "${model}" --auto medium -f "${promptFile}"`, {
247
+ encoding: 'utf-8',
248
+ timeout: 300000,
249
+ maxBuffer: 10 * 1024 * 1024,
250
+ env: { ...process.env, FACTORY_API_KEY: apiKey },
251
+ });
252
+ execSync(`rm "${promptFile}"`, { encoding: 'utf-8' });
253
+ return result.trim();
254
+ }
255
+ catch (error) {
256
+ try {
257
+ execSync(`rm "${promptFile}"`, { encoding: 'utf-8' });
258
+ }
259
+ catch {
260
+ // Ignore cleanup errors
261
+ }
262
+ throw error;
263
+ }
264
+ }
265
+ async function runBenchmarkForModel(model, tasks, config) {
266
+ const memoryLabel = config.useMemory ? 'with UAP' : 'without UAP';
267
+ console.log(`\n${'='.repeat(60)}`);
268
+ console.log(`Running: ${model.name} (${memoryLabel})`);
269
+ console.log(`${'='.repeat(60)}`);
270
+ const results = [];
271
+ for (const task of tasks) {
272
+ console.log(` [${task.difficulty.toUpperCase()}] ${task.name}...`);
273
+ const result = await runTaskForModel(task, model, config);
274
+ results.push(result);
275
+ if (result.success) {
276
+ console.log(` ✓ Success (${result.latencyMs}ms, ${result.turnsUsed} turn${result.turnsUsed > 1 ? 's' : ''})`);
277
+ }
278
+ else {
279
+ console.log(` ✗ Failed: ${result.errors[0] || 'Unknown error'}`);
280
+ }
281
+ // Delay between tasks
282
+ await new Promise((r) => setTimeout(r, 1000));
283
+ }
284
+ const succeeded = results.filter((r) => r.success).length;
285
+ const avgLatency = results.reduce((sum, r) => sum + r.latencyMs, 0) / results.length;
286
+ const avgTurns = results.reduce((sum, r) => sum + r.turnsUsed, 0) / results.length;
287
+ return {
288
+ modelId: model.id,
289
+ modelName: model.name,
290
+ tasksRun: tasks.length,
291
+ tasksSucceeded: succeeded,
292
+ successRate: (succeeded / tasks.length) * 100,
293
+ avgLatencyMs: Math.round(avgLatency),
294
+ avgTurns: Math.round(avgTurns * 10) / 10,
295
+ results,
296
+ };
297
+ }
298
+ function generateComparison(modelResults) {
299
+ const sorted = [...modelResults].sort((a, b) => b.successRate - a.successRate);
300
+ const fastest = [...modelResults].sort((a, b) => a.avgLatencyMs - b.avgLatencyMs);
301
+ const byDifficulty = {};
302
+ const byCategory = {};
303
+ for (const diff of ['easy', 'medium', 'hard']) {
304
+ let bestModel = '';
305
+ let bestRate = 0;
306
+ for (const modelResult of modelResults) {
307
+ const diffTasks = modelResult.results.filter((r) => {
308
+ const task = BENCHMARK_TASKS.find((t) => t.id === r.taskId);
309
+ return task?.difficulty === diff;
310
+ });
311
+ if (diffTasks.length > 0) {
312
+ const rate = (diffTasks.filter((t) => t.success).length / diffTasks.length) * 100;
313
+ if (rate > bestRate) {
314
+ bestRate = rate;
315
+ bestModel = modelResult.modelName;
316
+ }
317
+ }
318
+ }
319
+ byDifficulty[diff] = { model: bestModel, successRate: bestRate };
320
+ }
321
+ // By category
322
+ const categories = [...new Set(BENCHMARK_TASKS.map((t) => t.category))];
323
+ for (const cat of categories) {
324
+ let bestModel = '';
325
+ let bestRate = 0;
326
+ for (const modelResult of modelResults) {
327
+ const catTasks = modelResult.results.filter((r) => {
328
+ const task = BENCHMARK_TASKS.find((t) => t.id === r.taskId);
329
+ return task?.category === cat;
330
+ });
331
+ if (catTasks.length > 0) {
332
+ const rate = (catTasks.filter((t) => t.success).length / catTasks.length) * 100;
333
+ if (rate > bestRate) {
334
+ bestRate = rate;
335
+ bestModel = modelResult.modelName;
336
+ }
337
+ }
338
+ }
339
+ byCategory[cat] = { model: bestModel, successRate: bestRate };
340
+ }
341
+ return {
342
+ bestOverall: sorted[0]?.modelName || 'N/A',
343
+ fastestModel: fastest[0]?.modelName || 'N/A',
344
+ byDifficulty,
345
+ byCategory,
346
+ };
347
+ }
348
+ function generateMarkdownReport(report) {
349
+ const lines = [
350
+ '# Improved UAP Benchmark Results',
351
+ '',
352
+ `**Generated:** ${report.timestamp}`,
353
+ `**Configuration:**`,
354
+ `- Max Turns: ${report.config.maxTurns}`,
355
+ `- Memory Enabled: ${report.config.useMemory}`,
356
+ `- Hierarchical Prompting: ${report.config.useHierarchicalPrompting}`,
357
+ '',
358
+ '---',
359
+ '',
360
+ '## Executive Summary',
361
+ '',
362
+ '| Model | Success Rate | Avg Latency | Avg Turns |',
363
+ '|-------|--------------|-------------|-----------|',
364
+ ];
365
+ for (const model of report.models) {
366
+ lines.push(`| ${model.modelName} | ${model.successRate.toFixed(1)}% | ${model.avgLatencyMs}ms | ${model.avgTurns} |`);
367
+ }
368
+ lines.push('', '---', '', '## Comparison', '');
369
+ lines.push(`- **Best Overall:** ${report.comparison.bestOverall}`);
370
+ lines.push(`- **Fastest Model:** ${report.comparison.fastestModel}`);
371
+ lines.push('', '### By Difficulty', '');
372
+ lines.push('| Difficulty | Best Model | Success Rate |');
373
+ lines.push('|------------|------------|--------------|');
374
+ for (const [diff, data] of Object.entries(report.comparison.byDifficulty)) {
375
+ lines.push(`| ${diff} | ${data.model} | ${data.successRate.toFixed(1)}% |`);
376
+ }
377
+ lines.push('', '### By Category', '');
378
+ lines.push('| Category | Best Model | Success Rate |');
379
+ lines.push('|----------|------------|--------------|');
380
+ for (const [cat, data] of Object.entries(report.comparison.byCategory)) {
381
+ lines.push(`| ${cat} | ${data.model} | ${data.successRate.toFixed(1)}% |`);
382
+ }
383
+ if (report.memoryImpact) {
384
+ lines.push('', '---', '', '## UAP Memory Impact', '');
385
+ lines.push('| Model | Without UAP | With UAP | Improvement |');
386
+ lines.push('|-------|-------------|----------|-------------|');
387
+ for (const withMem of report.memoryImpact.withMemory) {
388
+ const without = report.memoryImpact.withoutMemory.find((r) => r.modelId === withMem.modelId);
389
+ const imp = report.memoryImpact.improvement[withMem.modelId];
390
+ if (without && imp) {
391
+ const sign = imp.successDelta >= 0 ? '+' : '';
392
+ lines.push(`| ${withMem.modelName} | ${without.successRate.toFixed(1)}% | ${withMem.successRate.toFixed(1)}% | ${sign}${imp.successDelta.toFixed(1)}% |`);
393
+ }
394
+ }
395
+ }
396
+ lines.push('', '---', '', '## Detailed Results', '');
397
+ for (const model of report.models) {
398
+ lines.push(`### ${model.modelName}`, '');
399
+ lines.push('| Task | Difficulty | Success | Latency | Turns | Category |');
400
+ lines.push('|------|------------|---------|---------|-------|----------|');
401
+ for (const result of model.results) {
402
+ const task = BENCHMARK_TASKS.find((t) => t.id === result.taskId);
403
+ const status = result.success ? '✓' : '✗';
404
+ lines.push(`| ${task?.name || result.taskId} | ${task?.difficulty} | ${status} | ${result.latencyMs}ms | ${result.turnsUsed} | ${result.classification.category} |`);
405
+ }
406
+ lines.push('');
407
+ }
408
+ lines.push('---', '', '**Report Generated by UAP Improved Benchmark**');
409
+ return lines.join('\n');
410
+ }
411
+ // ============================================================================
412
+ // Parallel Execution Utilities
413
+ // ============================================================================
414
+ /**
415
+ * Run multiple model benchmarks in parallel with configurable concurrency
416
+ */
417
+ async function runModelsInParallel(models, tasks, config, concurrency) {
418
+ const results = [];
419
+ const queue = [...models];
420
+ const inProgress = [];
421
+ const runNext = async () => {
422
+ const model = queue.shift();
423
+ if (!model)
424
+ return;
425
+ const result = await runBenchmarkForModel(model, tasks, config);
426
+ results.push(result);
427
+ if (queue.length > 0) {
428
+ await runNext();
429
+ }
430
+ };
431
+ // Start initial batch up to concurrency limit
432
+ const initialBatch = Math.min(concurrency, models.length);
433
+ for (let i = 0; i < initialBatch; i++) {
434
+ inProgress.push(runNext());
435
+ }
436
+ await Promise.all(inProgress);
437
+ // Sort results to match original model order
438
+ return models.map((m) => results.find((r) => r.modelId === m.id)).filter(Boolean);
439
+ }
440
+ // ============================================================================
441
+ // Main Entry Point
442
+ // ============================================================================
443
+ export async function runImprovedBenchmark(options = {}) {
444
+ const apiKey = options.apiKey || process.env.FACTORY_API_KEY || process.env.DROID_API_KEY;
445
+ if (!apiKey) {
446
+ throw new Error('API key required (FACTORY_API_KEY or DROID_API_KEY)');
447
+ }
448
+ const modelsToTest = options.models
449
+ ? MODELS.filter((m) => options.models.includes(m.id))
450
+ : MODELS;
451
+ const compareMemory = options.compareMemory ?? true;
452
+ const maxTurns = options.maxTurns ?? 2;
453
+ const useHierarchicalPrompting = options.useHierarchicalPrompting ?? true;
454
+ const verbose = options.verbose ?? false;
455
+ const parallelModels = options.parallelModels ?? 1;
456
+ // Determine effective parallelism
457
+ const effectiveParallel = Math.min(parallelModels, modelsToTest.length);
458
+ const isParallel = effectiveParallel > 1;
459
+ console.log('\n' + '█'.repeat(60));
460
+ console.log(' UAP IMPROVED BENCHMARK');
461
+ console.log('█'.repeat(60));
462
+ console.log(`\nModels: ${modelsToTest.map((m) => m.name).join(', ')}`);
463
+ console.log(`Tasks: ${BENCHMARK_TASKS.length}`);
464
+ console.log(`Max Turns: ${maxTurns}`);
465
+ console.log(`Memory Comparison: ${compareMemory}`);
466
+ console.log(`Hierarchical Prompting: ${useHierarchicalPrompting}`);
467
+ console.log(`Parallel Models: ${effectiveParallel}${isParallel ? ' (ENABLED)' : ' (sequential)'}`);
468
+ let withoutMemoryResults = [];
469
+ let withMemoryResults = [];
470
+ // Run without memory first (if comparing)
471
+ if (compareMemory) {
472
+ console.log('\n' + '█'.repeat(60));
473
+ console.log(` PHASE 1: WITHOUT UAP MEMORY${isParallel ? ' (PARALLEL)' : ''}`);
474
+ console.log('█'.repeat(60));
475
+ const baseConfig = {
476
+ useMemory: false,
477
+ useHierarchicalPrompting: false,
478
+ maxTurns: 1,
479
+ apiKey,
480
+ verbose,
481
+ };
482
+ if (isParallel) {
483
+ console.log(`\n Running ${modelsToTest.length} models with concurrency=${effectiveParallel}...\n`);
484
+ withoutMemoryResults = await runModelsInParallel(modelsToTest, BENCHMARK_TASKS, baseConfig, effectiveParallel);
485
+ }
486
+ else {
487
+ for (const model of modelsToTest) {
488
+ const result = await runBenchmarkForModel(model, BENCHMARK_TASKS, baseConfig);
489
+ withoutMemoryResults.push(result);
490
+ }
491
+ }
492
+ }
493
+ // Run with memory (and all improvements)
494
+ console.log('\n' + '█'.repeat(60));
495
+ console.log(` PHASE 2: WITH UAP IMPROVEMENTS${isParallel ? ' (PARALLEL)' : ''}`);
496
+ console.log('█'.repeat(60));
497
+ const uapConfig = {
498
+ useMemory: true,
499
+ useHierarchicalPrompting,
500
+ maxTurns,
501
+ apiKey,
502
+ verbose,
503
+ };
504
+ if (isParallel) {
505
+ console.log(`\n Running ${modelsToTest.length} models with concurrency=${effectiveParallel}...\n`);
506
+ withMemoryResults = await runModelsInParallel(modelsToTest, BENCHMARK_TASKS, uapConfig, effectiveParallel);
507
+ }
508
+ else {
509
+ for (const model of modelsToTest) {
510
+ const result = await runBenchmarkForModel(model, BENCHMARK_TASKS, uapConfig);
511
+ withMemoryResults.push(result);
512
+ }
513
+ }
514
+ // Calculate improvement
515
+ const improvement = {};
516
+ if (compareMemory) {
517
+ for (const model of modelsToTest) {
518
+ const without = withoutMemoryResults.find((r) => r.modelId === model.id);
519
+ const withMem = withMemoryResults.find((r) => r.modelId === model.id);
520
+ if (without && withMem) {
521
+ improvement[model.id] = {
522
+ successDelta: withMem.successRate - without.successRate,
523
+ speedup: without.avgLatencyMs > 0 ? without.avgLatencyMs / withMem.avgLatencyMs : 1,
524
+ };
525
+ }
526
+ }
527
+ }
528
+ const report = {
529
+ timestamp: new Date().toISOString(),
530
+ config: {
531
+ maxTurns,
532
+ useMemory: true,
533
+ useHierarchicalPrompting,
534
+ },
535
+ models: withMemoryResults,
536
+ comparison: generateComparison(withMemoryResults),
537
+ memoryImpact: compareMemory
538
+ ? {
539
+ withMemory: withMemoryResults,
540
+ withoutMemory: withoutMemoryResults,
541
+ improvement,
542
+ }
543
+ : undefined,
544
+ };
545
+ // Generate and save report
546
+ const markdown = generateMarkdownReport(report);
547
+ const reportPath = join(PROJECT_ROOT, 'IMPROVED_BENCHMARK_RESULTS.md');
548
+ writeFileSync(reportPath, markdown);
549
+ console.log(`\nReport saved to: ${reportPath}`);
550
+ // Print summary
551
+ console.log('\n' + '='.repeat(60));
552
+ console.log(' BENCHMARK COMPLETE');
553
+ console.log('='.repeat(60));
554
+ if (compareMemory) {
555
+ console.log('\n--- Memory Impact ---');
556
+ for (const [modelId, imp] of Object.entries(improvement)) {
557
+ const model = modelsToTest.find((m) => m.id === modelId);
558
+ const sign = imp.successDelta >= 0 ? '+' : '';
559
+ console.log(` ${model?.name}: ${sign}${imp.successDelta.toFixed(1)}% success`);
560
+ }
561
+ }
562
+ console.log(`\nBest Overall: ${report.comparison.bestOverall}`);
563
+ return report;
564
+ }
565
+ // CLI entry
566
+ if (process.argv[1]?.includes('improved-benchmark')) {
567
+ const envPath = join(PROJECT_ROOT, '.env');
568
+ if (existsSync(envPath)) {
569
+ const envContent = readFileSync(envPath, 'utf-8');
570
+ for (const line of envContent.split('\n')) {
571
+ const [key, ...valueParts] = line.split('=');
572
+ if (key && valueParts.length > 0) {
573
+ process.env[key.trim()] = valueParts.join('=').trim();
574
+ }
575
+ }
576
+ }
577
+ runImprovedBenchmark({ verbose: true })
578
+ .then(() => process.exit(0))
579
+ .catch((err) => {
580
+ console.error('Benchmark failed:', err);
581
+ process.exit(1);
582
+ });
583
+ }
584
+ export { MODELS, BENCHMARK_TASKS };
585
+ //# sourceMappingURL=improved-benchmark.js.map