@miller-tech/uap 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (660) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +888 -0
  3. package/dist/analyzers/index.d.ts +3 -0
  4. package/dist/analyzers/index.d.ts.map +1 -0
  5. package/dist/analyzers/index.js +684 -0
  6. package/dist/analyzers/index.js.map +1 -0
  7. package/dist/benchmarks/agents/naive-agent.d.ts +60 -0
  8. package/dist/benchmarks/agents/naive-agent.d.ts.map +1 -0
  9. package/dist/benchmarks/agents/naive-agent.js +144 -0
  10. package/dist/benchmarks/agents/naive-agent.js.map +1 -0
  11. package/dist/benchmarks/agents/uap-agent.d.ts +167 -0
  12. package/dist/benchmarks/agents/uap-agent.d.ts.map +1 -0
  13. package/dist/benchmarks/agents/uap-agent.js +437 -0
  14. package/dist/benchmarks/agents/uap-agent.js.map +1 -0
  15. package/dist/benchmarks/benchmark.d.ts +328 -0
  16. package/dist/benchmarks/benchmark.d.ts.map +1 -0
  17. package/dist/benchmarks/benchmark.js +112 -0
  18. package/dist/benchmarks/benchmark.js.map +1 -0
  19. package/dist/benchmarks/execution-verifier.d.ts +41 -0
  20. package/dist/benchmarks/execution-verifier.d.ts.map +1 -0
  21. package/dist/benchmarks/execution-verifier.js +340 -0
  22. package/dist/benchmarks/execution-verifier.js.map +1 -0
  23. package/dist/benchmarks/hierarchical-prompting.d.ts +37 -0
  24. package/dist/benchmarks/hierarchical-prompting.d.ts.map +1 -0
  25. package/dist/benchmarks/hierarchical-prompting.js +246 -0
  26. package/dist/benchmarks/hierarchical-prompting.js.map +1 -0
  27. package/dist/benchmarks/improved-benchmark.d.ts +89 -0
  28. package/dist/benchmarks/improved-benchmark.d.ts.map +1 -0
  29. package/dist/benchmarks/improved-benchmark.js +585 -0
  30. package/dist/benchmarks/improved-benchmark.js.map +1 -0
  31. package/dist/benchmarks/index.d.ts +11 -0
  32. package/dist/benchmarks/index.d.ts.map +1 -0
  33. package/dist/benchmarks/index.js +11 -0
  34. package/dist/benchmarks/index.js.map +1 -0
  35. package/dist/benchmarks/model-integration.d.ts +111 -0
  36. package/dist/benchmarks/model-integration.d.ts.map +1 -0
  37. package/dist/benchmarks/model-integration.js +904 -0
  38. package/dist/benchmarks/model-integration.js.map +1 -0
  39. package/dist/benchmarks/multi-turn-agent.d.ts +44 -0
  40. package/dist/benchmarks/multi-turn-agent.d.ts.map +1 -0
  41. package/dist/benchmarks/multi-turn-agent.js +254 -0
  42. package/dist/benchmarks/multi-turn-agent.js.map +1 -0
  43. package/dist/benchmarks/multi-turn-loop.d.ts +57 -0
  44. package/dist/benchmarks/multi-turn-loop.d.ts.map +1 -0
  45. package/dist/benchmarks/multi-turn-loop.js +167 -0
  46. package/dist/benchmarks/multi-turn-loop.js.map +1 -0
  47. package/dist/benchmarks/tasks.d.ts +19 -0
  48. package/dist/benchmarks/tasks.d.ts.map +1 -0
  49. package/dist/benchmarks/tasks.js +435 -0
  50. package/dist/benchmarks/tasks.js.map +1 -0
  51. package/dist/bin/cli.d.ts +3 -0
  52. package/dist/bin/cli.d.ts.map +1 -0
  53. package/dist/bin/cli.js +546 -0
  54. package/dist/bin/cli.js.map +1 -0
  55. package/dist/bin/llama-server-optimize.d.ts +18 -0
  56. package/dist/bin/llama-server-optimize.d.ts.map +1 -0
  57. package/dist/bin/llama-server-optimize.js +708 -0
  58. package/dist/bin/llama-server-optimize.js.map +1 -0
  59. package/dist/bin/policy.d.ts +3 -0
  60. package/dist/bin/policy.d.ts.map +1 -0
  61. package/dist/bin/policy.js +143 -0
  62. package/dist/bin/policy.js.map +1 -0
  63. package/dist/bin/tool-calls.d.ts +3 -0
  64. package/dist/bin/tool-calls.d.ts.map +1 -0
  65. package/dist/bin/tool-calls.js +4 -0
  66. package/dist/bin/tool-calls.js.map +1 -0
  67. package/dist/browser/index.d.ts +2 -0
  68. package/dist/browser/index.d.ts.map +1 -0
  69. package/dist/browser/index.js +2 -0
  70. package/dist/browser/index.js.map +1 -0
  71. package/dist/browser/web-browser.d.ts +30 -0
  72. package/dist/browser/web-browser.d.ts.map +1 -0
  73. package/dist/browser/web-browser.js +93 -0
  74. package/dist/browser/web-browser.js.map +1 -0
  75. package/dist/cli/agent.d.ts +20 -0
  76. package/dist/cli/agent.d.ts.map +1 -0
  77. package/dist/cli/agent.js +474 -0
  78. package/dist/cli/agent.js.map +1 -0
  79. package/dist/cli/analyze.d.ts +7 -0
  80. package/dist/cli/analyze.d.ts.map +1 -0
  81. package/dist/cli/analyze.js +103 -0
  82. package/dist/cli/analyze.js.map +1 -0
  83. package/dist/cli/completion-gates.d.ts +51 -0
  84. package/dist/cli/completion-gates.d.ts.map +1 -0
  85. package/dist/cli/completion-gates.js +201 -0
  86. package/dist/cli/completion-gates.js.map +1 -0
  87. package/dist/cli/compliance.d.ts +8 -0
  88. package/dist/cli/compliance.d.ts.map +1 -0
  89. package/dist/cli/compliance.js +509 -0
  90. package/dist/cli/compliance.js.map +1 -0
  91. package/dist/cli/coord.d.ts +7 -0
  92. package/dist/cli/coord.d.ts.map +1 -0
  93. package/dist/cli/coord.js +138 -0
  94. package/dist/cli/coord.js.map +1 -0
  95. package/dist/cli/dashboard.d.ts +21 -0
  96. package/dist/cli/dashboard.d.ts.map +1 -0
  97. package/dist/cli/dashboard.js +1508 -0
  98. package/dist/cli/dashboard.js.map +1 -0
  99. package/dist/cli/deploy.d.ts +19 -0
  100. package/dist/cli/deploy.d.ts.map +1 -0
  101. package/dist/cli/deploy.js +387 -0
  102. package/dist/cli/deploy.js.map +1 -0
  103. package/dist/cli/droids.d.ts +9 -0
  104. package/dist/cli/droids.d.ts.map +1 -0
  105. package/dist/cli/droids.js +227 -0
  106. package/dist/cli/droids.js.map +1 -0
  107. package/dist/cli/generate.d.ts +17 -0
  108. package/dist/cli/generate.d.ts.map +1 -0
  109. package/dist/cli/generate.js +432 -0
  110. package/dist/cli/generate.js.map +1 -0
  111. package/dist/cli/hooks.d.ts +9 -0
  112. package/dist/cli/hooks.d.ts.map +1 -0
  113. package/dist/cli/hooks.js +464 -0
  114. package/dist/cli/hooks.js.map +1 -0
  115. package/dist/cli/init.d.ts +12 -0
  116. package/dist/cli/init.d.ts.map +1 -0
  117. package/dist/cli/init.js +364 -0
  118. package/dist/cli/init.js.map +1 -0
  119. package/dist/cli/mcp-router.d.ts +16 -0
  120. package/dist/cli/mcp-router.d.ts.map +1 -0
  121. package/dist/cli/mcp-router.js +143 -0
  122. package/dist/cli/mcp-router.js.map +1 -0
  123. package/dist/cli/memory.d.ts +24 -0
  124. package/dist/cli/memory.d.ts.map +1 -0
  125. package/dist/cli/memory.js +885 -0
  126. package/dist/cli/memory.js.map +1 -0
  127. package/dist/cli/model.d.ts +15 -0
  128. package/dist/cli/model.d.ts.map +1 -0
  129. package/dist/cli/model.js +290 -0
  130. package/dist/cli/model.js.map +1 -0
  131. package/dist/cli/patterns.d.ts +26 -0
  132. package/dist/cli/patterns.d.ts.map +1 -0
  133. package/dist/cli/patterns.js +862 -0
  134. package/dist/cli/patterns.js.map +1 -0
  135. package/dist/cli/rtk-validation.d.ts +9 -0
  136. package/dist/cli/rtk-validation.d.ts.map +1 -0
  137. package/dist/cli/rtk-validation.js +9 -0
  138. package/dist/cli/rtk-validation.js.map +1 -0
  139. package/dist/cli/rtk.d.ts +34 -0
  140. package/dist/cli/rtk.d.ts.map +1 -0
  141. package/dist/cli/rtk.js +401 -0
  142. package/dist/cli/rtk.js.map +1 -0
  143. package/dist/cli/schema-diff.d.ts +7 -0
  144. package/dist/cli/schema-diff.d.ts.map +1 -0
  145. package/dist/cli/schema-diff.js +11 -0
  146. package/dist/cli/schema-diff.js.map +1 -0
  147. package/dist/cli/setup-mcp-router.d.ts +8 -0
  148. package/dist/cli/setup-mcp-router.d.ts.map +1 -0
  149. package/dist/cli/setup-mcp-router.js +163 -0
  150. package/dist/cli/setup-mcp-router.js.map +1 -0
  151. package/dist/cli/setup-wizard.d.ts +2 -0
  152. package/dist/cli/setup-wizard.d.ts.map +1 -0
  153. package/dist/cli/setup-wizard.js +806 -0
  154. package/dist/cli/setup-wizard.js.map +1 -0
  155. package/dist/cli/setup.d.ts +15 -0
  156. package/dist/cli/setup.d.ts.map +1 -0
  157. package/dist/cli/setup.js +154 -0
  158. package/dist/cli/setup.js.map +1 -0
  159. package/dist/cli/sync.d.ts +8 -0
  160. package/dist/cli/sync.d.ts.map +1 -0
  161. package/dist/cli/sync.js +395 -0
  162. package/dist/cli/sync.js.map +1 -0
  163. package/dist/cli/task.d.ts +33 -0
  164. package/dist/cli/task.d.ts.map +1 -0
  165. package/dist/cli/task.js +672 -0
  166. package/dist/cli/task.js.map +1 -0
  167. package/dist/cli/tool-calls.d.ts +20 -0
  168. package/dist/cli/tool-calls.d.ts.map +1 -0
  169. package/dist/cli/tool-calls.js +605 -0
  170. package/dist/cli/tool-calls.js.map +1 -0
  171. package/dist/cli/uap.d.ts +10 -0
  172. package/dist/cli/uap.d.ts.map +1 -0
  173. package/dist/cli/uap.js +398 -0
  174. package/dist/cli/uap.js.map +1 -0
  175. package/dist/cli/update.d.ts +10 -0
  176. package/dist/cli/update.d.ts.map +1 -0
  177. package/dist/cli/update.js +300 -0
  178. package/dist/cli/update.js.map +1 -0
  179. package/dist/cli/visualize.d.ts +77 -0
  180. package/dist/cli/visualize.d.ts.map +1 -0
  181. package/dist/cli/visualize.js +287 -0
  182. package/dist/cli/visualize.js.map +1 -0
  183. package/dist/cli/worktree.d.ts +9 -0
  184. package/dist/cli/worktree.d.ts.map +1 -0
  185. package/dist/cli/worktree.js +213 -0
  186. package/dist/cli/worktree.js.map +1 -0
  187. package/dist/coordination/adaptive-patterns.d.ts +65 -0
  188. package/dist/coordination/adaptive-patterns.d.ts.map +1 -0
  189. package/dist/coordination/adaptive-patterns.js +108 -0
  190. package/dist/coordination/adaptive-patterns.js.map +1 -0
  191. package/dist/coordination/auto-agent.d.ts +82 -0
  192. package/dist/coordination/auto-agent.d.ts.map +1 -0
  193. package/dist/coordination/auto-agent.js +145 -0
  194. package/dist/coordination/auto-agent.js.map +1 -0
  195. package/dist/coordination/capability-router.d.ts +79 -0
  196. package/dist/coordination/capability-router.d.ts.map +1 -0
  197. package/dist/coordination/capability-router.js +334 -0
  198. package/dist/coordination/capability-router.js.map +1 -0
  199. package/dist/coordination/database.d.ts +13 -0
  200. package/dist/coordination/database.d.ts.map +1 -0
  201. package/dist/coordination/database.js +136 -0
  202. package/dist/coordination/database.js.map +1 -0
  203. package/dist/coordination/deploy-batcher.d.ts +122 -0
  204. package/dist/coordination/deploy-batcher.d.ts.map +1 -0
  205. package/dist/coordination/deploy-batcher.js +718 -0
  206. package/dist/coordination/deploy-batcher.js.map +1 -0
  207. package/dist/coordination/droid-validator.d.ts +59 -0
  208. package/dist/coordination/droid-validator.d.ts.map +1 -0
  209. package/dist/coordination/droid-validator.js +142 -0
  210. package/dist/coordination/droid-validator.js.map +1 -0
  211. package/dist/coordination/index.d.ts +10 -0
  212. package/dist/coordination/index.d.ts.map +1 -0
  213. package/dist/coordination/index.js +10 -0
  214. package/dist/coordination/index.js.map +1 -0
  215. package/dist/coordination/pattern-router.d.ts +50 -0
  216. package/dist/coordination/pattern-router.d.ts.map +1 -0
  217. package/dist/coordination/pattern-router.js +118 -0
  218. package/dist/coordination/pattern-router.js.map +1 -0
  219. package/dist/coordination/service.d.ts +81 -0
  220. package/dist/coordination/service.d.ts.map +1 -0
  221. package/dist/coordination/service.js +619 -0
  222. package/dist/coordination/service.js.map +1 -0
  223. package/dist/coordination/worktree-enforcer.d.ts +22 -0
  224. package/dist/coordination/worktree-enforcer.d.ts.map +1 -0
  225. package/dist/coordination/worktree-enforcer.js +71 -0
  226. package/dist/coordination/worktree-enforcer.js.map +1 -0
  227. package/dist/generators/claude-md.d.ts +3 -0
  228. package/dist/generators/claude-md.d.ts.map +1 -0
  229. package/dist/generators/claude-md.js +1020 -0
  230. package/dist/generators/claude-md.js.map +1 -0
  231. package/dist/generators/template-loader.d.ts +105 -0
  232. package/dist/generators/template-loader.d.ts.map +1 -0
  233. package/dist/generators/template-loader.js +291 -0
  234. package/dist/generators/template-loader.js.map +1 -0
  235. package/dist/index.d.ts +49 -0
  236. package/dist/index.d.ts.map +1 -0
  237. package/dist/index.js +63 -0
  238. package/dist/index.js.map +1 -0
  239. package/dist/mcp-router/config/parser.d.ts +9 -0
  240. package/dist/mcp-router/config/parser.d.ts.map +1 -0
  241. package/dist/mcp-router/config/parser.js +174 -0
  242. package/dist/mcp-router/config/parser.js.map +1 -0
  243. package/dist/mcp-router/executor/client.d.ts +31 -0
  244. package/dist/mcp-router/executor/client.d.ts.map +1 -0
  245. package/dist/mcp-router/executor/client.js +189 -0
  246. package/dist/mcp-router/executor/client.js.map +1 -0
  247. package/dist/mcp-router/index.d.ts +22 -0
  248. package/dist/mcp-router/index.d.ts.map +1 -0
  249. package/dist/mcp-router/index.js +18 -0
  250. package/dist/mcp-router/index.js.map +1 -0
  251. package/dist/mcp-router/output-compressor.d.ts +26 -0
  252. package/dist/mcp-router/output-compressor.d.ts.map +1 -0
  253. package/dist/mcp-router/output-compressor.js +236 -0
  254. package/dist/mcp-router/output-compressor.js.map +1 -0
  255. package/dist/mcp-router/search/fuzzy.d.ts +26 -0
  256. package/dist/mcp-router/search/fuzzy.d.ts.map +1 -0
  257. package/dist/mcp-router/search/fuzzy.js +94 -0
  258. package/dist/mcp-router/search/fuzzy.js.map +1 -0
  259. package/dist/mcp-router/server.d.ts +50 -0
  260. package/dist/mcp-router/server.d.ts.map +1 -0
  261. package/dist/mcp-router/server.js +229 -0
  262. package/dist/mcp-router/server.js.map +1 -0
  263. package/dist/mcp-router/session-stats.d.ts +37 -0
  264. package/dist/mcp-router/session-stats.d.ts.map +1 -0
  265. package/dist/mcp-router/session-stats.js +56 -0
  266. package/dist/mcp-router/session-stats.js.map +1 -0
  267. package/dist/mcp-router/tools/discover.d.ts +37 -0
  268. package/dist/mcp-router/tools/discover.d.ts.map +1 -0
  269. package/dist/mcp-router/tools/discover.js +65 -0
  270. package/dist/mcp-router/tools/discover.js.map +1 -0
  271. package/dist/mcp-router/tools/execute.d.ts +43 -0
  272. package/dist/mcp-router/tools/execute.d.ts.map +1 -0
  273. package/dist/mcp-router/tools/execute.js +144 -0
  274. package/dist/mcp-router/tools/execute.js.map +1 -0
  275. package/dist/mcp-router/types.d.ts +62 -0
  276. package/dist/mcp-router/types.d.ts.map +1 -0
  277. package/dist/mcp-router/types.js +6 -0
  278. package/dist/mcp-router/types.js.map +1 -0
  279. package/dist/memory/adaptive-context.d.ts +149 -0
  280. package/dist/memory/adaptive-context.d.ts.map +1 -0
  281. package/dist/memory/adaptive-context.js +1095 -0
  282. package/dist/memory/adaptive-context.js.map +1 -0
  283. package/dist/memory/agent-scoped-memory.d.ts +67 -0
  284. package/dist/memory/agent-scoped-memory.d.ts.map +1 -0
  285. package/dist/memory/agent-scoped-memory.js +126 -0
  286. package/dist/memory/agent-scoped-memory.js.map +1 -0
  287. package/dist/memory/ambiguity-detector.d.ts +54 -0
  288. package/dist/memory/ambiguity-detector.d.ts.map +1 -0
  289. package/dist/memory/ambiguity-detector.js +401 -0
  290. package/dist/memory/ambiguity-detector.js.map +1 -0
  291. package/dist/memory/backends/base.d.ts +18 -0
  292. package/dist/memory/backends/base.d.ts.map +1 -0
  293. package/dist/memory/backends/base.js +2 -0
  294. package/dist/memory/backends/base.js.map +1 -0
  295. package/dist/memory/backends/factory.d.ts +4 -0
  296. package/dist/memory/backends/factory.d.ts.map +1 -0
  297. package/dist/memory/backends/factory.js +53 -0
  298. package/dist/memory/backends/factory.js.map +1 -0
  299. package/dist/memory/backends/github.d.ts +27 -0
  300. package/dist/memory/backends/github.d.ts.map +1 -0
  301. package/dist/memory/backends/github.js +134 -0
  302. package/dist/memory/backends/github.js.map +1 -0
  303. package/dist/memory/backends/qdrant-cloud.d.ts +32 -0
  304. package/dist/memory/backends/qdrant-cloud.d.ts.map +1 -0
  305. package/dist/memory/backends/qdrant-cloud.js +167 -0
  306. package/dist/memory/backends/qdrant-cloud.js.map +1 -0
  307. package/dist/memory/context-compressor.d.ts +116 -0
  308. package/dist/memory/context-compressor.d.ts.map +1 -0
  309. package/dist/memory/context-compressor.js +430 -0
  310. package/dist/memory/context-compressor.js.map +1 -0
  311. package/dist/memory/context-pruner.d.ts +55 -0
  312. package/dist/memory/context-pruner.d.ts.map +1 -0
  313. package/dist/memory/context-pruner.js +85 -0
  314. package/dist/memory/context-pruner.js.map +1 -0
  315. package/dist/memory/correction-propagator.d.ts +44 -0
  316. package/dist/memory/correction-propagator.d.ts.map +1 -0
  317. package/dist/memory/correction-propagator.js +156 -0
  318. package/dist/memory/correction-propagator.js.map +1 -0
  319. package/dist/memory/daily-log.d.ts +67 -0
  320. package/dist/memory/daily-log.d.ts.map +1 -0
  321. package/dist/memory/daily-log.js +143 -0
  322. package/dist/memory/daily-log.js.map +1 -0
  323. package/dist/memory/dynamic-retrieval.d.ts +112 -0
  324. package/dist/memory/dynamic-retrieval.d.ts.map +1 -0
  325. package/dist/memory/dynamic-retrieval.js +908 -0
  326. package/dist/memory/dynamic-retrieval.js.map +1 -0
  327. package/dist/memory/embeddings.d.ts +172 -0
  328. package/dist/memory/embeddings.d.ts.map +1 -0
  329. package/dist/memory/embeddings.js +780 -0
  330. package/dist/memory/embeddings.js.map +1 -0
  331. package/dist/memory/generic-uap-patterns.d.ts +7 -0
  332. package/dist/memory/generic-uap-patterns.d.ts.map +1 -0
  333. package/dist/memory/generic-uap-patterns.js +43 -0
  334. package/dist/memory/generic-uap-patterns.js.map +1 -0
  335. package/dist/memory/hierarchical-memory.d.ts +141 -0
  336. package/dist/memory/hierarchical-memory.d.ts.map +1 -0
  337. package/dist/memory/hierarchical-memory.js +485 -0
  338. package/dist/memory/hierarchical-memory.js.map +1 -0
  339. package/dist/memory/knowledge-graph.d.ts +98 -0
  340. package/dist/memory/knowledge-graph.d.ts.map +1 -0
  341. package/dist/memory/knowledge-graph.js +275 -0
  342. package/dist/memory/knowledge-graph.js.map +1 -0
  343. package/dist/memory/memory-consolidator.d.ts +124 -0
  344. package/dist/memory/memory-consolidator.d.ts.map +1 -0
  345. package/dist/memory/memory-consolidator.js +514 -0
  346. package/dist/memory/memory-consolidator.js.map +1 -0
  347. package/dist/memory/memory-maintenance.d.ts +39 -0
  348. package/dist/memory/memory-maintenance.d.ts.map +1 -0
  349. package/dist/memory/memory-maintenance.js +336 -0
  350. package/dist/memory/memory-maintenance.js.map +1 -0
  351. package/dist/memory/model-router.d.ts +105 -0
  352. package/dist/memory/model-router.d.ts.map +1 -0
  353. package/dist/memory/model-router.js +474 -0
  354. package/dist/memory/model-router.js.map +1 -0
  355. package/dist/memory/multi-view-memory.d.ts +134 -0
  356. package/dist/memory/multi-view-memory.d.ts.map +1 -0
  357. package/dist/memory/multi-view-memory.js +430 -0
  358. package/dist/memory/multi-view-memory.js.map +1 -0
  359. package/dist/memory/predictive-memory.d.ts +79 -0
  360. package/dist/memory/predictive-memory.d.ts.map +1 -0
  361. package/dist/memory/predictive-memory.js +294 -0
  362. package/dist/memory/predictive-memory.js.map +1 -0
  363. package/dist/memory/prepopulate.d.ts +76 -0
  364. package/dist/memory/prepopulate.d.ts.map +1 -0
  365. package/dist/memory/prepopulate.js +832 -0
  366. package/dist/memory/prepopulate.js.map +1 -0
  367. package/dist/memory/semantic-compression.d.ts +77 -0
  368. package/dist/memory/semantic-compression.d.ts.map +1 -0
  369. package/dist/memory/semantic-compression.js +359 -0
  370. package/dist/memory/semantic-compression.js.map +1 -0
  371. package/dist/memory/serverless-qdrant.d.ts +102 -0
  372. package/dist/memory/serverless-qdrant.d.ts.map +1 -0
  373. package/dist/memory/serverless-qdrant.js +369 -0
  374. package/dist/memory/serverless-qdrant.js.map +1 -0
  375. package/dist/memory/short-term/factory.d.ts +26 -0
  376. package/dist/memory/short-term/factory.d.ts.map +1 -0
  377. package/dist/memory/short-term/factory.js +28 -0
  378. package/dist/memory/short-term/factory.js.map +1 -0
  379. package/dist/memory/short-term/indexeddb.d.ts +25 -0
  380. package/dist/memory/short-term/indexeddb.d.ts.map +1 -0
  381. package/dist/memory/short-term/indexeddb.js +64 -0
  382. package/dist/memory/short-term/indexeddb.js.map +1 -0
  383. package/dist/memory/short-term/schema.d.ts +6 -0
  384. package/dist/memory/short-term/schema.d.ts.map +1 -0
  385. package/dist/memory/short-term/schema.js +141 -0
  386. package/dist/memory/short-term/schema.js.map +1 -0
  387. package/dist/memory/short-term/sqlite.d.ts +64 -0
  388. package/dist/memory/short-term/sqlite.d.ts.map +1 -0
  389. package/dist/memory/short-term/sqlite.js +274 -0
  390. package/dist/memory/short-term/sqlite.js.map +1 -0
  391. package/dist/memory/speculative-cache.d.ts +111 -0
  392. package/dist/memory/speculative-cache.d.ts.map +1 -0
  393. package/dist/memory/speculative-cache.js +457 -0
  394. package/dist/memory/speculative-cache.js.map +1 -0
  395. package/dist/memory/task-classifier.d.ts +40 -0
  396. package/dist/memory/task-classifier.d.ts.map +1 -0
  397. package/dist/memory/task-classifier.js +342 -0
  398. package/dist/memory/task-classifier.js.map +1 -0
  399. package/dist/memory/terminal-bench-knowledge.d.ts +48 -0
  400. package/dist/memory/terminal-bench-knowledge.d.ts.map +1 -0
  401. package/dist/memory/terminal-bench-knowledge.js +622 -0
  402. package/dist/memory/terminal-bench-knowledge.js.map +1 -0
  403. package/dist/memory/write-gate.d.ts +39 -0
  404. package/dist/memory/write-gate.d.ts.map +1 -0
  405. package/dist/memory/write-gate.js +190 -0
  406. package/dist/memory/write-gate.js.map +1 -0
  407. package/dist/models/api-client.d.ts +46 -0
  408. package/dist/models/api-client.d.ts.map +1 -0
  409. package/dist/models/api-client.js +182 -0
  410. package/dist/models/api-client.js.map +1 -0
  411. package/dist/models/execution-profiles.d.ts +64 -0
  412. package/dist/models/execution-profiles.d.ts.map +1 -0
  413. package/dist/models/execution-profiles.js +403 -0
  414. package/dist/models/execution-profiles.js.map +1 -0
  415. package/dist/models/executor.d.ts +130 -0
  416. package/dist/models/executor.d.ts.map +1 -0
  417. package/dist/models/executor.js +382 -0
  418. package/dist/models/executor.js.map +1 -0
  419. package/dist/models/index.d.ts +19 -0
  420. package/dist/models/index.d.ts.map +1 -0
  421. package/dist/models/index.js +23 -0
  422. package/dist/models/index.js.map +1 -0
  423. package/dist/models/plan-validator.d.ts +37 -0
  424. package/dist/models/plan-validator.d.ts.map +1 -0
  425. package/dist/models/plan-validator.js +179 -0
  426. package/dist/models/plan-validator.js.map +1 -0
  427. package/dist/models/planner.d.ts +73 -0
  428. package/dist/models/planner.d.ts.map +1 -0
  429. package/dist/models/planner.js +375 -0
  430. package/dist/models/planner.js.map +1 -0
  431. package/dist/models/router.d.ts +96 -0
  432. package/dist/models/router.d.ts.map +1 -0
  433. package/dist/models/router.js +523 -0
  434. package/dist/models/router.js.map +1 -0
  435. package/dist/models/types.d.ts +370 -0
  436. package/dist/models/types.d.ts.map +1 -0
  437. package/dist/models/types.js +232 -0
  438. package/dist/models/types.js.map +1 -0
  439. package/dist/models/unified-router.d.ts +152 -0
  440. package/dist/models/unified-router.d.ts.map +1 -0
  441. package/dist/models/unified-router.js +313 -0
  442. package/dist/models/unified-router.js.map +1 -0
  443. package/dist/policies/convert-policy-to-claude.d.ts +3 -0
  444. package/dist/policies/convert-policy-to-claude.d.ts.map +1 -0
  445. package/dist/policies/convert-policy-to-claude.js +87 -0
  446. package/dist/policies/convert-policy-to-claude.js.map +1 -0
  447. package/dist/policies/database-manager.d.ts +27 -0
  448. package/dist/policies/database-manager.d.ts.map +1 -0
  449. package/dist/policies/database-manager.js +198 -0
  450. package/dist/policies/database-manager.js.map +1 -0
  451. package/dist/policies/enforced-tool-router.d.ts +53 -0
  452. package/dist/policies/enforced-tool-router.d.ts.map +1 -0
  453. package/dist/policies/enforced-tool-router.js +80 -0
  454. package/dist/policies/enforced-tool-router.js.map +1 -0
  455. package/dist/policies/index.d.ts +10 -0
  456. package/dist/policies/index.d.ts.map +1 -0
  457. package/dist/policies/index.js +8 -0
  458. package/dist/policies/index.js.map +1 -0
  459. package/dist/policies/policy-gate.d.ts +59 -0
  460. package/dist/policies/policy-gate.d.ts.map +1 -0
  461. package/dist/policies/policy-gate.js +171 -0
  462. package/dist/policies/policy-gate.js.map +1 -0
  463. package/dist/policies/policy-memory.d.ts +18 -0
  464. package/dist/policies/policy-memory.d.ts.map +1 -0
  465. package/dist/policies/policy-memory.js +126 -0
  466. package/dist/policies/policy-memory.js.map +1 -0
  467. package/dist/policies/policy-tools.d.ts +11 -0
  468. package/dist/policies/policy-tools.d.ts.map +1 -0
  469. package/dist/policies/policy-tools.js +66 -0
  470. package/dist/policies/policy-tools.js.map +1 -0
  471. package/dist/policies/schemas/policy.d.ts +69 -0
  472. package/dist/policies/schemas/policy.d.ts.map +1 -0
  473. package/dist/policies/schemas/policy.js +31 -0
  474. package/dist/policies/schemas/policy.js.map +1 -0
  475. package/dist/tasks/coordination.d.ts +83 -0
  476. package/dist/tasks/coordination.d.ts.map +1 -0
  477. package/dist/tasks/coordination.js +291 -0
  478. package/dist/tasks/coordination.js.map +1 -0
  479. package/dist/tasks/database.d.ts +19 -0
  480. package/dist/tasks/database.d.ts.map +1 -0
  481. package/dist/tasks/database.js +149 -0
  482. package/dist/tasks/database.js.map +1 -0
  483. package/dist/tasks/decoder-gate.d.ts +64 -0
  484. package/dist/tasks/decoder-gate.d.ts.map +1 -0
  485. package/dist/tasks/decoder-gate.js +268 -0
  486. package/dist/tasks/decoder-gate.js.map +1 -0
  487. package/dist/tasks/index.d.ts +6 -0
  488. package/dist/tasks/index.d.ts.map +1 -0
  489. package/dist/tasks/index.js +6 -0
  490. package/dist/tasks/index.js.map +1 -0
  491. package/dist/tasks/service.d.ts +40 -0
  492. package/dist/tasks/service.d.ts.map +1 -0
  493. package/dist/tasks/service.js +671 -0
  494. package/dist/tasks/service.js.map +1 -0
  495. package/dist/tasks/types.d.ts +238 -0
  496. package/dist/tasks/types.d.ts.map +1 -0
  497. package/dist/tasks/types.js +74 -0
  498. package/dist/tasks/types.js.map +1 -0
  499. package/dist/telemetry/index.d.ts +2 -0
  500. package/dist/telemetry/index.d.ts.map +1 -0
  501. package/dist/telemetry/index.js +2 -0
  502. package/dist/telemetry/index.js.map +1 -0
  503. package/dist/telemetry/session-telemetry.d.ts +56 -0
  504. package/dist/telemetry/session-telemetry.d.ts.map +1 -0
  505. package/dist/telemetry/session-telemetry.js +807 -0
  506. package/dist/telemetry/session-telemetry.js.map +1 -0
  507. package/dist/types/analysis.d.ts +82 -0
  508. package/dist/types/analysis.d.ts.map +1 -0
  509. package/dist/types/analysis.js +2 -0
  510. package/dist/types/analysis.js.map +1 -0
  511. package/dist/types/config.d.ts +3324 -0
  512. package/dist/types/config.d.ts.map +1 -0
  513. package/dist/types/config.js +418 -0
  514. package/dist/types/config.js.map +1 -0
  515. package/dist/types/coordination.d.ts +240 -0
  516. package/dist/types/coordination.d.ts.map +1 -0
  517. package/dist/types/coordination.js +43 -0
  518. package/dist/types/coordination.js.map +1 -0
  519. package/dist/types/index.d.ts +4 -0
  520. package/dist/types/index.d.ts.map +1 -0
  521. package/dist/types/index.js +4 -0
  522. package/dist/types/index.js.map +1 -0
  523. package/dist/uap-droids-strict.d.ts +59 -0
  524. package/dist/uap-droids-strict.d.ts.map +1 -0
  525. package/dist/uap-droids-strict.js +200 -0
  526. package/dist/uap-droids-strict.js.map +1 -0
  527. package/dist/utils/config-manager.d.ts +30 -0
  528. package/dist/utils/config-manager.d.ts.map +1 -0
  529. package/dist/utils/config-manager.js +41 -0
  530. package/dist/utils/config-manager.js.map +1 -0
  531. package/dist/utils/fetch-with-retry.d.ts +5 -0
  532. package/dist/utils/fetch-with-retry.d.ts.map +1 -0
  533. package/dist/utils/fetch-with-retry.js +61 -0
  534. package/dist/utils/fetch-with-retry.js.map +1 -0
  535. package/dist/utils/merge-claude-md.d.ts +28 -0
  536. package/dist/utils/merge-claude-md.d.ts.map +1 -0
  537. package/dist/utils/merge-claude-md.js +342 -0
  538. package/dist/utils/merge-claude-md.js.map +1 -0
  539. package/dist/utils/rate-limiter.d.ts +58 -0
  540. package/dist/utils/rate-limiter.d.ts.map +1 -0
  541. package/dist/utils/rate-limiter.js +100 -0
  542. package/dist/utils/rate-limiter.js.map +1 -0
  543. package/dist/utils/string-similarity.d.ts +37 -0
  544. package/dist/utils/string-similarity.d.ts.map +1 -0
  545. package/dist/utils/string-similarity.js +114 -0
  546. package/dist/utils/string-similarity.js.map +1 -0
  547. package/dist/utils/validate-json.d.ts +51 -0
  548. package/dist/utils/validate-json.d.ts.map +1 -0
  549. package/dist/utils/validate-json.js +94 -0
  550. package/dist/utils/validate-json.js.map +1 -0
  551. package/docs/INDEX.md +66 -0
  552. package/docs/architecture/MULTI_MODEL.md +224 -0
  553. package/docs/architecture/SYSTEM_ANALYSIS.md +1117 -0
  554. package/docs/architecture/UAP_COMPLIANCE.md +217 -0
  555. package/docs/architecture/UAP_PROTOCOL.md +339 -0
  556. package/docs/architecture/UAP_STRICT_DROIDS.md +172 -0
  557. package/docs/archive/BALLS_MODE_SELF_ANALYSIS.md +260 -0
  558. package/docs/archive/FAILING_TASKS_SOLUTION_PLAN.md +668 -0
  559. package/docs/archive/JINJA2-SYSTEM-MESSAGE-FIX.md +209 -0
  560. package/docs/archive/NPM-PUBLISH-V0.9.1.md +240 -0
  561. package/docs/archive/OPTIMIZATION_OPTIONS.md +334 -0
  562. package/docs/archive/SETUP_IMPROVEMENTS.md +213 -0
  563. package/docs/archive/UAP_GENERIC_OPTIMIZATION_PLAN.md +270 -0
  564. package/docs/archive/UAP_V103_PATTERN_DESIGN.md +315 -0
  565. package/docs/archive/UAP_V104_COMPLIANCE_DESIGN.md +223 -0
  566. package/docs/archive/changelog/2026-03-10_uap-100-compliance.md +77 -0
  567. package/docs/archive/changelog/2026-03-10_uap-full-system-verification.md +109 -0
  568. package/docs/benchmarks/ACCURACY_ANALYSIS.md +471 -0
  569. package/docs/benchmarks/TOKEN_OPTIMIZATION.md +572 -0
  570. package/docs/benchmarks/VALIDATION_PLAN.md +568 -0
  571. package/docs/benchmarks/VALIDATION_RESULTS.md +161 -0
  572. package/docs/deployment/DEPLOYMENT.md +895 -0
  573. package/docs/deployment/DEPLOYMENT_STRATEGIES.md +518 -0
  574. package/docs/deployment/DEPLOY_BATCHER_ANALYSIS.md +856 -0
  575. package/docs/deployment/DEPLOY_BATCHING.md +273 -0
  576. package/docs/deployment/DEPLOY_BUCKETING_ANALYSIS.md +420 -0
  577. package/docs/deployment/QWEN35_LLAMA_CPP.md +265 -0
  578. package/docs/getting-started/INTEGRATION.md +449 -0
  579. package/docs/getting-started/OVERVIEW.md +344 -0
  580. package/docs/getting-started/SETUP.md +203 -0
  581. package/docs/integrations/MCP_ROUTER_SETUP.md +445 -0
  582. package/docs/integrations/RTK_INTEGRATION.md +468 -0
  583. package/docs/operations/TROUBLESHOOTING.md +660 -0
  584. package/docs/reference/API_REFERENCE.md +903 -0
  585. package/docs/reference/FEATURES.md +472 -0
  586. package/docs/reference/HARNESS-MATRIX.md +318 -0
  587. package/docs/reference/UAP_CLI_REFERENCE.md +600 -0
  588. package/docs/research/BEHAVIORAL_PATTERNS.md +228 -0
  589. package/docs/research/DOMAIN_STRATEGIES.md +316 -0
  590. package/docs/research/MEMORY_SYSTEMS_COMPARISON.md +812 -0
  591. package/docs/research/PATTERN_ANALYSIS_2026-01-18.md +436 -0
  592. package/docs/research/PERFORMANCE_ANALYSIS_2026-01-18.md +209 -0
  593. package/docs/research/PERFORMANCE_TEST_PLAN.md +383 -0
  594. package/docs/research/TERMINAL_BENCH_LEARNINGS.md +217 -0
  595. package/package.json +113 -0
  596. package/scripts/README.md +161 -0
  597. package/templates/CLAUDE.template.md +10 -0
  598. package/templates/CLAUDE_ARCHITECTURE.template.md +103 -0
  599. package/templates/CLAUDE_CODING.template.md +127 -0
  600. package/templates/CLAUDE_DROIDS.template.md +109 -0
  601. package/templates/CLAUDE_MEMORY.template.md +131 -0
  602. package/templates/CLAUDE_WORKFLOWS.template.md +139 -0
  603. package/templates/PROJECT.template.md +209 -0
  604. package/templates/SCHEMA.md +57 -0
  605. package/templates/archive/CLAUDE.template.root-v6.md +534 -0
  606. package/templates/archive/CLAUDE.template.v6.md +534 -0
  607. package/templates/hooks/forgecode/pre-compact.sh +68 -0
  608. package/templates/hooks/forgecode/session-start.sh +169 -0
  609. package/templates/hooks/forgecode.plugin.sh +128 -0
  610. package/templates/hooks/pre-compact.sh +74 -0
  611. package/templates/hooks/session-start.sh +366 -0
  612. package/tools/agents/README.md +224 -0
  613. package/tools/agents/UAP/README.md +386 -0
  614. package/tools/agents/UAP/__init__.py +9 -0
  615. package/tools/agents/UAP/cli.py +901 -0
  616. package/tools/agents/UAP/compliance_verify.sh +108 -0
  617. package/tools/agents/UAP/full_verification.sh +126 -0
  618. package/tools/agents/UAP/version.py +32 -0
  619. package/tools/agents/benchmarks/benchmark_memory_systems.py +730 -0
  620. package/tools/agents/benchmarks/results/benchmark_20260106_064817.json +170 -0
  621. package/tools/agents/benchmarks/results/benchmark_20260106_064817.md +51 -0
  622. package/tools/agents/config/chat_template.jinja +77 -0
  623. package/tools/agents/config/tool-call-schema.json +19 -0
  624. package/tools/agents/config/tool-call.gbnf +58 -0
  625. package/tools/agents/docker/Dockerfile.python +52 -0
  626. package/tools/agents/docker/Dockerfile.ubuntu +55 -0
  627. package/tools/agents/docker-compose.qdrant.yml +24 -0
  628. package/tools/agents/install-opencode-local.sh.j2 +135 -0
  629. package/tools/agents/migrations/apply.py +256 -0
  630. package/tools/agents/opencode_uap_agent.py +1505 -0
  631. package/tools/agents/plugin/README.md +91 -0
  632. package/tools/agents/plugin/index.ts +46 -0
  633. package/tools/agents/plugin/pre-compact.sh +68 -0
  634. package/tools/agents/plugin/session-start.sh +175 -0
  635. package/tools/agents/plugin/uap-commands.ts +45 -0
  636. package/tools/agents/plugin/uap-droids.ts +54 -0
  637. package/tools/agents/plugin/uap-patterns.ts +54 -0
  638. package/tools/agents/plugin/uap-skills.ts +52 -0
  639. package/tools/agents/plugins/uap-enforce.ts +314 -0
  640. package/tools/agents/scripts/__pycache__/tool_call_wrapper.cpython-313.pyc +0 -0
  641. package/tools/agents/scripts/chat_template_verifier.py +343 -0
  642. package/tools/agents/scripts/fix-qwen-template.js +38 -0
  643. package/tools/agents/scripts/fix_qwen_chat_template.py +316 -0
  644. package/tools/agents/scripts/generate_lora_training_data.py +412 -0
  645. package/tools/agents/scripts/init_qdrant.py +151 -0
  646. package/tools/agents/scripts/memory_migration.py +560 -0
  647. package/tools/agents/scripts/migrate_memory_to_qdrant.py +110 -0
  648. package/tools/agents/scripts/prepare_lora.sh +512 -0
  649. package/tools/agents/scripts/query_memory.py +200 -0
  650. package/tools/agents/scripts/qwen-tool-call-test.js +38 -0
  651. package/tools/agents/scripts/qwen-tool-call-wrapper.js +38 -0
  652. package/tools/agents/scripts/qwen_tool_call_test.py +464 -0
  653. package/tools/agents/scripts/qwen_tool_call_wrapper.py +686 -0
  654. package/tools/agents/scripts/start-services.sh +96 -0
  655. package/tools/agents/scripts/tool-choice-proxy.cjs +296 -0
  656. package/tools/agents/scripts/tool_call_test.py +656 -0
  657. package/tools/agents/scripts/tool_call_wrapper.py +799 -0
  658. package/tools/agents/tests/test_uap_compliance.py +257 -0
  659. package/tools/agents/uap_agent.py +122 -0
  660. package/tools/agents/uap_agent_install.sh +12 -0
@@ -0,0 +1,904 @@
1
+ /**
2
+ * Model Integration Benchmark
3
+ *
4
+ * Runs real API calls against multiple LLM providers via Factory.ai droid exec CLI
5
+ * to compare model performance on UAP memory-enhanced tasks.
6
+ *
7
+ * Assumptions:
8
+ * - FACTORY_API_KEY is set in environment for Factory.ai API access
9
+ * - Models: Claude Opus 4.5, GLM 4.7, GPT 5.2
10
+ * - droid CLI is installed and accessible
11
+ * - UAP CLI is available for memory initialization
12
+ *
13
+ * What this handles:
14
+ * - Full UAP setup (init, analyze, generate, memory start, prepopulate)
15
+ * - CLAUDE.md reading and context injection
16
+ * - Real API calls to multiple LLM providers via droid exec
17
+ * - Task execution comparison across models with/without UAP
18
+ * - Performance metrics collection (latency, success, tokens)
19
+ * - Result aggregation and reporting
20
+ *
21
+ * What this does NOT handle:
22
+ * - Rate limiting (caller responsibility)
23
+ * - Cost tracking (would require billing API)
24
+ * - Streaming responses (uses completion mode)
25
+ */
26
+ import { readFileSync, writeFileSync, existsSync } from 'fs';
27
+ import { join, dirname } from 'path';
28
+ import { fileURLToPath } from 'url';
29
+ import { execSync } from 'child_process';
30
+ const __filename = fileURLToPath(import.meta.url);
31
+ const __dirname = dirname(__filename);
32
+ const PROJECT_ROOT = join(__dirname, '../..');
33
+ /**
34
+ * Initialize UAP system for benchmark testing
35
+ * Runs: uam init, uam analyze, uam generate, uam memory start, uam memory prepopulate
36
+ */
37
+ async function setupUAP(verbose = false) {
38
+ const result = {
39
+ initialized: false,
40
+ memoryStarted: false,
41
+ memoryPrepopulated: false,
42
+ claudeMdLoaded: false,
43
+ errors: [],
44
+ };
45
+ const log = (msg) => {
46
+ if (verbose)
47
+ console.log(` [UAP Setup] ${msg}`);
48
+ };
49
+ try {
50
+ // Step 1: Check if UAP CLI is available
51
+ log('Checking UAP CLI availability...');
52
+ try {
53
+ execSync('uam --version', { encoding: 'utf-8', cwd: PROJECT_ROOT, stdio: 'pipe' });
54
+ }
55
+ catch {
56
+ // Try with npx
57
+ execSync('npx uam --version', { encoding: 'utf-8', cwd: PROJECT_ROOT, stdio: 'pipe' });
58
+ }
59
+ // Step 2: Initialize UAP (idempotent - safe to run multiple times)
60
+ log('Running uam init...');
61
+ try {
62
+ execSync('uam init --non-interactive 2>/dev/null || true', {
63
+ encoding: 'utf-8',
64
+ cwd: PROJECT_ROOT,
65
+ stdio: 'pipe',
66
+ timeout: 30000,
67
+ });
68
+ result.initialized = true;
69
+ }
70
+ catch (e) {
71
+ result.errors.push(`init failed: ${e instanceof Error ? e.message : String(e)}`);
72
+ }
73
+ // Step 3: Analyze project structure
74
+ log('Running uam analyze...');
75
+ try {
76
+ execSync('uam analyze 2>/dev/null || true', {
77
+ encoding: 'utf-8',
78
+ cwd: PROJECT_ROOT,
79
+ stdio: 'pipe',
80
+ timeout: 60000,
81
+ });
82
+ }
83
+ catch (e) {
84
+ result.errors.push(`analyze failed: ${e instanceof Error ? e.message : String(e)}`);
85
+ }
86
+ // Step 4: Generate/update CLAUDE.md
87
+ log('Running uam generate...');
88
+ try {
89
+ execSync('uam generate 2>/dev/null || true', {
90
+ encoding: 'utf-8',
91
+ cwd: PROJECT_ROOT,
92
+ stdio: 'pipe',
93
+ timeout: 30000,
94
+ });
95
+ }
96
+ catch (e) {
97
+ result.errors.push(`generate failed: ${e instanceof Error ? e.message : String(e)}`);
98
+ }
99
+ // Step 5: Start memory services
100
+ log('Starting memory services...');
101
+ try {
102
+ execSync('uam memory start 2>/dev/null || true', {
103
+ encoding: 'utf-8',
104
+ cwd: PROJECT_ROOT,
105
+ stdio: 'pipe',
106
+ timeout: 60000,
107
+ });
108
+ result.memoryStarted = true;
109
+ }
110
+ catch (e) {
111
+ result.errors.push(`memory start failed: ${e instanceof Error ? e.message : String(e)}`);
112
+ }
113
+ // Step 6: Prepopulate memory from docs and git history
114
+ log('Prepopulating memory from docs and git...');
115
+ try {
116
+ execSync('uam memory prepopulate --docs --git --limit 100 2>/dev/null || true', {
117
+ encoding: 'utf-8',
118
+ cwd: PROJECT_ROOT,
119
+ stdio: 'pipe',
120
+ timeout: 120000,
121
+ });
122
+ result.memoryPrepopulated = true;
123
+ }
124
+ catch (e) {
125
+ result.errors.push(`memory prepopulate failed: ${e instanceof Error ? e.message : String(e)}`);
126
+ }
127
+ // Step 7: Verify CLAUDE.md exists
128
+ const claudeMdPath = join(PROJECT_ROOT, 'CLAUDE.md');
129
+ if (existsSync(claudeMdPath)) {
130
+ result.claudeMdLoaded = true;
131
+ log('CLAUDE.md found and ready');
132
+ }
133
+ else {
134
+ result.errors.push('CLAUDE.md not found after setup');
135
+ }
136
+ }
137
+ catch (error) {
138
+ result.errors.push(`UAP setup error: ${error instanceof Error ? error.message : String(error)}`);
139
+ }
140
+ return result;
141
+ }
142
+ /**
143
+ * Load UAP memory context from CLAUDE.md and short-term memory
144
+ */
145
+ function loadUAPMemoryContext() {
146
+ const sections = [];
147
+ // Read CLAUDE.md
148
+ const claudeMdPath = join(PROJECT_ROOT, 'CLAUDE.md');
149
+ if (existsSync(claudeMdPath)) {
150
+ const claudeMd = readFileSync(claudeMdPath, 'utf-8');
151
+ // Extract key sections from CLAUDE.md
152
+ sections.push('## UAP Memory Context (from CLAUDE.md)\n');
153
+ // Extract Code Field section
154
+ const codeFieldMatch = claudeMd.match(/## .*CODE FIELD.*?(?=\n## |\n---\n|$)/s);
155
+ if (codeFieldMatch) {
156
+ sections.push('### Code Field Guidelines\n');
157
+ sections.push(codeFieldMatch[0].slice(0, 1500) + '\n');
158
+ }
159
+ // Extract Testing Requirements
160
+ const testingMatch = claudeMd.match(/## .*Testing Requirements.*?(?=\n## |\n---\n|$)/s);
161
+ if (testingMatch) {
162
+ sections.push('### Testing Requirements\n');
163
+ sections.push(testingMatch[0].slice(0, 500) + '\n');
164
+ }
165
+ // Extract Repository Structure
166
+ const structureMatch = claudeMd.match(/## Repository Structure.*?```[\s\S]*?```/);
167
+ if (structureMatch) {
168
+ sections.push('### Repository Structure\n');
169
+ sections.push(structureMatch[0].slice(0, 1000) + '\n');
170
+ }
171
+ }
172
+ // Query short-term memory from SQLite
173
+ const dbPath = join(PROJECT_ROOT, 'agents/data/memory/short_term.db');
174
+ if (existsSync(dbPath)) {
175
+ try {
176
+ const recentMemories = execSync(`sqlite3 "${dbPath}" "SELECT type, content FROM memories ORDER BY id DESC LIMIT 10;" 2>/dev/null || true`, { encoding: 'utf-8', cwd: PROJECT_ROOT }).trim();
177
+ if (recentMemories) {
178
+ sections.push('### Recent Session Memory\n');
179
+ sections.push('```\n' + recentMemories.slice(0, 1000) + '\n```\n');
180
+ }
181
+ // Get lessons learned
182
+ const lessons = execSync(`sqlite3 "${dbPath}" "SELECT content FROM memories WHERE type='lesson' ORDER BY id DESC LIMIT 5;" 2>/dev/null || true`, { encoding: 'utf-8', cwd: PROJECT_ROOT }).trim();
183
+ if (lessons) {
184
+ sections.push('### Lessons Learned\n');
185
+ sections.push(lessons.slice(0, 500) + '\n');
186
+ }
187
+ }
188
+ catch {
189
+ // Memory DB not available
190
+ }
191
+ }
192
+ // Add static context as fallback/supplement
193
+ sections.push(`
194
+ ### Project Coding Standards
195
+ - Use TypeScript strict mode
196
+ - All functions must have JSDoc comments with @param and @returns
197
+ - Error handling uses custom AppError class that extends Error
198
+ - Prefer async/await over callbacks and Promises
199
+ - Use zod for runtime input validation
200
+ - Export types and interfaces alongside implementations
201
+ - Use Map for key-value storage, Set for unique collections
202
+
203
+ ### Common Patterns
204
+ - Singleton pattern: private constructor + static getInstance()
205
+ - Strategy pattern: interface + multiple implementations
206
+ - Factory pattern: static create() methods
207
+ - Error handling: try/catch with specific error types
208
+ - Exponential backoff: delay = baseMs * Math.pow(2, attempt)
209
+
210
+ ### Known Gotchas (from memory)
211
+ - Always check array bounds: use i < length, not i <= length
212
+ - Handle empty arrays explicitly before operations
213
+ - Include cleanup logic for resources (timers, connections)
214
+ - JSON.parse throws on invalid input - always wrap in try/catch
215
+ - Array methods like reduce need initial value for empty arrays
216
+ - Map.get() returns undefined for missing keys
217
+
218
+ ---
219
+
220
+ `);
221
+ return sections.join('\n');
222
+ }
223
+ // Cached memory context (loaded once per benchmark run)
224
+ let cachedMemoryContext = null;
225
+ function getUAPMemoryContext() {
226
+ if (!cachedMemoryContext) {
227
+ cachedMemoryContext = loadUAPMemoryContext();
228
+ }
229
+ return cachedMemoryContext;
230
+ }
231
+ // ============================================================================
232
+ // Model Configurations (per Factory.ai droid CLI available models)
233
+ // ============================================================================
234
+ const MODELS = [
235
+ {
236
+ id: 'opus-4.5',
237
+ name: 'Claude Opus 4.5',
238
+ provider: 'anthropic',
239
+ apiModel: 'claude-opus-4-5-20251101',
240
+ },
241
+ {
242
+ id: 'glm-4.7',
243
+ name: 'GLM 4.7 (Droid Core)',
244
+ provider: 'zhipu',
245
+ apiModel: 'glm-4.7',
246
+ },
247
+ {
248
+ id: 'gpt-5.2-codex',
249
+ name: 'GPT 5.2 Codex',
250
+ provider: 'openai',
251
+ apiModel: 'gpt-5.2-codex',
252
+ },
253
+ {
254
+ id: 'gpt-5.2',
255
+ name: 'GPT 5.2',
256
+ provider: 'openai',
257
+ apiModel: 'gpt-5.2',
258
+ },
259
+ ];
260
+ // ============================================================================
261
+ // Benchmark Tasks
262
+ // ============================================================================
263
+ const BENCHMARK_TASKS = [
264
+ {
265
+ id: 'task-001-code-generation',
266
+ name: 'TypeScript Function Generation',
267
+ description: 'Generate a well-typed TypeScript function',
268
+ prompt: `Write a TypeScript function called 'calculateAverage' that:
269
+ 1. Takes an array of numbers as input
270
+ 2. Returns the arithmetic mean
271
+ 3. Handles empty arrays (return 0)
272
+ 4. Has proper type annotations
273
+
274
+ Return ONLY the function code, no explanations.`,
275
+ difficulty: 'easy',
276
+ category: 'code-generation',
277
+ expectedPatterns: ['function calculateAverage', 'number[]', ': number', 'length', 'return'],
278
+ maxTokens: 500,
279
+ },
280
+ {
281
+ id: 'task-002-bug-fix',
282
+ name: 'Bug Detection and Fix',
283
+ description: 'Identify and fix a bug in code',
284
+ prompt: `Find and fix the bug in this TypeScript code:
285
+
286
+ function sumPositive(nums: number[]): number {
287
+ let sum = 0;
288
+ for (let i = 0; i <= nums.length; i++) {
289
+ if (nums[i] > 0) {
290
+ sum += nums[i];
291
+ }
292
+ }
293
+ return sum;
294
+ }
295
+
296
+ Return ONLY the corrected function code.`,
297
+ difficulty: 'easy',
298
+ category: 'bug-fix',
299
+ expectedPatterns: ['i < nums.length', 'function sumPositive', 'return sum'],
300
+ maxTokens: 500,
301
+ },
302
+ {
303
+ id: 'task-003-pattern-application',
304
+ name: 'Design Pattern Implementation',
305
+ description: 'Implement a singleton pattern',
306
+ prompt: `Implement a TypeScript singleton class called 'ConfigManager' that:
307
+ 1. Has a private constructor
308
+ 2. Has a static getInstance() method
309
+ 3. Has get(key: string) and set(key: string, value: any) methods
310
+ 4. Stores configuration in a private Map
311
+
312
+ Return ONLY the class code.`,
313
+ difficulty: 'medium',
314
+ category: 'patterns',
315
+ expectedPatterns: [
316
+ 'class ConfigManager',
317
+ 'private constructor',
318
+ 'static getInstance',
319
+ 'private static instance',
320
+ 'Map',
321
+ ],
322
+ maxTokens: 800,
323
+ },
324
+ {
325
+ id: 'task-004-refactoring',
326
+ name: 'Code Refactoring',
327
+ description: 'Refactor code for better maintainability',
328
+ prompt: `Refactor this code to follow SOLID principles and improve readability:
329
+
330
+ function processOrder(order: any) {
331
+ if (order.type === 'digital') {
332
+ console.log('Sending email with download link');
333
+ order.status = 'delivered';
334
+ } else if (order.type === 'physical') {
335
+ console.log('Creating shipping label');
336
+ order.status = 'shipped';
337
+ } else if (order.type === 'subscription') {
338
+ console.log('Activating subscription');
339
+ order.status = 'active';
340
+ }
341
+ console.log('Order processed: ' + order.id);
342
+ return order;
343
+ }
344
+
345
+ Provide the refactored TypeScript code using proper interfaces and a strategy pattern.`,
346
+ difficulty: 'medium',
347
+ category: 'refactoring',
348
+ expectedPatterns: ['interface', 'class', 'implements', 'process'],
349
+ maxTokens: 1200,
350
+ },
351
+ {
352
+ id: 'task-005-memory-context',
353
+ name: 'Context-Aware Code Generation',
354
+ description: 'Generate code using provided context',
355
+ prompt: `Given the following project context from memory:
356
+
357
+ MEMORY CONTEXT:
358
+ - Project uses src/utils/ for utility functions
359
+ - All functions must have JSDoc comments
360
+ - Error handling uses custom AppError class
361
+ - Prefer async/await over callbacks
362
+ - Use zod for input validation
363
+
364
+ Write a utility function 'validateAndParseJSON' that:
365
+ 1. Takes a string input
366
+ 2. Validates it's valid JSON using zod
367
+ 3. Returns the parsed object or throws AppError
368
+ 4. Has proper JSDoc documentation
369
+
370
+ Return ONLY the function code with JSDoc.`,
371
+ difficulty: 'medium',
372
+ category: 'memory',
373
+ expectedPatterns: ['async', 'zod', 'AppError', '@param', '@returns', 'validateAndParseJSON'],
374
+ maxTokens: 800,
375
+ },
376
+ {
377
+ id: 'task-006-complex-algorithm',
378
+ name: 'Algorithm Implementation',
379
+ description: 'Implement a complex algorithm with proper typing',
380
+ prompt: `Implement a TypeScript function 'findShortestPath' using Dijkstra's algorithm:
381
+
382
+ 1. Input: weighted graph as adjacency list Map<string, Map<string, number>>
383
+ 2. Input: start node (string), end node (string)
384
+ 3. Output: { path: string[], distance: number } or null if no path
385
+ 4. Handle disconnected nodes properly
386
+ 5. Use proper TypeScript types
387
+
388
+ Return ONLY the function code with type definitions.`,
389
+ difficulty: 'hard',
390
+ category: 'algorithms',
391
+ expectedPatterns: [
392
+ 'function findShortestPath',
393
+ 'Map<string',
394
+ 'distance',
395
+ 'path',
396
+ 'while',
397
+ 'return',
398
+ ],
399
+ maxTokens: 1500,
400
+ },
401
+ {
402
+ id: 'task-007-multi-step-task',
403
+ name: 'Multi-Step Code Generation',
404
+ description: 'Complete a multi-step implementation task',
405
+ prompt: `Create a complete TypeScript module for a rate limiter with these requirements:
406
+
407
+ 1. Interface RateLimiterConfig { maxRequests: number; windowMs: number; }
408
+ 2. Class RateLimiter with:
409
+ - constructor(config: RateLimiterConfig)
410
+ - isAllowed(clientId: string): boolean
411
+ - getRemainingRequests(clientId: string): number
412
+ - reset(clientId?: string): void
413
+ 3. Use Map for tracking requests per client
414
+ 4. Include proper cleanup of expired entries
415
+ 5. Export both the class and interface
416
+
417
+ Return the complete module code.`,
418
+ difficulty: 'hard',
419
+ category: 'multi-step',
420
+ expectedPatterns: [
421
+ 'interface RateLimiterConfig',
422
+ 'class RateLimiter',
423
+ 'isAllowed',
424
+ 'getRemainingRequests',
425
+ 'reset',
426
+ 'Map',
427
+ 'export',
428
+ ],
429
+ maxTokens: 2000,
430
+ },
431
+ {
432
+ id: 'task-008-error-handling',
433
+ name: 'Comprehensive Error Handling',
434
+ description: 'Implement robust error handling',
435
+ prompt: `Create a TypeScript async function 'fetchWithRetry' that:
436
+
437
+ 1. Takes url: string, options?: RequestInit, retryConfig?: { maxRetries: number; backoffMs: number; }
438
+ 2. Implements exponential backoff retry logic
439
+ 3. Handles network errors, timeout, and HTTP errors (4xx, 5xx)
440
+ 4. Returns Promise<Response> or throws a detailed custom error
441
+ 5. Logs each retry attempt
442
+ 6. Has proper TypeScript types for all parameters and return values
443
+
444
+ Return ONLY the function code with any necessary type definitions.`,
445
+ difficulty: 'hard',
446
+ category: 'error-handling',
447
+ expectedPatterns: [
448
+ 'async function fetchWithRetry',
449
+ 'retry',
450
+ 'backoff',
451
+ 'catch',
452
+ 'throw',
453
+ 'Promise<Response>',
454
+ ],
455
+ maxTokens: 1200,
456
+ },
457
+ ];
458
+ // ============================================================================
459
+ // Droid Exec Client
460
+ // ============================================================================
461
+ class DroidExecClient {
462
+ apiKey;
463
+ tmpDir;
464
+ autoLevel;
465
+ constructor(apiKey, autoLevel = 'low') {
466
+ this.apiKey = apiKey;
467
+ this.autoLevel = autoLevel;
468
+ this.tmpDir = '/tmp/uap-benchmark';
469
+ try {
470
+ execSync(`mkdir -p ${this.tmpDir}`, { encoding: 'utf-8' });
471
+ }
472
+ catch {
473
+ // ignore
474
+ }
475
+ }
476
+ async complete(model, prompt) {
477
+ const startTime = Date.now();
478
+ // Write prompt to temp file to avoid shell escaping issues
479
+ const promptFile = `${this.tmpDir}/prompt-${Date.now()}.txt`;
480
+ writeFileSync(promptFile, prompt, 'utf-8');
481
+ try {
482
+ // Use --auto low to allow file operations without system modifications
483
+ const result = execSync(`FACTORY_API_KEY="${this.apiKey}" droid exec --model "${model}" --auto ${this.autoLevel} -f "${promptFile}"`, {
484
+ encoding: 'utf-8',
485
+ timeout: 300000, // 5 minutes for complex tasks
486
+ maxBuffer: 10 * 1024 * 1024,
487
+ env: { ...process.env, FACTORY_API_KEY: this.apiKey },
488
+ });
489
+ const latencyMs = Date.now() - startTime;
490
+ // Clean up temp file
491
+ try {
492
+ execSync(`rm "${promptFile}"`, { encoding: 'utf-8' });
493
+ }
494
+ catch {
495
+ // ignore cleanup failures
496
+ }
497
+ return {
498
+ content: result.trim(),
499
+ tokensUsed: 0,
500
+ latencyMs,
501
+ };
502
+ }
503
+ catch (error) {
504
+ // Clean up temp file
505
+ try {
506
+ execSync(`rm "${promptFile}"`, { encoding: 'utf-8' });
507
+ }
508
+ catch {
509
+ // ignore cleanup failures
510
+ }
511
+ const errMsg = error instanceof Error ? error.message : String(error);
512
+ throw new Error(`droid exec failed: ${errMsg}`);
513
+ }
514
+ }
515
+ }
516
+ // ============================================================================
517
+ // Benchmark Runner
518
+ // ============================================================================
519
+ function evaluateResponse(response, expectedPatterns) {
520
+ const normalizedResponse = response.toLowerCase();
521
+ return expectedPatterns.filter((pattern) => normalizedResponse.includes(pattern.toLowerCase()));
522
+ }
523
+ async function runTaskForModel(client, model, task, withMemory = false) {
524
+ const result = {
525
+ taskId: task.id,
526
+ modelId: model.id,
527
+ success: false,
528
+ latencyMs: 0,
529
+ tokensUsed: 0,
530
+ response: '',
531
+ matchedPatterns: [],
532
+ };
533
+ try {
534
+ // Inject UAP memory context if enabled (loaded from CLAUDE.md + memory DB)
535
+ const prompt = withMemory ? getUAPMemoryContext() + task.prompt : task.prompt;
536
+ const completion = await client.complete(model.apiModel, prompt);
537
+ result.response = completion.content;
538
+ result.latencyMs = completion.latencyMs;
539
+ result.tokensUsed = completion.tokensUsed;
540
+ result.matchedPatterns = evaluateResponse(completion.content, task.expectedPatterns);
541
+ const matchRatio = result.matchedPatterns.length / task.expectedPatterns.length;
542
+ result.success = matchRatio >= 0.6;
543
+ }
544
+ catch (error) {
545
+ result.error = error instanceof Error ? error.message : String(error);
546
+ }
547
+ return result;
548
+ }
549
+ async function runBenchmarkForModel(client, model, tasks, withMemory = false) {
550
+ const memoryLabel = withMemory ? ' (with UAP Memory)' : ' (without Memory)';
551
+ console.log(`\n${'='.repeat(60)}`);
552
+ console.log(`Running benchmark for: ${model.name}${memoryLabel}`);
553
+ console.log(`${'='.repeat(60)}`);
554
+ const results = [];
555
+ for (const task of tasks) {
556
+ console.log(` [${task.difficulty.toUpperCase()}] ${task.name}...`);
557
+ const result = await runTaskForModel(client, model, task, withMemory);
558
+ results.push(result);
559
+ if (result.success) {
560
+ console.log(` ✓ Success (${result.latencyMs}ms)`);
561
+ }
562
+ else {
563
+ console.log(` ✗ Failed: ${result.error || 'Pattern mismatch'}`);
564
+ }
565
+ // Small delay between tasks
566
+ await new Promise((r) => setTimeout(r, 1000));
567
+ }
568
+ const succeeded = results.filter((r) => r.success).length;
569
+ const successfulResults = results.filter((r) => r.latencyMs > 0);
570
+ const avgLatency = successfulResults.length > 0
571
+ ? successfulResults.reduce((sum, r) => sum + r.latencyMs, 0) / successfulResults.length
572
+ : 0;
573
+ const totalTokens = results.reduce((sum, r) => sum + r.tokensUsed, 0);
574
+ return {
575
+ modelId: model.id,
576
+ modelName: model.name,
577
+ tasksRun: tasks.length,
578
+ tasksSucceeded: succeeded,
579
+ successRate: (succeeded / tasks.length) * 100,
580
+ avgLatencyMs: Math.round(avgLatency),
581
+ totalTokens,
582
+ results,
583
+ };
584
+ }
585
+ function generateComparison(modelResults) {
586
+ const sorted = [...modelResults].sort((a, b) => b.successRate - a.successRate);
587
+ const fastest = [...modelResults].sort((a, b) => a.avgLatencyMs - b.avgLatencyMs);
588
+ const byDifficulty = {};
589
+ for (const diff of ['easy', 'medium', 'hard']) {
590
+ let bestModel = '';
591
+ let bestRate = 0;
592
+ for (const modelResult of modelResults) {
593
+ const diffTasks = modelResult.results.filter((r) => {
594
+ const task = BENCHMARK_TASKS.find((t) => t.id === r.taskId);
595
+ return task?.difficulty === diff;
596
+ });
597
+ if (diffTasks.length > 0) {
598
+ const rate = (diffTasks.filter((t) => t.success).length / diffTasks.length) * 100;
599
+ if (rate > bestRate) {
600
+ bestRate = rate;
601
+ bestModel = modelResult.modelName;
602
+ }
603
+ }
604
+ }
605
+ byDifficulty[diff] = { model: bestModel, successRate: bestRate };
606
+ }
607
+ return {
608
+ bestOverall: sorted[0]?.modelName || 'N/A',
609
+ fastestModel: fastest[0]?.modelName || 'N/A',
610
+ mostAccurate: sorted[0]?.modelName || 'N/A',
611
+ byDifficulty,
612
+ };
613
+ }
614
+ function generateMarkdownReport(report) {
615
+ const lines = [
616
+ '# Model Integration Benchmark Results',
617
+ '',
618
+ `**Generated:** ${report.timestamp}`,
619
+ `**Models Tested:** ${report.models.map((m) => m.modelName).join(', ')}`,
620
+ `**Tasks Run:** ${BENCHMARK_TASKS.length}`,
621
+ '',
622
+ '---',
623
+ '',
624
+ '## Executive Summary',
625
+ '',
626
+ '| Model | Success Rate | Avg Latency | Total Tokens |',
627
+ '|-------|--------------|-------------|--------------|',
628
+ ];
629
+ for (const model of report.models) {
630
+ lines.push(`| ${model.modelName} | ${model.successRate.toFixed(1)}% | ${model.avgLatencyMs}ms | ${model.totalTokens} |`);
631
+ }
632
+ lines.push('', '---', '', '## Comparison', '');
633
+ lines.push(`- **Best Overall:** ${report.comparison.bestOverall}`);
634
+ lines.push(`- **Fastest Model:** ${report.comparison.fastestModel}`);
635
+ lines.push(`- **Most Accurate:** ${report.comparison.mostAccurate}`);
636
+ lines.push('', '### By Difficulty', '');
637
+ lines.push('| Difficulty | Best Model | Success Rate |');
638
+ lines.push('|------------|------------|--------------|');
639
+ for (const [diff, data] of Object.entries(report.comparison.byDifficulty)) {
640
+ lines.push(`| ${diff} | ${data.model} | ${data.successRate.toFixed(1)}% |`);
641
+ }
642
+ lines.push('', '---', '', '## Detailed Results', '');
643
+ for (const model of report.models) {
644
+ lines.push(`### ${model.modelName}`, '');
645
+ lines.push('| Task | Difficulty | Success | Latency | Patterns Matched |');
646
+ lines.push('|------|------------|---------|---------|------------------|');
647
+ for (const result of model.results) {
648
+ const task = BENCHMARK_TASKS.find((t) => t.id === result.taskId);
649
+ const status = result.success ? '✓' : '✗';
650
+ const patterns = `${result.matchedPatterns.length}/${task?.expectedPatterns.length || 0}`;
651
+ lines.push(`| ${task?.name || result.taskId} | ${task?.difficulty || 'N/A'} | ${status} | ${result.latencyMs}ms | ${patterns} |`);
652
+ }
653
+ lines.push('');
654
+ }
655
+ // Add memory comparison section if available
656
+ if (report.memoryComparison) {
657
+ lines.push('---', '', '## UAP Memory Impact Analysis', '');
658
+ lines.push('### Success Rate Comparison', '');
659
+ lines.push('| Model | Without Memory | With Memory | Improvement |');
660
+ lines.push('|-------|----------------|-------------|-------------|');
661
+ for (const withMem of report.memoryComparison.withMemory) {
662
+ const without = report.memoryComparison.withoutMemory.find((r) => r.modelId === withMem.modelId);
663
+ const imp = report.memoryComparison.improvement[withMem.modelId];
664
+ if (without && imp) {
665
+ const sign = imp.successDelta >= 0 ? '+' : '';
666
+ lines.push(`| ${withMem.modelName} | ${without.successRate.toFixed(1)}% | ${withMem.successRate.toFixed(1)}% | ${sign}${imp.successDelta.toFixed(1)}% |`);
667
+ }
668
+ }
669
+ lines.push('', '### Latency Comparison', '');
670
+ lines.push('| Model | Without Memory | With Memory | Speed Ratio |');
671
+ lines.push('|-------|----------------|-------------|-------------|');
672
+ for (const withMem of report.memoryComparison.withMemory) {
673
+ const without = report.memoryComparison.withoutMemory.find((r) => r.modelId === withMem.modelId);
674
+ const imp = report.memoryComparison.improvement[withMem.modelId];
675
+ if (without && imp) {
676
+ const speedLabel = imp.speedupRatio > 1
677
+ ? `${imp.speedupRatio.toFixed(2)}x faster`
678
+ : imp.speedupRatio < 1
679
+ ? `${(1 / imp.speedupRatio).toFixed(2)}x slower`
680
+ : 'same';
681
+ lines.push(`| ${withMem.modelName} | ${without.avgLatencyMs}ms | ${withMem.avgLatencyMs}ms | ${speedLabel} |`);
682
+ }
683
+ }
684
+ lines.push('', '### Key Findings', '');
685
+ // Find best improvement
686
+ const improvements = Object.entries(report.memoryComparison.improvement);
687
+ if (improvements.length > 0) {
688
+ const bestImprovement = improvements.reduce((a, b) => a[1].successDelta > b[1].successDelta ? a : b);
689
+ const bestModel = BENCHMARK_TASKS.length > 0
690
+ ? report.memoryComparison.withMemory.find((m) => m.modelId === bestImprovement[0])
691
+ ?.modelName
692
+ : 'N/A';
693
+ lines.push(`- **Best Memory Benefit:** ${bestModel} (+${bestImprovement[1].successDelta.toFixed(1)}% success rate)`);
694
+ const avgImprovement = improvements.reduce((sum, [_, imp]) => sum + imp.successDelta, 0) / improvements.length;
695
+ lines.push(`- **Average Improvement:** +${avgImprovement.toFixed(1)}% success rate across all models`);
696
+ lines.push('', '### Interpretation', '');
697
+ lines.push('UAP memory context injection provides models with:');
698
+ lines.push('- Project structure knowledge (file locations, patterns)');
699
+ lines.push('- Coding standards (JSDoc, error handling, async patterns)');
700
+ lines.push('- Common gotchas and lessons learned from previous sessions');
701
+ lines.push('- Design pattern templates (singleton, strategy, factory)');
702
+ }
703
+ }
704
+ lines.push('', '---', '', '**Report Generated by UAP Model Integration Benchmark**');
705
+ return lines.join('\n');
706
+ }
707
+ // ============================================================================
708
+ // Parallel Execution Utilities
709
+ // ============================================================================
710
+ /**
711
+ * Run multiple model benchmarks in parallel with configurable concurrency
712
+ */
713
+ async function runModelsInParallel(client, models, tasks, withMemory, concurrency) {
714
+ const results = [];
715
+ const queue = [...models];
716
+ const inProgress = [];
717
+ const runNext = async () => {
718
+ const model = queue.shift();
719
+ if (!model)
720
+ return;
721
+ const result = await runBenchmarkForModel(client, model, tasks, withMemory);
722
+ results.push(result);
723
+ if (queue.length > 0) {
724
+ await runNext();
725
+ }
726
+ };
727
+ // Start initial batch up to concurrency limit
728
+ const initialBatch = Math.min(concurrency, models.length);
729
+ for (let i = 0; i < initialBatch; i++) {
730
+ inProgress.push(runNext());
731
+ }
732
+ await Promise.all(inProgress);
733
+ // Sort results to match original model order
734
+ return models.map((m) => results.find((r) => r.modelId === m.id)).filter(Boolean);
735
+ }
736
+ export async function runModelBenchmark(apiKeyOrOptions, modelIds, compareMemory = true, parallelModels = 1) {
737
+ // Handle both old signature and new options object
738
+ let key;
739
+ let models;
740
+ let compare;
741
+ let parallel;
742
+ if (typeof apiKeyOrOptions === 'object' && apiKeyOrOptions !== null) {
743
+ key = apiKeyOrOptions.apiKey;
744
+ models = apiKeyOrOptions.modelIds;
745
+ compare = apiKeyOrOptions.compareMemory ?? true;
746
+ parallel = apiKeyOrOptions.parallelModels ?? 1;
747
+ }
748
+ else {
749
+ key = apiKeyOrOptions;
750
+ models = modelIds;
751
+ compare = compareMemory;
752
+ parallel = parallelModels;
753
+ }
754
+ key = key || process.env.FACTORY_API_KEY || process.env.DROID_API_KEY;
755
+ if (!key) {
756
+ throw new Error('FACTORY_API_KEY or DROID_API_KEY not provided and not found in environment');
757
+ }
758
+ const client = new DroidExecClient(key, 'medium');
759
+ const modelsToTest = models ? MODELS.filter((m) => models.includes(m.id)) : MODELS;
760
+ if (modelsToTest.length === 0) {
761
+ throw new Error('No valid models specified');
762
+ }
763
+ // Determine effective parallelism
764
+ const effectiveParallel = Math.min(parallel, modelsToTest.length);
765
+ const isParallel = effectiveParallel > 1;
766
+ console.log('\n' + '='.repeat(60));
767
+ console.log(' UAP MODEL INTEGRATION BENCHMARK');
768
+ console.log('='.repeat(60));
769
+ console.log(`\nModels: ${modelsToTest.map((m) => m.name).join(', ')}`);
770
+ console.log(`Tasks: ${BENCHMARK_TASKS.length}`);
771
+ console.log(`Memory Comparison: ${compare ? 'ENABLED' : 'DISABLED'}`);
772
+ console.log(`Parallel Models: ${effectiveParallel}${isParallel ? ' (ENABLED)' : ' (sequential)'}`);
773
+ let withoutMemoryResults = [];
774
+ let withMemoryResults = [];
775
+ // Run without memory first
776
+ console.log('\n' + '█'.repeat(60));
777
+ console.log(` PHASE 1: WITHOUT UAP MEMORY${isParallel ? ' (PARALLEL)' : ''}`);
778
+ console.log('█'.repeat(60));
779
+ if (isParallel) {
780
+ console.log(`\n Running ${modelsToTest.length} models with concurrency=${effectiveParallel}...\n`);
781
+ withoutMemoryResults = await runModelsInParallel(client, modelsToTest, BENCHMARK_TASKS, false, effectiveParallel);
782
+ }
783
+ else {
784
+ for (const model of modelsToTest) {
785
+ const result = await runBenchmarkForModel(client, model, BENCHMARK_TASKS, false);
786
+ withoutMemoryResults.push(result);
787
+ }
788
+ }
789
+ // Run with memory if comparison enabled
790
+ if (compare) {
791
+ console.log('\n' + '█'.repeat(60));
792
+ console.log(` PHASE 2: WITH UAP MEMORY${isParallel ? ' (PARALLEL)' : ''}`);
793
+ console.log('█'.repeat(60));
794
+ // Setup UAP before running with-memory tests
795
+ console.log('\n--- Setting up UAP (init, analyze, generate, memory start, prepopulate) ---');
796
+ const uapSetup = await setupUAP(true);
797
+ if (uapSetup.errors.length > 0) {
798
+ console.log('\nUAP Setup warnings:');
799
+ uapSetup.errors.forEach((e) => console.log(` - ${e}`));
800
+ }
801
+ console.log(`\nUAP Status:`);
802
+ console.log(` Initialized: ${uapSetup.initialized ? '✓' : '✗'}`);
803
+ console.log(` Memory Started: ${uapSetup.memoryStarted ? '✓' : '✗'}`);
804
+ console.log(` Memory Prepopulated: ${uapSetup.memoryPrepopulated ? '✓' : '✗'}`);
805
+ console.log(` CLAUDE.md Loaded: ${uapSetup.claudeMdLoaded ? '✓' : '✗'}`);
806
+ // Clear cached context to force reload with fresh memory
807
+ cachedMemoryContext = null;
808
+ // Log memory context size
809
+ const memoryContext = getUAPMemoryContext();
810
+ console.log(` Memory Context Size: ${memoryContext.length} chars\n`);
811
+ if (isParallel) {
812
+ console.log(` Running ${modelsToTest.length} models with concurrency=${effectiveParallel}...\n`);
813
+ withMemoryResults = await runModelsInParallel(client, modelsToTest, BENCHMARK_TASKS, true, effectiveParallel);
814
+ }
815
+ else {
816
+ for (const model of modelsToTest) {
817
+ const result = await runBenchmarkForModel(client, model, BENCHMARK_TASKS, true);
818
+ withMemoryResults.push(result);
819
+ }
820
+ }
821
+ }
822
+ // Calculate memory improvement for each model
823
+ const improvement = {};
824
+ if (compare) {
825
+ for (const model of modelsToTest) {
826
+ const without = withoutMemoryResults.find((r) => r.modelId === model.id);
827
+ const withMem = withMemoryResults.find((r) => r.modelId === model.id);
828
+ if (without && withMem) {
829
+ improvement[model.id] = {
830
+ successDelta: withMem.successRate - without.successRate,
831
+ speedupRatio: without.avgLatencyMs > 0 ? without.avgLatencyMs / withMem.avgLatencyMs : 1,
832
+ };
833
+ }
834
+ }
835
+ }
836
+ // Use with-memory results as primary if available, otherwise without
837
+ const primaryResults = compare && withMemoryResults.length > 0 ? withMemoryResults : withoutMemoryResults;
838
+ const report = {
839
+ timestamp: new Date().toISOString(),
840
+ models: primaryResults,
841
+ comparison: generateComparison(primaryResults),
842
+ memoryComparison: compare
843
+ ? {
844
+ withMemory: withMemoryResults,
845
+ withoutMemory: withoutMemoryResults,
846
+ improvement,
847
+ }
848
+ : undefined,
849
+ };
850
+ // Generate and save markdown report
851
+ const markdown = generateMarkdownReport(report);
852
+ const reportPath = join(__dirname, '../../MODEL_BENCHMARK_RESULTS.md');
853
+ writeFileSync(reportPath, markdown);
854
+ console.log(`\nReport saved to: ${reportPath}`);
855
+ // Print summary
856
+ console.log('\n' + '='.repeat(60));
857
+ console.log(' BENCHMARK COMPLETE');
858
+ console.log('='.repeat(60));
859
+ if (compare) {
860
+ console.log('\n--- Without Memory ---');
861
+ for (const model of withoutMemoryResults) {
862
+ console.log(` ${model.modelName}: ${model.successRate.toFixed(1)}% success, ${model.avgLatencyMs}ms avg`);
863
+ }
864
+ console.log('\n--- With UAP Memory ---');
865
+ for (const model of withMemoryResults) {
866
+ console.log(` ${model.modelName}: ${model.successRate.toFixed(1)}% success, ${model.avgLatencyMs}ms avg`);
867
+ }
868
+ console.log('\n--- Memory Improvement ---');
869
+ for (const [modelId, imp] of Object.entries(improvement)) {
870
+ const model = modelsToTest.find((m) => m.id === modelId);
871
+ const sign = imp.successDelta >= 0 ? '+' : '';
872
+ console.log(` ${model?.name}: ${sign}${imp.successDelta.toFixed(1)}% success, ${imp.speedupRatio.toFixed(2)}x speed`);
873
+ }
874
+ }
875
+ else {
876
+ console.log('\nSummary:');
877
+ for (const model of primaryResults) {
878
+ console.log(` ${model.modelName}: ${model.successRate.toFixed(1)}% success, ${model.avgLatencyMs}ms avg`);
879
+ }
880
+ }
881
+ console.log(`\nBest Overall: ${report.comparison.bestOverall}`);
882
+ return report;
883
+ }
884
+ // CLI entry point
885
+ if (process.argv[1]?.includes('model-integration')) {
886
+ const envPath = join(__dirname, '../../.env');
887
+ if (existsSync(envPath)) {
888
+ const envContent = readFileSync(envPath, 'utf-8');
889
+ for (const line of envContent.split('\n')) {
890
+ const [key, ...valueParts] = line.split('=');
891
+ if (key && valueParts.length > 0) {
892
+ process.env[key.trim()] = valueParts.join('=').trim();
893
+ }
894
+ }
895
+ }
896
+ runModelBenchmark()
897
+ .then(() => process.exit(0))
898
+ .catch((err) => {
899
+ console.error('Benchmark failed:', err);
900
+ process.exit(1);
901
+ });
902
+ }
903
+ export { MODELS, BENCHMARK_TASKS, setupUAP, loadUAPMemoryContext };
904
+ //# sourceMappingURL=model-integration.js.map