@elizaos/plugin-training 2.0.3-beta.5 → 2.0.3-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (363) hide show
  1. package/dist/backends/native.d.ts +96 -0
  2. package/dist/backends/native.d.ts.map +1 -0
  3. package/dist/backends/native.js +308 -0
  4. package/dist/backends/native.js.map +1 -0
  5. package/dist/cli/train.d.ts +22 -0
  6. package/dist/cli/train.d.ts.map +1 -0
  7. package/dist/cli/train.js +219 -0
  8. package/dist/cli/train.js.map +1 -0
  9. package/dist/core/action-benchmark-runner.d.ts +55 -0
  10. package/dist/core/action-benchmark-runner.d.ts.map +1 -0
  11. package/dist/core/action-benchmark-runner.js +341 -0
  12. package/dist/core/action-benchmark-runner.js.map +1 -0
  13. package/dist/core/artifact-store.d.ts +72 -0
  14. package/dist/core/artifact-store.d.ts.map +1 -0
  15. package/dist/core/artifact-store.js +50 -0
  16. package/dist/core/artifact-store.js.map +1 -0
  17. package/dist/core/benchmark-matrix-artifact.d.ts +102 -0
  18. package/dist/core/benchmark-matrix-artifact.d.ts.map +1 -0
  19. package/dist/core/benchmark-matrix-artifact.js +381 -0
  20. package/dist/core/benchmark-matrix-artifact.js.map +1 -0
  21. package/dist/core/benchmark-vs-cerebras-runner.d.ts +37 -0
  22. package/dist/core/benchmark-vs-cerebras-runner.d.ts.map +1 -0
  23. package/dist/core/benchmark-vs-cerebras-runner.js +151 -0
  24. package/dist/core/benchmark-vs-cerebras-runner.js.map +1 -0
  25. package/dist/core/cerebras-eval-model.d.ts +54 -0
  26. package/dist/core/cerebras-eval-model.d.ts.map +1 -0
  27. package/dist/core/cerebras-eval-model.js +249 -0
  28. package/dist/core/cerebras-eval-model.js.map +1 -0
  29. package/dist/core/cli.d.ts +15 -0
  30. package/dist/core/cli.d.ts.map +1 -0
  31. package/dist/core/cli.js +1003 -0
  32. package/dist/core/cli.js.map +1 -0
  33. package/dist/core/context-audit.d.ts +51 -0
  34. package/dist/core/context-audit.d.ts.map +1 -0
  35. package/dist/core/context-audit.js +166 -0
  36. package/dist/core/context-audit.js.map +1 -0
  37. package/dist/core/context-catalog.d.ts +47 -0
  38. package/dist/core/context-catalog.d.ts.map +1 -0
  39. package/dist/core/context-catalog.js +269 -0
  40. package/dist/core/context-catalog.js.map +1 -0
  41. package/dist/core/context-types.d.ts +3 -0
  42. package/dist/core/context-types.d.ts.map +1 -0
  43. package/dist/core/context-types.js +18 -0
  44. package/dist/core/context-types.js.map +1 -0
  45. package/dist/core/dataset-generator.d.ts +135 -0
  46. package/dist/core/dataset-generator.d.ts.map +1 -0
  47. package/dist/core/dataset-generator.js +895 -0
  48. package/dist/core/dataset-generator.js.map +1 -0
  49. package/dist/core/eliza1-benchmark-recipe.d.ts +18 -0
  50. package/dist/core/eliza1-benchmark-recipe.d.ts.map +1 -0
  51. package/dist/core/eliza1-benchmark-recipe.js +64 -0
  52. package/dist/core/eliza1-benchmark-recipe.js.map +1 -0
  53. package/dist/core/eliza1-bundle-stager.d.ts +57 -0
  54. package/dist/core/eliza1-bundle-stager.d.ts.map +1 -0
  55. package/dist/core/eliza1-bundle-stager.js +149 -0
  56. package/dist/core/eliza1-bundle-stager.js.map +1 -0
  57. package/dist/core/ensure-cron-job.d.ts +53 -0
  58. package/dist/core/ensure-cron-job.d.ts.map +1 -0
  59. package/dist/core/ensure-cron-job.js +51 -0
  60. package/dist/core/ensure-cron-job.js.map +1 -0
  61. package/dist/core/eval-comparison-artifact.d.ts +72 -0
  62. package/dist/core/eval-comparison-artifact.d.ts.map +1 -0
  63. package/dist/core/eval-comparison-artifact.js +281 -0
  64. package/dist/core/eval-comparison-artifact.js.map +1 -0
  65. package/dist/core/feed-generation-runner.d.ts +37 -0
  66. package/dist/core/feed-generation-runner.d.ts.map +1 -0
  67. package/dist/core/feed-generation-runner.js +232 -0
  68. package/dist/core/feed-generation-runner.js.map +1 -0
  69. package/dist/core/html-escape.d.ts +5 -0
  70. package/dist/core/html-escape.d.ts.map +1 -0
  71. package/dist/core/html-escape.js +11 -0
  72. package/dist/core/html-escape.js.map +1 -0
  73. package/dist/core/huggingface-dataset-ingest.d.ts +52 -0
  74. package/dist/core/huggingface-dataset-ingest.d.ts.map +1 -0
  75. package/dist/core/huggingface-dataset-ingest.js +134 -0
  76. package/dist/core/huggingface-dataset-ingest.js.map +1 -0
  77. package/dist/core/index.d.ts +29 -0
  78. package/dist/core/index.d.ts.map +1 -0
  79. package/dist/core/index.js +204 -0
  80. package/dist/core/index.js.map +1 -0
  81. package/dist/core/privacy-filter.d.ts +95 -0
  82. package/dist/core/privacy-filter.d.ts.map +1 -0
  83. package/dist/core/privacy-filter.js +324 -0
  84. package/dist/core/privacy-filter.js.map +1 -0
  85. package/dist/core/promotion-gate.d.ts +117 -0
  86. package/dist/core/promotion-gate.d.ts.map +1 -0
  87. package/dist/core/promotion-gate.js +85 -0
  88. package/dist/core/promotion-gate.js.map +1 -0
  89. package/dist/core/promotion-persist.d.ts +116 -0
  90. package/dist/core/promotion-persist.d.ts.map +1 -0
  91. package/dist/core/promotion-persist.js +93 -0
  92. package/dist/core/promotion-persist.js.map +1 -0
  93. package/dist/core/prompt-compare.d.ts +99 -0
  94. package/dist/core/prompt-compare.d.ts.map +1 -0
  95. package/dist/core/prompt-compare.js +210 -0
  96. package/dist/core/prompt-compare.js.map +1 -0
  97. package/dist/core/replay-validator.d.ts +136 -0
  98. package/dist/core/replay-validator.d.ts.map +1 -0
  99. package/dist/core/replay-validator.js +312 -0
  100. package/dist/core/replay-validator.js.map +1 -0
  101. package/dist/core/roleplay-executor.d.ts +123 -0
  102. package/dist/core/roleplay-executor.d.ts.map +1 -0
  103. package/dist/core/roleplay-executor.js +675 -0
  104. package/dist/core/roleplay-executor.js.map +1 -0
  105. package/dist/core/roleplay-trajectories.d.ts +54 -0
  106. package/dist/core/roleplay-trajectories.d.ts.map +1 -0
  107. package/dist/core/roleplay-trajectories.js +88 -0
  108. package/dist/core/roleplay-trajectories.js.map +1 -0
  109. package/dist/core/scenario-blueprints.d.ts +62 -0
  110. package/dist/core/scenario-blueprints.d.ts.map +1 -0
  111. package/dist/core/scenario-blueprints.js +850 -0
  112. package/dist/core/scenario-blueprints.js.map +1 -0
  113. package/dist/core/scenario-runner.d.ts +36 -0
  114. package/dist/core/scenario-runner.d.ts.map +1 -0
  115. package/dist/core/scenario-runner.js +216 -0
  116. package/dist/core/scenario-runner.js.map +1 -0
  117. package/dist/core/skill-scoring-cron.d.ts +57 -0
  118. package/dist/core/skill-scoring-cron.d.ts.map +1 -0
  119. package/dist/core/skill-scoring-cron.js +180 -0
  120. package/dist/core/skill-scoring-cron.js.map +1 -0
  121. package/dist/core/test-trajectory-collector.d.ts +37 -0
  122. package/dist/core/test-trajectory-collector.d.ts.map +1 -0
  123. package/dist/core/test-trajectory-collector.js +225 -0
  124. package/dist/core/test-trajectory-collector.js.map +1 -0
  125. package/dist/core/track-c-queue-task.d.ts +37 -0
  126. package/dist/core/track-c-queue-task.d.ts.map +1 -0
  127. package/dist/core/track-c-queue-task.js +104 -0
  128. package/dist/core/track-c-queue-task.js.map +1 -0
  129. package/dist/core/training-analysis-index.d.ts +104 -0
  130. package/dist/core/training-analysis-index.d.ts.map +1 -0
  131. package/dist/core/training-analysis-index.js +3297 -0
  132. package/dist/core/training-analysis-index.js.map +1 -0
  133. package/dist/core/training-collection-runner.d.ts +508 -0
  134. package/dist/core/training-collection-runner.d.ts.map +1 -0
  135. package/dist/core/training-collection-runner.js +2299 -0
  136. package/dist/core/training-collection-runner.js.map +1 -0
  137. package/dist/core/training-config.d.ts +52 -0
  138. package/dist/core/training-config.d.ts.map +1 -0
  139. package/dist/core/training-config.js +117 -0
  140. package/dist/core/training-config.js.map +1 -0
  141. package/dist/core/training-orchestrator.d.ts +112 -0
  142. package/dist/core/training-orchestrator.d.ts.map +1 -0
  143. package/dist/core/training-orchestrator.js +729 -0
  144. package/dist/core/training-orchestrator.js.map +1 -0
  145. package/dist/core/training-readiness-report.d.ts +52 -0
  146. package/dist/core/training-readiness-report.d.ts.map +1 -0
  147. package/dist/core/training-readiness-report.js +765 -0
  148. package/dist/core/training-readiness-report.js.map +1 -0
  149. package/dist/core/trajectory-consumer.d.ts +15 -0
  150. package/dist/core/trajectory-consumer.d.ts.map +1 -0
  151. package/dist/core/trajectory-consumer.js +61 -0
  152. package/dist/core/trajectory-consumer.js.map +1 -0
  153. package/dist/core/trajectory-export-bundle.d.ts +95 -0
  154. package/dist/core/trajectory-export-bundle.d.ts.map +1 -0
  155. package/dist/core/trajectory-export-bundle.js +561 -0
  156. package/dist/core/trajectory-export-bundle.js.map +1 -0
  157. package/dist/core/trajectory-export-cron.d.ts +57 -0
  158. package/dist/core/trajectory-export-cron.d.ts.map +1 -0
  159. package/dist/core/trajectory-export-cron.js +170 -0
  160. package/dist/core/trajectory-export-cron.js.map +1 -0
  161. package/dist/core/trajectory-hf-upload.d.ts +50 -0
  162. package/dist/core/trajectory-hf-upload.d.ts.map +1 -0
  163. package/dist/core/trajectory-hf-upload.js +111 -0
  164. package/dist/core/trajectory-hf-upload.js.map +1 -0
  165. package/dist/core/trajectory-task-datasets.d.ts +62 -0
  166. package/dist/core/trajectory-task-datasets.d.ts.map +1 -0
  167. package/dist/core/trajectory-task-datasets.js +427 -0
  168. package/dist/core/trajectory-task-datasets.js.map +1 -0
  169. package/dist/core/wait-for-service.d.ts +25 -0
  170. package/dist/core/wait-for-service.d.ts.map +1 -0
  171. package/dist/core/wait-for-service.js +19 -0
  172. package/dist/core/wait-for-service.js.map +1 -0
  173. package/dist/core/workspace-runtime.d.ts +4 -0
  174. package/dist/core/workspace-runtime.d.ts.map +1 -0
  175. package/dist/core/workspace-runtime.js +25 -0
  176. package/dist/core/workspace-runtime.js.map +1 -0
  177. package/dist/dspy/artifact.d.ts +54 -0
  178. package/dist/dspy/artifact.d.ts.map +1 -0
  179. package/dist/dspy/artifact.js +61 -0
  180. package/dist/dspy/artifact.js.map +1 -0
  181. package/dist/dspy/chain-of-thought.d.ts +27 -0
  182. package/dist/dspy/chain-of-thought.d.ts.map +1 -0
  183. package/dist/dspy/chain-of-thought.js +43 -0
  184. package/dist/dspy/chain-of-thought.js.map +1 -0
  185. package/dist/dspy/examples.d.ts +72 -0
  186. package/dist/dspy/examples.d.ts.map +1 -0
  187. package/dist/dspy/examples.js +105 -0
  188. package/dist/dspy/examples.js.map +1 -0
  189. package/dist/dspy/index.d.ts +15 -0
  190. package/dist/dspy/index.d.ts.map +1 -0
  191. package/dist/dspy/index.js +40 -0
  192. package/dist/dspy/index.js.map +1 -0
  193. package/dist/dspy/lm-adapter.d.ts +100 -0
  194. package/dist/dspy/lm-adapter.d.ts.map +1 -0
  195. package/dist/dspy/lm-adapter.js +81 -0
  196. package/dist/dspy/lm-adapter.js.map +1 -0
  197. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts +23 -0
  198. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts.map +1 -0
  199. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js +85 -0
  200. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js.map +1 -0
  201. package/dist/dspy/optimizers/dspy-copro.d.ts +29 -0
  202. package/dist/dspy/optimizers/dspy-copro.d.ts.map +1 -0
  203. package/dist/dspy/optimizers/dspy-copro.js +141 -0
  204. package/dist/dspy/optimizers/dspy-copro.js.map +1 -0
  205. package/dist/dspy/optimizers/dspy-mipro.d.ts +37 -0
  206. package/dist/dspy/optimizers/dspy-mipro.d.ts.map +1 -0
  207. package/dist/dspy/optimizers/dspy-mipro.js +194 -0
  208. package/dist/dspy/optimizers/dspy-mipro.js.map +1 -0
  209. package/dist/dspy/optimizers/index.d.ts +5 -0
  210. package/dist/dspy/optimizers/index.d.ts.map +1 -0
  211. package/dist/dspy/optimizers/index.js +11 -0
  212. package/dist/dspy/optimizers/index.js.map +1 -0
  213. package/dist/dspy/optimizers/types.d.ts +39 -0
  214. package/dist/dspy/optimizers/types.d.ts.map +1 -0
  215. package/dist/dspy/optimizers/types.js +1 -0
  216. package/dist/dspy/optimizers/types.js.map +1 -0
  217. package/dist/dspy/predict.d.ts +49 -0
  218. package/dist/dspy/predict.d.ts.map +1 -0
  219. package/dist/dspy/predict.js +73 -0
  220. package/dist/dspy/predict.js.map +1 -0
  221. package/dist/dspy/signature.d.ts +88 -0
  222. package/dist/dspy/signature.d.ts.map +1 -0
  223. package/dist/dspy/signature.js +205 -0
  224. package/dist/dspy/signature.js.map +1 -0
  225. package/dist/index.d.ts +15 -0
  226. package/dist/index.d.ts.map +1 -0
  227. package/dist/index.js +15 -0
  228. package/dist/index.js.map +1 -0
  229. package/dist/optimizers/bootstrap-fewshot.d.ts +42 -0
  230. package/dist/optimizers/bootstrap-fewshot.d.ts.map +1 -0
  231. package/dist/optimizers/bootstrap-fewshot.js +92 -0
  232. package/dist/optimizers/bootstrap-fewshot.js.map +1 -0
  233. package/dist/optimizers/gepa.d.ts +63 -0
  234. package/dist/optimizers/gepa.d.ts.map +1 -0
  235. package/dist/optimizers/gepa.js +232 -0
  236. package/dist/optimizers/gepa.js.map +1 -0
  237. package/dist/optimizers/index.d.ts +7 -0
  238. package/dist/optimizers/index.d.ts.map +1 -0
  239. package/dist/optimizers/index.js +51 -0
  240. package/dist/optimizers/index.js.map +1 -0
  241. package/dist/optimizers/instruction-search.d.ts +39 -0
  242. package/dist/optimizers/instruction-search.d.ts.map +1 -0
  243. package/dist/optimizers/instruction-search.js +108 -0
  244. package/dist/optimizers/instruction-search.js.map +1 -0
  245. package/dist/optimizers/prompt-evolution.d.ts +39 -0
  246. package/dist/optimizers/prompt-evolution.d.ts.map +1 -0
  247. package/dist/optimizers/prompt-evolution.js +101 -0
  248. package/dist/optimizers/prompt-evolution.js.map +1 -0
  249. package/dist/optimizers/scoring.d.ts +139 -0
  250. package/dist/optimizers/scoring.d.ts.map +1 -0
  251. package/dist/optimizers/scoring.js +299 -0
  252. package/dist/optimizers/scoring.js.map +1 -0
  253. package/dist/optimizers/types.d.ts +105 -0
  254. package/dist/optimizers/types.d.ts.map +1 -0
  255. package/dist/optimizers/types.js +1 -0
  256. package/dist/optimizers/types.js.map +1 -0
  257. package/dist/register-runtime.d.ts +3 -0
  258. package/dist/register-runtime.d.ts.map +1 -0
  259. package/dist/register-runtime.js +60 -0
  260. package/dist/register-runtime.js.map +1 -0
  261. package/dist/register-terminal-view.d.ts +15 -0
  262. package/dist/register-terminal-view.d.ts.map +1 -0
  263. package/dist/register-terminal-view.js +31 -0
  264. package/dist/register-terminal-view.js.map +1 -0
  265. package/dist/routes/experience-routes.d.ts +21 -0
  266. package/dist/routes/experience-routes.d.ts.map +1 -0
  267. package/dist/routes/experience-routes.js +513 -0
  268. package/dist/routes/experience-routes.js.map +1 -0
  269. package/dist/routes/index.d.ts +5 -0
  270. package/dist/routes/index.d.ts.map +1 -0
  271. package/dist/routes/index.js +17 -0
  272. package/dist/routes/index.js.map +1 -0
  273. package/dist/routes/training-routes.d.ts +10 -0
  274. package/dist/routes/training-routes.d.ts.map +1 -0
  275. package/dist/routes/training-routes.js +1239 -0
  276. package/dist/routes/training-routes.js.map +1 -0
  277. package/dist/routes/training-vast-routes.d.ts +35 -0
  278. package/dist/routes/training-vast-routes.d.ts.map +1 -0
  279. package/dist/routes/training-vast-routes.js +249 -0
  280. package/dist/routes/training-vast-routes.js.map +1 -0
  281. package/dist/routes/trajectory-routes.d.ts +19 -0
  282. package/dist/routes/trajectory-routes.d.ts.map +1 -0
  283. package/dist/routes/trajectory-routes.js +1122 -0
  284. package/dist/routes/trajectory-routes.js.map +1 -0
  285. package/dist/services/index.d.ts +9 -0
  286. package/dist/services/index.d.ts.map +1 -0
  287. package/dist/services/index.js +63 -0
  288. package/dist/services/index.js.map +1 -0
  289. package/dist/services/training-backend-check.d.ts +8 -0
  290. package/dist/services/training-backend-check.d.ts.map +1 -0
  291. package/dist/services/training-backend-check.js +31 -0
  292. package/dist/services/training-backend-check.js.map +1 -0
  293. package/dist/services/training-service-like.d.ts +40 -0
  294. package/dist/services/training-service-like.d.ts.map +1 -0
  295. package/dist/services/training-service-like.js +1 -0
  296. package/dist/services/training-service-like.js.map +1 -0
  297. package/dist/services/training-service-registry.d.ts +4 -0
  298. package/dist/services/training-service-registry.d.ts.map +1 -0
  299. package/dist/services/training-service-registry.js +12 -0
  300. package/dist/services/training-service-registry.js.map +1 -0
  301. package/dist/services/training-service.d.ts +59 -0
  302. package/dist/services/training-service.d.ts.map +1 -0
  303. package/dist/services/training-service.js +154 -0
  304. package/dist/services/training-service.js.map +1 -0
  305. package/dist/services/training-trigger.d.ts +177 -0
  306. package/dist/services/training-trigger.d.ts.map +1 -0
  307. package/dist/services/training-trigger.js +300 -0
  308. package/dist/services/training-trigger.js.map +1 -0
  309. package/dist/services/training-vast-service.d.ts +149 -0
  310. package/dist/services/training-vast-service.d.ts.map +1 -0
  311. package/dist/services/training-vast-service.js +648 -0
  312. package/dist/services/training-vast-service.js.map +1 -0
  313. package/dist/services/vast-inference-stats.d.ts +37 -0
  314. package/dist/services/vast-inference-stats.d.ts.map +1 -0
  315. package/dist/services/vast-inference-stats.js +81 -0
  316. package/dist/services/vast-inference-stats.js.map +1 -0
  317. package/dist/services/vast-job-store.d.ts +74 -0
  318. package/dist/services/vast-job-store.d.ts.map +1 -0
  319. package/dist/services/vast-job-store.js +194 -0
  320. package/dist/services/vast-job-store.js.map +1 -0
  321. package/dist/services/vast-subprocess.d.ts +27 -0
  322. package/dist/services/vast-subprocess.d.ts.map +1 -0
  323. package/dist/services/vast-subprocess.js +78 -0
  324. package/dist/services/vast-subprocess.js.map +1 -0
  325. package/dist/setup-routes.d.ts +17 -0
  326. package/dist/setup-routes.d.ts.map +1 -0
  327. package/dist/setup-routes.js +319 -0
  328. package/dist/setup-routes.js.map +1 -0
  329. package/dist/ui/FineTuningSpatialView.d.ts +49 -0
  330. package/dist/ui/FineTuningSpatialView.d.ts.map +1 -0
  331. package/dist/ui/FineTuningSpatialView.js +154 -0
  332. package/dist/ui/FineTuningSpatialView.js.map +1 -0
  333. package/dist/ui/FineTuningView.d.ts +7 -0
  334. package/dist/ui/FineTuningView.d.ts.map +1 -0
  335. package/dist/ui/FineTuningView.helpers.d.ts +17 -0
  336. package/dist/ui/FineTuningView.helpers.d.ts.map +1 -0
  337. package/dist/ui/FineTuningView.helpers.js +30 -0
  338. package/dist/ui/FineTuningView.helpers.js.map +1 -0
  339. package/dist/ui/FineTuningView.interact.d.ts +2 -0
  340. package/dist/ui/FineTuningView.interact.d.ts.map +1 -0
  341. package/dist/ui/FineTuningView.interact.js +300 -0
  342. package/dist/ui/FineTuningView.interact.js.map +1 -0
  343. package/dist/ui/FineTuningView.js +4653 -0
  344. package/dist/ui/FineTuningView.js.map +1 -0
  345. package/dist/ui/fine-tuning-panels.d.ts +100 -0
  346. package/dist/ui/fine-tuning-panels.d.ts.map +1 -0
  347. package/dist/ui/fine-tuning-panels.helpers.d.ts +19 -0
  348. package/dist/ui/fine-tuning-panels.helpers.d.ts.map +1 -0
  349. package/dist/ui/fine-tuning-panels.helpers.js +77 -0
  350. package/dist/ui/fine-tuning-panels.helpers.js.map +1 -0
  351. package/dist/ui/fine-tuning-panels.js +928 -0
  352. package/dist/ui/fine-tuning-panels.js.map +1 -0
  353. package/dist/ui/index.d.ts +5 -0
  354. package/dist/ui/index.d.ts.map +1 -0
  355. package/dist/ui/index.js +5 -0
  356. package/dist/ui/index.js.map +1 -0
  357. package/dist/ui/training-view-bundle.d.ts +3 -0
  358. package/dist/ui/training-view-bundle.d.ts.map +1 -0
  359. package/dist/ui/training-view-bundle.js +7 -0
  360. package/dist/ui/training-view-bundle.js.map +1 -0
  361. package/dist/views/bundle.js +5312 -0
  362. package/dist/views/bundle.js.map +1 -0
  363. package/package.json +7 -7
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/core/action-benchmark-runner.ts"],"sourcesContent":["import { spawn } from \"node:child_process\";\nimport { mkdir, readFile, writeFile } from \"node:fs/promises\";\nimport { join } from \"node:path\";\nimport { trainingStateRoot } from \"./training-config.js\";\nimport { defaultBunCommand, resolveWorkspaceRoot } from \"./workspace-runtime.js\";\n\nexport type ActionBenchmarkMatrixVariant = \"reference\" | \"base\" | \"trained\";\n\nexport interface ActionBenchmarkRunOptions {\n workspaceRoot?: string;\n bun?: string;\n outputDir?: string;\n useMocks?: boolean;\n forceTrajectoryCapture?: boolean;\n filter?: string;\n runsPerCase?: number;\n provider?: string;\n modelId?: string;\n runtimeModel?: string;\n smallModel?: string;\n largeModel?: string;\n baseUrl?: string;\n variant?: ActionBenchmarkMatrixVariant;\n tier?: string;\n benchmark?: string;\n datasetVersion?: string;\n codeCommit?: string;\n dryRun?: boolean;\n}\n\nexport interface ActionBenchmarkRunResult {\n workspaceRoot: string;\n appCoreRoot: string;\n outputDir: string;\n reportMarkdownPath: string;\n reportJsonPath: string;\n trajectoryDir: string;\n command: string[];\n env: Record<string, string>;\n stdout: string;\n stderr: string;\n exitCode: number;\n matrixSource: {\n path: string;\n modelId?: string;\n benchmark?: string;\n variant?: ActionBenchmarkMatrixVariant;\n tier?: string;\n provider?: string;\n datasetVersion?: string;\n codeCommit?: string;\n useMocks?: boolean;\n } | null;\n}\n\nfunction safeTimestamp(value: string): string {\n return value.replace(/[:.]/g, \"-\");\n}\n\nfunction positiveInt(value: number | undefined): number | undefined {\n return typeof value === \"number\" && Number.isFinite(value)\n ? Math.max(1, Math.floor(value))\n : undefined;\n}\n\nfunction collectProcess(\n command: string,\n args: string[],\n cwd: string,\n env: NodeJS.ProcessEnv,\n): Promise<{ stdout: string; stderr: string; exitCode: number }> {\n return new Promise((resolvePromise, reject) => {\n const child = spawn(command, args, {\n cwd,\n env,\n stdio: [\"ignore\", \"pipe\", \"pipe\"],\n });\n let stdout = \"\";\n let stderr = \"\";\n child.stdout.setEncoding(\"utf-8\");\n child.stderr.setEncoding(\"utf-8\");\n child.stdout.on(\"data\", (chunk) => {\n stdout += chunk;\n });\n child.stderr.on(\"data\", (chunk) => {\n stderr += chunk;\n });\n child.on(\"error\", reject);\n child.on(\"close\", (code) => {\n resolvePromise({ stdout, stderr, exitCode: code ?? 1 });\n });\n });\n}\n\nfunction stringSetting(value: string | undefined): string | undefined {\n return value?.trim() || undefined;\n}\n\nfunction modelListUrl(baseUrl: string): string {\n const normalized = baseUrl.trim().replace(/\\/+$/, \"\");\n return `${normalized}/models`;\n}\n\nfunction localModelIdMatches(availableId: string, requestedId: string): boolean {\n return (\n availableId === requestedId ||\n availableId === `${requestedId}:latest` ||\n `${availableId}:latest` === requestedId\n );\n}\n\nasync function localModelIds(baseUrl: string): Promise<string[]> {\n const response = await fetch(modelListUrl(baseUrl));\n if (!response.ok) {\n throw new Error(\n `local model endpoint ${modelListUrl(baseUrl)} returned ${response.status} ${response.statusText}`,\n );\n }\n const payload = await response.json();\n const data =\n payload &&\n typeof payload === \"object\" &&\n Array.isArray((payload as { data?: unknown }).data)\n ? (payload as { data: unknown[] }).data\n : [];\n return data\n .map((item) =>\n item && typeof item === \"object\"\n ? (item as { id?: unknown; name?: unknown }).id ??\n (item as { id?: unknown; name?: unknown }).name\n : item,\n )\n .filter((id): id is string => typeof id === \"string\" && id.length > 0);\n}\n\nexport async function assertLocalBenchmarkModelAvailable(\n options: ActionBenchmarkRunOptions,\n): Promise<void> {\n if (effectiveUseMocks(options)) return;\n if (options.provider !== \"local-llama-cpp\") return;\n const requestedModel = stringSetting(options.runtimeModel);\n if (!requestedModel) return;\n const baseUrl = stringSetting(options.baseUrl) ?? \"http://localhost:11434/v1\";\n const ids = await localModelIds(baseUrl);\n if (ids.some((id) => localModelIdMatches(id, requestedModel))) return;\n throw new Error(\n `local action benchmark model \"${requestedModel}\" is not available at ${modelListUrl(\n baseUrl,\n )}; available models: ${ids.length > 0 ? ids.join(\", \") : \"none\"}`,\n );\n}\n\nfunction effectiveUseMocks(options: ActionBenchmarkRunOptions): boolean {\n return options.useMocks ?? options.dryRun === true;\n}\n\nfunction matrixSourceForReport(\n reportJsonPath: string,\n options: ActionBenchmarkRunOptions,\n): ActionBenchmarkRunResult[\"matrixSource\"] {\n const modelId =\n stringSetting(options.modelId) ?? stringSetting(options.provider);\n const variant = options.variant;\n if (!modelId || !variant) return null;\n return {\n path: reportJsonPath,\n modelId,\n variant,\n benchmark: stringSetting(options.benchmark),\n tier: stringSetting(options.tier),\n provider: stringSetting(options.provider),\n datasetVersion: stringSetting(options.datasetVersion),\n codeCommit: stringSetting(options.codeCommit),\n useMocks: effectiveUseMocks(options),\n };\n}\n\nfunction dryRunCaseSample(options: ActionBenchmarkRunOptions, trajectoryDir: string) {\n const tier = stringSetting(options.tier) ?? \"2b\";\n const variant = stringSetting(options.variant) ?? \"trained\";\n const modelId = stringSetting(options.modelId) ?? stringSetting(options.runtimeModel);\n return {\n caseId: `dry-run-${tier}-${variant}-action-selection`,\n prompt: \"Can you check my calendar?\",\n expectedAction: \"CHECK_RUNTIME\",\n actualAction: null,\n pass: false,\n response: \"Dry-run benchmark provenance sample; no model inference executed.\",\n latencyMs: 0,\n trajectoryPath: join(\n trajectoryDir,\n `dry-run-${tier}-${variant}-action-selection.json`,\n ),\n dryRun: true,\n modelId,\n tier,\n variant,\n };\n}\n\nasync function annotateBenchmarkReportSource(\n reportJsonPath: string,\n options: ActionBenchmarkRunOptions,\n): Promise<void> {\n const matrixSource = matrixSourceForReport(reportJsonPath, options);\n if (!matrixSource) return;\n const parsed = JSON.parse(await readFile(reportJsonPath, \"utf8\"));\n if (!parsed || typeof parsed !== \"object\" || Array.isArray(parsed)) return;\n const report = parsed as Record<string, unknown>;\n const existingSource =\n report.source &&\n typeof report.source === \"object\" &&\n !Array.isArray(report.source)\n ? (report.source as Record<string, unknown>)\n : {};\n report.source = {\n ...existingSource,\n modelId: matrixSource.modelId,\n variant: matrixSource.variant,\n benchmark: matrixSource.benchmark,\n tier: matrixSource.tier,\n provider: matrixSource.provider,\n datasetVersion: matrixSource.datasetVersion,\n codeCommit: matrixSource.codeCommit,\n useMocks: matrixSource.useMocks,\n };\n await writeFile(\n reportJsonPath,\n `${JSON.stringify(report, null, 2)}\\n`,\n \"utf8\",\n );\n}\n\nexport function buildActionBenchmarkCommand(): string[] {\n return [\n \"x\",\n \"vitest\",\n \"run\",\n \"--config\",\n \"../test/vitest/real.config.ts\",\n \"test/benchmarks/action-selection.real.test.ts\",\n \"--exclude\",\n \".git/**\",\n \"--exclude\",\n \".eliza/**\",\n ];\n}\n\nexport function buildActionBenchmarkEnv(\n options: ActionBenchmarkRunOptions,\n resolved: {\n reportMarkdownPath: string;\n reportJsonPath: string;\n trajectoryDir: string;\n },\n): Record<string, string> {\n const env: Record<string, string> = {\n ELIZA_RUN_ACTION_BENCHMARK: \"1\",\n ELIZA_ACTION_BENCHMARK_REPORT_PATH: resolved.reportMarkdownPath,\n ELIZA_ACTION_BENCHMARK_REPORT_JSON_PATH: resolved.reportJsonPath,\n ELIZA_ACTION_BENCHMARK_TRAJECTORY_DIR: resolved.trajectoryDir,\n };\n if (effectiveUseMocks(options)) env.ELIZA_BENCHMARK_USE_MOCKS = \"1\";\n if (options.forceTrajectoryCapture !== false) {\n env.ELIZA_DUMP_TRAJECTORIES = \"1\";\n env.ELIZA_TRAJECTORY_MARKDOWN = \"1\";\n }\n const runsPerCase = positiveInt(options.runsPerCase);\n if (runsPerCase) env.ELIZA_BENCHMARK_RUNS_PER_CASE = String(runsPerCase);\n if (options.filter?.trim())\n env.ELIZA_BENCHMARK_FILTER = options.filter.trim();\n if (options.provider?.trim()) {\n const provider = options.provider.trim();\n env.ELIZA_BENCHMARK_PROVIDER = provider;\n if (provider === \"local-llama-cpp\") {\n env.LOCAL_LLAMA_CPP_API_KEY =\n process.env.LOCAL_LLAMA_CPP_API_KEY ?? \"local\";\n }\n }\n const runtimeModel = stringSetting(options.runtimeModel);\n const smallModel = stringSetting(options.smallModel) ?? runtimeModel;\n const largeModel = stringSetting(options.largeModel) ?? runtimeModel;\n if (smallModel) env.ELIZA_LIVE_TEST_SMALL_MODEL = smallModel;\n if (largeModel) env.ELIZA_LIVE_TEST_LARGE_MODEL = largeModel;\n if (options.baseUrl?.trim()) {\n env.ELIZA_LIVE_TEST_LOCAL_LLAMA_CPP_BASE_URL = options.baseUrl.trim();\n }\n return env;\n}\n\nexport async function runActionBenchmark(\n options: ActionBenchmarkRunOptions = {},\n): Promise<ActionBenchmarkRunResult> {\n const workspaceRoot = resolveWorkspaceRoot(options.workspaceRoot);\n const appCoreRoot = join(workspaceRoot, \"packages\", \"app-core\");\n const stamp = safeTimestamp(new Date().toISOString());\n const outputDir =\n options.outputDir ??\n join(trainingStateRoot(), \"benchmarks\", \"action-selection\", stamp);\n const reportMarkdownPath = join(outputDir, \"action-benchmark-report.md\");\n const reportJsonPath = join(outputDir, \"action-benchmark-report.json\");\n const trajectoryDir = join(outputDir, \"trajectories\");\n await mkdir(outputDir, { recursive: true });\n await mkdir(trajectoryDir, { recursive: true });\n\n const command = options.bun ?? defaultBunCommand();\n const args = buildActionBenchmarkCommand();\n const benchmarkEnv = buildActionBenchmarkEnv(options, {\n reportMarkdownPath,\n reportJsonPath,\n trajectoryDir,\n });\n const reportMatrixSource = matrixSourceForReport(reportJsonPath, options);\n if (options.dryRun) {\n const sample = dryRunCaseSample(options, trajectoryDir);\n await writeFile(\n String(sample.trajectoryPath),\n `${JSON.stringify(\n {\n schema: \"eliza_action_benchmark_dry_run_trajectory\",\n schemaVersion: 1,\n generatedAt: new Date().toISOString(),\n source: {\n kind: \"app_core_action_selection_benchmark\",\n dryRun: true,\n modelId: sample.modelId,\n tier: sample.tier,\n variant: sample.variant,\n },\n caseId: sample.caseId,\n prompt: sample.prompt,\n expectedAction: sample.expectedAction,\n actualAction: sample.actualAction,\n pass: sample.pass,\n response: sample.response,\n events: [\n {\n type: \"DRY_RUN_BENCHMARK_CASE\",\n timestamp: new Date().toISOString(),\n data: {\n reason: \"No model inference executed in dry-run mode.\",\n },\n },\n ],\n },\n null,\n 2,\n )}\\n`,\n \"utf8\",\n );\n await writeFile(\n reportJsonPath,\n `${JSON.stringify(\n {\n schema: \"eliza_action_selection_benchmark_report\",\n schemaVersion: 1,\n generatedAt: new Date().toISOString(),\n source: {\n kind: \"app_core_action_selection_benchmark\",\n trajectoryDir,\n reportMarkdownPath,\n modelId: reportMatrixSource?.modelId,\n variant: reportMatrixSource?.variant,\n benchmark: reportMatrixSource?.benchmark,\n tier: reportMatrixSource?.tier,\n provider: reportMatrixSource?.provider,\n datasetVersion: reportMatrixSource?.datasetVersion,\n codeCommit: reportMatrixSource?.codeCommit,\n useMocks: reportMatrixSource?.useMocks,\n dryRun: true,\n },\n summary: {\n total: 1,\n passed: 0,\n failed: 1,\n accuracy: 0,\n plannerAccuracy: 0,\n executionAccuracy: 0,\n },\n failureModes: {\n dry_run: 1,\n },\n failures: [\n {\n caseId: sample.caseId,\n failureMode: \"dry_run\",\n reason: \"No model inference executed in dry-run mode.\",\n },\n ],\n results: [sample],\n dryRun: true,\n },\n null,\n 2,\n )}\\n`,\n \"utf8\",\n );\n await writeFile(\n reportMarkdownPath,\n \"# Action Selection Benchmark Dry Run\\n\\nNo benchmark cases were executed.\\n\",\n \"utf8\",\n );\n return {\n workspaceRoot,\n appCoreRoot,\n outputDir,\n reportMarkdownPath,\n reportJsonPath,\n trajectoryDir,\n command: [command, ...args],\n env: benchmarkEnv,\n stdout: \"[DRY RUN] Would run app-core action selection benchmark.\",\n stderr: \"\",\n exitCode: 0,\n matrixSource: reportMatrixSource,\n };\n }\n\n await assertLocalBenchmarkModelAvailable(options);\n\n const proc = await collectProcess(command, args, appCoreRoot, {\n ...process.env,\n ...benchmarkEnv,\n });\n if (proc.exitCode !== 0) {\n throw new Error(\n `action selection benchmark exited with code ${proc.exitCode}: ${\n proc.stderr || proc.stdout\n }`,\n );\n }\n await annotateBenchmarkReportSource(reportJsonPath, options);\n return {\n workspaceRoot,\n appCoreRoot,\n outputDir,\n reportMarkdownPath,\n reportJsonPath,\n trajectoryDir,\n command: [command, ...args],\n env: benchmarkEnv,\n stdout: proc.stdout,\n stderr: proc.stderr,\n exitCode: proc.exitCode,\n matrixSource: matrixSourceForReport(reportJsonPath, options),\n };\n}\n"],"mappings":"AAAA,SAAS,aAAa;AACtB,SAAS,OAAO,UAAU,iBAAiB;AAC3C,SAAS,YAAY;AACrB,SAAS,yBAAyB;AAClC,SAAS,mBAAmB,4BAA4B;AAmDxD,SAAS,cAAc,OAAuB;AAC5C,SAAO,MAAM,QAAQ,SAAS,GAAG;AACnC;AAEA,SAAS,YAAY,OAA+C;AAClE,SAAO,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK,IACrD,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,CAAC,IAC7B;AACN;AAEA,SAAS,eACP,SACA,MACA,KACA,KAC+D;AAC/D,SAAO,IAAI,QAAQ,CAAC,gBAAgB,WAAW;AAC7C,UAAM,QAAQ,MAAM,SAAS,MAAM;AAAA,MACjC;AAAA,MACA;AAAA,MACA,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,IAClC,CAAC;AACD,QAAI,SAAS;AACb,QAAI,SAAS;AACb,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,GAAG,QAAQ,CAAC,UAAU;AACjC,gBAAU;AAAA,IACZ,CAAC;AACD,UAAM,OAAO,GAAG,QAAQ,CAAC,UAAU;AACjC,gBAAU;AAAA,IACZ,CAAC;AACD,UAAM,GAAG,SAAS,MAAM;AACxB,UAAM,GAAG,SAAS,CAAC,SAAS;AAC1B,qBAAe,EAAE,QAAQ,QAAQ,UAAU,QAAQ,EAAE,CAAC;AAAA,IACxD,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,cAAc,OAA+C;AACpE,SAAO,OAAO,KAAK,KAAK;AAC1B;AAEA,SAAS,aAAa,SAAyB;AAC7C,QAAM,aAAa,QAAQ,KAAK,EAAE,QAAQ,QAAQ,EAAE;AACpD,SAAO,GAAG,UAAU;AACtB;AAEA,SAAS,oBAAoB,aAAqB,aAA8B;AAC9E,SACE,gBAAgB,eAChB,gBAAgB,GAAG,WAAW,aAC9B,GAAG,WAAW,cAAc;AAEhC;AAEA,eAAe,cAAc,SAAoC;AAC/D,QAAM,WAAW,MAAM,MAAM,aAAa,OAAO,CAAC;AAClD,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,IAAI;AAAA,MACR,wBAAwB,aAAa,OAAO,CAAC,aAAa,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,IAClG;AAAA,EACF;AACA,QAAM,UAAU,MAAM,SAAS,KAAK;AACpC,QAAM,OACJ,WACA,OAAO,YAAY,YACnB,MAAM,QAAS,QAA+B,IAAI,IAC7C,QAAgC,OACjC,CAAC;AACP,SAAO,KACJ;AAAA,IAAI,CAAC,SACJ,QAAQ,OAAO,SAAS,WACnB,KAA0C,MAC1C,KAA0C,OAC3C;AAAA,EACN,EACC,OAAO,CAAC,OAAqB,OAAO,OAAO,YAAY,GAAG,SAAS,CAAC;AACzE;AAEA,eAAsB,mCACpB,SACe;AACf,MAAI,kBAAkB,OAAO,EAAG;AAChC,MAAI,QAAQ,aAAa,kBAAmB;AAC5C,QAAM,iBAAiB,cAAc,QAAQ,YAAY;AACzD,MAAI,CAAC,eAAgB;AACrB,QAAM,UAAU,cAAc,QAAQ,OAAO,KAAK;AAClD,QAAM,MAAM,MAAM,cAAc,OAAO;AACvC,MAAI,IAAI,KAAK,CAAC,OAAO,oBAAoB,IAAI,cAAc,CAAC,EAAG;AAC/D,QAAM,IAAI;AAAA,IACR,iCAAiC,cAAc,yBAAyB;AAAA,MACtE;AAAA,IACF,CAAC,uBAAuB,IAAI,SAAS,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM;AAAA,EAClE;AACF;AAEA,SAAS,kBAAkB,SAA6C;AACtE,SAAO,QAAQ,YAAY,QAAQ,WAAW;AAChD;AAEA,SAAS,sBACP,gBACA,SAC0C;AAC1C,QAAM,UACJ,cAAc,QAAQ,OAAO,KAAK,cAAc,QAAQ,QAAQ;AAClE,QAAM,UAAU,QAAQ;AACxB,MAAI,CAAC,WAAW,CAAC,QAAS,QAAO;AACjC,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA,WAAW,cAAc,QAAQ,SAAS;AAAA,IAC1C,MAAM,cAAc,QAAQ,IAAI;AAAA,IAChC,UAAU,cAAc,QAAQ,QAAQ;AAAA,IACxC,gBAAgB,cAAc,QAAQ,cAAc;AAAA,IACpD,YAAY,cAAc,QAAQ,UAAU;AAAA,IAC5C,UAAU,kBAAkB,OAAO;AAAA,EACrC;AACF;AAEA,SAAS,iBAAiB,SAAoC,eAAuB;AACnF,QAAM,OAAO,cAAc,QAAQ,IAAI,KAAK;AAC5C,QAAM,UAAU,cAAc,QAAQ,OAAO,KAAK;AAClD,QAAM,UAAU,cAAc,QAAQ,OAAO,KAAK,cAAc,QAAQ,YAAY;AACpF,SAAO;AAAA,IACL,QAAQ,WAAW,IAAI,IAAI,OAAO;AAAA,IAClC,QAAQ;AAAA,IACR,gBAAgB;AAAA,IAChB,cAAc;AAAA,IACd,MAAM;AAAA,IACN,UAAU;AAAA,IACV,WAAW;AAAA,IACX,gBAAgB;AAAA,MACd;AAAA,MACA,WAAW,IAAI,IAAI,OAAO;AAAA,IAC5B;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,eAAe,8BACb,gBACA,SACe;AACf,QAAM,eAAe,sBAAsB,gBAAgB,OAAO;AAClE,MAAI,CAAC,aAAc;AACnB,QAAM,SAAS,KAAK,MAAM,MAAM,SAAS,gBAAgB,MAAM,CAAC;AAChE,MAAI,CAAC,UAAU,OAAO,WAAW,YAAY,MAAM,QAAQ,MAAM,EAAG;AACpE,QAAM,SAAS;AACf,QAAM,iBACJ,OAAO,UACP,OAAO,OAAO,WAAW,YACzB,CAAC,MAAM,QAAQ,OAAO,MAAM,IACvB,OAAO,SACR,CAAC;AACP,SAAO,SAAS;AAAA,IACd,GAAG;AAAA,IACH,SAAS,aAAa;AAAA,IACtB,SAAS,aAAa;AAAA,IACtB,WAAW,aAAa;AAAA,IACxB,MAAM,aAAa;AAAA,IACnB,UAAU,aAAa;AAAA,IACvB,gBAAgB,aAAa;AAAA,IAC7B,YAAY,aAAa;AAAA,IACzB,UAAU,aAAa;AAAA,EACzB;AACA,QAAM;AAAA,IACJ;AAAA,IACA,GAAG,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA;AAAA,IAClC;AAAA,EACF;AACF;AAEO,SAAS,8BAAwC;AACtD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,SAAS,wBACd,SACA,UAKwB;AACxB,QAAM,MAA8B;AAAA,IAClC,4BAA4B;AAAA,IAC5B,oCAAoC,SAAS;AAAA,IAC7C,yCAAyC,SAAS;AAAA,IAClD,uCAAuC,SAAS;AAAA,EAClD;AACA,MAAI,kBAAkB,OAAO,EAAG,KAAI,4BAA4B;AAChE,MAAI,QAAQ,2BAA2B,OAAO;AAC5C,QAAI,0BAA0B;AAC9B,QAAI,4BAA4B;AAAA,EAClC;AACA,QAAM,cAAc,YAAY,QAAQ,WAAW;AACnD,MAAI,YAAa,KAAI,gCAAgC,OAAO,WAAW;AACvE,MAAI,QAAQ,QAAQ,KAAK;AACvB,QAAI,yBAAyB,QAAQ,OAAO,KAAK;AACnD,MAAI,QAAQ,UAAU,KAAK,GAAG;AAC5B,UAAM,WAAW,QAAQ,SAAS,KAAK;AACvC,QAAI,2BAA2B;AAC/B,QAAI,aAAa,mBAAmB;AAClC,UAAI,0BACF,QAAQ,IAAI,2BAA2B;AAAA,IAC3C;AAAA,EACF;AACA,QAAM,eAAe,cAAc,QAAQ,YAAY;AACvD,QAAM,aAAa,cAAc,QAAQ,UAAU,KAAK;AACxD,QAAM,aAAa,cAAc,QAAQ,UAAU,KAAK;AACxD,MAAI,WAAY,KAAI,8BAA8B;AAClD,MAAI,WAAY,KAAI,8BAA8B;AAClD,MAAI,QAAQ,SAAS,KAAK,GAAG;AAC3B,QAAI,2CAA2C,QAAQ,QAAQ,KAAK;AAAA,EACtE;AACA,SAAO;AACT;AAEA,eAAsB,mBACpB,UAAqC,CAAC,GACH;AACnC,QAAM,gBAAgB,qBAAqB,QAAQ,aAAa;AAChE,QAAM,cAAc,KAAK,eAAe,YAAY,UAAU;AAC9D,QAAM,QAAQ,eAAc,oBAAI,KAAK,GAAE,YAAY,CAAC;AACpD,QAAM,YACJ,QAAQ,aACR,KAAK,kBAAkB,GAAG,cAAc,oBAAoB,KAAK;AACnE,QAAM,qBAAqB,KAAK,WAAW,4BAA4B;AACvE,QAAM,iBAAiB,KAAK,WAAW,8BAA8B;AACrE,QAAM,gBAAgB,KAAK,WAAW,cAAc;AACpD,QAAM,MAAM,WAAW,EAAE,WAAW,KAAK,CAAC;AAC1C,QAAM,MAAM,eAAe,EAAE,WAAW,KAAK,CAAC;AAE9C,QAAM,UAAU,QAAQ,OAAO,kBAAkB;AACjD,QAAM,OAAO,4BAA4B;AACzC,QAAM,eAAe,wBAAwB,SAAS;AAAA,IACpD;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,QAAM,qBAAqB,sBAAsB,gBAAgB,OAAO;AACxE,MAAI,QAAQ,QAAQ;AAClB,UAAM,SAAS,iBAAiB,SAAS,aAAa;AACtD,UAAM;AAAA,MACJ,OAAO,OAAO,cAAc;AAAA,MAC5B,GAAG,KAAK;AAAA,QACN;AAAA,UACE,QAAQ;AAAA,UACR,eAAe;AAAA,UACf,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,UACpC,QAAQ;AAAA,YACN,MAAM;AAAA,YACN,QAAQ;AAAA,YACR,SAAS,OAAO;AAAA,YAChB,MAAM,OAAO;AAAA,YACb,SAAS,OAAO;AAAA,UAClB;AAAA,UACA,QAAQ,OAAO;AAAA,UACf,QAAQ,OAAO;AAAA,UACf,gBAAgB,OAAO;AAAA,UACvB,cAAc,OAAO;AAAA,UACrB,MAAM,OAAO;AAAA,UACb,UAAU,OAAO;AAAA,UACjB,QAAQ;AAAA,YACN;AAAA,cACE,MAAM;AAAA,cACN,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,cAClC,MAAM;AAAA,gBACJ,QAAQ;AAAA,cACV;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA;AAAA,MACD;AAAA,IACF;AACA,UAAM;AAAA,MACJ;AAAA,MACA,GAAG,KAAK;AAAA,QACN;AAAA,UACE,QAAQ;AAAA,UACR,eAAe;AAAA,UACf,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,UACpC,QAAQ;AAAA,YACN,MAAM;AAAA,YACN;AAAA,YACA;AAAA,YACA,SAAS,oBAAoB;AAAA,YAC7B,SAAS,oBAAoB;AAAA,YAC7B,WAAW,oBAAoB;AAAA,YAC/B,MAAM,oBAAoB;AAAA,YAC1B,UAAU,oBAAoB;AAAA,YAC9B,gBAAgB,oBAAoB;AAAA,YACpC,YAAY,oBAAoB;AAAA,YAChC,UAAU,oBAAoB;AAAA,YAC9B,QAAQ;AAAA,UACV;AAAA,UACA,SAAS;AAAA,YACP,OAAO;AAAA,YACP,QAAQ;AAAA,YACR,QAAQ;AAAA,YACR,UAAU;AAAA,YACV,iBAAiB;AAAA,YACjB,mBAAmB;AAAA,UACrB;AAAA,UACA,cAAc;AAAA,YACZ,SAAS;AAAA,UACX;AAAA,UACA,UAAU;AAAA,YACR;AAAA,cACE,QAAQ,OAAO;AAAA,cACf,aAAa;AAAA,cACb,QAAQ;AAAA,YACV;AAAA,UACF;AAAA,UACA,SAAS,CAAC,MAAM;AAAA,UAChB,QAAQ;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA;AAAA,MACD;AAAA,IACF;AACA,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS,CAAC,SAAS,GAAG,IAAI;AAAA,MAC1B,KAAK;AAAA,MACL,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,cAAc;AAAA,IAChB;AAAA,EACF;AAEA,QAAM,mCAAmC,OAAO;AAEhD,QAAM,OAAO,MAAM,eAAe,SAAS,MAAM,aAAa;AAAA,IAC5D,GAAG,QAAQ;AAAA,IACX,GAAG;AAAA,EACL,CAAC;AACD,MAAI,KAAK,aAAa,GAAG;AACvB,UAAM,IAAI;AAAA,MACR,+CAA+C,KAAK,QAAQ,KAC1D,KAAK,UAAU,KAAK,MACtB;AAAA,IACF;AAAA,EACF;AACA,QAAM,8BAA8B,gBAAgB,OAAO;AAC3D,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS,CAAC,SAAS,GAAG,IAAI;AAAA,IAC1B,KAAK;AAAA,IACL,QAAQ,KAAK;AAAA,IACb,QAAQ,KAAK;AAAA,IACb,UAAU,KAAK;AAAA,IACf,cAAc,sBAAsB,gBAAgB,OAAO;AAAA,EAC7D;AACF;","names":[]}
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Filesystem helpers around the `OptimizedPromptService` store.
3
+ *
4
+ * Two responsibilities, both intentionally separate from the strict per-task
5
+ * artifact loader in `packages/core/src/services/optimized-prompt.ts`:
6
+ *
7
+ * 1. Persist `candidate_rejected_<timestamp>.json` files for runs the
8
+ * promotion gate refused. These live under `<task>/rejected/` so the
9
+ * strict artifact parser never picks them up at boot.
10
+ * 2. Prune the per-task directory to the most recent N promoted artifacts.
11
+ * Older `.json` files are removed so a long-running deployment doesn't
12
+ * accumulate unbounded history. The pruning is rollback-friendly: when
13
+ * operators want to revert, they pick from the retained N files.
14
+ *
15
+ * Both helpers operate on file mtime/timestamp, never on the artifact contents.
16
+ * The strict parser owns content validation; this module is filesystem-only.
17
+ */
18
+ /**
19
+ * Maximum number of promoted artifacts retained per task. Older files are
20
+ * deleted by `prunePromotedArtifacts`. Keeping 5 lines up with the W1-P3
21
+ * rollback budget (one current + four historical fallbacks).
22
+ */
23
+ export declare const DEFAULT_PROMOTED_ARTIFACT_RETENTION = 5;
24
+ /** Subdirectory under `<storeRoot>/<task>/` for rejected candidates. */
25
+ export declare const REJECTED_DIRNAME = "rejected";
26
+ export interface RejectedCandidatePayload {
27
+ /** ISO-8601 timestamp of when the candidate was rejected. */
28
+ rejectedAt: string;
29
+ /** Task the rejected candidate targeted. */
30
+ task: string;
31
+ /** Optimizer name that produced the candidate. */
32
+ optimizer: string;
33
+ /** The candidate prompt body that did not clear the gate. */
34
+ candidatePrompt: string;
35
+ /** Incumbent prompt body the gate evaluated against. */
36
+ incumbentPrompt: string;
37
+ /** Score / margin block from the promotion gate. */
38
+ scores: {
39
+ incumbentMeanScore: number;
40
+ incumbentStdDev: number;
41
+ candidateScore: number;
42
+ delta: number;
43
+ promotionMargin: number;
44
+ noiseThreshold: number;
45
+ incumbentReseeds: number;
46
+ examplesPerPass: number;
47
+ incumbentScores: number[];
48
+ };
49
+ /** Plain-english reason from the promotion gate. */
50
+ reason: string;
51
+ /** Backreference to the dataset that drove the run. */
52
+ datasetId: string;
53
+ /** Backreference to the run id from the training orchestrator. */
54
+ runId?: string;
55
+ }
56
+ /**
57
+ * Write a `candidate_rejected_<timestamp>.json` file under
58
+ * `<storeRoot>/<task>/rejected/`. Atomic (temp + rename). Returns the final
59
+ * path. The parent directory is created if missing.
60
+ */
61
+ export declare function writeRejectedCandidate(storeRoot: string, task: string, payload: RejectedCandidatePayload): Promise<string>;
62
+ /**
63
+ * Delete promoted artifacts older than the most recent `retain` (by mtime).
64
+ * Only `.json` files at the top level of `<storeRoot>/<task>/` are considered;
65
+ * the `rejected/` subdirectory is left alone.
66
+ *
67
+ * Returns the list of removed paths. No-op when fewer than `retain` files
68
+ * exist. Errors during stat/unlink propagate — the caller decides whether to
69
+ * fail the whole run.
70
+ */
71
+ export declare function prunePromotedArtifacts(storeRoot: string, task: string, retain?: number): Promise<string[]>;
72
+ //# sourceMappingURL=artifact-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"artifact-store.d.ts","sourceRoot":"","sources":["../../src/core/artifact-store.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAMH;;;;GAIG;AACH,eAAO,MAAM,mCAAmC,IAAI,CAAC;AAErD,wEAAwE;AACxE,eAAO,MAAM,gBAAgB,aAAa,CAAC;AAE3C,MAAM,WAAW,wBAAwB;IACvC,6DAA6D;IAC7D,UAAU,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,IAAI,EAAE,MAAM,CAAC;IACb,kDAAkD;IAClD,SAAS,EAAE,MAAM,CAAC;IAClB,6DAA6D;IAC7D,eAAe,EAAE,MAAM,CAAC;IACxB,wDAAwD;IACxD,eAAe,EAAE,MAAM,CAAC;IACxB,oDAAoD;IACpD,MAAM,EAAE;QACN,kBAAkB,EAAE,MAAM,CAAC;QAC3B,eAAe,EAAE,MAAM,CAAC;QACxB,cAAc,EAAE,MAAM,CAAC;QACvB,KAAK,EAAE,MAAM,CAAC;QACd,eAAe,EAAE,MAAM,CAAC;QACxB,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;QACzB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,EAAE,CAAC;KAC3B,CAAC;IACF,oDAAoD;IACpD,MAAM,EAAE,MAAM,CAAC;IACf,uDAAuD;IACvD,SAAS,EAAE,MAAM,CAAC;IAClB,kEAAkE;IAClE,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AACH,wBAAsB,sBAAsB,CAC1C,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,MAAM,CAAC,CAUjB;AAED;;;;;;;;GAQG;AACH,wBAAsB,sBAAsB,CAC1C,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,MAAM,GAAE,MAA4C,GACnD,OAAO,CAAC,MAAM,EAAE,CAAC,CA0BnB"}
@@ -0,0 +1,50 @@
1
+ import { existsSync, mkdirSync, readdirSync } from "node:fs";
2
+ import { rename, stat, unlink, writeFile } from "node:fs/promises";
3
+ import { dirname, join } from "node:path";
4
+ const DEFAULT_PROMOTED_ARTIFACT_RETENTION = 5;
5
+ const REJECTED_DIRNAME = "rejected";
6
+ async function writeRejectedCandidate(storeRoot, task, payload) {
7
+ const dir = join(storeRoot, task, REJECTED_DIRNAME);
8
+ mkdirSync(dir, { recursive: true });
9
+ const stamp = payload.rejectedAt.replace(/[^0-9]/g, "");
10
+ const finalPath = join(dir, `candidate_rejected_${stamp}.json`);
11
+ const tempPath = `${finalPath}.tmp-${process.pid}-${Date.now()}`;
12
+ mkdirSync(dirname(tempPath), { recursive: true });
13
+ await writeFile(tempPath, `${JSON.stringify(payload, null, 2)}
14
+ `, "utf-8");
15
+ await rename(tempPath, finalPath);
16
+ return finalPath;
17
+ }
18
+ async function prunePromotedArtifacts(storeRoot, task, retain = DEFAULT_PROMOTED_ARTIFACT_RETENTION) {
19
+ if (retain < 1) {
20
+ throw new Error(
21
+ `[artifact-store] prunePromotedArtifacts retain must be >= 1; got ${retain}`
22
+ );
23
+ }
24
+ const dir = join(storeRoot, task);
25
+ if (!existsSync(dir)) return [];
26
+ const entries = readdirSync(dir);
27
+ const jsonFiles = entries.filter((name) => name.endsWith(".json"));
28
+ if (jsonFiles.length <= retain) return [];
29
+ const withStats = [];
30
+ for (const name of jsonFiles) {
31
+ const path = join(dir, name);
32
+ const stats = await stat(path);
33
+ withStats.push({ path, mtimeMs: stats.mtimeMs });
34
+ }
35
+ withStats.sort((a, b) => b.mtimeMs - a.mtimeMs);
36
+ const toRemove = withStats.slice(retain);
37
+ const removed = [];
38
+ for (const entry of toRemove) {
39
+ await unlink(entry.path);
40
+ removed.push(entry.path);
41
+ }
42
+ return removed;
43
+ }
44
+ export {
45
+ DEFAULT_PROMOTED_ARTIFACT_RETENTION,
46
+ REJECTED_DIRNAME,
47
+ prunePromotedArtifacts,
48
+ writeRejectedCandidate
49
+ };
50
+ //# sourceMappingURL=artifact-store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/core/artifact-store.ts"],"sourcesContent":["/**\n * Filesystem helpers around the `OptimizedPromptService` store.\n *\n * Two responsibilities, both intentionally separate from the strict per-task\n * artifact loader in `packages/core/src/services/optimized-prompt.ts`:\n *\n * 1. Persist `candidate_rejected_<timestamp>.json` files for runs the\n * promotion gate refused. These live under `<task>/rejected/` so the\n * strict artifact parser never picks them up at boot.\n * 2. Prune the per-task directory to the most recent N promoted artifacts.\n * Older `.json` files are removed so a long-running deployment doesn't\n * accumulate unbounded history. The pruning is rollback-friendly: when\n * operators want to revert, they pick from the retained N files.\n *\n * Both helpers operate on file mtime/timestamp, never on the artifact contents.\n * The strict parser owns content validation; this module is filesystem-only.\n */\n\nimport { existsSync, mkdirSync, readdirSync } from \"node:fs\";\nimport { rename, stat, unlink, writeFile } from \"node:fs/promises\";\nimport { dirname, join } from \"node:path\";\n\n/**\n * Maximum number of promoted artifacts retained per task. Older files are\n * deleted by `prunePromotedArtifacts`. Keeping 5 lines up with the W1-P3\n * rollback budget (one current + four historical fallbacks).\n */\nexport const DEFAULT_PROMOTED_ARTIFACT_RETENTION = 5;\n\n/** Subdirectory under `<storeRoot>/<task>/` for rejected candidates. */\nexport const REJECTED_DIRNAME = \"rejected\";\n\nexport interface RejectedCandidatePayload {\n /** ISO-8601 timestamp of when the candidate was rejected. */\n rejectedAt: string;\n /** Task the rejected candidate targeted. */\n task: string;\n /** Optimizer name that produced the candidate. */\n optimizer: string;\n /** The candidate prompt body that did not clear the gate. */\n candidatePrompt: string;\n /** Incumbent prompt body the gate evaluated against. */\n incumbentPrompt: string;\n /** Score / margin block from the promotion gate. */\n scores: {\n incumbentMeanScore: number;\n incumbentStdDev: number;\n candidateScore: number;\n delta: number;\n promotionMargin: number;\n noiseThreshold: number;\n incumbentReseeds: number;\n examplesPerPass: number;\n incumbentScores: number[];\n };\n /** Plain-english reason from the promotion gate. */\n reason: string;\n /** Backreference to the dataset that drove the run. */\n datasetId: string;\n /** Backreference to the run id from the training orchestrator. */\n runId?: string;\n}\n\n/**\n * Write a `candidate_rejected_<timestamp>.json` file under\n * `<storeRoot>/<task>/rejected/`. Atomic (temp + rename). Returns the final\n * path. The parent directory is created if missing.\n */\nexport async function writeRejectedCandidate(\n storeRoot: string,\n task: string,\n payload: RejectedCandidatePayload,\n): Promise<string> {\n const dir = join(storeRoot, task, REJECTED_DIRNAME);\n mkdirSync(dir, { recursive: true });\n const stamp = payload.rejectedAt.replace(/[^0-9]/g, \"\");\n const finalPath = join(dir, `candidate_rejected_${stamp}.json`);\n const tempPath = `${finalPath}.tmp-${process.pid}-${Date.now()}`;\n mkdirSync(dirname(tempPath), { recursive: true });\n await writeFile(tempPath, `${JSON.stringify(payload, null, 2)}\\n`, \"utf-8\");\n await rename(tempPath, finalPath);\n return finalPath;\n}\n\n/**\n * Delete promoted artifacts older than the most recent `retain` (by mtime).\n * Only `.json` files at the top level of `<storeRoot>/<task>/` are considered;\n * the `rejected/` subdirectory is left alone.\n *\n * Returns the list of removed paths. No-op when fewer than `retain` files\n * exist. Errors during stat/unlink propagate — the caller decides whether to\n * fail the whole run.\n */\nexport async function prunePromotedArtifacts(\n storeRoot: string,\n task: string,\n retain: number = DEFAULT_PROMOTED_ARTIFACT_RETENTION,\n): Promise<string[]> {\n if (retain < 1) {\n throw new Error(\n `[artifact-store] prunePromotedArtifacts retain must be >= 1; got ${retain}`,\n );\n }\n const dir = join(storeRoot, task);\n if (!existsSync(dir)) return [];\n const entries = readdirSync(dir);\n const jsonFiles = entries.filter((name) => name.endsWith(\".json\"));\n if (jsonFiles.length <= retain) return [];\n // Sort by mtime descending so the newest files are first; we delete the tail.\n const withStats: Array<{ path: string; mtimeMs: number }> = [];\n for (const name of jsonFiles) {\n const path = join(dir, name);\n const stats = await stat(path);\n withStats.push({ path, mtimeMs: stats.mtimeMs });\n }\n withStats.sort((a, b) => b.mtimeMs - a.mtimeMs);\n const toRemove = withStats.slice(retain);\n const removed: string[] = [];\n for (const entry of toRemove) {\n await unlink(entry.path);\n removed.push(entry.path);\n }\n return removed;\n}\n"],"mappings":"AAkBA,SAAS,YAAY,WAAW,mBAAmB;AACnD,SAAS,QAAQ,MAAM,QAAQ,iBAAiB;AAChD,SAAS,SAAS,YAAY;AAOvB,MAAM,sCAAsC;AAG5C,MAAM,mBAAmB;AAsChC,eAAsB,uBACpB,WACA,MACA,SACiB;AACjB,QAAM,MAAM,KAAK,WAAW,MAAM,gBAAgB;AAClD,YAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AAClC,QAAM,QAAQ,QAAQ,WAAW,QAAQ,WAAW,EAAE;AACtD,QAAM,YAAY,KAAK,KAAK,sBAAsB,KAAK,OAAO;AAC9D,QAAM,WAAW,GAAG,SAAS,QAAQ,QAAQ,GAAG,IAAI,KAAK,IAAI,CAAC;AAC9D,YAAU,QAAQ,QAAQ,GAAG,EAAE,WAAW,KAAK,CAAC;AAChD,QAAM,UAAU,UAAU,GAAG,KAAK,UAAU,SAAS,MAAM,CAAC,CAAC;AAAA,GAAM,OAAO;AAC1E,QAAM,OAAO,UAAU,SAAS;AAChC,SAAO;AACT;AAWA,eAAsB,uBACpB,WACA,MACA,SAAiB,qCACE;AACnB,MAAI,SAAS,GAAG;AACd,UAAM,IAAI;AAAA,MACR,oEAAoE,MAAM;AAAA,IAC5E;AAAA,EACF;AACA,QAAM,MAAM,KAAK,WAAW,IAAI;AAChC,MAAI,CAAC,WAAW,GAAG,EAAG,QAAO,CAAC;AAC9B,QAAM,UAAU,YAAY,GAAG;AAC/B,QAAM,YAAY,QAAQ,OAAO,CAAC,SAAS,KAAK,SAAS,OAAO,CAAC;AACjE,MAAI,UAAU,UAAU,OAAQ,QAAO,CAAC;AAExC,QAAM,YAAsD,CAAC;AAC7D,aAAW,QAAQ,WAAW;AAC5B,UAAM,OAAO,KAAK,KAAK,IAAI;AAC3B,UAAM,QAAQ,MAAM,KAAK,IAAI;AAC7B,cAAU,KAAK,EAAE,MAAM,SAAS,MAAM,QAAQ,CAAC;AAAA,EACjD;AACA,YAAU,KAAK,CAAC,GAAG,MAAM,EAAE,UAAU,EAAE,OAAO;AAC9C,QAAM,WAAW,UAAU,MAAM,MAAM;AACvC,QAAM,UAAoB,CAAC;AAC3B,aAAW,SAAS,UAAU;AAC5B,UAAM,OAAO,MAAM,IAAI;AACvB,YAAQ,KAAK,MAAM,IAAI;AAAA,EACzB;AACA,SAAO;AACT;","names":[]}
@@ -0,0 +1,102 @@
1
+ export declare const BENCHMARK_MATRIX_ARTIFACT_SCHEMA = "eliza_benchmark_matrix_artifact";
2
+ export declare const BENCHMARK_MATRIX_ARTIFACT_VERSION = 1;
3
+ export declare const ACTION_BENCHMARK_REPORT_SCHEMA = "eliza_action_selection_benchmark_report";
4
+ export declare const ACTION_SELECTION_BENCHMARK_ID = "eliza_harness_action_selection";
5
+ export declare const LOCAL_EVAL_COMPARISON_BENCHMARK_ID = "eliza_harness_local_eval_comparison";
6
+ export declare const ELIZA_ONE_MATRIX_TIERS: readonly ["0b", "2b", "4b", "9b", "27b"];
7
+ export type ElizaOneMatrixTier = (typeof ELIZA_ONE_MATRIX_TIERS)[number];
8
+ export type BenchmarkMatrixVariant = "reference" | "base" | "trained";
9
+ export interface BenchmarkMatrixRowInput {
10
+ modelId: string;
11
+ benchmark: string;
12
+ score: number;
13
+ variant: BenchmarkMatrixVariant;
14
+ tier?: string;
15
+ provider?: string;
16
+ datasetVersion?: string;
17
+ codeCommit?: string;
18
+ ts?: number | string;
19
+ metrics?: Record<string, unknown>;
20
+ raw?: Record<string, unknown>;
21
+ }
22
+ export interface BenchmarkMatrixInput {
23
+ rows: BenchmarkMatrixRowInput[];
24
+ outputDir?: string;
25
+ generatedAt?: string;
26
+ referenceModelId?: string;
27
+ source?: Record<string, unknown>;
28
+ }
29
+ export interface BenchmarkMatrixArtifactSource {
30
+ path: string;
31
+ modelId?: string;
32
+ benchmark?: string;
33
+ variant?: BenchmarkMatrixVariant;
34
+ tier?: string;
35
+ provider?: string;
36
+ datasetVersion?: string;
37
+ codeCommit?: string;
38
+ useMocks?: boolean;
39
+ }
40
+ export interface BenchmarkMatrixFromArtifactsInput {
41
+ artifacts: BenchmarkMatrixArtifactSource[];
42
+ outputDir?: string;
43
+ generatedAt?: string;
44
+ referenceModelId?: string;
45
+ source?: Record<string, unknown>;
46
+ }
47
+ export interface BenchmarkMatrixCell {
48
+ modelId: string;
49
+ benchmark: string;
50
+ score: number;
51
+ variant: BenchmarkMatrixVariant;
52
+ tier: string | null;
53
+ provider: string | null;
54
+ datasetVersion: string | null;
55
+ codeCommit: string | null;
56
+ ts: number | string | null;
57
+ metrics: Record<string, unknown>;
58
+ raw: Record<string, unknown>;
59
+ }
60
+ export interface BenchmarkMatrixComparison {
61
+ tier: string;
62
+ benchmark: string;
63
+ baseModelId: string | null;
64
+ trainedModelId: string | null;
65
+ referenceModelId: string | null;
66
+ baseScore: number | null;
67
+ trainedScore: number | null;
68
+ referenceScore: number | null;
69
+ improvementAbsolute: number | null;
70
+ improvementPercent: number | null;
71
+ trainedVsReferenceAbsolute: number | null;
72
+ trainedVsReferencePercent: number | null;
73
+ dryRun: boolean;
74
+ }
75
+ export interface BenchmarkMatrixArtifact {
76
+ schema: typeof BENCHMARK_MATRIX_ARTIFACT_SCHEMA;
77
+ version: typeof BENCHMARK_MATRIX_ARTIFACT_VERSION;
78
+ generatedAt: string;
79
+ source: Record<string, unknown>;
80
+ referenceModelId: string | null;
81
+ tiers: string[];
82
+ benchmarks: string[];
83
+ counts: {
84
+ rows: number;
85
+ comparisons: number;
86
+ tiers: number;
87
+ benchmarks: number;
88
+ };
89
+ rows: BenchmarkMatrixCell[];
90
+ comparisons: BenchmarkMatrixComparison[];
91
+ }
92
+ export interface BenchmarkMatrixArtifactResult {
93
+ outputDir: string;
94
+ artifactPath: string;
95
+ artifact: BenchmarkMatrixArtifact;
96
+ }
97
+ export declare function buildBenchmarkMatrixRowsFromArtifactPayload(payload: Record<string, unknown>, source: BenchmarkMatrixArtifactSource): BenchmarkMatrixRowInput[];
98
+ export declare function buildBenchmarkMatrixRowsFromArtifacts(artifacts: BenchmarkMatrixArtifactSource[]): Promise<BenchmarkMatrixRowInput[]>;
99
+ export declare function buildBenchmarkMatrixArtifactPayload(input: BenchmarkMatrixInput): BenchmarkMatrixArtifact;
100
+ export declare function writeBenchmarkMatrixArtifact(input: BenchmarkMatrixInput): Promise<BenchmarkMatrixArtifactResult>;
101
+ export declare function writeBenchmarkMatrixArtifactFromArtifacts(input: BenchmarkMatrixFromArtifactsInput): Promise<BenchmarkMatrixArtifactResult>;
102
+ //# sourceMappingURL=benchmark-matrix-artifact.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark-matrix-artifact.d.ts","sourceRoot":"","sources":["../../src/core/benchmark-matrix-artifact.ts"],"names":[],"mappings":"AAUA,eAAO,MAAM,gCAAgC,oCACV,CAAC;AACpC,eAAO,MAAM,iCAAiC,IAAI,CAAC;AACnD,eAAO,MAAM,8BAA8B,4CACA,CAAC;AAC5C,eAAO,MAAM,6BAA6B,mCAAmC,CAAC;AAC9E,eAAO,MAAM,kCAAkC,wCACR,CAAC;AAExC,eAAO,MAAM,sBAAsB,0CAMzB,CAAC;AAEX,MAAM,MAAM,kBAAkB,GAAG,CAAC,OAAO,sBAAsB,CAAC,CAAC,MAAM,CAAC,CAAC;AACzE,MAAM,MAAM,sBAAsB,GAAG,WAAW,GAAG,MAAM,GAAG,SAAS,CAAC;AAEtE,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,sBAAsB,CAAC;IAChC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAClC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,uBAAuB,EAAE,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,6BAA6B;IAC5C,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,sBAAsB,CAAC;IACjC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,iCAAiC;IAChD,SAAS,EAAE,6BAA6B,EAAE,CAAC;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,sBAAsB,CAAC;IAChC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,EAAE,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,0BAA0B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1C,yBAAyB,EAAE,MAAM,GAAG,IAAI,CAAC;IACzC,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,uBAAuB;IACtC,MAAM,EAAE,OAAO,gCAAgC,CAAC;IAChD,OAAO,EAAE,OAAO,iCAAiC,CAAC;IAClD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,MAAM,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,IAAI,EAAE,mBAAmB,EAAE,CAAC;IAC5B,WAAW,EAAE,yBAAyB,EAAE,CAAC;CAC1C;AAED,MAAM,WAAW,6BAA6B;IAC5C,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,uBAAuB,CAAC;CACnC;AAwXD,wBAAgB,2CAA2C,CACzD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAChC,MAAM,EAAE,6BAA6B,GACpC,uBAAuB,EAAE,CAW3B;AAED,wBAAsB,qCAAqC,CACzD,SAAS,EAAE,6BAA6B,EAAE,GACzC,OAAO,CAAC,uBAAuB,EAAE,CAAC,CASpC;AAED,wBAAgB,mCAAmC,CACjD,KAAK,EAAE,oBAAoB,GAC1B,uBAAuB,CA6BzB;AAED,wBAAsB,4BAA4B,CAChD,KAAK,EAAE,oBAAoB,GAC1B,OAAO,CAAC,6BAA6B,CAAC,CAiBxC;AAED,wBAAsB,yCAAyC,CAC7D,KAAK,EAAE,iCAAiC,GACvC,OAAO,CAAC,6BAA6B,CAAC,CAYxC"}