@elizaos/plugin-training 2.0.3-beta.6 → 2.0.3-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (363) hide show
  1. package/dist/backends/native.d.ts +96 -0
  2. package/dist/backends/native.d.ts.map +1 -0
  3. package/dist/backends/native.js +308 -0
  4. package/dist/backends/native.js.map +1 -0
  5. package/dist/cli/train.d.ts +22 -0
  6. package/dist/cli/train.d.ts.map +1 -0
  7. package/dist/cli/train.js +219 -0
  8. package/dist/cli/train.js.map +1 -0
  9. package/dist/core/action-benchmark-runner.d.ts +55 -0
  10. package/dist/core/action-benchmark-runner.d.ts.map +1 -0
  11. package/dist/core/action-benchmark-runner.js +341 -0
  12. package/dist/core/action-benchmark-runner.js.map +1 -0
  13. package/dist/core/artifact-store.d.ts +72 -0
  14. package/dist/core/artifact-store.d.ts.map +1 -0
  15. package/dist/core/artifact-store.js +50 -0
  16. package/dist/core/artifact-store.js.map +1 -0
  17. package/dist/core/benchmark-matrix-artifact.d.ts +102 -0
  18. package/dist/core/benchmark-matrix-artifact.d.ts.map +1 -0
  19. package/dist/core/benchmark-matrix-artifact.js +381 -0
  20. package/dist/core/benchmark-matrix-artifact.js.map +1 -0
  21. package/dist/core/benchmark-vs-cerebras-runner.d.ts +37 -0
  22. package/dist/core/benchmark-vs-cerebras-runner.d.ts.map +1 -0
  23. package/dist/core/benchmark-vs-cerebras-runner.js +151 -0
  24. package/dist/core/benchmark-vs-cerebras-runner.js.map +1 -0
  25. package/dist/core/cerebras-eval-model.d.ts +54 -0
  26. package/dist/core/cerebras-eval-model.d.ts.map +1 -0
  27. package/dist/core/cerebras-eval-model.js +249 -0
  28. package/dist/core/cerebras-eval-model.js.map +1 -0
  29. package/dist/core/cli.d.ts +15 -0
  30. package/dist/core/cli.d.ts.map +1 -0
  31. package/dist/core/cli.js +1003 -0
  32. package/dist/core/cli.js.map +1 -0
  33. package/dist/core/context-audit.d.ts +51 -0
  34. package/dist/core/context-audit.d.ts.map +1 -0
  35. package/dist/core/context-audit.js +166 -0
  36. package/dist/core/context-audit.js.map +1 -0
  37. package/dist/core/context-catalog.d.ts +47 -0
  38. package/dist/core/context-catalog.d.ts.map +1 -0
  39. package/dist/core/context-catalog.js +269 -0
  40. package/dist/core/context-catalog.js.map +1 -0
  41. package/dist/core/context-types.d.ts +3 -0
  42. package/dist/core/context-types.d.ts.map +1 -0
  43. package/dist/core/context-types.js +18 -0
  44. package/dist/core/context-types.js.map +1 -0
  45. package/dist/core/dataset-generator.d.ts +135 -0
  46. package/dist/core/dataset-generator.d.ts.map +1 -0
  47. package/dist/core/dataset-generator.js +895 -0
  48. package/dist/core/dataset-generator.js.map +1 -0
  49. package/dist/core/eliza1-benchmark-recipe.d.ts +18 -0
  50. package/dist/core/eliza1-benchmark-recipe.d.ts.map +1 -0
  51. package/dist/core/eliza1-benchmark-recipe.js +64 -0
  52. package/dist/core/eliza1-benchmark-recipe.js.map +1 -0
  53. package/dist/core/eliza1-bundle-stager.d.ts +57 -0
  54. package/dist/core/eliza1-bundle-stager.d.ts.map +1 -0
  55. package/dist/core/eliza1-bundle-stager.js +149 -0
  56. package/dist/core/eliza1-bundle-stager.js.map +1 -0
  57. package/dist/core/ensure-cron-job.d.ts +53 -0
  58. package/dist/core/ensure-cron-job.d.ts.map +1 -0
  59. package/dist/core/ensure-cron-job.js +51 -0
  60. package/dist/core/ensure-cron-job.js.map +1 -0
  61. package/dist/core/eval-comparison-artifact.d.ts +72 -0
  62. package/dist/core/eval-comparison-artifact.d.ts.map +1 -0
  63. package/dist/core/eval-comparison-artifact.js +281 -0
  64. package/dist/core/eval-comparison-artifact.js.map +1 -0
  65. package/dist/core/feed-generation-runner.d.ts +37 -0
  66. package/dist/core/feed-generation-runner.d.ts.map +1 -0
  67. package/dist/core/feed-generation-runner.js +232 -0
  68. package/dist/core/feed-generation-runner.js.map +1 -0
  69. package/dist/core/html-escape.d.ts +5 -0
  70. package/dist/core/html-escape.d.ts.map +1 -0
  71. package/dist/core/html-escape.js +11 -0
  72. package/dist/core/html-escape.js.map +1 -0
  73. package/dist/core/huggingface-dataset-ingest.d.ts +52 -0
  74. package/dist/core/huggingface-dataset-ingest.d.ts.map +1 -0
  75. package/dist/core/huggingface-dataset-ingest.js +134 -0
  76. package/dist/core/huggingface-dataset-ingest.js.map +1 -0
  77. package/dist/core/index.d.ts +29 -0
  78. package/dist/core/index.d.ts.map +1 -0
  79. package/dist/core/index.js +204 -0
  80. package/dist/core/index.js.map +1 -0
  81. package/dist/core/privacy-filter.d.ts +95 -0
  82. package/dist/core/privacy-filter.d.ts.map +1 -0
  83. package/dist/core/privacy-filter.js +324 -0
  84. package/dist/core/privacy-filter.js.map +1 -0
  85. package/dist/core/promotion-gate.d.ts +117 -0
  86. package/dist/core/promotion-gate.d.ts.map +1 -0
  87. package/dist/core/promotion-gate.js +85 -0
  88. package/dist/core/promotion-gate.js.map +1 -0
  89. package/dist/core/promotion-persist.d.ts +116 -0
  90. package/dist/core/promotion-persist.d.ts.map +1 -0
  91. package/dist/core/promotion-persist.js +93 -0
  92. package/dist/core/promotion-persist.js.map +1 -0
  93. package/dist/core/prompt-compare.d.ts +99 -0
  94. package/dist/core/prompt-compare.d.ts.map +1 -0
  95. package/dist/core/prompt-compare.js +210 -0
  96. package/dist/core/prompt-compare.js.map +1 -0
  97. package/dist/core/replay-validator.d.ts +136 -0
  98. package/dist/core/replay-validator.d.ts.map +1 -0
  99. package/dist/core/replay-validator.js +312 -0
  100. package/dist/core/replay-validator.js.map +1 -0
  101. package/dist/core/roleplay-executor.d.ts +123 -0
  102. package/dist/core/roleplay-executor.d.ts.map +1 -0
  103. package/dist/core/roleplay-executor.js +675 -0
  104. package/dist/core/roleplay-executor.js.map +1 -0
  105. package/dist/core/roleplay-trajectories.d.ts +54 -0
  106. package/dist/core/roleplay-trajectories.d.ts.map +1 -0
  107. package/dist/core/roleplay-trajectories.js +88 -0
  108. package/dist/core/roleplay-trajectories.js.map +1 -0
  109. package/dist/core/scenario-blueprints.d.ts +62 -0
  110. package/dist/core/scenario-blueprints.d.ts.map +1 -0
  111. package/dist/core/scenario-blueprints.js +850 -0
  112. package/dist/core/scenario-blueprints.js.map +1 -0
  113. package/dist/core/scenario-runner.d.ts +36 -0
  114. package/dist/core/scenario-runner.d.ts.map +1 -0
  115. package/dist/core/scenario-runner.js +216 -0
  116. package/dist/core/scenario-runner.js.map +1 -0
  117. package/dist/core/skill-scoring-cron.d.ts +57 -0
  118. package/dist/core/skill-scoring-cron.d.ts.map +1 -0
  119. package/dist/core/skill-scoring-cron.js +180 -0
  120. package/dist/core/skill-scoring-cron.js.map +1 -0
  121. package/dist/core/test-trajectory-collector.d.ts +37 -0
  122. package/dist/core/test-trajectory-collector.d.ts.map +1 -0
  123. package/dist/core/test-trajectory-collector.js +225 -0
  124. package/dist/core/test-trajectory-collector.js.map +1 -0
  125. package/dist/core/track-c-queue-task.d.ts +37 -0
  126. package/dist/core/track-c-queue-task.d.ts.map +1 -0
  127. package/dist/core/track-c-queue-task.js +104 -0
  128. package/dist/core/track-c-queue-task.js.map +1 -0
  129. package/dist/core/training-analysis-index.d.ts +104 -0
  130. package/dist/core/training-analysis-index.d.ts.map +1 -0
  131. package/dist/core/training-analysis-index.js +3297 -0
  132. package/dist/core/training-analysis-index.js.map +1 -0
  133. package/dist/core/training-collection-runner.d.ts +508 -0
  134. package/dist/core/training-collection-runner.d.ts.map +1 -0
  135. package/dist/core/training-collection-runner.js +2299 -0
  136. package/dist/core/training-collection-runner.js.map +1 -0
  137. package/dist/core/training-config.d.ts +52 -0
  138. package/dist/core/training-config.d.ts.map +1 -0
  139. package/dist/core/training-config.js +117 -0
  140. package/dist/core/training-config.js.map +1 -0
  141. package/dist/core/training-orchestrator.d.ts +112 -0
  142. package/dist/core/training-orchestrator.d.ts.map +1 -0
  143. package/dist/core/training-orchestrator.js +729 -0
  144. package/dist/core/training-orchestrator.js.map +1 -0
  145. package/dist/core/training-readiness-report.d.ts +52 -0
  146. package/dist/core/training-readiness-report.d.ts.map +1 -0
  147. package/dist/core/training-readiness-report.js +765 -0
  148. package/dist/core/training-readiness-report.js.map +1 -0
  149. package/dist/core/trajectory-consumer.d.ts +15 -0
  150. package/dist/core/trajectory-consumer.d.ts.map +1 -0
  151. package/dist/core/trajectory-consumer.js +61 -0
  152. package/dist/core/trajectory-consumer.js.map +1 -0
  153. package/dist/core/trajectory-export-bundle.d.ts +95 -0
  154. package/dist/core/trajectory-export-bundle.d.ts.map +1 -0
  155. package/dist/core/trajectory-export-bundle.js +561 -0
  156. package/dist/core/trajectory-export-bundle.js.map +1 -0
  157. package/dist/core/trajectory-export-cron.d.ts +57 -0
  158. package/dist/core/trajectory-export-cron.d.ts.map +1 -0
  159. package/dist/core/trajectory-export-cron.js +170 -0
  160. package/dist/core/trajectory-export-cron.js.map +1 -0
  161. package/dist/core/trajectory-hf-upload.d.ts +50 -0
  162. package/dist/core/trajectory-hf-upload.d.ts.map +1 -0
  163. package/dist/core/trajectory-hf-upload.js +111 -0
  164. package/dist/core/trajectory-hf-upload.js.map +1 -0
  165. package/dist/core/trajectory-task-datasets.d.ts +62 -0
  166. package/dist/core/trajectory-task-datasets.d.ts.map +1 -0
  167. package/dist/core/trajectory-task-datasets.js +427 -0
  168. package/dist/core/trajectory-task-datasets.js.map +1 -0
  169. package/dist/core/wait-for-service.d.ts +25 -0
  170. package/dist/core/wait-for-service.d.ts.map +1 -0
  171. package/dist/core/wait-for-service.js +19 -0
  172. package/dist/core/wait-for-service.js.map +1 -0
  173. package/dist/core/workspace-runtime.d.ts +4 -0
  174. package/dist/core/workspace-runtime.d.ts.map +1 -0
  175. package/dist/core/workspace-runtime.js +25 -0
  176. package/dist/core/workspace-runtime.js.map +1 -0
  177. package/dist/dspy/artifact.d.ts +54 -0
  178. package/dist/dspy/artifact.d.ts.map +1 -0
  179. package/dist/dspy/artifact.js +61 -0
  180. package/dist/dspy/artifact.js.map +1 -0
  181. package/dist/dspy/chain-of-thought.d.ts +27 -0
  182. package/dist/dspy/chain-of-thought.d.ts.map +1 -0
  183. package/dist/dspy/chain-of-thought.js +43 -0
  184. package/dist/dspy/chain-of-thought.js.map +1 -0
  185. package/dist/dspy/examples.d.ts +72 -0
  186. package/dist/dspy/examples.d.ts.map +1 -0
  187. package/dist/dspy/examples.js +105 -0
  188. package/dist/dspy/examples.js.map +1 -0
  189. package/dist/dspy/index.d.ts +15 -0
  190. package/dist/dspy/index.d.ts.map +1 -0
  191. package/dist/dspy/index.js +40 -0
  192. package/dist/dspy/index.js.map +1 -0
  193. package/dist/dspy/lm-adapter.d.ts +100 -0
  194. package/dist/dspy/lm-adapter.d.ts.map +1 -0
  195. package/dist/dspy/lm-adapter.js +81 -0
  196. package/dist/dspy/lm-adapter.js.map +1 -0
  197. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts +23 -0
  198. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts.map +1 -0
  199. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js +85 -0
  200. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js.map +1 -0
  201. package/dist/dspy/optimizers/dspy-copro.d.ts +29 -0
  202. package/dist/dspy/optimizers/dspy-copro.d.ts.map +1 -0
  203. package/dist/dspy/optimizers/dspy-copro.js +141 -0
  204. package/dist/dspy/optimizers/dspy-copro.js.map +1 -0
  205. package/dist/dspy/optimizers/dspy-mipro.d.ts +37 -0
  206. package/dist/dspy/optimizers/dspy-mipro.d.ts.map +1 -0
  207. package/dist/dspy/optimizers/dspy-mipro.js +194 -0
  208. package/dist/dspy/optimizers/dspy-mipro.js.map +1 -0
  209. package/dist/dspy/optimizers/index.d.ts +5 -0
  210. package/dist/dspy/optimizers/index.d.ts.map +1 -0
  211. package/dist/dspy/optimizers/index.js +11 -0
  212. package/dist/dspy/optimizers/index.js.map +1 -0
  213. package/dist/dspy/optimizers/types.d.ts +39 -0
  214. package/dist/dspy/optimizers/types.d.ts.map +1 -0
  215. package/dist/dspy/optimizers/types.js +1 -0
  216. package/dist/dspy/optimizers/types.js.map +1 -0
  217. package/dist/dspy/predict.d.ts +49 -0
  218. package/dist/dspy/predict.d.ts.map +1 -0
  219. package/dist/dspy/predict.js +73 -0
  220. package/dist/dspy/predict.js.map +1 -0
  221. package/dist/dspy/signature.d.ts +88 -0
  222. package/dist/dspy/signature.d.ts.map +1 -0
  223. package/dist/dspy/signature.js +205 -0
  224. package/dist/dspy/signature.js.map +1 -0
  225. package/dist/index.d.ts +15 -0
  226. package/dist/index.d.ts.map +1 -0
  227. package/dist/index.js +15 -0
  228. package/dist/index.js.map +1 -0
  229. package/dist/optimizers/bootstrap-fewshot.d.ts +42 -0
  230. package/dist/optimizers/bootstrap-fewshot.d.ts.map +1 -0
  231. package/dist/optimizers/bootstrap-fewshot.js +92 -0
  232. package/dist/optimizers/bootstrap-fewshot.js.map +1 -0
  233. package/dist/optimizers/gepa.d.ts +63 -0
  234. package/dist/optimizers/gepa.d.ts.map +1 -0
  235. package/dist/optimizers/gepa.js +232 -0
  236. package/dist/optimizers/gepa.js.map +1 -0
  237. package/dist/optimizers/index.d.ts +7 -0
  238. package/dist/optimizers/index.d.ts.map +1 -0
  239. package/dist/optimizers/index.js +51 -0
  240. package/dist/optimizers/index.js.map +1 -0
  241. package/dist/optimizers/instruction-search.d.ts +39 -0
  242. package/dist/optimizers/instruction-search.d.ts.map +1 -0
  243. package/dist/optimizers/instruction-search.js +108 -0
  244. package/dist/optimizers/instruction-search.js.map +1 -0
  245. package/dist/optimizers/prompt-evolution.d.ts +39 -0
  246. package/dist/optimizers/prompt-evolution.d.ts.map +1 -0
  247. package/dist/optimizers/prompt-evolution.js +101 -0
  248. package/dist/optimizers/prompt-evolution.js.map +1 -0
  249. package/dist/optimizers/scoring.d.ts +139 -0
  250. package/dist/optimizers/scoring.d.ts.map +1 -0
  251. package/dist/optimizers/scoring.js +299 -0
  252. package/dist/optimizers/scoring.js.map +1 -0
  253. package/dist/optimizers/types.d.ts +105 -0
  254. package/dist/optimizers/types.d.ts.map +1 -0
  255. package/dist/optimizers/types.js +1 -0
  256. package/dist/optimizers/types.js.map +1 -0
  257. package/dist/register-runtime.d.ts +3 -0
  258. package/dist/register-runtime.d.ts.map +1 -0
  259. package/dist/register-runtime.js +60 -0
  260. package/dist/register-runtime.js.map +1 -0
  261. package/dist/register-terminal-view.d.ts +15 -0
  262. package/dist/register-terminal-view.d.ts.map +1 -0
  263. package/dist/register-terminal-view.js +31 -0
  264. package/dist/register-terminal-view.js.map +1 -0
  265. package/dist/routes/experience-routes.d.ts +21 -0
  266. package/dist/routes/experience-routes.d.ts.map +1 -0
  267. package/dist/routes/experience-routes.js +513 -0
  268. package/dist/routes/experience-routes.js.map +1 -0
  269. package/dist/routes/index.d.ts +5 -0
  270. package/dist/routes/index.d.ts.map +1 -0
  271. package/dist/routes/index.js +17 -0
  272. package/dist/routes/index.js.map +1 -0
  273. package/dist/routes/training-routes.d.ts +10 -0
  274. package/dist/routes/training-routes.d.ts.map +1 -0
  275. package/dist/routes/training-routes.js +1239 -0
  276. package/dist/routes/training-routes.js.map +1 -0
  277. package/dist/routes/training-vast-routes.d.ts +35 -0
  278. package/dist/routes/training-vast-routes.d.ts.map +1 -0
  279. package/dist/routes/training-vast-routes.js +249 -0
  280. package/dist/routes/training-vast-routes.js.map +1 -0
  281. package/dist/routes/trajectory-routes.d.ts +19 -0
  282. package/dist/routes/trajectory-routes.d.ts.map +1 -0
  283. package/dist/routes/trajectory-routes.js +1122 -0
  284. package/dist/routes/trajectory-routes.js.map +1 -0
  285. package/dist/services/index.d.ts +9 -0
  286. package/dist/services/index.d.ts.map +1 -0
  287. package/dist/services/index.js +63 -0
  288. package/dist/services/index.js.map +1 -0
  289. package/dist/services/training-backend-check.d.ts +8 -0
  290. package/dist/services/training-backend-check.d.ts.map +1 -0
  291. package/dist/services/training-backend-check.js +31 -0
  292. package/dist/services/training-backend-check.js.map +1 -0
  293. package/dist/services/training-service-like.d.ts +40 -0
  294. package/dist/services/training-service-like.d.ts.map +1 -0
  295. package/dist/services/training-service-like.js +1 -0
  296. package/dist/services/training-service-like.js.map +1 -0
  297. package/dist/services/training-service-registry.d.ts +4 -0
  298. package/dist/services/training-service-registry.d.ts.map +1 -0
  299. package/dist/services/training-service-registry.js +12 -0
  300. package/dist/services/training-service-registry.js.map +1 -0
  301. package/dist/services/training-service.d.ts +59 -0
  302. package/dist/services/training-service.d.ts.map +1 -0
  303. package/dist/services/training-service.js +154 -0
  304. package/dist/services/training-service.js.map +1 -0
  305. package/dist/services/training-trigger.d.ts +177 -0
  306. package/dist/services/training-trigger.d.ts.map +1 -0
  307. package/dist/services/training-trigger.js +300 -0
  308. package/dist/services/training-trigger.js.map +1 -0
  309. package/dist/services/training-vast-service.d.ts +149 -0
  310. package/dist/services/training-vast-service.d.ts.map +1 -0
  311. package/dist/services/training-vast-service.js +648 -0
  312. package/dist/services/training-vast-service.js.map +1 -0
  313. package/dist/services/vast-inference-stats.d.ts +37 -0
  314. package/dist/services/vast-inference-stats.d.ts.map +1 -0
  315. package/dist/services/vast-inference-stats.js +81 -0
  316. package/dist/services/vast-inference-stats.js.map +1 -0
  317. package/dist/services/vast-job-store.d.ts +74 -0
  318. package/dist/services/vast-job-store.d.ts.map +1 -0
  319. package/dist/services/vast-job-store.js +194 -0
  320. package/dist/services/vast-job-store.js.map +1 -0
  321. package/dist/services/vast-subprocess.d.ts +27 -0
  322. package/dist/services/vast-subprocess.d.ts.map +1 -0
  323. package/dist/services/vast-subprocess.js +78 -0
  324. package/dist/services/vast-subprocess.js.map +1 -0
  325. package/dist/setup-routes.d.ts +17 -0
  326. package/dist/setup-routes.d.ts.map +1 -0
  327. package/dist/setup-routes.js +319 -0
  328. package/dist/setup-routes.js.map +1 -0
  329. package/dist/ui/FineTuningSpatialView.d.ts +49 -0
  330. package/dist/ui/FineTuningSpatialView.d.ts.map +1 -0
  331. package/dist/ui/FineTuningSpatialView.js +154 -0
  332. package/dist/ui/FineTuningSpatialView.js.map +1 -0
  333. package/dist/ui/FineTuningView.d.ts +7 -0
  334. package/dist/ui/FineTuningView.d.ts.map +1 -0
  335. package/dist/ui/FineTuningView.helpers.d.ts +17 -0
  336. package/dist/ui/FineTuningView.helpers.d.ts.map +1 -0
  337. package/dist/ui/FineTuningView.helpers.js +30 -0
  338. package/dist/ui/FineTuningView.helpers.js.map +1 -0
  339. package/dist/ui/FineTuningView.interact.d.ts +2 -0
  340. package/dist/ui/FineTuningView.interact.d.ts.map +1 -0
  341. package/dist/ui/FineTuningView.interact.js +300 -0
  342. package/dist/ui/FineTuningView.interact.js.map +1 -0
  343. package/dist/ui/FineTuningView.js +4653 -0
  344. package/dist/ui/FineTuningView.js.map +1 -0
  345. package/dist/ui/fine-tuning-panels.d.ts +100 -0
  346. package/dist/ui/fine-tuning-panels.d.ts.map +1 -0
  347. package/dist/ui/fine-tuning-panels.helpers.d.ts +19 -0
  348. package/dist/ui/fine-tuning-panels.helpers.d.ts.map +1 -0
  349. package/dist/ui/fine-tuning-panels.helpers.js +77 -0
  350. package/dist/ui/fine-tuning-panels.helpers.js.map +1 -0
  351. package/dist/ui/fine-tuning-panels.js +928 -0
  352. package/dist/ui/fine-tuning-panels.js.map +1 -0
  353. package/dist/ui/index.d.ts +5 -0
  354. package/dist/ui/index.d.ts.map +1 -0
  355. package/dist/ui/index.js +5 -0
  356. package/dist/ui/index.js.map +1 -0
  357. package/dist/ui/training-view-bundle.d.ts +3 -0
  358. package/dist/ui/training-view-bundle.d.ts.map +1 -0
  359. package/dist/ui/training-view-bundle.js +7 -0
  360. package/dist/ui/training-view-bundle.js.map +1 -0
  361. package/dist/views/bundle.js +5312 -0
  362. package/dist/views/bundle.js.map +1 -0
  363. package/package.json +7 -7
@@ -0,0 +1,219 @@
1
+ import { parseArgs } from "node:util";
2
+ import { NATIVE_OPTIMIZERS, runNativeBackend } from "../backends/native.js";
3
+ import { ALL_TRAINING_TASKS } from "../core/training-config.js";
4
+ const ALLOWED_BACKENDS = /* @__PURE__ */ new Set(["native"]);
5
+ const ALLOWED_TASKS = new Set(ALL_TRAINING_TASKS);
6
+ const ALLOWED_OPTIMIZERS = new Set(NATIVE_OPTIMIZERS);
7
+ const HELP = `Usage:
8
+ bun run train -- --backend native --dataset <path> [options]
9
+
10
+ Options:
11
+ --backend NAME native (required)
12
+ --dataset PATH Path to eliza_native_v1 JSONL file (required)
13
+ --task NAME ${[...ALLOWED_TASKS].join(" | ")}
14
+ (includes the LifeOps per-capability tasks, e.g.
15
+ calendar_extract / schedule_plan / morning_brief)
16
+ --optimizer NAME instruction-search | prompt-evolution | gepa
17
+ | bootstrap-fewshot | dspy-bootstrap-fewshot
18
+ | dspy-copro | dspy-mipro
19
+ Defaults to instruction-search.
20
+ gepa is the formal Pareto+feedback variant (Goyal et
21
+ al. 2024); prompt-evolution is the simpler genetic
22
+ mutation variant \u2014 both stay registered.
23
+ The dspy-* variants use the native DSPy primitives
24
+ (Signature + Predict + privacy-filtered Example loader)
25
+ and emit eliza_native_v1-compatible artifacts.
26
+ --baseline PATH Path to a baseline-prompt text file. Defaults to
27
+ the first system message in request.messages.
28
+ --help Show this help text
29
+ `;
30
+ function parseTrainArgs(argv) {
31
+ const { values } = parseArgs({
32
+ args: argv,
33
+ options: {
34
+ backend: { type: "string" },
35
+ dataset: { type: "string" },
36
+ task: { type: "string" },
37
+ optimizer: { type: "string" },
38
+ baseline: { type: "string" },
39
+ help: { type: "boolean" }
40
+ },
41
+ allowPositionals: false
42
+ });
43
+ if (values.help) return "help";
44
+ const backend = values.backend?.trim();
45
+ if (!backend || !ALLOWED_BACKENDS.has(backend)) {
46
+ throw new Error(
47
+ `--backend is required and must be one of: ${[...ALLOWED_BACKENDS].join(", ")}`
48
+ );
49
+ }
50
+ const dataset = values.dataset?.trim();
51
+ if (!dataset) {
52
+ throw new Error("--dataset <path> is required");
53
+ }
54
+ let task;
55
+ if (values.task) {
56
+ const t = values.task.trim();
57
+ if (!ALLOWED_TASKS.has(t)) {
58
+ throw new Error(
59
+ `--task must be one of: ${[...ALLOWED_TASKS].join(", ")}`
60
+ );
61
+ }
62
+ task = t;
63
+ }
64
+ let optimizer;
65
+ if (values.optimizer) {
66
+ const opt = values.optimizer.trim();
67
+ if (!ALLOWED_OPTIMIZERS.has(opt)) {
68
+ throw new Error(
69
+ `--optimizer must be one of: ${[...ALLOWED_OPTIMIZERS].join(", ")}`
70
+ );
71
+ }
72
+ optimizer = opt;
73
+ }
74
+ return {
75
+ backend,
76
+ dataset,
77
+ task,
78
+ optimizer,
79
+ baseline: values.baseline
80
+ };
81
+ }
82
+ async function runTrainCli(argv) {
83
+ const parsed = parseTrainArgs(argv);
84
+ if (parsed === "help") {
85
+ process.stdout.write(HELP);
86
+ return 0;
87
+ }
88
+ switch (parsed.backend) {
89
+ case "native": {
90
+ const optimizer = parsed.optimizer ?? "instruction-search";
91
+ const task = parsed.task ?? "should_respond";
92
+ const baselinePrompt = await loadBaselinePrompt(parsed);
93
+ const trainProvider = process.env.TRAIN_MODEL_PROVIDER?.trim() ?? process.env.TRAINING_PROVIDER?.trim();
94
+ if (trainProvider !== "cerebras") {
95
+ console.error(
96
+ "[train] TRAIN_MODEL_PROVIDER=cerebras (or TRAINING_PROVIDER=cerebras) is required. The native backend requires the real evaluation adapter; set the env var and rerun."
97
+ );
98
+ return 1;
99
+ }
100
+ const helperPath = "../../../plugin-personal-assistant/test/helpers/lifeops-eval-model.ts";
101
+ const helperModule = await import(helperPath);
102
+ const useModel = helperModule.getTrainingUseModelAdapter();
103
+ const adapter = {
104
+ async complete(input) {
105
+ const prompt = input.system ? `${input.system}
106
+
107
+ ${input.user}` : input.user;
108
+ return await useModel({
109
+ prompt,
110
+ temperature: input.temperature,
111
+ maxTokens: input.maxTokens
112
+ });
113
+ }
114
+ };
115
+ console.log(
116
+ "[train] adapter: cerebras gpt-oss-120b (TRAIN_MODEL_PROVIDER=cerebras)"
117
+ );
118
+ const result = await runNativeBackend({
119
+ datasetPath: parsed.dataset,
120
+ task,
121
+ optimizer,
122
+ baselinePrompt,
123
+ datasetId: parsed.dataset,
124
+ runtime: { useModel },
125
+ adapter
126
+ });
127
+ for (const note of result.notes) console.log(`[train] ${note}`);
128
+ if (!result.invoked) return 1;
129
+ console.log(
130
+ `[train] native ${optimizer} task=${task} dataset=${result.datasetSize} baseline=${result.baselineScore.toFixed(3)} optimized=${result.score.toFixed(3)}`
131
+ );
132
+ const path = await import("node:path");
133
+ const os = await import("node:os");
134
+ const stateDir = process.env.TRAINING_STATE_DIR?.trim() || process.env.ELIZA_STATE_DIR?.trim() || path.join(os.homedir(), ".eliza");
135
+ const promptTask = task === "context_routing" ? "should_respond" : task;
136
+ const artifactPayload = {
137
+ task: promptTask,
138
+ optimizer,
139
+ baseline: baselinePrompt,
140
+ prompt: result.result.optimizedPrompt,
141
+ baselineScore: result.baselineScore,
142
+ score: result.score,
143
+ datasetSize: result.datasetSize,
144
+ datasetId: parsed.dataset,
145
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
146
+ lineage: result.result.lineage,
147
+ ...result.result.fewShotExamples ? { fewShotExamples: result.result.fewShotExamples } : {}
148
+ };
149
+ try {
150
+ const { OptimizedPromptService } = await import("@elizaos/core");
151
+ const service = new OptimizedPromptService();
152
+ service.setStoreRoot(path.join(stateDir, "optimized-prompts"));
153
+ const artifactPath = await service.setPrompt(
154
+ promptTask,
155
+ artifactPayload
156
+ );
157
+ console.log(`[train] artifact: ${artifactPath}`);
158
+ } catch (err) {
159
+ const fs = await import("node:fs");
160
+ const dir = path.join(stateDir, "optimized-prompts", promptTask);
161
+ fs.mkdirSync(dir, { recursive: true });
162
+ const existing = fs.readdirSync(dir).filter((f) => /^v\d+\.json$/.test(f)).map((f) => Number.parseInt(f.replace(/^v|\.json$/g, ""), 10)).filter((n) => Number.isFinite(n));
163
+ const nextVersion = (existing.length ? Math.max(...existing) : 0) + 1;
164
+ const out = path.join(dir, `v${nextVersion}.json`);
165
+ fs.writeFileSync(out, JSON.stringify(artifactPayload, null, 2));
166
+ console.warn(
167
+ `[train] OptimizedPromptService unavailable (${err?.message ?? err}); wrote raw artifact -> ${out}`
168
+ );
169
+ }
170
+ return 0;
171
+ }
172
+ default: {
173
+ throw new Error(`Unknown backend: ${parsed.backend}`);
174
+ }
175
+ }
176
+ }
177
+ async function loadBaselinePrompt(args) {
178
+ if (args.baseline) {
179
+ const { readFile } = await import("node:fs/promises");
180
+ return await readFile(args.baseline, "utf-8");
181
+ }
182
+ const { readFileSync } = await import("node:fs");
183
+ const raw = readFileSync(args.dataset, "utf-8");
184
+ const firstLine = raw.split("\n").find((line) => line.trim().length > 0);
185
+ if (!firstLine) {
186
+ throw new Error(
187
+ `[native] cannot infer baseline from empty dataset ${args.dataset}; pass --baseline <path>`
188
+ );
189
+ }
190
+ const parsedJson = JSON.parse(firstLine);
191
+ if (!parsedJson || typeof parsedJson !== "object" || parsedJson.format !== "eliza_native_v1") {
192
+ throw new Error(
193
+ `[native] dataset first row is not an eliza_native_v1 document; pass --baseline <path>`
194
+ );
195
+ }
196
+ const request = parsedJson.request;
197
+ const messages = Array.isArray(request?.messages) ? request.messages : [];
198
+ const systemMsg = messages.find(
199
+ (msg) => msg.role === "system" && typeof msg.content === "string"
200
+ );
201
+ const system = typeof request?.system === "string" && request.system.length > 0 ? request.system : systemMsg?.content;
202
+ if (!system) {
203
+ throw new Error(
204
+ `[native] dataset first row has no request.system or system message; pass --baseline <path>`
205
+ );
206
+ }
207
+ return system;
208
+ }
209
+ if (import.meta.url === `file://${process.argv[1] ? new URL(`file://${process.argv[1]}`).pathname : ""}`) {
210
+ runTrainCli(process.argv.slice(2)).then((code) => process.exit(code)).catch((err) => {
211
+ console.error(err instanceof Error ? err.message : String(err));
212
+ process.exit(1);
213
+ });
214
+ }
215
+ export {
216
+ parseTrainArgs,
217
+ runTrainCli
218
+ };
219
+ //# sourceMappingURL=train.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/cli/train.ts"],"sourcesContent":["/**\n * Training CLI for Eliza-native trajectory data.\n *\n * Usage:\n * bun run train -- --backend native --dataset <path> \\\n * [--task {should_respond|context_routing|action_planner|response|media_description}]\n *\n * Consumes `eliza_native_v1` model-boundary JSONL rows.\n */\n\nimport { parseArgs } from \"node:util\";\nimport { NATIVE_OPTIMIZERS, runNativeBackend } from \"../backends/native.js\";\nimport { ALL_TRAINING_TASKS } from \"../core/training-config.js\";\nimport type { TrajectoryTrainingTask } from \"../core/trajectory-task-datasets.js\";\nimport type { OptimizerName } from \"../optimizers/index.js\";\n\nconst ALLOWED_BACKENDS = new Set([\"native\"]);\nconst ALLOWED_TASKS = new Set<string>(ALL_TRAINING_TASKS);\nconst ALLOWED_OPTIMIZERS = new Set<string>(NATIVE_OPTIMIZERS);\n\nconst HELP = `Usage:\n bun run train -- --backend native --dataset <path> [options]\n\nOptions:\n --backend NAME native (required)\n --dataset PATH Path to eliza_native_v1 JSONL file (required)\n --task NAME ${[...ALLOWED_TASKS].join(\" | \")}\n (includes the LifeOps per-capability tasks, e.g.\n calendar_extract / schedule_plan / morning_brief)\n --optimizer NAME instruction-search | prompt-evolution | gepa\n | bootstrap-fewshot | dspy-bootstrap-fewshot\n | dspy-copro | dspy-mipro\n Defaults to instruction-search.\n gepa is the formal Pareto+feedback variant (Goyal et\n al. 2024); prompt-evolution is the simpler genetic\n mutation variant — both stay registered.\n The dspy-* variants use the native DSPy primitives\n (Signature + Predict + privacy-filtered Example loader)\n and emit eliza_native_v1-compatible artifacts.\n --baseline PATH Path to a baseline-prompt text file. Defaults to\n the first system message in request.messages.\n --help Show this help text\n`;\n\ninterface ParsedTrainArgs {\n backend: \"native\";\n dataset: string;\n task?: TrajectoryTrainingTask;\n optimizer?: OptimizerName;\n baseline?: string;\n}\n\nexport function parseTrainArgs(argv: string[]): ParsedTrainArgs | \"help\" {\n const { values } = parseArgs({\n args: argv,\n options: {\n backend: { type: \"string\" },\n dataset: { type: \"string\" },\n task: { type: \"string\" },\n optimizer: { type: \"string\" },\n baseline: { type: \"string\" },\n help: { type: \"boolean\" },\n },\n allowPositionals: false,\n });\n if (values.help) return \"help\";\n\n const backend = values.backend?.trim();\n if (!backend || !ALLOWED_BACKENDS.has(backend)) {\n throw new Error(\n `--backend is required and must be one of: ${[...ALLOWED_BACKENDS].join(\", \")}`,\n );\n }\n const dataset = values.dataset?.trim();\n if (!dataset) {\n throw new Error(\"--dataset <path> is required\");\n }\n let task: TrajectoryTrainingTask | undefined;\n if (values.task) {\n const t = values.task.trim();\n if (!ALLOWED_TASKS.has(t)) {\n throw new Error(\n `--task must be one of: ${[...ALLOWED_TASKS].join(\", \")}`,\n );\n }\n task = t as TrajectoryTrainingTask;\n }\n\n let optimizer: OptimizerName | undefined;\n if (values.optimizer) {\n const opt = values.optimizer.trim();\n if (!ALLOWED_OPTIMIZERS.has(opt)) {\n throw new Error(\n `--optimizer must be one of: ${[...ALLOWED_OPTIMIZERS].join(\", \")}`,\n );\n }\n optimizer = opt as OptimizerName;\n }\n\n return {\n backend: backend as ParsedTrainArgs[\"backend\"],\n dataset,\n task,\n optimizer,\n baseline: values.baseline,\n };\n}\n\nexport async function runTrainCli(argv: string[]): Promise<number> {\n const parsed = parseTrainArgs(argv);\n if (parsed === \"help\") {\n process.stdout.write(HELP);\n return 0;\n }\n\n switch (parsed.backend) {\n case \"native\": {\n const optimizer = parsed.optimizer ?? \"instruction-search\";\n const task: TrajectoryTrainingTask = parsed.task ?? \"should_respond\";\n const baselinePrompt = await loadBaselinePrompt(parsed);\n // Real-model adapter: scoring + variant generation run through the\n // Cerebras gpt-oss-120b client (lifeops-eval-model.ts). This is the path\n // `bun run lifeops:optimize` exercises against captured trajectories.\n const trainProvider =\n process.env.TRAIN_MODEL_PROVIDER?.trim() ??\n process.env.TRAINING_PROVIDER?.trim();\n if (trainProvider !== \"cerebras\") {\n console.error(\n \"[train] TRAIN_MODEL_PROVIDER=cerebras (or TRAINING_PROVIDER=cerebras) is required. \" +\n \"The native backend requires the real evaluation adapter; set the env var and rerun.\",\n );\n return 1;\n }\n // The eval helper lives in plugin-personal-assistant's test tree, which is\n // outside this package's emit rootDir. Declare the single export's real\n // signature locally so the dynamic import stays fully typed without a\n // static `typeof import()` reference dragging an out-of-rootDir file into\n // the build program (TS6059).\n interface LifeOpsEvalModelModule {\n getTrainingUseModelAdapter(): (input: {\n prompt: string;\n temperature?: number;\n maxTokens?: number;\n }) => Promise<string>;\n }\n const helperPath =\n \"../../../plugin-personal-assistant/test/helpers/lifeops-eval-model.ts\";\n const helperModule: LifeOpsEvalModelModule = await import(helperPath);\n const useModel = helperModule.getTrainingUseModelAdapter();\n const adapter = {\n async complete(input: {\n system?: string;\n user: string;\n temperature?: number;\n maxTokens?: number;\n }): Promise<string> {\n const prompt = input.system\n ? `${input.system}\\n\\n${input.user}`\n : input.user;\n return await useModel({\n prompt,\n temperature: input.temperature,\n maxTokens: input.maxTokens,\n });\n },\n };\n console.log(\n \"[train] adapter: cerebras gpt-oss-120b (TRAIN_MODEL_PROVIDER=cerebras)\",\n );\n const result = await runNativeBackend({\n datasetPath: parsed.dataset,\n task,\n optimizer,\n baselinePrompt,\n datasetId: parsed.dataset,\n runtime: { useModel },\n adapter,\n });\n for (const note of result.notes) console.log(`[train] ${note}`);\n if (!result.invoked) return 1;\n console.log(\n `[train] native ${optimizer} task=${task} dataset=${result.datasetSize} ` +\n `baseline=${result.baselineScore.toFixed(3)} optimized=${result.score.toFixed(3)}`,\n );\n\n // Persist the optimized prompt + lineage so the operator can inspect\n // and deploy it. Routed through `OptimizedPromptService.setPrompt` so\n // the on-disk versioning (`vN.json` + `current`/`previous` symlinks)\n // matches what the runtime trigger service writes. Keeps `rollback`\n // working regardless of which write path produced the artifact.\n const path = await import(\"node:path\");\n const os = await import(\"node:os\");\n const stateDir =\n process.env.TRAINING_STATE_DIR?.trim() ||\n process.env.ELIZA_STATE_DIR?.trim() ||\n path.join(os.homedir(), \".eliza\");\n const promptTask = task === \"context_routing\" ? \"should_respond\" : task;\n const artifactPayload = {\n task: promptTask,\n optimizer,\n baseline: baselinePrompt,\n prompt: result.result.optimizedPrompt,\n baselineScore: result.baselineScore,\n score: result.score,\n datasetSize: result.datasetSize,\n datasetId: parsed.dataset,\n generatedAt: new Date().toISOString(),\n lineage: result.result.lineage,\n ...(result.result.fewShotExamples\n ? { fewShotExamples: result.result.fewShotExamples }\n : {}),\n };\n try {\n const { OptimizedPromptService } = await import(\"@elizaos/core\");\n const service = new OptimizedPromptService();\n service.setStoreRoot(path.join(stateDir, \"optimized-prompts\"));\n const artifactPath = await service.setPrompt(\n promptTask,\n artifactPayload,\n );\n console.log(`[train] artifact: ${artifactPath}`);\n } catch (err) {\n // Fallback: write the artifact directly to <stateDir>/optimized-prompts/<task>/vN.json\n // when @elizaos/core fails to load (e.g. transient drizzle-orm\n // resolution issues during cleanup). Keeps the optimizer output\n // recoverable.\n const fs = await import(\"node:fs\");\n const dir = path.join(stateDir, \"optimized-prompts\", promptTask);\n fs.mkdirSync(dir, { recursive: true });\n const existing = fs\n .readdirSync(dir)\n .filter((f) => /^v\\d+\\.json$/.test(f))\n .map((f) => Number.parseInt(f.replace(/^v|\\.json$/g, \"\"), 10))\n .filter((n) => Number.isFinite(n));\n const nextVersion = (existing.length ? Math.max(...existing) : 0) + 1;\n const out = path.join(dir, `v${nextVersion}.json`);\n fs.writeFileSync(out, JSON.stringify(artifactPayload, null, 2));\n console.warn(\n `[train] OptimizedPromptService unavailable (${(err as Error)?.message ?? err}); wrote raw artifact -> ${out}`,\n );\n }\n return 0;\n }\n default: {\n // Unreachable thanks to the ALLOWED_BACKENDS guard above.\n throw new Error(`Unknown backend: ${parsed.backend}`);\n }\n }\n}\n\nasync function loadBaselinePrompt(args: ParsedTrainArgs): Promise<string> {\n if (args.baseline) {\n const { readFile } = await import(\"node:fs/promises\");\n return await readFile(args.baseline, \"utf-8\");\n }\n const { readFileSync } = await import(\"node:fs\");\n const raw = readFileSync(args.dataset, \"utf-8\");\n const firstLine = raw.split(\"\\n\").find((line) => line.trim().length > 0);\n if (!firstLine) {\n throw new Error(\n `[native] cannot infer baseline from empty dataset ${args.dataset}; pass --baseline <path>`,\n );\n }\n const parsedJson: unknown = JSON.parse(firstLine);\n if (\n !parsedJson ||\n typeof parsedJson !== \"object\" ||\n (parsedJson as { format?: unknown }).format !== \"eliza_native_v1\"\n ) {\n throw new Error(\n `[native] dataset first row is not an eliza_native_v1 document; pass --baseline <path>`,\n );\n }\n const request = (\n parsedJson as { request?: { system?: unknown; messages?: unknown } }\n ).request;\n const messages = Array.isArray(request?.messages)\n ? (request.messages as Array<{ role?: string; content?: string }>)\n : [];\n const systemMsg = messages.find(\n (msg) => msg.role === \"system\" && typeof msg.content === \"string\",\n );\n const system =\n typeof request?.system === \"string\" && request.system.length > 0\n ? request.system\n : systemMsg?.content;\n if (!system) {\n throw new Error(\n `[native] dataset first row has no request.system or system message; pass --baseline <path>`,\n );\n }\n return system;\n}\n\nif (\n import.meta.url ===\n `file://${process.argv[1] ? new URL(`file://${process.argv[1]}`).pathname : \"\"}`\n) {\n runTrainCli(process.argv.slice(2))\n .then((code) => process.exit(code))\n .catch((err) => {\n console.error(err instanceof Error ? err.message : String(err));\n process.exit(1);\n });\n}\n"],"mappings":"AAUA,SAAS,iBAAiB;AAC1B,SAAS,mBAAmB,wBAAwB;AACpD,SAAS,0BAA0B;AAInC,MAAM,mBAAmB,oBAAI,IAAI,CAAC,QAAQ,CAAC;AAC3C,MAAM,gBAAgB,IAAI,IAAY,kBAAkB;AACxD,MAAM,qBAAqB,IAAI,IAAY,iBAAiB;AAE5D,MAAM,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,yBAMY,CAAC,GAAG,aAAa,EAAE,KAAK,KAAK,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA0BhD,SAAS,eAAe,MAA0C;AACvE,QAAM,EAAE,OAAO,IAAI,UAAU;AAAA,IAC3B,MAAM;AAAA,IACN,SAAS;AAAA,MACP,SAAS,EAAE,MAAM,SAAS;AAAA,MAC1B,SAAS,EAAE,MAAM,SAAS;AAAA,MAC1B,MAAM,EAAE,MAAM,SAAS;AAAA,MACvB,WAAW,EAAE,MAAM,SAAS;AAAA,MAC5B,UAAU,EAAE,MAAM,SAAS;AAAA,MAC3B,MAAM,EAAE,MAAM,UAAU;AAAA,IAC1B;AAAA,IACA,kBAAkB;AAAA,EACpB,CAAC;AACD,MAAI,OAAO,KAAM,QAAO;AAExB,QAAM,UAAU,OAAO,SAAS,KAAK;AACrC,MAAI,CAAC,WAAW,CAAC,iBAAiB,IAAI,OAAO,GAAG;AAC9C,UAAM,IAAI;AAAA,MACR,6CAA6C,CAAC,GAAG,gBAAgB,EAAE,KAAK,IAAI,CAAC;AAAA,IAC/E;AAAA,EACF;AACA,QAAM,UAAU,OAAO,SAAS,KAAK;AACrC,MAAI,CAAC,SAAS;AACZ,UAAM,IAAI,MAAM,8BAA8B;AAAA,EAChD;AACA,MAAI;AACJ,MAAI,OAAO,MAAM;AACf,UAAM,IAAI,OAAO,KAAK,KAAK;AAC3B,QAAI,CAAC,cAAc,IAAI,CAAC,GAAG;AACzB,YAAM,IAAI;AAAA,QACR,0BAA0B,CAAC,GAAG,aAAa,EAAE,KAAK,IAAI,CAAC;AAAA,MACzD;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,MAAI;AACJ,MAAI,OAAO,WAAW;AACpB,UAAM,MAAM,OAAO,UAAU,KAAK;AAClC,QAAI,CAAC,mBAAmB,IAAI,GAAG,GAAG;AAChC,YAAM,IAAI;AAAA,QACR,+BAA+B,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC;AAAA,MACnE;AAAA,IACF;AACA,gBAAY;AAAA,EACd;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,OAAO;AAAA,EACnB;AACF;AAEA,eAAsB,YAAY,MAAiC;AACjE,QAAM,SAAS,eAAe,IAAI;AAClC,MAAI,WAAW,QAAQ;AACrB,YAAQ,OAAO,MAAM,IAAI;AACzB,WAAO;AAAA,EACT;AAEA,UAAQ,OAAO,SAAS;AAAA,IACtB,KAAK,UAAU;AACb,YAAM,YAAY,OAAO,aAAa;AACtC,YAAM,OAA+B,OAAO,QAAQ;AACpD,YAAM,iBAAiB,MAAM,mBAAmB,MAAM;AAItD,YAAM,gBACJ,QAAQ,IAAI,sBAAsB,KAAK,KACvC,QAAQ,IAAI,mBAAmB,KAAK;AACtC,UAAI,kBAAkB,YAAY;AAChC,gBAAQ;AAAA,UACN;AAAA,QAEF;AACA,eAAO;AAAA,MACT;AAaA,YAAM,aACJ;AACF,YAAM,eAAuC,MAAM,OAAO;AAC1D,YAAM,WAAW,aAAa,2BAA2B;AACzD,YAAM,UAAU;AAAA,QACd,MAAM,SAAS,OAKK;AAClB,gBAAM,SAAS,MAAM,SACjB,GAAG,MAAM,MAAM;AAAA;AAAA,EAAO,MAAM,IAAI,KAChC,MAAM;AACV,iBAAO,MAAM,SAAS;AAAA,YACpB;AAAA,YACA,aAAa,MAAM;AAAA,YACnB,WAAW,MAAM;AAAA,UACnB,CAAC;AAAA,QACH;AAAA,MACF;AACA,cAAQ;AAAA,QACN;AAAA,MACF;AACA,YAAM,SAAS,MAAM,iBAAiB;AAAA,QACpC,aAAa,OAAO;AAAA,QACpB;AAAA,QACA;AAAA,QACA;AAAA,QACA,WAAW,OAAO;AAAA,QAClB,SAAS,EAAE,SAAS;AAAA,QACpB;AAAA,MACF,CAAC;AACD,iBAAW,QAAQ,OAAO,MAAO,SAAQ,IAAI,WAAW,IAAI,EAAE;AAC9D,UAAI,CAAC,OAAO,QAAS,QAAO;AAC5B,cAAQ;AAAA,QACN,kBAAkB,SAAS,SAAS,IAAI,YAAY,OAAO,WAAW,aACxD,OAAO,cAAc,QAAQ,CAAC,CAAC,cAAc,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,MACpF;AAOA,YAAM,OAAO,MAAM,OAAO,WAAW;AACrC,YAAM,KAAK,MAAM,OAAO,SAAS;AACjC,YAAM,WACJ,QAAQ,IAAI,oBAAoB,KAAK,KACrC,QAAQ,IAAI,iBAAiB,KAAK,KAClC,KAAK,KAAK,GAAG,QAAQ,GAAG,QAAQ;AAClC,YAAM,aAAa,SAAS,oBAAoB,mBAAmB;AACnE,YAAM,kBAAkB;AAAA,QACtB,MAAM;AAAA,QACN;AAAA,QACA,UAAU;AAAA,QACV,QAAQ,OAAO,OAAO;AAAA,QACtB,eAAe,OAAO;AAAA,QACtB,OAAO,OAAO;AAAA,QACd,aAAa,OAAO;AAAA,QACpB,WAAW,OAAO;AAAA,QAClB,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,QACpC,SAAS,OAAO,OAAO;AAAA,QACvB,GAAI,OAAO,OAAO,kBACd,EAAE,iBAAiB,OAAO,OAAO,gBAAgB,IACjD,CAAC;AAAA,MACP;AACA,UAAI;AACF,cAAM,EAAE,uBAAuB,IAAI,MAAM,OAAO,eAAe;AAC/D,cAAM,UAAU,IAAI,uBAAuB;AAC3C,gBAAQ,aAAa,KAAK,KAAK,UAAU,mBAAmB,CAAC;AAC7D,cAAM,eAAe,MAAM,QAAQ;AAAA,UACjC;AAAA,UACA;AAAA,QACF;AACA,gBAAQ,IAAI,qBAAqB,YAAY,EAAE;AAAA,MACjD,SAAS,KAAK;AAKZ,cAAM,KAAK,MAAM,OAAO,SAAS;AACjC,cAAM,MAAM,KAAK,KAAK,UAAU,qBAAqB,UAAU;AAC/D,WAAG,UAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AACrC,cAAM,WAAW,GACd,YAAY,GAAG,EACf,OAAO,CAAC,MAAM,eAAe,KAAK,CAAC,CAAC,EACpC,IAAI,CAAC,MAAM,OAAO,SAAS,EAAE,QAAQ,eAAe,EAAE,GAAG,EAAE,CAAC,EAC5D,OAAO,CAAC,MAAM,OAAO,SAAS,CAAC,CAAC;AACnC,cAAM,eAAe,SAAS,SAAS,KAAK,IAAI,GAAG,QAAQ,IAAI,KAAK;AACpE,cAAM,MAAM,KAAK,KAAK,KAAK,IAAI,WAAW,OAAO;AACjD,WAAG,cAAc,KAAK,KAAK,UAAU,iBAAiB,MAAM,CAAC,CAAC;AAC9D,gBAAQ;AAAA,UACN,+CAAgD,KAAe,WAAW,GAAG,4BAA4B,GAAG;AAAA,QAC9G;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,IACA,SAAS;AAEP,YAAM,IAAI,MAAM,oBAAoB,OAAO,OAAO,EAAE;AAAA,IACtD;AAAA,EACF;AACF;AAEA,eAAe,mBAAmB,MAAwC;AACxE,MAAI,KAAK,UAAU;AACjB,UAAM,EAAE,SAAS,IAAI,MAAM,OAAO,kBAAkB;AACpD,WAAO,MAAM,SAAS,KAAK,UAAU,OAAO;AAAA,EAC9C;AACA,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,SAAS;AAC/C,QAAM,MAAM,aAAa,KAAK,SAAS,OAAO;AAC9C,QAAM,YAAY,IAAI,MAAM,IAAI,EAAE,KAAK,CAAC,SAAS,KAAK,KAAK,EAAE,SAAS,CAAC;AACvE,MAAI,CAAC,WAAW;AACd,UAAM,IAAI;AAAA,MACR,qDAAqD,KAAK,OAAO;AAAA,IACnE;AAAA,EACF;AACA,QAAM,aAAsB,KAAK,MAAM,SAAS;AAChD,MACE,CAAC,cACD,OAAO,eAAe,YACrB,WAAoC,WAAW,mBAChD;AACA,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,QAAM,UACJ,WACA;AACF,QAAM,WAAW,MAAM,QAAQ,SAAS,QAAQ,IAC3C,QAAQ,WACT,CAAC;AACL,QAAM,YAAY,SAAS;AAAA,IACzB,CAAC,QAAQ,IAAI,SAAS,YAAY,OAAO,IAAI,YAAY;AAAA,EAC3D;AACA,QAAM,SACJ,OAAO,SAAS,WAAW,YAAY,QAAQ,OAAO,SAAS,IAC3D,QAAQ,SACR,WAAW;AACjB,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEA,IACE,YAAY,QACZ,UAAU,QAAQ,KAAK,CAAC,IAAI,IAAI,IAAI,UAAU,QAAQ,KAAK,CAAC,CAAC,EAAE,EAAE,WAAW,EAAE,IAC9E;AACA,cAAY,QAAQ,KAAK,MAAM,CAAC,CAAC,EAC9B,KAAK,CAAC,SAAS,QAAQ,KAAK,IAAI,CAAC,EACjC,MAAM,CAAC,QAAQ;AACd,YAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAC9D,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AACL;","names":[]}
@@ -0,0 +1,55 @@
1
+ export type ActionBenchmarkMatrixVariant = "reference" | "base" | "trained";
2
+ export interface ActionBenchmarkRunOptions {
3
+ workspaceRoot?: string;
4
+ bun?: string;
5
+ outputDir?: string;
6
+ useMocks?: boolean;
7
+ forceTrajectoryCapture?: boolean;
8
+ filter?: string;
9
+ runsPerCase?: number;
10
+ provider?: string;
11
+ modelId?: string;
12
+ runtimeModel?: string;
13
+ smallModel?: string;
14
+ largeModel?: string;
15
+ baseUrl?: string;
16
+ variant?: ActionBenchmarkMatrixVariant;
17
+ tier?: string;
18
+ benchmark?: string;
19
+ datasetVersion?: string;
20
+ codeCommit?: string;
21
+ dryRun?: boolean;
22
+ }
23
+ export interface ActionBenchmarkRunResult {
24
+ workspaceRoot: string;
25
+ appCoreRoot: string;
26
+ outputDir: string;
27
+ reportMarkdownPath: string;
28
+ reportJsonPath: string;
29
+ trajectoryDir: string;
30
+ command: string[];
31
+ env: Record<string, string>;
32
+ stdout: string;
33
+ stderr: string;
34
+ exitCode: number;
35
+ matrixSource: {
36
+ path: string;
37
+ modelId?: string;
38
+ benchmark?: string;
39
+ variant?: ActionBenchmarkMatrixVariant;
40
+ tier?: string;
41
+ provider?: string;
42
+ datasetVersion?: string;
43
+ codeCommit?: string;
44
+ useMocks?: boolean;
45
+ } | null;
46
+ }
47
+ export declare function assertLocalBenchmarkModelAvailable(options: ActionBenchmarkRunOptions): Promise<void>;
48
+ export declare function buildActionBenchmarkCommand(): string[];
49
+ export declare function buildActionBenchmarkEnv(options: ActionBenchmarkRunOptions, resolved: {
50
+ reportMarkdownPath: string;
51
+ reportJsonPath: string;
52
+ trajectoryDir: string;
53
+ }): Record<string, string>;
54
+ export declare function runActionBenchmark(options?: ActionBenchmarkRunOptions): Promise<ActionBenchmarkRunResult>;
55
+ //# sourceMappingURL=action-benchmark-runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"action-benchmark-runner.d.ts","sourceRoot":"","sources":["../../src/core/action-benchmark-runner.ts"],"names":[],"mappings":"AAMA,MAAM,MAAM,4BAA4B,GAAG,WAAW,GAAG,MAAM,GAAG,SAAS,CAAC;AAE5E,MAAM,WAAW,yBAAyB;IACxC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,sBAAsB,CAAC,EAAE,OAAO,CAAC;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,4BAA4B,CAAC;IACvC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,wBAAwB;IACvC,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,OAAO,CAAC,EAAE,4BAA4B,CAAC;QACvC,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,QAAQ,CAAC,EAAE,OAAO,CAAC;KACpB,GAAG,IAAI,CAAC;CACV;AAkFD,wBAAsB,kCAAkC,CACtD,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,IAAI,CAAC,CAaf;AAmFD,wBAAgB,2BAA2B,IAAI,MAAM,EAAE,CAatD;AAED,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,yBAAyB,EAClC,QAAQ,EAAE;IACR,kBAAkB,EAAE,MAAM,CAAC;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;CACvB,GACA,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAiCxB;AAED,wBAAsB,kBAAkB,CACtC,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,wBAAwB,CAAC,CA0JnC"}
@@ -0,0 +1,341 @@
1
+ import { spawn } from "node:child_process";
2
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
3
+ import { join } from "node:path";
4
+ import { trainingStateRoot } from "./training-config.js";
5
+ import { defaultBunCommand, resolveWorkspaceRoot } from "./workspace-runtime.js";
6
+ function safeTimestamp(value) {
7
+ return value.replace(/[:.]/g, "-");
8
+ }
9
+ function positiveInt(value) {
10
+ return typeof value === "number" && Number.isFinite(value) ? Math.max(1, Math.floor(value)) : void 0;
11
+ }
12
+ function collectProcess(command, args, cwd, env) {
13
+ return new Promise((resolvePromise, reject) => {
14
+ const child = spawn(command, args, {
15
+ cwd,
16
+ env,
17
+ stdio: ["ignore", "pipe", "pipe"]
18
+ });
19
+ let stdout = "";
20
+ let stderr = "";
21
+ child.stdout.setEncoding("utf-8");
22
+ child.stderr.setEncoding("utf-8");
23
+ child.stdout.on("data", (chunk) => {
24
+ stdout += chunk;
25
+ });
26
+ child.stderr.on("data", (chunk) => {
27
+ stderr += chunk;
28
+ });
29
+ child.on("error", reject);
30
+ child.on("close", (code) => {
31
+ resolvePromise({ stdout, stderr, exitCode: code ?? 1 });
32
+ });
33
+ });
34
+ }
35
+ function stringSetting(value) {
36
+ return value?.trim() || void 0;
37
+ }
38
+ function modelListUrl(baseUrl) {
39
+ const normalized = baseUrl.trim().replace(/\/+$/, "");
40
+ return `${normalized}/models`;
41
+ }
42
+ function localModelIdMatches(availableId, requestedId) {
43
+ return availableId === requestedId || availableId === `${requestedId}:latest` || `${availableId}:latest` === requestedId;
44
+ }
45
+ async function localModelIds(baseUrl) {
46
+ const response = await fetch(modelListUrl(baseUrl));
47
+ if (!response.ok) {
48
+ throw new Error(
49
+ `local model endpoint ${modelListUrl(baseUrl)} returned ${response.status} ${response.statusText}`
50
+ );
51
+ }
52
+ const payload = await response.json();
53
+ const data = payload && typeof payload === "object" && Array.isArray(payload.data) ? payload.data : [];
54
+ return data.map(
55
+ (item) => item && typeof item === "object" ? item.id ?? item.name : item
56
+ ).filter((id) => typeof id === "string" && id.length > 0);
57
+ }
58
+ async function assertLocalBenchmarkModelAvailable(options) {
59
+ if (effectiveUseMocks(options)) return;
60
+ if (options.provider !== "local-llama-cpp") return;
61
+ const requestedModel = stringSetting(options.runtimeModel);
62
+ if (!requestedModel) return;
63
+ const baseUrl = stringSetting(options.baseUrl) ?? "http://localhost:11434/v1";
64
+ const ids = await localModelIds(baseUrl);
65
+ if (ids.some((id) => localModelIdMatches(id, requestedModel))) return;
66
+ throw new Error(
67
+ `local action benchmark model "${requestedModel}" is not available at ${modelListUrl(
68
+ baseUrl
69
+ )}; available models: ${ids.length > 0 ? ids.join(", ") : "none"}`
70
+ );
71
+ }
72
+ function effectiveUseMocks(options) {
73
+ return options.useMocks ?? options.dryRun === true;
74
+ }
75
+ function matrixSourceForReport(reportJsonPath, options) {
76
+ const modelId = stringSetting(options.modelId) ?? stringSetting(options.provider);
77
+ const variant = options.variant;
78
+ if (!modelId || !variant) return null;
79
+ return {
80
+ path: reportJsonPath,
81
+ modelId,
82
+ variant,
83
+ benchmark: stringSetting(options.benchmark),
84
+ tier: stringSetting(options.tier),
85
+ provider: stringSetting(options.provider),
86
+ datasetVersion: stringSetting(options.datasetVersion),
87
+ codeCommit: stringSetting(options.codeCommit),
88
+ useMocks: effectiveUseMocks(options)
89
+ };
90
+ }
91
+ function dryRunCaseSample(options, trajectoryDir) {
92
+ const tier = stringSetting(options.tier) ?? "2b";
93
+ const variant = stringSetting(options.variant) ?? "trained";
94
+ const modelId = stringSetting(options.modelId) ?? stringSetting(options.runtimeModel);
95
+ return {
96
+ caseId: `dry-run-${tier}-${variant}-action-selection`,
97
+ prompt: "Can you check my calendar?",
98
+ expectedAction: "CHECK_RUNTIME",
99
+ actualAction: null,
100
+ pass: false,
101
+ response: "Dry-run benchmark provenance sample; no model inference executed.",
102
+ latencyMs: 0,
103
+ trajectoryPath: join(
104
+ trajectoryDir,
105
+ `dry-run-${tier}-${variant}-action-selection.json`
106
+ ),
107
+ dryRun: true,
108
+ modelId,
109
+ tier,
110
+ variant
111
+ };
112
+ }
113
+ async function annotateBenchmarkReportSource(reportJsonPath, options) {
114
+ const matrixSource = matrixSourceForReport(reportJsonPath, options);
115
+ if (!matrixSource) return;
116
+ const parsed = JSON.parse(await readFile(reportJsonPath, "utf8"));
117
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return;
118
+ const report = parsed;
119
+ const existingSource = report.source && typeof report.source === "object" && !Array.isArray(report.source) ? report.source : {};
120
+ report.source = {
121
+ ...existingSource,
122
+ modelId: matrixSource.modelId,
123
+ variant: matrixSource.variant,
124
+ benchmark: matrixSource.benchmark,
125
+ tier: matrixSource.tier,
126
+ provider: matrixSource.provider,
127
+ datasetVersion: matrixSource.datasetVersion,
128
+ codeCommit: matrixSource.codeCommit,
129
+ useMocks: matrixSource.useMocks
130
+ };
131
+ await writeFile(
132
+ reportJsonPath,
133
+ `${JSON.stringify(report, null, 2)}
134
+ `,
135
+ "utf8"
136
+ );
137
+ }
138
+ function buildActionBenchmarkCommand() {
139
+ return [
140
+ "x",
141
+ "vitest",
142
+ "run",
143
+ "--config",
144
+ "../test/vitest/real.config.ts",
145
+ "test/benchmarks/action-selection.real.test.ts",
146
+ "--exclude",
147
+ ".git/**",
148
+ "--exclude",
149
+ ".eliza/**"
150
+ ];
151
+ }
152
+ function buildActionBenchmarkEnv(options, resolved) {
153
+ const env = {
154
+ ELIZA_RUN_ACTION_BENCHMARK: "1",
155
+ ELIZA_ACTION_BENCHMARK_REPORT_PATH: resolved.reportMarkdownPath,
156
+ ELIZA_ACTION_BENCHMARK_REPORT_JSON_PATH: resolved.reportJsonPath,
157
+ ELIZA_ACTION_BENCHMARK_TRAJECTORY_DIR: resolved.trajectoryDir
158
+ };
159
+ if (effectiveUseMocks(options)) env.ELIZA_BENCHMARK_USE_MOCKS = "1";
160
+ if (options.forceTrajectoryCapture !== false) {
161
+ env.ELIZA_DUMP_TRAJECTORIES = "1";
162
+ env.ELIZA_TRAJECTORY_MARKDOWN = "1";
163
+ }
164
+ const runsPerCase = positiveInt(options.runsPerCase);
165
+ if (runsPerCase) env.ELIZA_BENCHMARK_RUNS_PER_CASE = String(runsPerCase);
166
+ if (options.filter?.trim())
167
+ env.ELIZA_BENCHMARK_FILTER = options.filter.trim();
168
+ if (options.provider?.trim()) {
169
+ const provider = options.provider.trim();
170
+ env.ELIZA_BENCHMARK_PROVIDER = provider;
171
+ if (provider === "local-llama-cpp") {
172
+ env.LOCAL_LLAMA_CPP_API_KEY = process.env.LOCAL_LLAMA_CPP_API_KEY ?? "local";
173
+ }
174
+ }
175
+ const runtimeModel = stringSetting(options.runtimeModel);
176
+ const smallModel = stringSetting(options.smallModel) ?? runtimeModel;
177
+ const largeModel = stringSetting(options.largeModel) ?? runtimeModel;
178
+ if (smallModel) env.ELIZA_LIVE_TEST_SMALL_MODEL = smallModel;
179
+ if (largeModel) env.ELIZA_LIVE_TEST_LARGE_MODEL = largeModel;
180
+ if (options.baseUrl?.trim()) {
181
+ env.ELIZA_LIVE_TEST_LOCAL_LLAMA_CPP_BASE_URL = options.baseUrl.trim();
182
+ }
183
+ return env;
184
+ }
185
+ async function runActionBenchmark(options = {}) {
186
+ const workspaceRoot = resolveWorkspaceRoot(options.workspaceRoot);
187
+ const appCoreRoot = join(workspaceRoot, "packages", "app-core");
188
+ const stamp = safeTimestamp((/* @__PURE__ */ new Date()).toISOString());
189
+ const outputDir = options.outputDir ?? join(trainingStateRoot(), "benchmarks", "action-selection", stamp);
190
+ const reportMarkdownPath = join(outputDir, "action-benchmark-report.md");
191
+ const reportJsonPath = join(outputDir, "action-benchmark-report.json");
192
+ const trajectoryDir = join(outputDir, "trajectories");
193
+ await mkdir(outputDir, { recursive: true });
194
+ await mkdir(trajectoryDir, { recursive: true });
195
+ const command = options.bun ?? defaultBunCommand();
196
+ const args = buildActionBenchmarkCommand();
197
+ const benchmarkEnv = buildActionBenchmarkEnv(options, {
198
+ reportMarkdownPath,
199
+ reportJsonPath,
200
+ trajectoryDir
201
+ });
202
+ const reportMatrixSource = matrixSourceForReport(reportJsonPath, options);
203
+ if (options.dryRun) {
204
+ const sample = dryRunCaseSample(options, trajectoryDir);
205
+ await writeFile(
206
+ String(sample.trajectoryPath),
207
+ `${JSON.stringify(
208
+ {
209
+ schema: "eliza_action_benchmark_dry_run_trajectory",
210
+ schemaVersion: 1,
211
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
212
+ source: {
213
+ kind: "app_core_action_selection_benchmark",
214
+ dryRun: true,
215
+ modelId: sample.modelId,
216
+ tier: sample.tier,
217
+ variant: sample.variant
218
+ },
219
+ caseId: sample.caseId,
220
+ prompt: sample.prompt,
221
+ expectedAction: sample.expectedAction,
222
+ actualAction: sample.actualAction,
223
+ pass: sample.pass,
224
+ response: sample.response,
225
+ events: [
226
+ {
227
+ type: "DRY_RUN_BENCHMARK_CASE",
228
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
229
+ data: {
230
+ reason: "No model inference executed in dry-run mode."
231
+ }
232
+ }
233
+ ]
234
+ },
235
+ null,
236
+ 2
237
+ )}
238
+ `,
239
+ "utf8"
240
+ );
241
+ await writeFile(
242
+ reportJsonPath,
243
+ `${JSON.stringify(
244
+ {
245
+ schema: "eliza_action_selection_benchmark_report",
246
+ schemaVersion: 1,
247
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
248
+ source: {
249
+ kind: "app_core_action_selection_benchmark",
250
+ trajectoryDir,
251
+ reportMarkdownPath,
252
+ modelId: reportMatrixSource?.modelId,
253
+ variant: reportMatrixSource?.variant,
254
+ benchmark: reportMatrixSource?.benchmark,
255
+ tier: reportMatrixSource?.tier,
256
+ provider: reportMatrixSource?.provider,
257
+ datasetVersion: reportMatrixSource?.datasetVersion,
258
+ codeCommit: reportMatrixSource?.codeCommit,
259
+ useMocks: reportMatrixSource?.useMocks,
260
+ dryRun: true
261
+ },
262
+ summary: {
263
+ total: 1,
264
+ passed: 0,
265
+ failed: 1,
266
+ accuracy: 0,
267
+ plannerAccuracy: 0,
268
+ executionAccuracy: 0
269
+ },
270
+ failureModes: {
271
+ dry_run: 1
272
+ },
273
+ failures: [
274
+ {
275
+ caseId: sample.caseId,
276
+ failureMode: "dry_run",
277
+ reason: "No model inference executed in dry-run mode."
278
+ }
279
+ ],
280
+ results: [sample],
281
+ dryRun: true
282
+ },
283
+ null,
284
+ 2
285
+ )}
286
+ `,
287
+ "utf8"
288
+ );
289
+ await writeFile(
290
+ reportMarkdownPath,
291
+ "# Action Selection Benchmark Dry Run\n\nNo benchmark cases were executed.\n",
292
+ "utf8"
293
+ );
294
+ return {
295
+ workspaceRoot,
296
+ appCoreRoot,
297
+ outputDir,
298
+ reportMarkdownPath,
299
+ reportJsonPath,
300
+ trajectoryDir,
301
+ command: [command, ...args],
302
+ env: benchmarkEnv,
303
+ stdout: "[DRY RUN] Would run app-core action selection benchmark.",
304
+ stderr: "",
305
+ exitCode: 0,
306
+ matrixSource: reportMatrixSource
307
+ };
308
+ }
309
+ await assertLocalBenchmarkModelAvailable(options);
310
+ const proc = await collectProcess(command, args, appCoreRoot, {
311
+ ...process.env,
312
+ ...benchmarkEnv
313
+ });
314
+ if (proc.exitCode !== 0) {
315
+ throw new Error(
316
+ `action selection benchmark exited with code ${proc.exitCode}: ${proc.stderr || proc.stdout}`
317
+ );
318
+ }
319
+ await annotateBenchmarkReportSource(reportJsonPath, options);
320
+ return {
321
+ workspaceRoot,
322
+ appCoreRoot,
323
+ outputDir,
324
+ reportMarkdownPath,
325
+ reportJsonPath,
326
+ trajectoryDir,
327
+ command: [command, ...args],
328
+ env: benchmarkEnv,
329
+ stdout: proc.stdout,
330
+ stderr: proc.stderr,
331
+ exitCode: proc.exitCode,
332
+ matrixSource: matrixSourceForReport(reportJsonPath, options)
333
+ };
334
+ }
335
+ export {
336
+ assertLocalBenchmarkModelAvailable,
337
+ buildActionBenchmarkCommand,
338
+ buildActionBenchmarkEnv,
339
+ runActionBenchmark
340
+ };
341
+ //# sourceMappingURL=action-benchmark-runner.js.map