@elizaos/plugin-training 2.0.3-beta.5 → 2.0.3-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (363) hide show
  1. package/dist/backends/native.d.ts +96 -0
  2. package/dist/backends/native.d.ts.map +1 -0
  3. package/dist/backends/native.js +308 -0
  4. package/dist/backends/native.js.map +1 -0
  5. package/dist/cli/train.d.ts +22 -0
  6. package/dist/cli/train.d.ts.map +1 -0
  7. package/dist/cli/train.js +219 -0
  8. package/dist/cli/train.js.map +1 -0
  9. package/dist/core/action-benchmark-runner.d.ts +55 -0
  10. package/dist/core/action-benchmark-runner.d.ts.map +1 -0
  11. package/dist/core/action-benchmark-runner.js +341 -0
  12. package/dist/core/action-benchmark-runner.js.map +1 -0
  13. package/dist/core/artifact-store.d.ts +72 -0
  14. package/dist/core/artifact-store.d.ts.map +1 -0
  15. package/dist/core/artifact-store.js +50 -0
  16. package/dist/core/artifact-store.js.map +1 -0
  17. package/dist/core/benchmark-matrix-artifact.d.ts +102 -0
  18. package/dist/core/benchmark-matrix-artifact.d.ts.map +1 -0
  19. package/dist/core/benchmark-matrix-artifact.js +381 -0
  20. package/dist/core/benchmark-matrix-artifact.js.map +1 -0
  21. package/dist/core/benchmark-vs-cerebras-runner.d.ts +37 -0
  22. package/dist/core/benchmark-vs-cerebras-runner.d.ts.map +1 -0
  23. package/dist/core/benchmark-vs-cerebras-runner.js +151 -0
  24. package/dist/core/benchmark-vs-cerebras-runner.js.map +1 -0
  25. package/dist/core/cerebras-eval-model.d.ts +54 -0
  26. package/dist/core/cerebras-eval-model.d.ts.map +1 -0
  27. package/dist/core/cerebras-eval-model.js +249 -0
  28. package/dist/core/cerebras-eval-model.js.map +1 -0
  29. package/dist/core/cli.d.ts +15 -0
  30. package/dist/core/cli.d.ts.map +1 -0
  31. package/dist/core/cli.js +1003 -0
  32. package/dist/core/cli.js.map +1 -0
  33. package/dist/core/context-audit.d.ts +51 -0
  34. package/dist/core/context-audit.d.ts.map +1 -0
  35. package/dist/core/context-audit.js +166 -0
  36. package/dist/core/context-audit.js.map +1 -0
  37. package/dist/core/context-catalog.d.ts +47 -0
  38. package/dist/core/context-catalog.d.ts.map +1 -0
  39. package/dist/core/context-catalog.js +269 -0
  40. package/dist/core/context-catalog.js.map +1 -0
  41. package/dist/core/context-types.d.ts +3 -0
  42. package/dist/core/context-types.d.ts.map +1 -0
  43. package/dist/core/context-types.js +18 -0
  44. package/dist/core/context-types.js.map +1 -0
  45. package/dist/core/dataset-generator.d.ts +135 -0
  46. package/dist/core/dataset-generator.d.ts.map +1 -0
  47. package/dist/core/dataset-generator.js +895 -0
  48. package/dist/core/dataset-generator.js.map +1 -0
  49. package/dist/core/eliza1-benchmark-recipe.d.ts +18 -0
  50. package/dist/core/eliza1-benchmark-recipe.d.ts.map +1 -0
  51. package/dist/core/eliza1-benchmark-recipe.js +64 -0
  52. package/dist/core/eliza1-benchmark-recipe.js.map +1 -0
  53. package/dist/core/eliza1-bundle-stager.d.ts +57 -0
  54. package/dist/core/eliza1-bundle-stager.d.ts.map +1 -0
  55. package/dist/core/eliza1-bundle-stager.js +149 -0
  56. package/dist/core/eliza1-bundle-stager.js.map +1 -0
  57. package/dist/core/ensure-cron-job.d.ts +53 -0
  58. package/dist/core/ensure-cron-job.d.ts.map +1 -0
  59. package/dist/core/ensure-cron-job.js +51 -0
  60. package/dist/core/ensure-cron-job.js.map +1 -0
  61. package/dist/core/eval-comparison-artifact.d.ts +72 -0
  62. package/dist/core/eval-comparison-artifact.d.ts.map +1 -0
  63. package/dist/core/eval-comparison-artifact.js +281 -0
  64. package/dist/core/eval-comparison-artifact.js.map +1 -0
  65. package/dist/core/feed-generation-runner.d.ts +37 -0
  66. package/dist/core/feed-generation-runner.d.ts.map +1 -0
  67. package/dist/core/feed-generation-runner.js +232 -0
  68. package/dist/core/feed-generation-runner.js.map +1 -0
  69. package/dist/core/html-escape.d.ts +5 -0
  70. package/dist/core/html-escape.d.ts.map +1 -0
  71. package/dist/core/html-escape.js +11 -0
  72. package/dist/core/html-escape.js.map +1 -0
  73. package/dist/core/huggingface-dataset-ingest.d.ts +52 -0
  74. package/dist/core/huggingface-dataset-ingest.d.ts.map +1 -0
  75. package/dist/core/huggingface-dataset-ingest.js +134 -0
  76. package/dist/core/huggingface-dataset-ingest.js.map +1 -0
  77. package/dist/core/index.d.ts +29 -0
  78. package/dist/core/index.d.ts.map +1 -0
  79. package/dist/core/index.js +204 -0
  80. package/dist/core/index.js.map +1 -0
  81. package/dist/core/privacy-filter.d.ts +95 -0
  82. package/dist/core/privacy-filter.d.ts.map +1 -0
  83. package/dist/core/privacy-filter.js +324 -0
  84. package/dist/core/privacy-filter.js.map +1 -0
  85. package/dist/core/promotion-gate.d.ts +117 -0
  86. package/dist/core/promotion-gate.d.ts.map +1 -0
  87. package/dist/core/promotion-gate.js +85 -0
  88. package/dist/core/promotion-gate.js.map +1 -0
  89. package/dist/core/promotion-persist.d.ts +116 -0
  90. package/dist/core/promotion-persist.d.ts.map +1 -0
  91. package/dist/core/promotion-persist.js +93 -0
  92. package/dist/core/promotion-persist.js.map +1 -0
  93. package/dist/core/prompt-compare.d.ts +99 -0
  94. package/dist/core/prompt-compare.d.ts.map +1 -0
  95. package/dist/core/prompt-compare.js +210 -0
  96. package/dist/core/prompt-compare.js.map +1 -0
  97. package/dist/core/replay-validator.d.ts +136 -0
  98. package/dist/core/replay-validator.d.ts.map +1 -0
  99. package/dist/core/replay-validator.js +312 -0
  100. package/dist/core/replay-validator.js.map +1 -0
  101. package/dist/core/roleplay-executor.d.ts +123 -0
  102. package/dist/core/roleplay-executor.d.ts.map +1 -0
  103. package/dist/core/roleplay-executor.js +675 -0
  104. package/dist/core/roleplay-executor.js.map +1 -0
  105. package/dist/core/roleplay-trajectories.d.ts +54 -0
  106. package/dist/core/roleplay-trajectories.d.ts.map +1 -0
  107. package/dist/core/roleplay-trajectories.js +88 -0
  108. package/dist/core/roleplay-trajectories.js.map +1 -0
  109. package/dist/core/scenario-blueprints.d.ts +62 -0
  110. package/dist/core/scenario-blueprints.d.ts.map +1 -0
  111. package/dist/core/scenario-blueprints.js +850 -0
  112. package/dist/core/scenario-blueprints.js.map +1 -0
  113. package/dist/core/scenario-runner.d.ts +36 -0
  114. package/dist/core/scenario-runner.d.ts.map +1 -0
  115. package/dist/core/scenario-runner.js +216 -0
  116. package/dist/core/scenario-runner.js.map +1 -0
  117. package/dist/core/skill-scoring-cron.d.ts +57 -0
  118. package/dist/core/skill-scoring-cron.d.ts.map +1 -0
  119. package/dist/core/skill-scoring-cron.js +180 -0
  120. package/dist/core/skill-scoring-cron.js.map +1 -0
  121. package/dist/core/test-trajectory-collector.d.ts +37 -0
  122. package/dist/core/test-trajectory-collector.d.ts.map +1 -0
  123. package/dist/core/test-trajectory-collector.js +225 -0
  124. package/dist/core/test-trajectory-collector.js.map +1 -0
  125. package/dist/core/track-c-queue-task.d.ts +37 -0
  126. package/dist/core/track-c-queue-task.d.ts.map +1 -0
  127. package/dist/core/track-c-queue-task.js +104 -0
  128. package/dist/core/track-c-queue-task.js.map +1 -0
  129. package/dist/core/training-analysis-index.d.ts +104 -0
  130. package/dist/core/training-analysis-index.d.ts.map +1 -0
  131. package/dist/core/training-analysis-index.js +3297 -0
  132. package/dist/core/training-analysis-index.js.map +1 -0
  133. package/dist/core/training-collection-runner.d.ts +508 -0
  134. package/dist/core/training-collection-runner.d.ts.map +1 -0
  135. package/dist/core/training-collection-runner.js +2299 -0
  136. package/dist/core/training-collection-runner.js.map +1 -0
  137. package/dist/core/training-config.d.ts +52 -0
  138. package/dist/core/training-config.d.ts.map +1 -0
  139. package/dist/core/training-config.js +117 -0
  140. package/dist/core/training-config.js.map +1 -0
  141. package/dist/core/training-orchestrator.d.ts +112 -0
  142. package/dist/core/training-orchestrator.d.ts.map +1 -0
  143. package/dist/core/training-orchestrator.js +729 -0
  144. package/dist/core/training-orchestrator.js.map +1 -0
  145. package/dist/core/training-readiness-report.d.ts +52 -0
  146. package/dist/core/training-readiness-report.d.ts.map +1 -0
  147. package/dist/core/training-readiness-report.js +765 -0
  148. package/dist/core/training-readiness-report.js.map +1 -0
  149. package/dist/core/trajectory-consumer.d.ts +15 -0
  150. package/dist/core/trajectory-consumer.d.ts.map +1 -0
  151. package/dist/core/trajectory-consumer.js +61 -0
  152. package/dist/core/trajectory-consumer.js.map +1 -0
  153. package/dist/core/trajectory-export-bundle.d.ts +95 -0
  154. package/dist/core/trajectory-export-bundle.d.ts.map +1 -0
  155. package/dist/core/trajectory-export-bundle.js +561 -0
  156. package/dist/core/trajectory-export-bundle.js.map +1 -0
  157. package/dist/core/trajectory-export-cron.d.ts +57 -0
  158. package/dist/core/trajectory-export-cron.d.ts.map +1 -0
  159. package/dist/core/trajectory-export-cron.js +170 -0
  160. package/dist/core/trajectory-export-cron.js.map +1 -0
  161. package/dist/core/trajectory-hf-upload.d.ts +50 -0
  162. package/dist/core/trajectory-hf-upload.d.ts.map +1 -0
  163. package/dist/core/trajectory-hf-upload.js +111 -0
  164. package/dist/core/trajectory-hf-upload.js.map +1 -0
  165. package/dist/core/trajectory-task-datasets.d.ts +62 -0
  166. package/dist/core/trajectory-task-datasets.d.ts.map +1 -0
  167. package/dist/core/trajectory-task-datasets.js +427 -0
  168. package/dist/core/trajectory-task-datasets.js.map +1 -0
  169. package/dist/core/wait-for-service.d.ts +25 -0
  170. package/dist/core/wait-for-service.d.ts.map +1 -0
  171. package/dist/core/wait-for-service.js +19 -0
  172. package/dist/core/wait-for-service.js.map +1 -0
  173. package/dist/core/workspace-runtime.d.ts +4 -0
  174. package/dist/core/workspace-runtime.d.ts.map +1 -0
  175. package/dist/core/workspace-runtime.js +25 -0
  176. package/dist/core/workspace-runtime.js.map +1 -0
  177. package/dist/dspy/artifact.d.ts +54 -0
  178. package/dist/dspy/artifact.d.ts.map +1 -0
  179. package/dist/dspy/artifact.js +61 -0
  180. package/dist/dspy/artifact.js.map +1 -0
  181. package/dist/dspy/chain-of-thought.d.ts +27 -0
  182. package/dist/dspy/chain-of-thought.d.ts.map +1 -0
  183. package/dist/dspy/chain-of-thought.js +43 -0
  184. package/dist/dspy/chain-of-thought.js.map +1 -0
  185. package/dist/dspy/examples.d.ts +72 -0
  186. package/dist/dspy/examples.d.ts.map +1 -0
  187. package/dist/dspy/examples.js +105 -0
  188. package/dist/dspy/examples.js.map +1 -0
  189. package/dist/dspy/index.d.ts +15 -0
  190. package/dist/dspy/index.d.ts.map +1 -0
  191. package/dist/dspy/index.js +40 -0
  192. package/dist/dspy/index.js.map +1 -0
  193. package/dist/dspy/lm-adapter.d.ts +100 -0
  194. package/dist/dspy/lm-adapter.d.ts.map +1 -0
  195. package/dist/dspy/lm-adapter.js +81 -0
  196. package/dist/dspy/lm-adapter.js.map +1 -0
  197. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts +23 -0
  198. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts.map +1 -0
  199. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js +85 -0
  200. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js.map +1 -0
  201. package/dist/dspy/optimizers/dspy-copro.d.ts +29 -0
  202. package/dist/dspy/optimizers/dspy-copro.d.ts.map +1 -0
  203. package/dist/dspy/optimizers/dspy-copro.js +141 -0
  204. package/dist/dspy/optimizers/dspy-copro.js.map +1 -0
  205. package/dist/dspy/optimizers/dspy-mipro.d.ts +37 -0
  206. package/dist/dspy/optimizers/dspy-mipro.d.ts.map +1 -0
  207. package/dist/dspy/optimizers/dspy-mipro.js +194 -0
  208. package/dist/dspy/optimizers/dspy-mipro.js.map +1 -0
  209. package/dist/dspy/optimizers/index.d.ts +5 -0
  210. package/dist/dspy/optimizers/index.d.ts.map +1 -0
  211. package/dist/dspy/optimizers/index.js +11 -0
  212. package/dist/dspy/optimizers/index.js.map +1 -0
  213. package/dist/dspy/optimizers/types.d.ts +39 -0
  214. package/dist/dspy/optimizers/types.d.ts.map +1 -0
  215. package/dist/dspy/optimizers/types.js +1 -0
  216. package/dist/dspy/optimizers/types.js.map +1 -0
  217. package/dist/dspy/predict.d.ts +49 -0
  218. package/dist/dspy/predict.d.ts.map +1 -0
  219. package/dist/dspy/predict.js +73 -0
  220. package/dist/dspy/predict.js.map +1 -0
  221. package/dist/dspy/signature.d.ts +88 -0
  222. package/dist/dspy/signature.d.ts.map +1 -0
  223. package/dist/dspy/signature.js +205 -0
  224. package/dist/dspy/signature.js.map +1 -0
  225. package/dist/index.d.ts +15 -0
  226. package/dist/index.d.ts.map +1 -0
  227. package/dist/index.js +15 -0
  228. package/dist/index.js.map +1 -0
  229. package/dist/optimizers/bootstrap-fewshot.d.ts +42 -0
  230. package/dist/optimizers/bootstrap-fewshot.d.ts.map +1 -0
  231. package/dist/optimizers/bootstrap-fewshot.js +92 -0
  232. package/dist/optimizers/bootstrap-fewshot.js.map +1 -0
  233. package/dist/optimizers/gepa.d.ts +63 -0
  234. package/dist/optimizers/gepa.d.ts.map +1 -0
  235. package/dist/optimizers/gepa.js +232 -0
  236. package/dist/optimizers/gepa.js.map +1 -0
  237. package/dist/optimizers/index.d.ts +7 -0
  238. package/dist/optimizers/index.d.ts.map +1 -0
  239. package/dist/optimizers/index.js +51 -0
  240. package/dist/optimizers/index.js.map +1 -0
  241. package/dist/optimizers/instruction-search.d.ts +39 -0
  242. package/dist/optimizers/instruction-search.d.ts.map +1 -0
  243. package/dist/optimizers/instruction-search.js +108 -0
  244. package/dist/optimizers/instruction-search.js.map +1 -0
  245. package/dist/optimizers/prompt-evolution.d.ts +39 -0
  246. package/dist/optimizers/prompt-evolution.d.ts.map +1 -0
  247. package/dist/optimizers/prompt-evolution.js +101 -0
  248. package/dist/optimizers/prompt-evolution.js.map +1 -0
  249. package/dist/optimizers/scoring.d.ts +139 -0
  250. package/dist/optimizers/scoring.d.ts.map +1 -0
  251. package/dist/optimizers/scoring.js +299 -0
  252. package/dist/optimizers/scoring.js.map +1 -0
  253. package/dist/optimizers/types.d.ts +105 -0
  254. package/dist/optimizers/types.d.ts.map +1 -0
  255. package/dist/optimizers/types.js +1 -0
  256. package/dist/optimizers/types.js.map +1 -0
  257. package/dist/register-runtime.d.ts +3 -0
  258. package/dist/register-runtime.d.ts.map +1 -0
  259. package/dist/register-runtime.js +60 -0
  260. package/dist/register-runtime.js.map +1 -0
  261. package/dist/register-terminal-view.d.ts +15 -0
  262. package/dist/register-terminal-view.d.ts.map +1 -0
  263. package/dist/register-terminal-view.js +31 -0
  264. package/dist/register-terminal-view.js.map +1 -0
  265. package/dist/routes/experience-routes.d.ts +21 -0
  266. package/dist/routes/experience-routes.d.ts.map +1 -0
  267. package/dist/routes/experience-routes.js +513 -0
  268. package/dist/routes/experience-routes.js.map +1 -0
  269. package/dist/routes/index.d.ts +5 -0
  270. package/dist/routes/index.d.ts.map +1 -0
  271. package/dist/routes/index.js +17 -0
  272. package/dist/routes/index.js.map +1 -0
  273. package/dist/routes/training-routes.d.ts +10 -0
  274. package/dist/routes/training-routes.d.ts.map +1 -0
  275. package/dist/routes/training-routes.js +1239 -0
  276. package/dist/routes/training-routes.js.map +1 -0
  277. package/dist/routes/training-vast-routes.d.ts +35 -0
  278. package/dist/routes/training-vast-routes.d.ts.map +1 -0
  279. package/dist/routes/training-vast-routes.js +249 -0
  280. package/dist/routes/training-vast-routes.js.map +1 -0
  281. package/dist/routes/trajectory-routes.d.ts +19 -0
  282. package/dist/routes/trajectory-routes.d.ts.map +1 -0
  283. package/dist/routes/trajectory-routes.js +1122 -0
  284. package/dist/routes/trajectory-routes.js.map +1 -0
  285. package/dist/services/index.d.ts +9 -0
  286. package/dist/services/index.d.ts.map +1 -0
  287. package/dist/services/index.js +63 -0
  288. package/dist/services/index.js.map +1 -0
  289. package/dist/services/training-backend-check.d.ts +8 -0
  290. package/dist/services/training-backend-check.d.ts.map +1 -0
  291. package/dist/services/training-backend-check.js +31 -0
  292. package/dist/services/training-backend-check.js.map +1 -0
  293. package/dist/services/training-service-like.d.ts +40 -0
  294. package/dist/services/training-service-like.d.ts.map +1 -0
  295. package/dist/services/training-service-like.js +1 -0
  296. package/dist/services/training-service-like.js.map +1 -0
  297. package/dist/services/training-service-registry.d.ts +4 -0
  298. package/dist/services/training-service-registry.d.ts.map +1 -0
  299. package/dist/services/training-service-registry.js +12 -0
  300. package/dist/services/training-service-registry.js.map +1 -0
  301. package/dist/services/training-service.d.ts +59 -0
  302. package/dist/services/training-service.d.ts.map +1 -0
  303. package/dist/services/training-service.js +154 -0
  304. package/dist/services/training-service.js.map +1 -0
  305. package/dist/services/training-trigger.d.ts +177 -0
  306. package/dist/services/training-trigger.d.ts.map +1 -0
  307. package/dist/services/training-trigger.js +300 -0
  308. package/dist/services/training-trigger.js.map +1 -0
  309. package/dist/services/training-vast-service.d.ts +149 -0
  310. package/dist/services/training-vast-service.d.ts.map +1 -0
  311. package/dist/services/training-vast-service.js +648 -0
  312. package/dist/services/training-vast-service.js.map +1 -0
  313. package/dist/services/vast-inference-stats.d.ts +37 -0
  314. package/dist/services/vast-inference-stats.d.ts.map +1 -0
  315. package/dist/services/vast-inference-stats.js +81 -0
  316. package/dist/services/vast-inference-stats.js.map +1 -0
  317. package/dist/services/vast-job-store.d.ts +74 -0
  318. package/dist/services/vast-job-store.d.ts.map +1 -0
  319. package/dist/services/vast-job-store.js +194 -0
  320. package/dist/services/vast-job-store.js.map +1 -0
  321. package/dist/services/vast-subprocess.d.ts +27 -0
  322. package/dist/services/vast-subprocess.d.ts.map +1 -0
  323. package/dist/services/vast-subprocess.js +78 -0
  324. package/dist/services/vast-subprocess.js.map +1 -0
  325. package/dist/setup-routes.d.ts +17 -0
  326. package/dist/setup-routes.d.ts.map +1 -0
  327. package/dist/setup-routes.js +319 -0
  328. package/dist/setup-routes.js.map +1 -0
  329. package/dist/ui/FineTuningSpatialView.d.ts +49 -0
  330. package/dist/ui/FineTuningSpatialView.d.ts.map +1 -0
  331. package/dist/ui/FineTuningSpatialView.js +154 -0
  332. package/dist/ui/FineTuningSpatialView.js.map +1 -0
  333. package/dist/ui/FineTuningView.d.ts +7 -0
  334. package/dist/ui/FineTuningView.d.ts.map +1 -0
  335. package/dist/ui/FineTuningView.helpers.d.ts +17 -0
  336. package/dist/ui/FineTuningView.helpers.d.ts.map +1 -0
  337. package/dist/ui/FineTuningView.helpers.js +30 -0
  338. package/dist/ui/FineTuningView.helpers.js.map +1 -0
  339. package/dist/ui/FineTuningView.interact.d.ts +2 -0
  340. package/dist/ui/FineTuningView.interact.d.ts.map +1 -0
  341. package/dist/ui/FineTuningView.interact.js +300 -0
  342. package/dist/ui/FineTuningView.interact.js.map +1 -0
  343. package/dist/ui/FineTuningView.js +4653 -0
  344. package/dist/ui/FineTuningView.js.map +1 -0
  345. package/dist/ui/fine-tuning-panels.d.ts +100 -0
  346. package/dist/ui/fine-tuning-panels.d.ts.map +1 -0
  347. package/dist/ui/fine-tuning-panels.helpers.d.ts +19 -0
  348. package/dist/ui/fine-tuning-panels.helpers.d.ts.map +1 -0
  349. package/dist/ui/fine-tuning-panels.helpers.js +77 -0
  350. package/dist/ui/fine-tuning-panels.helpers.js.map +1 -0
  351. package/dist/ui/fine-tuning-panels.js +928 -0
  352. package/dist/ui/fine-tuning-panels.js.map +1 -0
  353. package/dist/ui/index.d.ts +5 -0
  354. package/dist/ui/index.d.ts.map +1 -0
  355. package/dist/ui/index.js +5 -0
  356. package/dist/ui/index.js.map +1 -0
  357. package/dist/ui/training-view-bundle.d.ts +3 -0
  358. package/dist/ui/training-view-bundle.d.ts.map +1 -0
  359. package/dist/ui/training-view-bundle.js +7 -0
  360. package/dist/ui/training-view-bundle.js.map +1 -0
  361. package/dist/views/bundle.js +5312 -0
  362. package/dist/views/bundle.js.map +1 -0
  363. package/package.json +7 -7
@@ -0,0 +1,1003 @@
1
+ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
2
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
3
+ import { homedir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+ import { parseArgs } from "node:util";
7
+ import { AGENT_CONTEXTS } from "./context-types.js";
8
+ import {
9
+ createAnthropicTeacher,
10
+ createCerebrasTeacher,
11
+ createOpenAITeacher,
12
+ exportToElizaNativeJSONL,
13
+ generateDataset
14
+ } from "./dataset-generator.js";
15
+ import {
16
+ ELIZA_ONE_BENCHMARK_TIER_LIST,
17
+ elizaOneActionBenchmarkPairs,
18
+ elizaOneBenchmarkModelId,
19
+ parseElizaOneBenchmarkTiers
20
+ } from "./eliza1-benchmark-recipe.js";
21
+ import {
22
+ comparePrompts,
23
+ formatComparisonSummary
24
+ } from "./prompt-compare.js";
25
+ import { formatQualityReport, validateDataset } from "./replay-validator.js";
26
+ import {
27
+ buildRoleplayEpisodes,
28
+ exportRoleplayEpisodes
29
+ } from "./roleplay-trajectories.js";
30
+ import { ALL_BLUEPRINTS, BLUEPRINT_STATS } from "./scenario-blueprints.js";
31
+ import {
32
+ buildTrainingCollectionPreflightWithProbes,
33
+ listTrainingCollections,
34
+ runTrainingCollection
35
+ } from "./training-collection-runner.js";
36
+ import {
37
+ buildTaskRecord
38
+ } from "./trajectory-task-datasets.js";
39
+ import { discoverWorkspaceRoot } from "./workspace-runtime.js";
40
+ const AGENT_DECISIONS = ["RESPOND", "IGNORE", "STOP"];
41
+ function parseAgentContexts(value) {
42
+ if (!value) return void 0;
43
+ const out = [];
44
+ for (const entry of value.split(",")) {
45
+ const trimmed = entry.trim();
46
+ if (trimmed && AGENT_CONTEXTS.includes(trimmed)) {
47
+ out.push(trimmed);
48
+ }
49
+ }
50
+ return out.length > 0 ? out : void 0;
51
+ }
52
+ function parseAgentDecisions(value) {
53
+ if (!value) return void 0;
54
+ const out = [];
55
+ for (const entry of value.split(",")) {
56
+ const trimmed = entry.trim();
57
+ if (trimmed && AGENT_DECISIONS.includes(trimmed)) {
58
+ out.push(trimmed);
59
+ }
60
+ }
61
+ return out.length > 0 ? out : void 0;
62
+ }
63
+ function parseCliTierList(value) {
64
+ return parseElizaOneBenchmarkTiers(value);
65
+ }
66
+ function optionalPositiveInteger(value) {
67
+ if (!value) return void 0;
68
+ const parsed = Number.parseInt(value, 10);
69
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : void 0;
70
+ }
71
+ function parseCerebrasVariants(value) {
72
+ if (value === "trained" || value === "base" || value === "both") {
73
+ return value;
74
+ }
75
+ if (value) {
76
+ throw new Error(
77
+ `Invalid --cerebras-variants value ${JSON.stringify(value)}; expected trained, base, or both`
78
+ );
79
+ }
80
+ return "both";
81
+ }
82
+ function parseActionBenchmarkVariant(value) {
83
+ if (value === void 0) return void 0;
84
+ if (value === "reference" || value === "base" || value === "trained") {
85
+ return value;
86
+ }
87
+ throw new Error(
88
+ `Invalid --benchmark-variant value ${JSON.stringify(value)}; expected reference, base, or trained`
89
+ );
90
+ }
91
+ function parseBenchmarkVsCerebrasBenchmark(value) {
92
+ if (value === "eliza_harness_action_selection" || value === "clawbench" || value === "hermes" || value === "all") {
93
+ return value;
94
+ }
95
+ throw new Error(
96
+ `Invalid --benchmark value ${JSON.stringify(value)}; expected eliza_harness_action_selection, clawbench, hermes, or all`
97
+ );
98
+ }
99
+ function getTeacherModel() {
100
+ const trainProvider = process.env.TRAIN_MODEL_PROVIDER?.trim() ?? process.env.TRAINING_PROVIDER?.trim();
101
+ const cerebrasKey = process.env.CEREBRAS_API_KEY;
102
+ if (trainProvider === "cerebras" && cerebrasKey) {
103
+ console.log("Using Cerebras gpt-oss-120b as teacher model");
104
+ return createCerebrasTeacher();
105
+ }
106
+ const anthropicKey = process.env.ANTHROPIC_API_KEY;
107
+ const openaiKey = process.env.OPENAI_API_KEY;
108
+ if (anthropicKey) {
109
+ console.log("Using Anthropic Claude Sonnet 4 as teacher model");
110
+ return createAnthropicTeacher(anthropicKey);
111
+ }
112
+ if (openaiKey) {
113
+ console.log("Using OpenAI GPT-5 as teacher model");
114
+ return createOpenAITeacher(openaiKey);
115
+ }
116
+ throw new Error(
117
+ "No teacher model API key found. Set CEREBRAS_API_KEY (preferred), ANTHROPIC_API_KEY, or OPENAI_API_KEY."
118
+ );
119
+ }
120
+ async function cmdGenerate(args) {
121
+ const { values } = parseArgs({
122
+ args,
123
+ options: {
124
+ variants: { type: "string", default: "5" },
125
+ output: { type: "string", default: "./training-data" },
126
+ concurrency: { type: "string", default: "5" },
127
+ contexts: { type: "string" },
128
+ decisions: { type: "string" },
129
+ limitBlueprints: { type: "string" }
130
+ }
131
+ });
132
+ const variantsRaw = values.variants;
133
+ const outputDir = values.output;
134
+ const concurrencyRaw = values.concurrency;
135
+ if (typeof variantsRaw !== "string" || typeof outputDir !== "string" || typeof concurrencyRaw !== "string") {
136
+ throw new Error("Missing required generate options");
137
+ }
138
+ const variantsPerBlueprint = parseInt(variantsRaw, 10);
139
+ const concurrency = parseInt(concurrencyRaw, 10);
140
+ const filterContexts = parseAgentContexts(values.contexts);
141
+ const filterDecisions = parseAgentDecisions(values.decisions);
142
+ const limitBlueprints = values.limitBlueprints ? parseInt(values.limitBlueprints, 10) : void 0;
143
+ const teacher = getTeacherModel();
144
+ const blueprintCount = limitBlueprints ? Math.min(limitBlueprints, ALL_BLUEPRINTS.length) : ALL_BLUEPRINTS.length;
145
+ console.log(`
146
+ Scenario blueprints: ${ALL_BLUEPRINTS.length}`);
147
+ console.log(`Manual blueprints: ${BLUEPRINT_STATS.manualCount}`);
148
+ console.log(
149
+ `Generated blueprints: ${BLUEPRINT_STATS.totalCount - BLUEPRINT_STATS.manualCount}`
150
+ );
151
+ console.log(`Variants per blueprint: ${variantsPerBlueprint}`);
152
+ console.log(
153
+ `Expected total samples: ${blueprintCount * variantsPerBlueprint}`
154
+ );
155
+ console.log(`Output directory: ${outputDir}`);
156
+ console.log(`Teacher model: ${teacher.name}`);
157
+ console.log(`Concurrency: ${concurrency}`);
158
+ if (filterContexts)
159
+ console.log(`Filter contexts: ${filterContexts.join(", ")}`);
160
+ if (filterDecisions)
161
+ console.log(`Filter decisions: ${filterDecisions.join(", ")}`);
162
+ if (limitBlueprints) console.log(`Limit blueprints: ${limitBlueprints}`);
163
+ console.log("");
164
+ const config = {
165
+ variantsPerBlueprint,
166
+ teacher,
167
+ outputDir,
168
+ concurrency,
169
+ filterContexts,
170
+ filterDecisions,
171
+ limitBlueprints,
172
+ onProgress: (completed, total, sample) => {
173
+ const pct = (completed / total * 100).toFixed(1);
174
+ process.stdout.write(
175
+ `\r[${pct}%] ${completed}/${total} - ${sample.blueprintId} (${sample.expectedOutput.decision}/${sample.expectedOutput.primaryContext})`
176
+ );
177
+ }
178
+ };
179
+ console.log("Generating synthetic training data...\n");
180
+ const samples = await generateDataset(config);
181
+ console.log(`
182
+
183
+ Generated ${samples.length} samples.`);
184
+ console.log("\nValidating dataset...");
185
+ const report = validateDataset(samples);
186
+ console.log(formatQualityReport(report));
187
+ console.log("\nExporting to eliza_native_v1 JSONL format...");
188
+ const paths = await exportToElizaNativeJSONL(samples, outputDir);
189
+ console.log(` Combined: ${paths.combinedPath}`);
190
+ console.log(` Should-respond only: ${paths.shouldRespondPath}`);
191
+ console.log(` Context routing: ${paths.contextRoutingPath}`);
192
+ const roleplayPaths = await exportRoleplayEpisodes(
193
+ buildRoleplayEpisodes(samples),
194
+ samples,
195
+ outputDir
196
+ );
197
+ console.log(` Roleplay episodes: ${roleplayPaths.episodesPath}`);
198
+ console.log(` Roleplay manifest: ${roleplayPaths.manifestPath}`);
199
+ console.log("\nDone!");
200
+ }
201
+ async function cmdCompare(args) {
202
+ const { values } = parseArgs({
203
+ args,
204
+ options: {
205
+ baseline: { type: "string" },
206
+ variant: { type: "string" },
207
+ dataset: { type: "string" },
208
+ task: { type: "string" },
209
+ scorer: { type: "string" },
210
+ mode: { type: "string" },
211
+ "max-examples": { type: "string" },
212
+ tolerance: { type: "string" },
213
+ output: { type: "string", short: "o" },
214
+ temperature: { type: "string" },
215
+ "max-tokens": { type: "string" }
216
+ }
217
+ });
218
+ if (!values.baseline || !values.variant || !values.dataset) {
219
+ console.error(
220
+ "Usage: compare --baseline <prompt.txt> --variant <prompt.txt> --dataset <dataset.jsonl> [options]"
221
+ );
222
+ console.error("");
223
+ console.error("Options:");
224
+ console.error(
225
+ " --task <task> One of: should_respond, context_routing, action_planner, response, media_description, view_context"
226
+ );
227
+ console.error(
228
+ " --scorer <kind> agreement | planner_action (default: derived from --task)"
229
+ );
230
+ console.error(
231
+ " --mode <mode> vs_historical (default) | pairwise"
232
+ );
233
+ console.error(" --max-examples N Cap evaluations (default: all)");
234
+ console.error(
235
+ " --tolerance N Pass threshold delta (default: 0.02)"
236
+ );
237
+ console.error(" --temperature N Sampling temperature (default: 0)");
238
+ console.error(" --max-tokens N Per-completion cap (default: 512)");
239
+ console.error(" -o, --output <path> Write JSON result to file");
240
+ console.error("");
241
+ console.error(
242
+ "Requires ANTHROPIC_API_KEY or OPENAI_API_KEY for the model adapter."
243
+ );
244
+ process.exit(1);
245
+ }
246
+ const [baselinePrompt, variantPrompt] = await Promise.all([
247
+ readFile(values.baseline, "utf-8"),
248
+ readFile(values.variant, "utf-8")
249
+ ]);
250
+ const teacher = getTeacherModel();
251
+ const adapter = {
252
+ async complete(input) {
253
+ return await teacher.generate(input.system ?? "", input.user);
254
+ }
255
+ };
256
+ const task = values.task;
257
+ const scorer = values.scorer;
258
+ const mode = values.mode;
259
+ const maxExamples = values["max-examples"] ? Number.parseInt(values["max-examples"], 10) : void 0;
260
+ const temperature = values.temperature ? Number.parseFloat(values.temperature) : void 0;
261
+ const maxTokens = values["max-tokens"] ? Number.parseInt(values["max-tokens"], 10) : void 0;
262
+ console.log(
263
+ `[compare] baseline=${values.baseline} variant=${values.variant}`
264
+ );
265
+ console.log(
266
+ `[compare] dataset=${values.dataset} task=${task ?? "(any)"} mode=${mode ?? "vs_historical"}`
267
+ );
268
+ console.log(`[compare] adapter=${teacher.name}`);
269
+ const result = await comparePrompts({
270
+ baselinePrompt,
271
+ variantPrompt,
272
+ dataset: values.dataset,
273
+ task,
274
+ scorer,
275
+ mode,
276
+ maxExamples,
277
+ temperature,
278
+ maxTokens,
279
+ adapter
280
+ });
281
+ console.log("");
282
+ console.log(formatComparisonSummary(result));
283
+ if (values.output) {
284
+ await writeFile(values.output, JSON.stringify(result, null, 2));
285
+ console.log(`[compare] wrote result to ${values.output}`);
286
+ }
287
+ if (!result.passed) {
288
+ process.exit(2);
289
+ }
290
+ }
291
+ function classifyStage(stage) {
292
+ const kind = stage.kind?.toLowerCase() ?? "";
293
+ const modelType = stage.model?.modelType?.toLowerCase() ?? "";
294
+ if (kind === "messagehandler" || modelType.includes("response_handler")) {
295
+ return "should_respond";
296
+ }
297
+ if (kind === "planner" || modelType.includes("planner")) {
298
+ return "action_planner";
299
+ }
300
+ if (kind === "tool" || kind === "action") {
301
+ return "response";
302
+ }
303
+ if (modelType.includes("vision") || modelType.includes("image")) {
304
+ return "media_description";
305
+ }
306
+ return null;
307
+ }
308
+ function stringifyContent(value) {
309
+ if (typeof value === "string") return value;
310
+ if (value === null || value === void 0) return "";
311
+ return JSON.stringify(value);
312
+ }
313
+ function stageToJsonlRow(stage) {
314
+ const messages = stage.model?.messages ?? [];
315
+ const response = stage.model?.response;
316
+ if (messages.length === 0) return null;
317
+ if (!response && !stage.model?.toolCalls) return null;
318
+ const normalizedMessages = messages.map((m) => ({
319
+ role: m.role,
320
+ content: stringifyContent(m.content)
321
+ }));
322
+ const systemMsg = normalizedMessages.find((m) => m.role === "system");
323
+ const responseText = stringifyContent(response);
324
+ const toolCalls = stage.model?.toolCalls;
325
+ return {
326
+ format: "eliza_native_v1",
327
+ boundary: "vercel_ai_sdk.generateText",
328
+ request: {
329
+ system: systemMsg?.content ?? "",
330
+ messages: normalizedMessages
331
+ },
332
+ response: toolCalls ? { text: responseText, toolCalls } : { text: responseText }
333
+ };
334
+ }
335
+ async function cmdExportTrajectories(args) {
336
+ const { values } = parseArgs({
337
+ args,
338
+ options: {
339
+ input: { type: "string", short: "i" },
340
+ output: { type: "string", short: "o" },
341
+ "max-per-task": { type: "string" }
342
+ }
343
+ });
344
+ const inputDir = values.input ?? process.env.ELIZA_TRAJECTORY_DIR ?? join(
345
+ process.env.ELIZA_STATE_DIR ?? join(homedir(), ".eliza"),
346
+ "trajectories"
347
+ );
348
+ const outputDir = values.output ?? "./training-data";
349
+ const cap = values["max-per-task"] ? Number.parseInt(values["max-per-task"], 10) : Number.POSITIVE_INFINITY;
350
+ if (!existsSync(inputDir)) {
351
+ console.error(`[export-trajectories] input dir not found: ${inputDir}`);
352
+ process.exit(1);
353
+ }
354
+ await mkdir(outputDir, { recursive: true });
355
+ console.log(`[export-trajectories] reading from ${inputDir}`);
356
+ console.log(`[export-trajectories] writing to ${outputDir}`);
357
+ const buckets = buildTaskRecord(() => []);
358
+ const agentDirs = readdirSync(inputDir).filter((name) => {
359
+ const full = join(inputDir, name);
360
+ return statSync(full).isDirectory();
361
+ });
362
+ let totalTrajectories = 0;
363
+ let totalStages = 0;
364
+ let droppedStages = 0;
365
+ for (const agentDir of agentDirs) {
366
+ const agentPath = join(inputDir, agentDir);
367
+ const files = readdirSync(agentPath).filter((f) => f.endsWith(".json"));
368
+ for (const file of files) {
369
+ let traj;
370
+ try {
371
+ traj = JSON.parse(
372
+ readFileSync(join(agentPath, file), "utf-8")
373
+ );
374
+ } catch {
375
+ continue;
376
+ }
377
+ totalTrajectories += 1;
378
+ for (const stage of traj.stages ?? []) {
379
+ totalStages += 1;
380
+ const task = classifyStage(stage);
381
+ if (!task) {
382
+ droppedStages += 1;
383
+ continue;
384
+ }
385
+ if (buckets[task].length >= cap) continue;
386
+ const row = stageToJsonlRow(stage);
387
+ if (!row) {
388
+ droppedStages += 1;
389
+ continue;
390
+ }
391
+ buckets[task].push(row);
392
+ }
393
+ }
394
+ }
395
+ for (const task of Object.keys(buckets)) {
396
+ const path = join(outputDir, `${task}_trajectories.jsonl`);
397
+ const lines = buckets[task].map((row) => JSON.stringify(row));
398
+ await writeFile(path, `${lines.join("\n")}
399
+ `);
400
+ console.log(
401
+ `[export-trajectories] ${task}: wrote ${buckets[task].length} examples to ${path}`
402
+ );
403
+ }
404
+ console.log(
405
+ `[export-trajectories] summary: ${totalTrajectories} trajectories, ${totalStages} stages (${droppedStages} unclassified)`
406
+ );
407
+ }
408
+ function buildRunCollectionOptionsFromCliArgs(args) {
409
+ const { values } = parseArgs({
410
+ args,
411
+ options: {
412
+ output: { type: "string", short: "o" },
413
+ "workspace-root": { type: "string" },
414
+ tiers: { type: "string", default: "2b" },
415
+ benchmark: { type: "string", default: "eliza_harness_action_selection" },
416
+ provider: { type: "string", default: "local-llama-cpp" },
417
+ "base-url": { type: "string", default: "http://localhost:11434/v1" },
418
+ "runs-per-case": { type: "string", default: "1" },
419
+ "benchmark-filter": { type: "string" },
420
+ "benchmark-model": { type: "string" },
421
+ "benchmark-runtime-model": { type: "string" },
422
+ "benchmark-variant": { type: "string" },
423
+ "dataset-version": { type: "string", default: "eliza-native-v1" },
424
+ "hf-repo": { type: "string", default: "elizaos/eliza-1-training" },
425
+ "hf-revision": { type: "string", default: "main" },
426
+ "hf-files": { type: "string" },
427
+ "feed-archetypes": { type: "string", default: "trader" },
428
+ "feed-agents": { type: "string", default: "1" },
429
+ "feed-ticks": { type: "string", default: "1" },
430
+ "feed-parallel": { type: "string", default: "1" },
431
+ "cerebras-max-samples": { type: "string", default: "50" },
432
+ "cerebras-variants": { type: "string", default: "both" },
433
+ scenario: { type: "string", default: "deterministic-pr-smoke" },
434
+ "natural-sanitized-jsonl": { type: "string" },
435
+ "natural-raw-jsonl": { type: "string" },
436
+ "natural-run-id": { type: "string" },
437
+ "natural-tasks": { type: "string" },
438
+ "include-natural-raw": { type: "boolean", default: false },
439
+ live: { type: "boolean", default: false },
440
+ "preflight-only": { type: "boolean", default: false },
441
+ "probe-endpoints": { type: "boolean", default: false },
442
+ "skip-hf": { type: "boolean", default: false },
443
+ "skip-feed": { type: "boolean", default: false },
444
+ "skip-natural": { type: "boolean", default: false },
445
+ "skip-tests": { type: "boolean", default: false },
446
+ "skip-scenarios": { type: "boolean", default: false },
447
+ "skip-action-benchmark": { type: "boolean", default: false },
448
+ "skip-cerebras": { type: "boolean", default: false },
449
+ "skip-model-registry": { type: "boolean", default: false },
450
+ "skip-bundle-stage": { type: "boolean", default: false },
451
+ "include-eval-comparison": { type: "boolean", default: false },
452
+ "skip-eval-comparison": { type: "boolean", default: false },
453
+ "include-matrix": { type: "boolean", default: true },
454
+ "skip-matrix": { type: "boolean", default: false },
455
+ mocks: { type: "boolean" }
456
+ }
457
+ });
458
+ const tiers = parseCliTierList(
459
+ typeof values.tiers === "string" ? values.tiers : void 0
460
+ );
461
+ const live = values.live === true;
462
+ const dryRun = !live;
463
+ const benchmark = parseBenchmarkVsCerebrasBenchmark(
464
+ typeof values.benchmark === "string" ? values.benchmark : "eliza_harness_action_selection"
465
+ );
466
+ const provider = typeof values.provider === "string" ? values.provider : "local-llama-cpp";
467
+ const baseUrl = typeof values["base-url"] === "string" ? values["base-url"] : "http://localhost:11434/v1";
468
+ const datasetVersion = typeof values["dataset-version"] === "string" ? values["dataset-version"] : "eliza-native-v1";
469
+ const actionBenchmark = {
470
+ useMocks: typeof values.mocks === "boolean" ? values.mocks : dryRun,
471
+ forceTrajectoryCapture: true,
472
+ provider,
473
+ baseUrl,
474
+ benchmark,
475
+ datasetVersion,
476
+ modelId: typeof values["benchmark-model"] === "string" ? values["benchmark-model"] : void 0,
477
+ runtimeModel: typeof values["benchmark-runtime-model"] === "string" ? values["benchmark-runtime-model"] : typeof values["benchmark-model"] === "string" ? values["benchmark-model"] : void 0,
478
+ variant: parseActionBenchmarkVariant(
479
+ typeof values["benchmark-variant"] === "string" ? values["benchmark-variant"] : void 0
480
+ ),
481
+ filter: typeof values["benchmark-filter"] === "string" ? values["benchmark-filter"] : void 0,
482
+ runsPerCase: optionalPositiveInteger(
483
+ typeof values["runs-per-case"] === "string" ? values["runs-per-case"] : void 0
484
+ ),
485
+ dryRun
486
+ };
487
+ return {
488
+ preflightOnly: values["preflight-only"] === true,
489
+ preflightProbe: values["probe-endpoints"] === true,
490
+ outputDir: typeof values.output === "string" ? values.output : void 0,
491
+ workspaceRoot: typeof values["workspace-root"] === "string" ? values["workspace-root"] : discoverWorkspaceRoot(),
492
+ includeHuggingFace: values["skip-hf"] !== true,
493
+ includeFeed: values["skip-feed"] !== true,
494
+ includeNaturalTrajectories: values["skip-natural"] !== true,
495
+ includeTestTrajectories: values["skip-tests"] !== true,
496
+ includeScenarios: values["skip-scenarios"] !== true,
497
+ includeEvalComparison: values["skip-eval-comparison"] !== true && (dryRun || values["include-eval-comparison"] === true),
498
+ includeActionBenchmark: values["skip-action-benchmark"] !== true,
499
+ includeBenchmarkVsCerebras: values["skip-cerebras"] !== true,
500
+ includeEliza1ModelRegistry: values["skip-model-registry"] !== true,
501
+ includeEliza1BundleStage: values["skip-bundle-stage"] !== true,
502
+ includeBenchmarkMatrix: values["skip-matrix"] !== true,
503
+ naturalTrajectories: {
504
+ sanitizedJsonlPath: typeof values["natural-sanitized-jsonl"] === "string" ? values["natural-sanitized-jsonl"] : void 0,
505
+ rawJsonlPath: typeof values["natural-raw-jsonl"] === "string" ? values["natural-raw-jsonl"] : void 0,
506
+ includeRawJsonl: values["include-natural-raw"] === true || typeof values["natural-raw-jsonl"] === "string",
507
+ tasks: typeof values["natural-tasks"] === "string" ? values["natural-tasks"].split(",").map((task) => task.trim()).filter(Boolean) : void 0,
508
+ source: {
509
+ kind: "training_collection_natural_trajectories",
510
+ runId: typeof values["natural-run-id"] === "string" ? values["natural-run-id"] : void 0,
511
+ metadata: {
512
+ cli: true,
513
+ sanitizedJsonlPath: typeof values["natural-sanitized-jsonl"] === "string" ? values["natural-sanitized-jsonl"] : void 0,
514
+ rawJsonlPath: typeof values["natural-raw-jsonl"] === "string" ? values["natural-raw-jsonl"] : void 0
515
+ }
516
+ }
517
+ },
518
+ huggingFace: {
519
+ repoId: typeof values["hf-repo"] === "string" ? values["hf-repo"] : "elizaos/eliza-1-training",
520
+ revision: typeof values["hf-revision"] === "string" ? values["hf-revision"] : "main",
521
+ files: typeof values["hf-files"] === "string" ? values["hf-files"].split(",").map((file) => file.trim()).filter(Boolean) : void 0,
522
+ dryRun
523
+ },
524
+ feed: {
525
+ archetypes: typeof values["feed-archetypes"] === "string" ? values["feed-archetypes"] : "trader",
526
+ numAgents: optionalPositiveInteger(
527
+ typeof values["feed-agents"] === "string" ? values["feed-agents"] : void 0
528
+ ),
529
+ ticks: optionalPositiveInteger(
530
+ typeof values["feed-ticks"] === "string" ? values["feed-ticks"] : void 0
531
+ ),
532
+ parallel: optionalPositiveInteger(
533
+ typeof values["feed-parallel"] === "string" ? values["feed-parallel"] : void 0
534
+ ),
535
+ cleanup: true,
536
+ dryRun
537
+ },
538
+ scenarios: {
539
+ scenario: typeof values.scenario === "string" ? values.scenario : void 0,
540
+ exportNative: true,
541
+ useDeterministicProxy: true,
542
+ dryRun
543
+ },
544
+ evalComparison: {
545
+ model: elizaOneBenchmarkModelId(tiers[0] ?? "2b", "base"),
546
+ trainedModelPath: elizaOneBenchmarkModelId(tiers[0] ?? "2b", "trained"),
547
+ backend: "cpu",
548
+ dryRun
549
+ },
550
+ actionBenchmark,
551
+ actionBenchmarkPair: tiers.length === 1 && actionBenchmark.modelId === void 0 && actionBenchmark.runtimeModel === void 0 && actionBenchmark.variant === void 0 ? {
552
+ tier: tiers[0],
553
+ base: {
554
+ variant: "base",
555
+ modelId: elizaOneBenchmarkModelId(tiers[0], "base"),
556
+ runtimeModel: elizaOneBenchmarkModelId(tiers[0], "base")
557
+ },
558
+ trained: {
559
+ variant: "trained",
560
+ modelId: elizaOneBenchmarkModelId(tiers[0], "trained"),
561
+ runtimeModel: elizaOneBenchmarkModelId(tiers[0], "trained")
562
+ }
563
+ } : void 0,
564
+ actionBenchmarkPairs: tiers.length > 1 && actionBenchmark.modelId === void 0 && actionBenchmark.runtimeModel === void 0 && actionBenchmark.variant === void 0 ? elizaOneActionBenchmarkPairs(tiers) : void 0,
565
+ benchmarkVsCerebras: {
566
+ tiers: tiers.join(","),
567
+ benchmark,
568
+ variants: parseCerebrasVariants(
569
+ typeof values["cerebras-variants"] === "string" ? values["cerebras-variants"] : void 0
570
+ ),
571
+ maxSamples: optionalPositiveInteger(
572
+ typeof values["cerebras-max-samples"] === "string" ? values["cerebras-max-samples"] : void 0
573
+ ) ?? 50,
574
+ dryRun
575
+ },
576
+ eliza1BundleStage: {
577
+ repoId: "elizaos/eliza-1",
578
+ tier: tiers[0] ?? "2b",
579
+ localDir: "/tmp/eliza-1-bundles",
580
+ maxBytes: 8589934592,
581
+ apply: false
582
+ }
583
+ };
584
+ }
585
+ function formatTrainingCollectionPreflightSummary(preflight) {
586
+ const counts = preflight.checks.reduce(
587
+ (acc, check) => {
588
+ acc[check.status] = (acc[check.status] ?? 0) + 1;
589
+ return acc;
590
+ },
591
+ {}
592
+ );
593
+ return [
594
+ `[run-collection:preflight] live=${preflight.liveRequired ? "yes" : "no"} ok=${counts.ok ?? 0} warning=${counts.warning ?? 0} missing=${counts.missing ?? 0} skipped=${counts.skipped ?? 0}`,
595
+ ...preflight.checks.map(
596
+ (check) => `[run-collection:preflight] ${check.id}=${check.status} ${check.detail}${check.path ? ` path=${check.path}` : ""}`
597
+ )
598
+ ];
599
+ }
600
+ async function cmdRunCollection(args) {
601
+ const options = buildRunCollectionOptionsFromCliArgs(args);
602
+ if (options.preflightOnly) {
603
+ const preflight = await buildTrainingCollectionPreflightWithProbes({
604
+ options,
605
+ workspaceRoot: options.workspaceRoot,
606
+ trainingRoot: options.workspaceRoot ? join(options.workspaceRoot, "packages", "training") : void 0
607
+ });
608
+ for (const line of formatTrainingCollectionPreflightSummary(preflight)) {
609
+ console.log(line);
610
+ }
611
+ return;
612
+ }
613
+ const result = await runTrainingCollection(options);
614
+ for (const line of formatRunCollectionSummary(result)) {
615
+ console.log(line);
616
+ }
617
+ }
618
+ async function cmdListCollections(args) {
619
+ const { values } = parseArgs({
620
+ args,
621
+ options: {
622
+ root: { type: "string" },
623
+ limit: { type: "string", short: "n", default: "20" }
624
+ }
625
+ });
626
+ const result = await listTrainingCollections({
627
+ root: values.root,
628
+ limit: optionalPositiveInteger(values.limit)
629
+ });
630
+ for (const line of formatListTrainingCollectionsSummary(result)) {
631
+ console.log(line);
632
+ }
633
+ }
634
+ function formatListTrainingCollectionsSummary(result) {
635
+ const lines = [
636
+ `[list-collections] root=${result.root}`,
637
+ `[list-collections] count=${result.collections.length}`
638
+ ];
639
+ for (const collection of result.collections) {
640
+ const firstEvalComparison = collection.evals.comparisonInventory[0];
641
+ const firstModel = collection.training.modelInventory.find(
642
+ (model) => model.model || model.variant
643
+ ) ?? collection.training.modelInventory[0];
644
+ const sourceSamples = collection.sourceSamples ?? {
645
+ huggingFace: [],
646
+ feed: [],
647
+ natural: [],
648
+ scenarios: [],
649
+ tests: [],
650
+ trainingJsonl: []
651
+ };
652
+ const sourceSampleEntries = Object.entries(sourceSamples);
653
+ const sampleCounts = sourceSampleEntries.map(([source, samples]) => `${source}:${samples.length}`).join(",");
654
+ const sampleExamples = sourceSampleEntries.flatMap(
655
+ ([source, samples]) => samples.slice(0, 1).map((sample) => {
656
+ const id = sample.trajectoryId ?? sample.scenarioId ?? sample.title ?? "sample";
657
+ const task = sample.task ? `:${sample.task}` : "";
658
+ return `${source}:${id}${task}`;
659
+ })
660
+ ).slice(0, 4).join(",");
661
+ const evalSummary = [
662
+ `artifacts:${collection.evals.evalArtifacts}`,
663
+ `comparisons:${collection.evals.evalComparisons}`,
664
+ `action:${collection.evals.actionBenchmarks}`,
665
+ `matrices:${collection.evals.benchmarkMatrices}`,
666
+ firstEvalComparison ? `first:${firstEvalComparison.baseModel ?? "base"}->${firstEvalComparison.trainedModel ?? "trained"},improvement:${firstEvalComparison.improvementPercent ?? "n/a"}%` : null
667
+ ].filter(Boolean).join(",");
668
+ const modelSummary = [
669
+ `runs:${collection.training.trainingRuns}`,
670
+ `models:${collection.training.models}`,
671
+ `inventory:${collection.training.modelInventory.length}`,
672
+ firstModel ? `first:${firstModel.tier ?? "tier"}/${firstModel.variant ?? "variant"}/${firstModel.model ?? "model"},improvement:${firstModel.evalImprovementPercent ?? "n/a"}%` : null
673
+ ].filter(Boolean).join(",");
674
+ const gapSummary = collection.readinessGaps.length > 0 ? collection.readinessGaps.slice(0, 4).map(
675
+ (gap) => `${gap.id}:${gap.status}${gap.recommendedCapability ? `->${gap.recommendedCapability}` : ""}${formatRecommendedParamsSuffix(gap.recommendedParams)}`
676
+ ).join(",") : "none";
677
+ lines.push(
678
+ [
679
+ `[list-collections] run=${collection.generatedAt}`,
680
+ `readiness=${collection.readinessStatus}`,
681
+ `ready=${collection.readiness.ready}`,
682
+ `partial=${collection.readiness.partial}`,
683
+ `missing=${collection.readiness.missing}`,
684
+ `artifacts=${collection.artifactCount}`,
685
+ `sources=hf:${collection.dataSources.huggingFaceDatasets},feed:${collection.dataSources.feedDatasets},natural:${collection.dataSources.naturalTrajectoryBundles},scenarios:${collection.dataSources.scenarioRuns},native:${collection.dataSources.scenarioNativeDatasets},tests:${collection.dataSources.testTrajectories},jsonl:${collection.dataSources.trainingJsonlDatasets}`,
686
+ `benchmarks=pairs:${collection.benchmarks.actionBenchmarkPairs},comparisons:${collection.benchmarks.benchmarkComparisons},cases:${collection.benchmarks.caseSamples},tiers:${collection.benchmarks.tiers.join(",") || "none"}`,
687
+ `baseline=established:${collection.benchmarks.baselineProgress.establishedTiers.join(",") || "none"},next:${collection.benchmarks.baselineProgress.nextTier ?? "none"},remaining:${collection.benchmarks.baselineProgress.remainingTiers.join(",") || "none"}`,
688
+ `evals=${evalSummary}`,
689
+ `models=${modelSummary}`,
690
+ `samples=${sampleCounts}${sampleExamples ? `,examples:${sampleExamples}` : ""}`,
691
+ `artifact-links=source:${collection.sourceArtifacts.length},evidence:${collection.evidenceArtifacts.length}`,
692
+ `gaps=${gapSummary}`,
693
+ `output=${collection.outputDir}`,
694
+ `readme=${collection.readmePath}`,
695
+ `viewer=${collection.analysisIndexHtmlPath}`
696
+ ].join(" ")
697
+ );
698
+ }
699
+ return lines;
700
+ }
701
+ function formatRecommendedParamsSuffix(params) {
702
+ if (!params || Object.keys(params).length === 0) return "";
703
+ return ` params=${JSON.stringify(params)}`;
704
+ }
705
+ function compactStepError(error) {
706
+ const normalized = (error ?? "failed").replace(/\s+/g, " ").trim();
707
+ const priorityPatterns = [
708
+ /Database not initialized\.[^.]*\./,
709
+ /DATABASE_URL is required[^.]*\./,
710
+ /CEREBRAS_API_KEY is required[^.]*\./
711
+ ];
712
+ for (const pattern of priorityPatterns) {
713
+ const match = normalized.match(pattern);
714
+ if (match?.[0]) return match[0].slice(0, 220);
715
+ }
716
+ return normalized.slice(0, 220);
717
+ }
718
+ function formatRunCollectionSummary(result) {
719
+ const evidence = result.manifest.evidence;
720
+ const readiness = evidence.benchmarkReadiness;
721
+ const preflight = evidence.preflight ?? { liveRequired: false, checks: [] };
722
+ const preflightCounts = preflight.checks.reduce(
723
+ (acc, check) => {
724
+ acc[check.status] = (acc[check.status] ?? 0) + 1;
725
+ return acc;
726
+ },
727
+ {}
728
+ );
729
+ const priorityGapIds = [
730
+ "feed_generation",
731
+ "natural_trajectories",
732
+ "test_trajectories",
733
+ "smallest_model_benchmark",
734
+ "all_eliza1_tiers_benchmark",
735
+ "cerebras_reference",
736
+ "base_trained_improvement",
737
+ "all_eliza1_tier_improvements",
738
+ "agentic_benchmarks",
739
+ "benchmark_matrix",
740
+ "benchmark_case_provenance",
741
+ "eval_comparison",
742
+ "model_tracking",
743
+ "readable_source_samples"
744
+ ];
745
+ const readinessStatusFor = (id) => evidence.readinessGaps.find((gap) => gap.id === id)?.status ?? "ready";
746
+ const comparisonInventory = evidence.benchmarks.comparisonInventory ?? [];
747
+ const dryRunComparisons = comparisonInventory.filter(
748
+ (comparison) => comparison.dryRun === true
749
+ ).length;
750
+ const liveComparisons = Math.max(
751
+ 0,
752
+ comparisonInventory.length - dryRunComparisons
753
+ );
754
+ const gaps = [...evidence.readinessGaps].sort((left, right) => {
755
+ const leftIndex = priorityGapIds.indexOf(left.id);
756
+ const rightIndex = priorityGapIds.indexOf(right.id);
757
+ const leftPriority = leftIndex >= 0 ? 0 : 1;
758
+ const rightPriority = rightIndex >= 0 ? 0 : 1;
759
+ if (leftPriority !== rightPriority) return leftPriority - rightPriority;
760
+ if (leftIndex >= 0 && rightIndex >= 0 && leftIndex !== rightIndex) {
761
+ return leftIndex - rightIndex;
762
+ }
763
+ return left.id.localeCompare(right.id);
764
+ }).slice(0, 5);
765
+ const sourceSamples = evidence.sourceSamples ?? {
766
+ huggingFace: [],
767
+ feed: [],
768
+ natural: [],
769
+ scenarios: [],
770
+ tests: [],
771
+ trainingJsonl: []
772
+ };
773
+ const sourceSampleEntries = Object.entries(sourceSamples);
774
+ const sampleCounts = sourceSampleEntries.map(([source, samples]) => `${source}=${samples.length}`).join(" ");
775
+ const sampleExamples = sourceSampleEntries.flatMap(
776
+ ([source, samples]) => samples.slice(0, 2).map((sample) => {
777
+ const id = sample.trajectoryId ?? sample.title ?? "sample";
778
+ const task = sample.task ? `:${sample.task}` : "";
779
+ return `${source}:${id}${task}`;
780
+ })
781
+ ).slice(0, 5).join(" ");
782
+ const failedSteps = (result.manifest.steps ?? []).filter((step) => step.status === "failed").map((step) => `${step.id}:${compactStepError(step.error)}`);
783
+ return [
784
+ `[run-collection] output=${result.outputDir}`,
785
+ `[run-collection] manifest=${result.manifestPath}`,
786
+ `[run-collection] readme=${result.readmePath}`,
787
+ `[run-collection] viewer=${result.manifest.analysis.indexHtmlPath}`,
788
+ `[run-collection] collection-index=${result.collectionIndex.indexHtmlPath} json=${result.collectionIndex.indexJsonPath}`,
789
+ `[run-collection] readiness=${result.manifest.readiness.status} ready=${result.manifest.readiness.ready} partial=${result.manifest.readiness.partial} missing=${result.manifest.readiness.missing}`,
790
+ `[run-collection] preflight live=${preflight.liveRequired ? "yes" : "no"} ok=${preflightCounts.ok ?? 0} warning=${preflightCounts.warning ?? 0} missing=${preflightCounts.missing ?? 0} skipped=${preflightCounts.skipped ?? 0}`,
791
+ `[run-collection] sources hf=${evidence.dataSources.huggingFaceDatasets} feed=${evidence.dataSources.feedDatasets} natural=${evidence.dataSources.naturalTrajectoryBundles} scenarios=${evidence.dataSources.scenarioRuns} scenario-native=${evidence.dataSources.scenarioNativeDatasets} tests=${evidence.dataSources.testTrajectories} jsonl=${evidence.dataSources.trainingJsonlDatasets}`,
792
+ `[run-collection] evals artifacts=${evidence.evals.evalArtifacts} comparisons=${evidence.evals.evalComparisons} action=${evidence.evals.actionBenchmarks} matrices=${evidence.evals.benchmarkMatrices} models=${evidence.training.models} training-runs=${evidence.training.trainingRuns}`,
793
+ `[run-collection] benchmarks pairs=${evidence.benchmarks.actionBenchmarkPairs} rows=${evidence.benchmarks.benchmarkRows} comparisons=${evidence.benchmarks.benchmarkComparisons} tiers=${evidence.benchmarks.tiers.join(",") || "none"}`,
794
+ `[run-collection] baseline established=${evidence.benchmarks.baselineProgress.establishedTiers.join(",") || "none"} next=${evidence.benchmarks.baselineProgress.nextTier ?? "none"} remaining=${evidence.benchmarks.baselineProgress.remainingTiers.join(",") || "none"} smallest=${evidence.benchmarks.baselineProgress.smallestTierEstablished ? "yes" : "no"} all=${evidence.benchmarks.baselineProgress.allTiersEstablished ? "yes" : "no"}`,
795
+ `[run-collection] benchmark-comparisons live=${liveComparisons} dry-run=${dryRunComparisons} improvements=${evidence.benchmarks.improvementComparisons.length}`,
796
+ `[run-collection] benchmark-readiness smallest=${readiness.smallestTier} all-tiers=${readiness.allEliza1Tiers} improvement=${readiness.baseTrainedImprovement} all-tier-improvements=${readiness.allEliza1TierImprovements} cerebras=${readiness.cerebrasReference} cases=${readinessStatusFor("benchmark_case_provenance")}`,
797
+ `[run-collection] source-readiness natural=${readinessStatusFor("natural_trajectories")} tests=${readinessStatusFor("test_trajectories")} readable=${readinessStatusFor("readable_source_samples")}`,
798
+ `[run-collection] eval-readiness comparison=${readinessStatusFor("eval_comparison")} models=${readinessStatusFor("model_tracking")}`,
799
+ `[run-collection] sample-readiness readable=${readinessStatusFor("readable_source_samples")}`,
800
+ `[run-collection] source-samples ${sampleCounts}${sampleExamples ? ` examples=${sampleExamples}` : ""}`,
801
+ failedSteps.length > 0 ? `[run-collection] failed-steps ${failedSteps.join(" | ")}` : "[run-collection] failed-steps none",
802
+ gaps.length > 0 ? `[run-collection] readiness-gaps ${gaps.map(
803
+ (gap) => `${gap.id}:${gap.status}${gap.recommendedCapability ? `->${gap.recommendedCapability}` : ""}${formatRecommendedParamsSuffix(gap.recommendedParams)}`
804
+ ).join(" ")}` : "[run-collection] readiness-gaps none"
805
+ ];
806
+ }
807
+ async function cmdValidate(args) {
808
+ const { values } = parseArgs({
809
+ args,
810
+ options: {
811
+ input: { type: "string", short: "i" }
812
+ }
813
+ });
814
+ if (!values.input) {
815
+ console.error("Usage: validate --input <path-to-raw_samples.json>");
816
+ process.exit(1);
817
+ }
818
+ const raw = await readFile(values.input, "utf-8");
819
+ const samples = JSON.parse(raw);
820
+ console.log(`Loaded ${samples.length} samples from ${values.input}`);
821
+ console.log("");
822
+ const report = validateDataset(samples);
823
+ console.log(formatQualityReport(report));
824
+ }
825
+ const OPTIMIZED_PROMPT_TASKS_CLI = [
826
+ "should_respond",
827
+ "context_routing",
828
+ "action_planner",
829
+ "response",
830
+ "media_description",
831
+ "view_context"
832
+ ];
833
+ function isOptimizedPromptTaskCli(value) {
834
+ return OPTIMIZED_PROMPT_TASKS_CLI.includes(value);
835
+ }
836
+ async function cmdRollbackPrompt(args) {
837
+ const { values, positionals } = parseArgs({
838
+ args,
839
+ options: {
840
+ task: { type: "string" },
841
+ "store-root": { type: "string" }
842
+ },
843
+ allowPositionals: true
844
+ });
845
+ const taskName = values.task?.trim() ?? positionals[0]?.trim();
846
+ if (!taskName) {
847
+ console.error(
848
+ `Usage: rollback-prompt <task>
849
+ task: one of ${OPTIMIZED_PROMPT_TASKS_CLI.join(", ")}`
850
+ );
851
+ process.exit(1);
852
+ }
853
+ if (!isOptimizedPromptTaskCli(taskName)) {
854
+ console.error(
855
+ `Unknown task "${taskName}". Must be one of: ${OPTIMIZED_PROMPT_TASKS_CLI.join(", ")}`
856
+ );
857
+ process.exit(1);
858
+ }
859
+ const { OptimizedPromptService } = await import("@elizaos/core");
860
+ const service = new OptimizedPromptService();
861
+ const customRoot = values["store-root"]?.trim();
862
+ if (customRoot) {
863
+ service.setStoreRoot(customRoot);
864
+ } else {
865
+ const stateDir = process.env.ELIZA_STATE_DIR?.trim() || process.env.ELIZA_STATE_DIR?.trim() || join(homedir(), ".eliza");
866
+ service.setStoreRoot(join(stateDir, "optimized-prompts"));
867
+ }
868
+ await service.refresh();
869
+ try {
870
+ const promptTask = taskName === "context_routing" ? "should_respond" : taskName;
871
+ const newCurrent = await service.rollback(promptTask);
872
+ console.log(
873
+ `[rollback-prompt] task=${taskName} now points at ${newCurrent}`
874
+ );
875
+ } catch (err) {
876
+ console.error(
877
+ `[rollback-prompt] ${err instanceof Error ? err.message : String(err)}`
878
+ );
879
+ process.exit(1);
880
+ }
881
+ }
882
+ async function main() {
883
+ const args = process.argv.slice(2);
884
+ const command = args[0];
885
+ const restArgs = args.slice(1);
886
+ switch (command) {
887
+ case "generate":
888
+ await cmdGenerate(restArgs);
889
+ break;
890
+ case "validate":
891
+ await cmdValidate(restArgs);
892
+ break;
893
+ case "compare":
894
+ await cmdCompare(restArgs);
895
+ break;
896
+ case "export-trajectories":
897
+ await cmdExportTrajectories(restArgs);
898
+ break;
899
+ case "run-collection":
900
+ await cmdRunCollection(restArgs);
901
+ break;
902
+ case "list-collections":
903
+ await cmdListCollections(restArgs);
904
+ break;
905
+ case "rollback-prompt":
906
+ await cmdRollbackPrompt(restArgs);
907
+ break;
908
+ default:
909
+ console.log(`Usage: cli.ts <command> [options]
910
+
911
+ Commands:
912
+ generate Generate synthetic training data
913
+ --variants N Number of variants per blueprint (default: 5)
914
+ --output DIR Output directory (default: ./training-data)
915
+ --concurrency N API call concurrency (default: 5)
916
+ --contexts X,Y Filter to specific contexts
917
+ --decisions X,Y Filter to RESPOND,IGNORE,STOP
918
+
919
+ validate Validate a generated dataset
920
+ --input PATH Path to raw_samples.json
921
+
922
+ export-trajectories Re-export raw recorded trajectories to per-task JSONL
923
+ -i, --input DIR Trajectory dir (default: $ELIZA_TRAJECTORY_DIR or ~/.eliza/trajectories)
924
+ -o, --output DIR Output dir (default: ./training-data)
925
+ --max-per-task N Cap examples per task bucket
926
+
927
+ run-collection Collect HF/feed/natural/test/scenario/eval/benchmark evidence
928
+ -o, --output DIR Output dir (default: training state collection dir)
929
+ --tiers LIST Eliza-1 benchmark tiers, comma-separated, or "all" (default: 2b)
930
+ (all expands to ${ELIZA_ONE_BENCHMARK_TIER_LIST})
931
+ --live Execute live external work instead of dry-run defaults
932
+ --preflight-only Print live-readiness checks without collecting artifacts
933
+ --probe-endpoints Probe local OpenAI-compatible endpoints during preflight
934
+ --skip-matrix Skip benchmark matrix generation
935
+ --skip-hf Skip Hugging Face ingest
936
+ --hf-files LIST Comma-separated Hugging Face dataset paths to ingest
937
+ --skip-feed Skip feed generation
938
+ --skip-natural Skip natural trajectory export
939
+ --skip-tests Skip test trajectory collection
940
+ --skip-scenarios Skip scenario trajectories
941
+ --skip-action-benchmark Skip Eliza harness action benchmark execution
942
+ --benchmark-filter LIST Comma-separated action benchmark case ids
943
+ --benchmark-model ID Run action benchmark for one explicit model id
944
+ --benchmark-runtime-model ID Served local/provider model id (defaults to --benchmark-model)
945
+ --benchmark-variant V reference, base, or trained label for the explicit model
946
+ --cerebras-max-samples N Max prompts for benchmark-vs-Cerebras (default: 50)
947
+ --cerebras-variants V Eliza variants for benchmark-vs-Cerebras: trained, base, both (default: both)
948
+ --natural-sanitized-jsonl PATH Existing sanitized app trajectory JSONL
949
+ --natural-raw-jsonl PATH Existing raw app trajectory JSONL
950
+ --natural-run-id ID Run id to record on imported natural trajectories
951
+ --natural-tasks LIST Task buckets for natural trajectory export
952
+ --include-natural-raw Copy raw natural trajectory JSONL into the collection
953
+ --skip-eval-comparison Skip dry-run local eval comparison artifact
954
+ --skip-cerebras Skip benchmark-vs-Cerebras step
955
+ --skip-model-registry Skip persisted Eliza-1 model registry manifests
956
+ --skip-bundle-stage Skip Eliza-1 bundle stage step
957
+
958
+ list-collections List saved training collection runs
959
+ --root DIR Collection root or a single collection output dir
960
+ -n, --limit N Maximum runs to print (default: 20)
961
+ Prints gaps=<id>:<status>-><capability> params={...}
962
+
963
+ compare A/B compare two prompts on a trajectory dataset
964
+ --baseline PATH Path to baseline prompt (.txt)
965
+ --variant PATH Path to variant prompt (.txt)
966
+ --dataset PATH Path to JSONL dataset (eliza_native_v1)
967
+ --task NAME should_respond | context_routing | action_planner | response | media_description | view_context
968
+ --scorer KIND agreement | planner_action (default: from --task)
969
+ --mode MODE vs_historical (default) | pairwise
970
+ --max-examples N Cap evaluations
971
+ --tolerance F Pass threshold delta (default: 0.02)
972
+ --temperature F Sampling temperature (default: 0)
973
+ --max-tokens N Per-completion cap (default: 512)
974
+ -o, --output PATH Write JSON result to file
975
+ Exits with code 2 if variant regresses beyond --tolerance.
976
+
977
+ rollback-prompt Flip the optimized-prompt 'current' and 'previous' symlinks
978
+ <task> Required positional: should_respond | context_routing |
979
+ action_planner | response | media_description |
980
+ view_context
981
+ --store-root DIR Override the optimized-prompts store root (default:
982
+ $ELIZA_STATE_DIR / ~/.eliza/optimized-prompts)
983
+
984
+ Environment:
985
+ ANTHROPIC_API_KEY Use Claude as teacher model
986
+ OPENAI_API_KEY Use GPT-5 as teacher model
987
+ `);
988
+ break;
989
+ }
990
+ }
991
+ if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) {
992
+ main().catch((err) => {
993
+ console.error(err);
994
+ process.exit(1);
995
+ });
996
+ }
997
+ export {
998
+ buildRunCollectionOptionsFromCliArgs,
999
+ formatListTrainingCollectionsSummary,
1000
+ formatRunCollectionSummary,
1001
+ formatTrainingCollectionPreflightSummary
1002
+ };
1003
+ //# sourceMappingURL=cli.js.map