@elizaos/plugin-training 2.0.3-beta.5 → 2.0.3-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backends/native.d.ts +96 -0
- package/dist/backends/native.d.ts.map +1 -0
- package/dist/backends/native.js +308 -0
- package/dist/backends/native.js.map +1 -0
- package/dist/cli/train.d.ts +22 -0
- package/dist/cli/train.d.ts.map +1 -0
- package/dist/cli/train.js +219 -0
- package/dist/cli/train.js.map +1 -0
- package/dist/core/action-benchmark-runner.d.ts +55 -0
- package/dist/core/action-benchmark-runner.d.ts.map +1 -0
- package/dist/core/action-benchmark-runner.js +341 -0
- package/dist/core/action-benchmark-runner.js.map +1 -0
- package/dist/core/artifact-store.d.ts +72 -0
- package/dist/core/artifact-store.d.ts.map +1 -0
- package/dist/core/artifact-store.js +50 -0
- package/dist/core/artifact-store.js.map +1 -0
- package/dist/core/benchmark-matrix-artifact.d.ts +102 -0
- package/dist/core/benchmark-matrix-artifact.d.ts.map +1 -0
- package/dist/core/benchmark-matrix-artifact.js +381 -0
- package/dist/core/benchmark-matrix-artifact.js.map +1 -0
- package/dist/core/benchmark-vs-cerebras-runner.d.ts +37 -0
- package/dist/core/benchmark-vs-cerebras-runner.d.ts.map +1 -0
- package/dist/core/benchmark-vs-cerebras-runner.js +151 -0
- package/dist/core/benchmark-vs-cerebras-runner.js.map +1 -0
- package/dist/core/cerebras-eval-model.d.ts +54 -0
- package/dist/core/cerebras-eval-model.d.ts.map +1 -0
- package/dist/core/cerebras-eval-model.js +249 -0
- package/dist/core/cerebras-eval-model.js.map +1 -0
- package/dist/core/cli.d.ts +15 -0
- package/dist/core/cli.d.ts.map +1 -0
- package/dist/core/cli.js +1003 -0
- package/dist/core/cli.js.map +1 -0
- package/dist/core/context-audit.d.ts +51 -0
- package/dist/core/context-audit.d.ts.map +1 -0
- package/dist/core/context-audit.js +166 -0
- package/dist/core/context-audit.js.map +1 -0
- package/dist/core/context-catalog.d.ts +47 -0
- package/dist/core/context-catalog.d.ts.map +1 -0
- package/dist/core/context-catalog.js +269 -0
- package/dist/core/context-catalog.js.map +1 -0
- package/dist/core/context-types.d.ts +3 -0
- package/dist/core/context-types.d.ts.map +1 -0
- package/dist/core/context-types.js +18 -0
- package/dist/core/context-types.js.map +1 -0
- package/dist/core/dataset-generator.d.ts +135 -0
- package/dist/core/dataset-generator.d.ts.map +1 -0
- package/dist/core/dataset-generator.js +895 -0
- package/dist/core/dataset-generator.js.map +1 -0
- package/dist/core/eliza1-benchmark-recipe.d.ts +18 -0
- package/dist/core/eliza1-benchmark-recipe.d.ts.map +1 -0
- package/dist/core/eliza1-benchmark-recipe.js +64 -0
- package/dist/core/eliza1-benchmark-recipe.js.map +1 -0
- package/dist/core/eliza1-bundle-stager.d.ts +57 -0
- package/dist/core/eliza1-bundle-stager.d.ts.map +1 -0
- package/dist/core/eliza1-bundle-stager.js +149 -0
- package/dist/core/eliza1-bundle-stager.js.map +1 -0
- package/dist/core/ensure-cron-job.d.ts +53 -0
- package/dist/core/ensure-cron-job.d.ts.map +1 -0
- package/dist/core/ensure-cron-job.js +51 -0
- package/dist/core/ensure-cron-job.js.map +1 -0
- package/dist/core/eval-comparison-artifact.d.ts +72 -0
- package/dist/core/eval-comparison-artifact.d.ts.map +1 -0
- package/dist/core/eval-comparison-artifact.js +281 -0
- package/dist/core/eval-comparison-artifact.js.map +1 -0
- package/dist/core/feed-generation-runner.d.ts +37 -0
- package/dist/core/feed-generation-runner.d.ts.map +1 -0
- package/dist/core/feed-generation-runner.js +232 -0
- package/dist/core/feed-generation-runner.js.map +1 -0
- package/dist/core/html-escape.d.ts +5 -0
- package/dist/core/html-escape.d.ts.map +1 -0
- package/dist/core/html-escape.js +11 -0
- package/dist/core/html-escape.js.map +1 -0
- package/dist/core/huggingface-dataset-ingest.d.ts +52 -0
- package/dist/core/huggingface-dataset-ingest.d.ts.map +1 -0
- package/dist/core/huggingface-dataset-ingest.js +134 -0
- package/dist/core/huggingface-dataset-ingest.js.map +1 -0
- package/dist/core/index.d.ts +29 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +204 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/privacy-filter.d.ts +95 -0
- package/dist/core/privacy-filter.d.ts.map +1 -0
- package/dist/core/privacy-filter.js +324 -0
- package/dist/core/privacy-filter.js.map +1 -0
- package/dist/core/promotion-gate.d.ts +117 -0
- package/dist/core/promotion-gate.d.ts.map +1 -0
- package/dist/core/promotion-gate.js +85 -0
- package/dist/core/promotion-gate.js.map +1 -0
- package/dist/core/promotion-persist.d.ts +116 -0
- package/dist/core/promotion-persist.d.ts.map +1 -0
- package/dist/core/promotion-persist.js +93 -0
- package/dist/core/promotion-persist.js.map +1 -0
- package/dist/core/prompt-compare.d.ts +99 -0
- package/dist/core/prompt-compare.d.ts.map +1 -0
- package/dist/core/prompt-compare.js +210 -0
- package/dist/core/prompt-compare.js.map +1 -0
- package/dist/core/replay-validator.d.ts +136 -0
- package/dist/core/replay-validator.d.ts.map +1 -0
- package/dist/core/replay-validator.js +312 -0
- package/dist/core/replay-validator.js.map +1 -0
- package/dist/core/roleplay-executor.d.ts +123 -0
- package/dist/core/roleplay-executor.d.ts.map +1 -0
- package/dist/core/roleplay-executor.js +675 -0
- package/dist/core/roleplay-executor.js.map +1 -0
- package/dist/core/roleplay-trajectories.d.ts +54 -0
- package/dist/core/roleplay-trajectories.d.ts.map +1 -0
- package/dist/core/roleplay-trajectories.js +88 -0
- package/dist/core/roleplay-trajectories.js.map +1 -0
- package/dist/core/scenario-blueprints.d.ts +62 -0
- package/dist/core/scenario-blueprints.d.ts.map +1 -0
- package/dist/core/scenario-blueprints.js +850 -0
- package/dist/core/scenario-blueprints.js.map +1 -0
- package/dist/core/scenario-runner.d.ts +36 -0
- package/dist/core/scenario-runner.d.ts.map +1 -0
- package/dist/core/scenario-runner.js +216 -0
- package/dist/core/scenario-runner.js.map +1 -0
- package/dist/core/skill-scoring-cron.d.ts +57 -0
- package/dist/core/skill-scoring-cron.d.ts.map +1 -0
- package/dist/core/skill-scoring-cron.js +180 -0
- package/dist/core/skill-scoring-cron.js.map +1 -0
- package/dist/core/test-trajectory-collector.d.ts +37 -0
- package/dist/core/test-trajectory-collector.d.ts.map +1 -0
- package/dist/core/test-trajectory-collector.js +225 -0
- package/dist/core/test-trajectory-collector.js.map +1 -0
- package/dist/core/track-c-queue-task.d.ts +37 -0
- package/dist/core/track-c-queue-task.d.ts.map +1 -0
- package/dist/core/track-c-queue-task.js +104 -0
- package/dist/core/track-c-queue-task.js.map +1 -0
- package/dist/core/training-analysis-index.d.ts +104 -0
- package/dist/core/training-analysis-index.d.ts.map +1 -0
- package/dist/core/training-analysis-index.js +3297 -0
- package/dist/core/training-analysis-index.js.map +1 -0
- package/dist/core/training-collection-runner.d.ts +508 -0
- package/dist/core/training-collection-runner.d.ts.map +1 -0
- package/dist/core/training-collection-runner.js +2299 -0
- package/dist/core/training-collection-runner.js.map +1 -0
- package/dist/core/training-config.d.ts +52 -0
- package/dist/core/training-config.d.ts.map +1 -0
- package/dist/core/training-config.js +117 -0
- package/dist/core/training-config.js.map +1 -0
- package/dist/core/training-orchestrator.d.ts +112 -0
- package/dist/core/training-orchestrator.d.ts.map +1 -0
- package/dist/core/training-orchestrator.js +729 -0
- package/dist/core/training-orchestrator.js.map +1 -0
- package/dist/core/training-readiness-report.d.ts +52 -0
- package/dist/core/training-readiness-report.d.ts.map +1 -0
- package/dist/core/training-readiness-report.js +765 -0
- package/dist/core/training-readiness-report.js.map +1 -0
- package/dist/core/trajectory-consumer.d.ts +15 -0
- package/dist/core/trajectory-consumer.d.ts.map +1 -0
- package/dist/core/trajectory-consumer.js +61 -0
- package/dist/core/trajectory-consumer.js.map +1 -0
- package/dist/core/trajectory-export-bundle.d.ts +95 -0
- package/dist/core/trajectory-export-bundle.d.ts.map +1 -0
- package/dist/core/trajectory-export-bundle.js +561 -0
- package/dist/core/trajectory-export-bundle.js.map +1 -0
- package/dist/core/trajectory-export-cron.d.ts +57 -0
- package/dist/core/trajectory-export-cron.d.ts.map +1 -0
- package/dist/core/trajectory-export-cron.js +170 -0
- package/dist/core/trajectory-export-cron.js.map +1 -0
- package/dist/core/trajectory-hf-upload.d.ts +50 -0
- package/dist/core/trajectory-hf-upload.d.ts.map +1 -0
- package/dist/core/trajectory-hf-upload.js +111 -0
- package/dist/core/trajectory-hf-upload.js.map +1 -0
- package/dist/core/trajectory-task-datasets.d.ts +62 -0
- package/dist/core/trajectory-task-datasets.d.ts.map +1 -0
- package/dist/core/trajectory-task-datasets.js +427 -0
- package/dist/core/trajectory-task-datasets.js.map +1 -0
- package/dist/core/wait-for-service.d.ts +25 -0
- package/dist/core/wait-for-service.d.ts.map +1 -0
- package/dist/core/wait-for-service.js +19 -0
- package/dist/core/wait-for-service.js.map +1 -0
- package/dist/core/workspace-runtime.d.ts +4 -0
- package/dist/core/workspace-runtime.d.ts.map +1 -0
- package/dist/core/workspace-runtime.js +25 -0
- package/dist/core/workspace-runtime.js.map +1 -0
- package/dist/dspy/artifact.d.ts +54 -0
- package/dist/dspy/artifact.d.ts.map +1 -0
- package/dist/dspy/artifact.js +61 -0
- package/dist/dspy/artifact.js.map +1 -0
- package/dist/dspy/chain-of-thought.d.ts +27 -0
- package/dist/dspy/chain-of-thought.d.ts.map +1 -0
- package/dist/dspy/chain-of-thought.js +43 -0
- package/dist/dspy/chain-of-thought.js.map +1 -0
- package/dist/dspy/examples.d.ts +72 -0
- package/dist/dspy/examples.d.ts.map +1 -0
- package/dist/dspy/examples.js +105 -0
- package/dist/dspy/examples.js.map +1 -0
- package/dist/dspy/index.d.ts +15 -0
- package/dist/dspy/index.d.ts.map +1 -0
- package/dist/dspy/index.js +40 -0
- package/dist/dspy/index.js.map +1 -0
- package/dist/dspy/lm-adapter.d.ts +100 -0
- package/dist/dspy/lm-adapter.d.ts.map +1 -0
- package/dist/dspy/lm-adapter.js +81 -0
- package/dist/dspy/lm-adapter.js.map +1 -0
- package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts +23 -0
- package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts.map +1 -0
- package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js +85 -0
- package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js.map +1 -0
- package/dist/dspy/optimizers/dspy-copro.d.ts +29 -0
- package/dist/dspy/optimizers/dspy-copro.d.ts.map +1 -0
- package/dist/dspy/optimizers/dspy-copro.js +141 -0
- package/dist/dspy/optimizers/dspy-copro.js.map +1 -0
- package/dist/dspy/optimizers/dspy-mipro.d.ts +37 -0
- package/dist/dspy/optimizers/dspy-mipro.d.ts.map +1 -0
- package/dist/dspy/optimizers/dspy-mipro.js +194 -0
- package/dist/dspy/optimizers/dspy-mipro.js.map +1 -0
- package/dist/dspy/optimizers/index.d.ts +5 -0
- package/dist/dspy/optimizers/index.d.ts.map +1 -0
- package/dist/dspy/optimizers/index.js +11 -0
- package/dist/dspy/optimizers/index.js.map +1 -0
- package/dist/dspy/optimizers/types.d.ts +39 -0
- package/dist/dspy/optimizers/types.d.ts.map +1 -0
- package/dist/dspy/optimizers/types.js +1 -0
- package/dist/dspy/optimizers/types.js.map +1 -0
- package/dist/dspy/predict.d.ts +49 -0
- package/dist/dspy/predict.d.ts.map +1 -0
- package/dist/dspy/predict.js +73 -0
- package/dist/dspy/predict.js.map +1 -0
- package/dist/dspy/signature.d.ts +88 -0
- package/dist/dspy/signature.d.ts.map +1 -0
- package/dist/dspy/signature.js +205 -0
- package/dist/dspy/signature.js.map +1 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/optimizers/bootstrap-fewshot.d.ts +42 -0
- package/dist/optimizers/bootstrap-fewshot.d.ts.map +1 -0
- package/dist/optimizers/bootstrap-fewshot.js +92 -0
- package/dist/optimizers/bootstrap-fewshot.js.map +1 -0
- package/dist/optimizers/gepa.d.ts +63 -0
- package/dist/optimizers/gepa.d.ts.map +1 -0
- package/dist/optimizers/gepa.js +232 -0
- package/dist/optimizers/gepa.js.map +1 -0
- package/dist/optimizers/index.d.ts +7 -0
- package/dist/optimizers/index.d.ts.map +1 -0
- package/dist/optimizers/index.js +51 -0
- package/dist/optimizers/index.js.map +1 -0
- package/dist/optimizers/instruction-search.d.ts +39 -0
- package/dist/optimizers/instruction-search.d.ts.map +1 -0
- package/dist/optimizers/instruction-search.js +108 -0
- package/dist/optimizers/instruction-search.js.map +1 -0
- package/dist/optimizers/prompt-evolution.d.ts +39 -0
- package/dist/optimizers/prompt-evolution.d.ts.map +1 -0
- package/dist/optimizers/prompt-evolution.js +101 -0
- package/dist/optimizers/prompt-evolution.js.map +1 -0
- package/dist/optimizers/scoring.d.ts +139 -0
- package/dist/optimizers/scoring.d.ts.map +1 -0
- package/dist/optimizers/scoring.js +299 -0
- package/dist/optimizers/scoring.js.map +1 -0
- package/dist/optimizers/types.d.ts +105 -0
- package/dist/optimizers/types.d.ts.map +1 -0
- package/dist/optimizers/types.js +1 -0
- package/dist/optimizers/types.js.map +1 -0
- package/dist/register-runtime.d.ts +3 -0
- package/dist/register-runtime.d.ts.map +1 -0
- package/dist/register-runtime.js +60 -0
- package/dist/register-runtime.js.map +1 -0
- package/dist/register-terminal-view.d.ts +15 -0
- package/dist/register-terminal-view.d.ts.map +1 -0
- package/dist/register-terminal-view.js +31 -0
- package/dist/register-terminal-view.js.map +1 -0
- package/dist/routes/experience-routes.d.ts +21 -0
- package/dist/routes/experience-routes.d.ts.map +1 -0
- package/dist/routes/experience-routes.js +513 -0
- package/dist/routes/experience-routes.js.map +1 -0
- package/dist/routes/index.d.ts +5 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +17 -0
- package/dist/routes/index.js.map +1 -0
- package/dist/routes/training-routes.d.ts +10 -0
- package/dist/routes/training-routes.d.ts.map +1 -0
- package/dist/routes/training-routes.js +1239 -0
- package/dist/routes/training-routes.js.map +1 -0
- package/dist/routes/training-vast-routes.d.ts +35 -0
- package/dist/routes/training-vast-routes.d.ts.map +1 -0
- package/dist/routes/training-vast-routes.js +249 -0
- package/dist/routes/training-vast-routes.js.map +1 -0
- package/dist/routes/trajectory-routes.d.ts +19 -0
- package/dist/routes/trajectory-routes.d.ts.map +1 -0
- package/dist/routes/trajectory-routes.js +1122 -0
- package/dist/routes/trajectory-routes.js.map +1 -0
- package/dist/services/index.d.ts +9 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +63 -0
- package/dist/services/index.js.map +1 -0
- package/dist/services/training-backend-check.d.ts +8 -0
- package/dist/services/training-backend-check.d.ts.map +1 -0
- package/dist/services/training-backend-check.js +31 -0
- package/dist/services/training-backend-check.js.map +1 -0
- package/dist/services/training-service-like.d.ts +40 -0
- package/dist/services/training-service-like.d.ts.map +1 -0
- package/dist/services/training-service-like.js +1 -0
- package/dist/services/training-service-like.js.map +1 -0
- package/dist/services/training-service-registry.d.ts +4 -0
- package/dist/services/training-service-registry.d.ts.map +1 -0
- package/dist/services/training-service-registry.js +12 -0
- package/dist/services/training-service-registry.js.map +1 -0
- package/dist/services/training-service.d.ts +59 -0
- package/dist/services/training-service.d.ts.map +1 -0
- package/dist/services/training-service.js +154 -0
- package/dist/services/training-service.js.map +1 -0
- package/dist/services/training-trigger.d.ts +177 -0
- package/dist/services/training-trigger.d.ts.map +1 -0
- package/dist/services/training-trigger.js +300 -0
- package/dist/services/training-trigger.js.map +1 -0
- package/dist/services/training-vast-service.d.ts +149 -0
- package/dist/services/training-vast-service.d.ts.map +1 -0
- package/dist/services/training-vast-service.js +648 -0
- package/dist/services/training-vast-service.js.map +1 -0
- package/dist/services/vast-inference-stats.d.ts +37 -0
- package/dist/services/vast-inference-stats.d.ts.map +1 -0
- package/dist/services/vast-inference-stats.js +81 -0
- package/dist/services/vast-inference-stats.js.map +1 -0
- package/dist/services/vast-job-store.d.ts +74 -0
- package/dist/services/vast-job-store.d.ts.map +1 -0
- package/dist/services/vast-job-store.js +194 -0
- package/dist/services/vast-job-store.js.map +1 -0
- package/dist/services/vast-subprocess.d.ts +27 -0
- package/dist/services/vast-subprocess.d.ts.map +1 -0
- package/dist/services/vast-subprocess.js +78 -0
- package/dist/services/vast-subprocess.js.map +1 -0
- package/dist/setup-routes.d.ts +17 -0
- package/dist/setup-routes.d.ts.map +1 -0
- package/dist/setup-routes.js +319 -0
- package/dist/setup-routes.js.map +1 -0
- package/dist/ui/FineTuningSpatialView.d.ts +49 -0
- package/dist/ui/FineTuningSpatialView.d.ts.map +1 -0
- package/dist/ui/FineTuningSpatialView.js +154 -0
- package/dist/ui/FineTuningSpatialView.js.map +1 -0
- package/dist/ui/FineTuningView.d.ts +7 -0
- package/dist/ui/FineTuningView.d.ts.map +1 -0
- package/dist/ui/FineTuningView.helpers.d.ts +17 -0
- package/dist/ui/FineTuningView.helpers.d.ts.map +1 -0
- package/dist/ui/FineTuningView.helpers.js +30 -0
- package/dist/ui/FineTuningView.helpers.js.map +1 -0
- package/dist/ui/FineTuningView.interact.d.ts +2 -0
- package/dist/ui/FineTuningView.interact.d.ts.map +1 -0
- package/dist/ui/FineTuningView.interact.js +300 -0
- package/dist/ui/FineTuningView.interact.js.map +1 -0
- package/dist/ui/FineTuningView.js +4653 -0
- package/dist/ui/FineTuningView.js.map +1 -0
- package/dist/ui/fine-tuning-panels.d.ts +100 -0
- package/dist/ui/fine-tuning-panels.d.ts.map +1 -0
- package/dist/ui/fine-tuning-panels.helpers.d.ts +19 -0
- package/dist/ui/fine-tuning-panels.helpers.d.ts.map +1 -0
- package/dist/ui/fine-tuning-panels.helpers.js +77 -0
- package/dist/ui/fine-tuning-panels.helpers.js.map +1 -0
- package/dist/ui/fine-tuning-panels.js +928 -0
- package/dist/ui/fine-tuning-panels.js.map +1 -0
- package/dist/ui/index.d.ts +5 -0
- package/dist/ui/index.d.ts.map +1 -0
- package/dist/ui/index.js +5 -0
- package/dist/ui/index.js.map +1 -0
- package/dist/ui/training-view-bundle.d.ts +3 -0
- package/dist/ui/training-view-bundle.d.ts.map +1 -0
- package/dist/ui/training-view-bundle.js +7 -0
- package/dist/ui/training-view-bundle.js.map +1 -0
- package/dist/views/bundle.js +5312 -0
- package/dist/views/bundle.js.map +1 -0
- package/package.json +7 -7
package/dist/core/cli.js
ADDED
|
@@ -0,0 +1,1003 @@
|
|
|
1
|
+
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
|
2
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
6
|
+
import { parseArgs } from "node:util";
|
|
7
|
+
import { AGENT_CONTEXTS } from "./context-types.js";
|
|
8
|
+
import {
|
|
9
|
+
createAnthropicTeacher,
|
|
10
|
+
createCerebrasTeacher,
|
|
11
|
+
createOpenAITeacher,
|
|
12
|
+
exportToElizaNativeJSONL,
|
|
13
|
+
generateDataset
|
|
14
|
+
} from "./dataset-generator.js";
|
|
15
|
+
import {
|
|
16
|
+
ELIZA_ONE_BENCHMARK_TIER_LIST,
|
|
17
|
+
elizaOneActionBenchmarkPairs,
|
|
18
|
+
elizaOneBenchmarkModelId,
|
|
19
|
+
parseElizaOneBenchmarkTiers
|
|
20
|
+
} from "./eliza1-benchmark-recipe.js";
|
|
21
|
+
import {
|
|
22
|
+
comparePrompts,
|
|
23
|
+
formatComparisonSummary
|
|
24
|
+
} from "./prompt-compare.js";
|
|
25
|
+
import { formatQualityReport, validateDataset } from "./replay-validator.js";
|
|
26
|
+
import {
|
|
27
|
+
buildRoleplayEpisodes,
|
|
28
|
+
exportRoleplayEpisodes
|
|
29
|
+
} from "./roleplay-trajectories.js";
|
|
30
|
+
import { ALL_BLUEPRINTS, BLUEPRINT_STATS } from "./scenario-blueprints.js";
|
|
31
|
+
import {
|
|
32
|
+
buildTrainingCollectionPreflightWithProbes,
|
|
33
|
+
listTrainingCollections,
|
|
34
|
+
runTrainingCollection
|
|
35
|
+
} from "./training-collection-runner.js";
|
|
36
|
+
import {
|
|
37
|
+
buildTaskRecord
|
|
38
|
+
} from "./trajectory-task-datasets.js";
|
|
39
|
+
import { discoverWorkspaceRoot } from "./workspace-runtime.js";
|
|
40
|
+
const AGENT_DECISIONS = ["RESPOND", "IGNORE", "STOP"];
|
|
41
|
+
function parseAgentContexts(value) {
|
|
42
|
+
if (!value) return void 0;
|
|
43
|
+
const out = [];
|
|
44
|
+
for (const entry of value.split(",")) {
|
|
45
|
+
const trimmed = entry.trim();
|
|
46
|
+
if (trimmed && AGENT_CONTEXTS.includes(trimmed)) {
|
|
47
|
+
out.push(trimmed);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return out.length > 0 ? out : void 0;
|
|
51
|
+
}
|
|
52
|
+
function parseAgentDecisions(value) {
|
|
53
|
+
if (!value) return void 0;
|
|
54
|
+
const out = [];
|
|
55
|
+
for (const entry of value.split(",")) {
|
|
56
|
+
const trimmed = entry.trim();
|
|
57
|
+
if (trimmed && AGENT_DECISIONS.includes(trimmed)) {
|
|
58
|
+
out.push(trimmed);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return out.length > 0 ? out : void 0;
|
|
62
|
+
}
|
|
63
|
+
function parseCliTierList(value) {
|
|
64
|
+
return parseElizaOneBenchmarkTiers(value);
|
|
65
|
+
}
|
|
66
|
+
function optionalPositiveInteger(value) {
|
|
67
|
+
if (!value) return void 0;
|
|
68
|
+
const parsed = Number.parseInt(value, 10);
|
|
69
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : void 0;
|
|
70
|
+
}
|
|
71
|
+
function parseCerebrasVariants(value) {
|
|
72
|
+
if (value === "trained" || value === "base" || value === "both") {
|
|
73
|
+
return value;
|
|
74
|
+
}
|
|
75
|
+
if (value) {
|
|
76
|
+
throw new Error(
|
|
77
|
+
`Invalid --cerebras-variants value ${JSON.stringify(value)}; expected trained, base, or both`
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
return "both";
|
|
81
|
+
}
|
|
82
|
+
function parseActionBenchmarkVariant(value) {
|
|
83
|
+
if (value === void 0) return void 0;
|
|
84
|
+
if (value === "reference" || value === "base" || value === "trained") {
|
|
85
|
+
return value;
|
|
86
|
+
}
|
|
87
|
+
throw new Error(
|
|
88
|
+
`Invalid --benchmark-variant value ${JSON.stringify(value)}; expected reference, base, or trained`
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
function parseBenchmarkVsCerebrasBenchmark(value) {
|
|
92
|
+
if (value === "eliza_harness_action_selection" || value === "clawbench" || value === "hermes" || value === "all") {
|
|
93
|
+
return value;
|
|
94
|
+
}
|
|
95
|
+
throw new Error(
|
|
96
|
+
`Invalid --benchmark value ${JSON.stringify(value)}; expected eliza_harness_action_selection, clawbench, hermes, or all`
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
function getTeacherModel() {
|
|
100
|
+
const trainProvider = process.env.TRAIN_MODEL_PROVIDER?.trim() ?? process.env.TRAINING_PROVIDER?.trim();
|
|
101
|
+
const cerebrasKey = process.env.CEREBRAS_API_KEY;
|
|
102
|
+
if (trainProvider === "cerebras" && cerebrasKey) {
|
|
103
|
+
console.log("Using Cerebras gpt-oss-120b as teacher model");
|
|
104
|
+
return createCerebrasTeacher();
|
|
105
|
+
}
|
|
106
|
+
const anthropicKey = process.env.ANTHROPIC_API_KEY;
|
|
107
|
+
const openaiKey = process.env.OPENAI_API_KEY;
|
|
108
|
+
if (anthropicKey) {
|
|
109
|
+
console.log("Using Anthropic Claude Sonnet 4 as teacher model");
|
|
110
|
+
return createAnthropicTeacher(anthropicKey);
|
|
111
|
+
}
|
|
112
|
+
if (openaiKey) {
|
|
113
|
+
console.log("Using OpenAI GPT-5 as teacher model");
|
|
114
|
+
return createOpenAITeacher(openaiKey);
|
|
115
|
+
}
|
|
116
|
+
throw new Error(
|
|
117
|
+
"No teacher model API key found. Set CEREBRAS_API_KEY (preferred), ANTHROPIC_API_KEY, or OPENAI_API_KEY."
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
async function cmdGenerate(args) {
|
|
121
|
+
const { values } = parseArgs({
|
|
122
|
+
args,
|
|
123
|
+
options: {
|
|
124
|
+
variants: { type: "string", default: "5" },
|
|
125
|
+
output: { type: "string", default: "./training-data" },
|
|
126
|
+
concurrency: { type: "string", default: "5" },
|
|
127
|
+
contexts: { type: "string" },
|
|
128
|
+
decisions: { type: "string" },
|
|
129
|
+
limitBlueprints: { type: "string" }
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
const variantsRaw = values.variants;
|
|
133
|
+
const outputDir = values.output;
|
|
134
|
+
const concurrencyRaw = values.concurrency;
|
|
135
|
+
if (typeof variantsRaw !== "string" || typeof outputDir !== "string" || typeof concurrencyRaw !== "string") {
|
|
136
|
+
throw new Error("Missing required generate options");
|
|
137
|
+
}
|
|
138
|
+
const variantsPerBlueprint = parseInt(variantsRaw, 10);
|
|
139
|
+
const concurrency = parseInt(concurrencyRaw, 10);
|
|
140
|
+
const filterContexts = parseAgentContexts(values.contexts);
|
|
141
|
+
const filterDecisions = parseAgentDecisions(values.decisions);
|
|
142
|
+
const limitBlueprints = values.limitBlueprints ? parseInt(values.limitBlueprints, 10) : void 0;
|
|
143
|
+
const teacher = getTeacherModel();
|
|
144
|
+
const blueprintCount = limitBlueprints ? Math.min(limitBlueprints, ALL_BLUEPRINTS.length) : ALL_BLUEPRINTS.length;
|
|
145
|
+
console.log(`
|
|
146
|
+
Scenario blueprints: ${ALL_BLUEPRINTS.length}`);
|
|
147
|
+
console.log(`Manual blueprints: ${BLUEPRINT_STATS.manualCount}`);
|
|
148
|
+
console.log(
|
|
149
|
+
`Generated blueprints: ${BLUEPRINT_STATS.totalCount - BLUEPRINT_STATS.manualCount}`
|
|
150
|
+
);
|
|
151
|
+
console.log(`Variants per blueprint: ${variantsPerBlueprint}`);
|
|
152
|
+
console.log(
|
|
153
|
+
`Expected total samples: ${blueprintCount * variantsPerBlueprint}`
|
|
154
|
+
);
|
|
155
|
+
console.log(`Output directory: ${outputDir}`);
|
|
156
|
+
console.log(`Teacher model: ${teacher.name}`);
|
|
157
|
+
console.log(`Concurrency: ${concurrency}`);
|
|
158
|
+
if (filterContexts)
|
|
159
|
+
console.log(`Filter contexts: ${filterContexts.join(", ")}`);
|
|
160
|
+
if (filterDecisions)
|
|
161
|
+
console.log(`Filter decisions: ${filterDecisions.join(", ")}`);
|
|
162
|
+
if (limitBlueprints) console.log(`Limit blueprints: ${limitBlueprints}`);
|
|
163
|
+
console.log("");
|
|
164
|
+
const config = {
|
|
165
|
+
variantsPerBlueprint,
|
|
166
|
+
teacher,
|
|
167
|
+
outputDir,
|
|
168
|
+
concurrency,
|
|
169
|
+
filterContexts,
|
|
170
|
+
filterDecisions,
|
|
171
|
+
limitBlueprints,
|
|
172
|
+
onProgress: (completed, total, sample) => {
|
|
173
|
+
const pct = (completed / total * 100).toFixed(1);
|
|
174
|
+
process.stdout.write(
|
|
175
|
+
`\r[${pct}%] ${completed}/${total} - ${sample.blueprintId} (${sample.expectedOutput.decision}/${sample.expectedOutput.primaryContext})`
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
};
|
|
179
|
+
console.log("Generating synthetic training data...\n");
|
|
180
|
+
const samples = await generateDataset(config);
|
|
181
|
+
console.log(`
|
|
182
|
+
|
|
183
|
+
Generated ${samples.length} samples.`);
|
|
184
|
+
console.log("\nValidating dataset...");
|
|
185
|
+
const report = validateDataset(samples);
|
|
186
|
+
console.log(formatQualityReport(report));
|
|
187
|
+
console.log("\nExporting to eliza_native_v1 JSONL format...");
|
|
188
|
+
const paths = await exportToElizaNativeJSONL(samples, outputDir);
|
|
189
|
+
console.log(` Combined: ${paths.combinedPath}`);
|
|
190
|
+
console.log(` Should-respond only: ${paths.shouldRespondPath}`);
|
|
191
|
+
console.log(` Context routing: ${paths.contextRoutingPath}`);
|
|
192
|
+
const roleplayPaths = await exportRoleplayEpisodes(
|
|
193
|
+
buildRoleplayEpisodes(samples),
|
|
194
|
+
samples,
|
|
195
|
+
outputDir
|
|
196
|
+
);
|
|
197
|
+
console.log(` Roleplay episodes: ${roleplayPaths.episodesPath}`);
|
|
198
|
+
console.log(` Roleplay manifest: ${roleplayPaths.manifestPath}`);
|
|
199
|
+
console.log("\nDone!");
|
|
200
|
+
}
|
|
201
|
+
async function cmdCompare(args) {
|
|
202
|
+
const { values } = parseArgs({
|
|
203
|
+
args,
|
|
204
|
+
options: {
|
|
205
|
+
baseline: { type: "string" },
|
|
206
|
+
variant: { type: "string" },
|
|
207
|
+
dataset: { type: "string" },
|
|
208
|
+
task: { type: "string" },
|
|
209
|
+
scorer: { type: "string" },
|
|
210
|
+
mode: { type: "string" },
|
|
211
|
+
"max-examples": { type: "string" },
|
|
212
|
+
tolerance: { type: "string" },
|
|
213
|
+
output: { type: "string", short: "o" },
|
|
214
|
+
temperature: { type: "string" },
|
|
215
|
+
"max-tokens": { type: "string" }
|
|
216
|
+
}
|
|
217
|
+
});
|
|
218
|
+
if (!values.baseline || !values.variant || !values.dataset) {
|
|
219
|
+
console.error(
|
|
220
|
+
"Usage: compare --baseline <prompt.txt> --variant <prompt.txt> --dataset <dataset.jsonl> [options]"
|
|
221
|
+
);
|
|
222
|
+
console.error("");
|
|
223
|
+
console.error("Options:");
|
|
224
|
+
console.error(
|
|
225
|
+
" --task <task> One of: should_respond, context_routing, action_planner, response, media_description, view_context"
|
|
226
|
+
);
|
|
227
|
+
console.error(
|
|
228
|
+
" --scorer <kind> agreement | planner_action (default: derived from --task)"
|
|
229
|
+
);
|
|
230
|
+
console.error(
|
|
231
|
+
" --mode <mode> vs_historical (default) | pairwise"
|
|
232
|
+
);
|
|
233
|
+
console.error(" --max-examples N Cap evaluations (default: all)");
|
|
234
|
+
console.error(
|
|
235
|
+
" --tolerance N Pass threshold delta (default: 0.02)"
|
|
236
|
+
);
|
|
237
|
+
console.error(" --temperature N Sampling temperature (default: 0)");
|
|
238
|
+
console.error(" --max-tokens N Per-completion cap (default: 512)");
|
|
239
|
+
console.error(" -o, --output <path> Write JSON result to file");
|
|
240
|
+
console.error("");
|
|
241
|
+
console.error(
|
|
242
|
+
"Requires ANTHROPIC_API_KEY or OPENAI_API_KEY for the model adapter."
|
|
243
|
+
);
|
|
244
|
+
process.exit(1);
|
|
245
|
+
}
|
|
246
|
+
const [baselinePrompt, variantPrompt] = await Promise.all([
|
|
247
|
+
readFile(values.baseline, "utf-8"),
|
|
248
|
+
readFile(values.variant, "utf-8")
|
|
249
|
+
]);
|
|
250
|
+
const teacher = getTeacherModel();
|
|
251
|
+
const adapter = {
|
|
252
|
+
async complete(input) {
|
|
253
|
+
return await teacher.generate(input.system ?? "", input.user);
|
|
254
|
+
}
|
|
255
|
+
};
|
|
256
|
+
const task = values.task;
|
|
257
|
+
const scorer = values.scorer;
|
|
258
|
+
const mode = values.mode;
|
|
259
|
+
const maxExamples = values["max-examples"] ? Number.parseInt(values["max-examples"], 10) : void 0;
|
|
260
|
+
const temperature = values.temperature ? Number.parseFloat(values.temperature) : void 0;
|
|
261
|
+
const maxTokens = values["max-tokens"] ? Number.parseInt(values["max-tokens"], 10) : void 0;
|
|
262
|
+
console.log(
|
|
263
|
+
`[compare] baseline=${values.baseline} variant=${values.variant}`
|
|
264
|
+
);
|
|
265
|
+
console.log(
|
|
266
|
+
`[compare] dataset=${values.dataset} task=${task ?? "(any)"} mode=${mode ?? "vs_historical"}`
|
|
267
|
+
);
|
|
268
|
+
console.log(`[compare] adapter=${teacher.name}`);
|
|
269
|
+
const result = await comparePrompts({
|
|
270
|
+
baselinePrompt,
|
|
271
|
+
variantPrompt,
|
|
272
|
+
dataset: values.dataset,
|
|
273
|
+
task,
|
|
274
|
+
scorer,
|
|
275
|
+
mode,
|
|
276
|
+
maxExamples,
|
|
277
|
+
temperature,
|
|
278
|
+
maxTokens,
|
|
279
|
+
adapter
|
|
280
|
+
});
|
|
281
|
+
console.log("");
|
|
282
|
+
console.log(formatComparisonSummary(result));
|
|
283
|
+
if (values.output) {
|
|
284
|
+
await writeFile(values.output, JSON.stringify(result, null, 2));
|
|
285
|
+
console.log(`[compare] wrote result to ${values.output}`);
|
|
286
|
+
}
|
|
287
|
+
if (!result.passed) {
|
|
288
|
+
process.exit(2);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
function classifyStage(stage) {
|
|
292
|
+
const kind = stage.kind?.toLowerCase() ?? "";
|
|
293
|
+
const modelType = stage.model?.modelType?.toLowerCase() ?? "";
|
|
294
|
+
if (kind === "messagehandler" || modelType.includes("response_handler")) {
|
|
295
|
+
return "should_respond";
|
|
296
|
+
}
|
|
297
|
+
if (kind === "planner" || modelType.includes("planner")) {
|
|
298
|
+
return "action_planner";
|
|
299
|
+
}
|
|
300
|
+
if (kind === "tool" || kind === "action") {
|
|
301
|
+
return "response";
|
|
302
|
+
}
|
|
303
|
+
if (modelType.includes("vision") || modelType.includes("image")) {
|
|
304
|
+
return "media_description";
|
|
305
|
+
}
|
|
306
|
+
return null;
|
|
307
|
+
}
|
|
308
|
+
function stringifyContent(value) {
|
|
309
|
+
if (typeof value === "string") return value;
|
|
310
|
+
if (value === null || value === void 0) return "";
|
|
311
|
+
return JSON.stringify(value);
|
|
312
|
+
}
|
|
313
|
+
function stageToJsonlRow(stage) {
|
|
314
|
+
const messages = stage.model?.messages ?? [];
|
|
315
|
+
const response = stage.model?.response;
|
|
316
|
+
if (messages.length === 0) return null;
|
|
317
|
+
if (!response && !stage.model?.toolCalls) return null;
|
|
318
|
+
const normalizedMessages = messages.map((m) => ({
|
|
319
|
+
role: m.role,
|
|
320
|
+
content: stringifyContent(m.content)
|
|
321
|
+
}));
|
|
322
|
+
const systemMsg = normalizedMessages.find((m) => m.role === "system");
|
|
323
|
+
const responseText = stringifyContent(response);
|
|
324
|
+
const toolCalls = stage.model?.toolCalls;
|
|
325
|
+
return {
|
|
326
|
+
format: "eliza_native_v1",
|
|
327
|
+
boundary: "vercel_ai_sdk.generateText",
|
|
328
|
+
request: {
|
|
329
|
+
system: systemMsg?.content ?? "",
|
|
330
|
+
messages: normalizedMessages
|
|
331
|
+
},
|
|
332
|
+
response: toolCalls ? { text: responseText, toolCalls } : { text: responseText }
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
async function cmdExportTrajectories(args) {
|
|
336
|
+
const { values } = parseArgs({
|
|
337
|
+
args,
|
|
338
|
+
options: {
|
|
339
|
+
input: { type: "string", short: "i" },
|
|
340
|
+
output: { type: "string", short: "o" },
|
|
341
|
+
"max-per-task": { type: "string" }
|
|
342
|
+
}
|
|
343
|
+
});
|
|
344
|
+
const inputDir = values.input ?? process.env.ELIZA_TRAJECTORY_DIR ?? join(
|
|
345
|
+
process.env.ELIZA_STATE_DIR ?? join(homedir(), ".eliza"),
|
|
346
|
+
"trajectories"
|
|
347
|
+
);
|
|
348
|
+
const outputDir = values.output ?? "./training-data";
|
|
349
|
+
const cap = values["max-per-task"] ? Number.parseInt(values["max-per-task"], 10) : Number.POSITIVE_INFINITY;
|
|
350
|
+
if (!existsSync(inputDir)) {
|
|
351
|
+
console.error(`[export-trajectories] input dir not found: ${inputDir}`);
|
|
352
|
+
process.exit(1);
|
|
353
|
+
}
|
|
354
|
+
await mkdir(outputDir, { recursive: true });
|
|
355
|
+
console.log(`[export-trajectories] reading from ${inputDir}`);
|
|
356
|
+
console.log(`[export-trajectories] writing to ${outputDir}`);
|
|
357
|
+
const buckets = buildTaskRecord(() => []);
|
|
358
|
+
const agentDirs = readdirSync(inputDir).filter((name) => {
|
|
359
|
+
const full = join(inputDir, name);
|
|
360
|
+
return statSync(full).isDirectory();
|
|
361
|
+
});
|
|
362
|
+
let totalTrajectories = 0;
|
|
363
|
+
let totalStages = 0;
|
|
364
|
+
let droppedStages = 0;
|
|
365
|
+
for (const agentDir of agentDirs) {
|
|
366
|
+
const agentPath = join(inputDir, agentDir);
|
|
367
|
+
const files = readdirSync(agentPath).filter((f) => f.endsWith(".json"));
|
|
368
|
+
for (const file of files) {
|
|
369
|
+
let traj;
|
|
370
|
+
try {
|
|
371
|
+
traj = JSON.parse(
|
|
372
|
+
readFileSync(join(agentPath, file), "utf-8")
|
|
373
|
+
);
|
|
374
|
+
} catch {
|
|
375
|
+
continue;
|
|
376
|
+
}
|
|
377
|
+
totalTrajectories += 1;
|
|
378
|
+
for (const stage of traj.stages ?? []) {
|
|
379
|
+
totalStages += 1;
|
|
380
|
+
const task = classifyStage(stage);
|
|
381
|
+
if (!task) {
|
|
382
|
+
droppedStages += 1;
|
|
383
|
+
continue;
|
|
384
|
+
}
|
|
385
|
+
if (buckets[task].length >= cap) continue;
|
|
386
|
+
const row = stageToJsonlRow(stage);
|
|
387
|
+
if (!row) {
|
|
388
|
+
droppedStages += 1;
|
|
389
|
+
continue;
|
|
390
|
+
}
|
|
391
|
+
buckets[task].push(row);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
for (const task of Object.keys(buckets)) {
|
|
396
|
+
const path = join(outputDir, `${task}_trajectories.jsonl`);
|
|
397
|
+
const lines = buckets[task].map((row) => JSON.stringify(row));
|
|
398
|
+
await writeFile(path, `${lines.join("\n")}
|
|
399
|
+
`);
|
|
400
|
+
console.log(
|
|
401
|
+
`[export-trajectories] ${task}: wrote ${buckets[task].length} examples to ${path}`
|
|
402
|
+
);
|
|
403
|
+
}
|
|
404
|
+
console.log(
|
|
405
|
+
`[export-trajectories] summary: ${totalTrajectories} trajectories, ${totalStages} stages (${droppedStages} unclassified)`
|
|
406
|
+
);
|
|
407
|
+
}
|
|
408
|
+
function buildRunCollectionOptionsFromCliArgs(args) {
|
|
409
|
+
const { values } = parseArgs({
|
|
410
|
+
args,
|
|
411
|
+
options: {
|
|
412
|
+
output: { type: "string", short: "o" },
|
|
413
|
+
"workspace-root": { type: "string" },
|
|
414
|
+
tiers: { type: "string", default: "2b" },
|
|
415
|
+
benchmark: { type: "string", default: "eliza_harness_action_selection" },
|
|
416
|
+
provider: { type: "string", default: "local-llama-cpp" },
|
|
417
|
+
"base-url": { type: "string", default: "http://localhost:11434/v1" },
|
|
418
|
+
"runs-per-case": { type: "string", default: "1" },
|
|
419
|
+
"benchmark-filter": { type: "string" },
|
|
420
|
+
"benchmark-model": { type: "string" },
|
|
421
|
+
"benchmark-runtime-model": { type: "string" },
|
|
422
|
+
"benchmark-variant": { type: "string" },
|
|
423
|
+
"dataset-version": { type: "string", default: "eliza-native-v1" },
|
|
424
|
+
"hf-repo": { type: "string", default: "elizaos/eliza-1-training" },
|
|
425
|
+
"hf-revision": { type: "string", default: "main" },
|
|
426
|
+
"hf-files": { type: "string" },
|
|
427
|
+
"feed-archetypes": { type: "string", default: "trader" },
|
|
428
|
+
"feed-agents": { type: "string", default: "1" },
|
|
429
|
+
"feed-ticks": { type: "string", default: "1" },
|
|
430
|
+
"feed-parallel": { type: "string", default: "1" },
|
|
431
|
+
"cerebras-max-samples": { type: "string", default: "50" },
|
|
432
|
+
"cerebras-variants": { type: "string", default: "both" },
|
|
433
|
+
scenario: { type: "string", default: "deterministic-pr-smoke" },
|
|
434
|
+
"natural-sanitized-jsonl": { type: "string" },
|
|
435
|
+
"natural-raw-jsonl": { type: "string" },
|
|
436
|
+
"natural-run-id": { type: "string" },
|
|
437
|
+
"natural-tasks": { type: "string" },
|
|
438
|
+
"include-natural-raw": { type: "boolean", default: false },
|
|
439
|
+
live: { type: "boolean", default: false },
|
|
440
|
+
"preflight-only": { type: "boolean", default: false },
|
|
441
|
+
"probe-endpoints": { type: "boolean", default: false },
|
|
442
|
+
"skip-hf": { type: "boolean", default: false },
|
|
443
|
+
"skip-feed": { type: "boolean", default: false },
|
|
444
|
+
"skip-natural": { type: "boolean", default: false },
|
|
445
|
+
"skip-tests": { type: "boolean", default: false },
|
|
446
|
+
"skip-scenarios": { type: "boolean", default: false },
|
|
447
|
+
"skip-action-benchmark": { type: "boolean", default: false },
|
|
448
|
+
"skip-cerebras": { type: "boolean", default: false },
|
|
449
|
+
"skip-model-registry": { type: "boolean", default: false },
|
|
450
|
+
"skip-bundle-stage": { type: "boolean", default: false },
|
|
451
|
+
"include-eval-comparison": { type: "boolean", default: false },
|
|
452
|
+
"skip-eval-comparison": { type: "boolean", default: false },
|
|
453
|
+
"include-matrix": { type: "boolean", default: true },
|
|
454
|
+
"skip-matrix": { type: "boolean", default: false },
|
|
455
|
+
mocks: { type: "boolean" }
|
|
456
|
+
}
|
|
457
|
+
});
|
|
458
|
+
const tiers = parseCliTierList(
|
|
459
|
+
typeof values.tiers === "string" ? values.tiers : void 0
|
|
460
|
+
);
|
|
461
|
+
const live = values.live === true;
|
|
462
|
+
const dryRun = !live;
|
|
463
|
+
const benchmark = parseBenchmarkVsCerebrasBenchmark(
|
|
464
|
+
typeof values.benchmark === "string" ? values.benchmark : "eliza_harness_action_selection"
|
|
465
|
+
);
|
|
466
|
+
const provider = typeof values.provider === "string" ? values.provider : "local-llama-cpp";
|
|
467
|
+
const baseUrl = typeof values["base-url"] === "string" ? values["base-url"] : "http://localhost:11434/v1";
|
|
468
|
+
const datasetVersion = typeof values["dataset-version"] === "string" ? values["dataset-version"] : "eliza-native-v1";
|
|
469
|
+
const actionBenchmark = {
|
|
470
|
+
useMocks: typeof values.mocks === "boolean" ? values.mocks : dryRun,
|
|
471
|
+
forceTrajectoryCapture: true,
|
|
472
|
+
provider,
|
|
473
|
+
baseUrl,
|
|
474
|
+
benchmark,
|
|
475
|
+
datasetVersion,
|
|
476
|
+
modelId: typeof values["benchmark-model"] === "string" ? values["benchmark-model"] : void 0,
|
|
477
|
+
runtimeModel: typeof values["benchmark-runtime-model"] === "string" ? values["benchmark-runtime-model"] : typeof values["benchmark-model"] === "string" ? values["benchmark-model"] : void 0,
|
|
478
|
+
variant: parseActionBenchmarkVariant(
|
|
479
|
+
typeof values["benchmark-variant"] === "string" ? values["benchmark-variant"] : void 0
|
|
480
|
+
),
|
|
481
|
+
filter: typeof values["benchmark-filter"] === "string" ? values["benchmark-filter"] : void 0,
|
|
482
|
+
runsPerCase: optionalPositiveInteger(
|
|
483
|
+
typeof values["runs-per-case"] === "string" ? values["runs-per-case"] : void 0
|
|
484
|
+
),
|
|
485
|
+
dryRun
|
|
486
|
+
};
|
|
487
|
+
return {
|
|
488
|
+
preflightOnly: values["preflight-only"] === true,
|
|
489
|
+
preflightProbe: values["probe-endpoints"] === true,
|
|
490
|
+
outputDir: typeof values.output === "string" ? values.output : void 0,
|
|
491
|
+
workspaceRoot: typeof values["workspace-root"] === "string" ? values["workspace-root"] : discoverWorkspaceRoot(),
|
|
492
|
+
includeHuggingFace: values["skip-hf"] !== true,
|
|
493
|
+
includeFeed: values["skip-feed"] !== true,
|
|
494
|
+
includeNaturalTrajectories: values["skip-natural"] !== true,
|
|
495
|
+
includeTestTrajectories: values["skip-tests"] !== true,
|
|
496
|
+
includeScenarios: values["skip-scenarios"] !== true,
|
|
497
|
+
includeEvalComparison: values["skip-eval-comparison"] !== true && (dryRun || values["include-eval-comparison"] === true),
|
|
498
|
+
includeActionBenchmark: values["skip-action-benchmark"] !== true,
|
|
499
|
+
includeBenchmarkVsCerebras: values["skip-cerebras"] !== true,
|
|
500
|
+
includeEliza1ModelRegistry: values["skip-model-registry"] !== true,
|
|
501
|
+
includeEliza1BundleStage: values["skip-bundle-stage"] !== true,
|
|
502
|
+
includeBenchmarkMatrix: values["skip-matrix"] !== true,
|
|
503
|
+
naturalTrajectories: {
|
|
504
|
+
sanitizedJsonlPath: typeof values["natural-sanitized-jsonl"] === "string" ? values["natural-sanitized-jsonl"] : void 0,
|
|
505
|
+
rawJsonlPath: typeof values["natural-raw-jsonl"] === "string" ? values["natural-raw-jsonl"] : void 0,
|
|
506
|
+
includeRawJsonl: values["include-natural-raw"] === true || typeof values["natural-raw-jsonl"] === "string",
|
|
507
|
+
tasks: typeof values["natural-tasks"] === "string" ? values["natural-tasks"].split(",").map((task) => task.trim()).filter(Boolean) : void 0,
|
|
508
|
+
source: {
|
|
509
|
+
kind: "training_collection_natural_trajectories",
|
|
510
|
+
runId: typeof values["natural-run-id"] === "string" ? values["natural-run-id"] : void 0,
|
|
511
|
+
metadata: {
|
|
512
|
+
cli: true,
|
|
513
|
+
sanitizedJsonlPath: typeof values["natural-sanitized-jsonl"] === "string" ? values["natural-sanitized-jsonl"] : void 0,
|
|
514
|
+
rawJsonlPath: typeof values["natural-raw-jsonl"] === "string" ? values["natural-raw-jsonl"] : void 0
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
},
|
|
518
|
+
huggingFace: {
|
|
519
|
+
repoId: typeof values["hf-repo"] === "string" ? values["hf-repo"] : "elizaos/eliza-1-training",
|
|
520
|
+
revision: typeof values["hf-revision"] === "string" ? values["hf-revision"] : "main",
|
|
521
|
+
files: typeof values["hf-files"] === "string" ? values["hf-files"].split(",").map((file) => file.trim()).filter(Boolean) : void 0,
|
|
522
|
+
dryRun
|
|
523
|
+
},
|
|
524
|
+
feed: {
|
|
525
|
+
archetypes: typeof values["feed-archetypes"] === "string" ? values["feed-archetypes"] : "trader",
|
|
526
|
+
numAgents: optionalPositiveInteger(
|
|
527
|
+
typeof values["feed-agents"] === "string" ? values["feed-agents"] : void 0
|
|
528
|
+
),
|
|
529
|
+
ticks: optionalPositiveInteger(
|
|
530
|
+
typeof values["feed-ticks"] === "string" ? values["feed-ticks"] : void 0
|
|
531
|
+
),
|
|
532
|
+
parallel: optionalPositiveInteger(
|
|
533
|
+
typeof values["feed-parallel"] === "string" ? values["feed-parallel"] : void 0
|
|
534
|
+
),
|
|
535
|
+
cleanup: true,
|
|
536
|
+
dryRun
|
|
537
|
+
},
|
|
538
|
+
scenarios: {
|
|
539
|
+
scenario: typeof values.scenario === "string" ? values.scenario : void 0,
|
|
540
|
+
exportNative: true,
|
|
541
|
+
useDeterministicProxy: true,
|
|
542
|
+
dryRun
|
|
543
|
+
},
|
|
544
|
+
evalComparison: {
|
|
545
|
+
model: elizaOneBenchmarkModelId(tiers[0] ?? "2b", "base"),
|
|
546
|
+
trainedModelPath: elizaOneBenchmarkModelId(tiers[0] ?? "2b", "trained"),
|
|
547
|
+
backend: "cpu",
|
|
548
|
+
dryRun
|
|
549
|
+
},
|
|
550
|
+
actionBenchmark,
|
|
551
|
+
actionBenchmarkPair: tiers.length === 1 && actionBenchmark.modelId === void 0 && actionBenchmark.runtimeModel === void 0 && actionBenchmark.variant === void 0 ? {
|
|
552
|
+
tier: tiers[0],
|
|
553
|
+
base: {
|
|
554
|
+
variant: "base",
|
|
555
|
+
modelId: elizaOneBenchmarkModelId(tiers[0], "base"),
|
|
556
|
+
runtimeModel: elizaOneBenchmarkModelId(tiers[0], "base")
|
|
557
|
+
},
|
|
558
|
+
trained: {
|
|
559
|
+
variant: "trained",
|
|
560
|
+
modelId: elizaOneBenchmarkModelId(tiers[0], "trained"),
|
|
561
|
+
runtimeModel: elizaOneBenchmarkModelId(tiers[0], "trained")
|
|
562
|
+
}
|
|
563
|
+
} : void 0,
|
|
564
|
+
actionBenchmarkPairs: tiers.length > 1 && actionBenchmark.modelId === void 0 && actionBenchmark.runtimeModel === void 0 && actionBenchmark.variant === void 0 ? elizaOneActionBenchmarkPairs(tiers) : void 0,
|
|
565
|
+
benchmarkVsCerebras: {
|
|
566
|
+
tiers: tiers.join(","),
|
|
567
|
+
benchmark,
|
|
568
|
+
variants: parseCerebrasVariants(
|
|
569
|
+
typeof values["cerebras-variants"] === "string" ? values["cerebras-variants"] : void 0
|
|
570
|
+
),
|
|
571
|
+
maxSamples: optionalPositiveInteger(
|
|
572
|
+
typeof values["cerebras-max-samples"] === "string" ? values["cerebras-max-samples"] : void 0
|
|
573
|
+
) ?? 50,
|
|
574
|
+
dryRun
|
|
575
|
+
},
|
|
576
|
+
eliza1BundleStage: {
|
|
577
|
+
repoId: "elizaos/eliza-1",
|
|
578
|
+
tier: tiers[0] ?? "2b",
|
|
579
|
+
localDir: "/tmp/eliza-1-bundles",
|
|
580
|
+
maxBytes: 8589934592,
|
|
581
|
+
apply: false
|
|
582
|
+
}
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
function formatTrainingCollectionPreflightSummary(preflight) {
|
|
586
|
+
const counts = preflight.checks.reduce(
|
|
587
|
+
(acc, check) => {
|
|
588
|
+
acc[check.status] = (acc[check.status] ?? 0) + 1;
|
|
589
|
+
return acc;
|
|
590
|
+
},
|
|
591
|
+
{}
|
|
592
|
+
);
|
|
593
|
+
return [
|
|
594
|
+
`[run-collection:preflight] live=${preflight.liveRequired ? "yes" : "no"} ok=${counts.ok ?? 0} warning=${counts.warning ?? 0} missing=${counts.missing ?? 0} skipped=${counts.skipped ?? 0}`,
|
|
595
|
+
...preflight.checks.map(
|
|
596
|
+
(check) => `[run-collection:preflight] ${check.id}=${check.status} ${check.detail}${check.path ? ` path=${check.path}` : ""}`
|
|
597
|
+
)
|
|
598
|
+
];
|
|
599
|
+
}
|
|
600
|
+
async function cmdRunCollection(args) {
|
|
601
|
+
const options = buildRunCollectionOptionsFromCliArgs(args);
|
|
602
|
+
if (options.preflightOnly) {
|
|
603
|
+
const preflight = await buildTrainingCollectionPreflightWithProbes({
|
|
604
|
+
options,
|
|
605
|
+
workspaceRoot: options.workspaceRoot,
|
|
606
|
+
trainingRoot: options.workspaceRoot ? join(options.workspaceRoot, "packages", "training") : void 0
|
|
607
|
+
});
|
|
608
|
+
for (const line of formatTrainingCollectionPreflightSummary(preflight)) {
|
|
609
|
+
console.log(line);
|
|
610
|
+
}
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
const result = await runTrainingCollection(options);
|
|
614
|
+
for (const line of formatRunCollectionSummary(result)) {
|
|
615
|
+
console.log(line);
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
async function cmdListCollections(args) {
|
|
619
|
+
const { values } = parseArgs({
|
|
620
|
+
args,
|
|
621
|
+
options: {
|
|
622
|
+
root: { type: "string" },
|
|
623
|
+
limit: { type: "string", short: "n", default: "20" }
|
|
624
|
+
}
|
|
625
|
+
});
|
|
626
|
+
const result = await listTrainingCollections({
|
|
627
|
+
root: values.root,
|
|
628
|
+
limit: optionalPositiveInteger(values.limit)
|
|
629
|
+
});
|
|
630
|
+
for (const line of formatListTrainingCollectionsSummary(result)) {
|
|
631
|
+
console.log(line);
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
function formatListTrainingCollectionsSummary(result) {
|
|
635
|
+
const lines = [
|
|
636
|
+
`[list-collections] root=${result.root}`,
|
|
637
|
+
`[list-collections] count=${result.collections.length}`
|
|
638
|
+
];
|
|
639
|
+
for (const collection of result.collections) {
|
|
640
|
+
const firstEvalComparison = collection.evals.comparisonInventory[0];
|
|
641
|
+
const firstModel = collection.training.modelInventory.find(
|
|
642
|
+
(model) => model.model || model.variant
|
|
643
|
+
) ?? collection.training.modelInventory[0];
|
|
644
|
+
const sourceSamples = collection.sourceSamples ?? {
|
|
645
|
+
huggingFace: [],
|
|
646
|
+
feed: [],
|
|
647
|
+
natural: [],
|
|
648
|
+
scenarios: [],
|
|
649
|
+
tests: [],
|
|
650
|
+
trainingJsonl: []
|
|
651
|
+
};
|
|
652
|
+
const sourceSampleEntries = Object.entries(sourceSamples);
|
|
653
|
+
const sampleCounts = sourceSampleEntries.map(([source, samples]) => `${source}:${samples.length}`).join(",");
|
|
654
|
+
const sampleExamples = sourceSampleEntries.flatMap(
|
|
655
|
+
([source, samples]) => samples.slice(0, 1).map((sample) => {
|
|
656
|
+
const id = sample.trajectoryId ?? sample.scenarioId ?? sample.title ?? "sample";
|
|
657
|
+
const task = sample.task ? `:${sample.task}` : "";
|
|
658
|
+
return `${source}:${id}${task}`;
|
|
659
|
+
})
|
|
660
|
+
).slice(0, 4).join(",");
|
|
661
|
+
const evalSummary = [
|
|
662
|
+
`artifacts:${collection.evals.evalArtifacts}`,
|
|
663
|
+
`comparisons:${collection.evals.evalComparisons}`,
|
|
664
|
+
`action:${collection.evals.actionBenchmarks}`,
|
|
665
|
+
`matrices:${collection.evals.benchmarkMatrices}`,
|
|
666
|
+
firstEvalComparison ? `first:${firstEvalComparison.baseModel ?? "base"}->${firstEvalComparison.trainedModel ?? "trained"},improvement:${firstEvalComparison.improvementPercent ?? "n/a"}%` : null
|
|
667
|
+
].filter(Boolean).join(",");
|
|
668
|
+
const modelSummary = [
|
|
669
|
+
`runs:${collection.training.trainingRuns}`,
|
|
670
|
+
`models:${collection.training.models}`,
|
|
671
|
+
`inventory:${collection.training.modelInventory.length}`,
|
|
672
|
+
firstModel ? `first:${firstModel.tier ?? "tier"}/${firstModel.variant ?? "variant"}/${firstModel.model ?? "model"},improvement:${firstModel.evalImprovementPercent ?? "n/a"}%` : null
|
|
673
|
+
].filter(Boolean).join(",");
|
|
674
|
+
const gapSummary = collection.readinessGaps.length > 0 ? collection.readinessGaps.slice(0, 4).map(
|
|
675
|
+
(gap) => `${gap.id}:${gap.status}${gap.recommendedCapability ? `->${gap.recommendedCapability}` : ""}${formatRecommendedParamsSuffix(gap.recommendedParams)}`
|
|
676
|
+
).join(",") : "none";
|
|
677
|
+
lines.push(
|
|
678
|
+
[
|
|
679
|
+
`[list-collections] run=${collection.generatedAt}`,
|
|
680
|
+
`readiness=${collection.readinessStatus}`,
|
|
681
|
+
`ready=${collection.readiness.ready}`,
|
|
682
|
+
`partial=${collection.readiness.partial}`,
|
|
683
|
+
`missing=${collection.readiness.missing}`,
|
|
684
|
+
`artifacts=${collection.artifactCount}`,
|
|
685
|
+
`sources=hf:${collection.dataSources.huggingFaceDatasets},feed:${collection.dataSources.feedDatasets},natural:${collection.dataSources.naturalTrajectoryBundles},scenarios:${collection.dataSources.scenarioRuns},native:${collection.dataSources.scenarioNativeDatasets},tests:${collection.dataSources.testTrajectories},jsonl:${collection.dataSources.trainingJsonlDatasets}`,
|
|
686
|
+
`benchmarks=pairs:${collection.benchmarks.actionBenchmarkPairs},comparisons:${collection.benchmarks.benchmarkComparisons},cases:${collection.benchmarks.caseSamples},tiers:${collection.benchmarks.tiers.join(",") || "none"}`,
|
|
687
|
+
`baseline=established:${collection.benchmarks.baselineProgress.establishedTiers.join(",") || "none"},next:${collection.benchmarks.baselineProgress.nextTier ?? "none"},remaining:${collection.benchmarks.baselineProgress.remainingTiers.join(",") || "none"}`,
|
|
688
|
+
`evals=${evalSummary}`,
|
|
689
|
+
`models=${modelSummary}`,
|
|
690
|
+
`samples=${sampleCounts}${sampleExamples ? `,examples:${sampleExamples}` : ""}`,
|
|
691
|
+
`artifact-links=source:${collection.sourceArtifacts.length},evidence:${collection.evidenceArtifacts.length}`,
|
|
692
|
+
`gaps=${gapSummary}`,
|
|
693
|
+
`output=${collection.outputDir}`,
|
|
694
|
+
`readme=${collection.readmePath}`,
|
|
695
|
+
`viewer=${collection.analysisIndexHtmlPath}`
|
|
696
|
+
].join(" ")
|
|
697
|
+
);
|
|
698
|
+
}
|
|
699
|
+
return lines;
|
|
700
|
+
}
|
|
701
|
+
function formatRecommendedParamsSuffix(params) {
|
|
702
|
+
if (!params || Object.keys(params).length === 0) return "";
|
|
703
|
+
return ` params=${JSON.stringify(params)}`;
|
|
704
|
+
}
|
|
705
|
+
function compactStepError(error) {
|
|
706
|
+
const normalized = (error ?? "failed").replace(/\s+/g, " ").trim();
|
|
707
|
+
const priorityPatterns = [
|
|
708
|
+
/Database not initialized\.[^.]*\./,
|
|
709
|
+
/DATABASE_URL is required[^.]*\./,
|
|
710
|
+
/CEREBRAS_API_KEY is required[^.]*\./
|
|
711
|
+
];
|
|
712
|
+
for (const pattern of priorityPatterns) {
|
|
713
|
+
const match = normalized.match(pattern);
|
|
714
|
+
if (match?.[0]) return match[0].slice(0, 220);
|
|
715
|
+
}
|
|
716
|
+
return normalized.slice(0, 220);
|
|
717
|
+
}
|
|
718
|
+
function formatRunCollectionSummary(result) {
|
|
719
|
+
const evidence = result.manifest.evidence;
|
|
720
|
+
const readiness = evidence.benchmarkReadiness;
|
|
721
|
+
const preflight = evidence.preflight ?? { liveRequired: false, checks: [] };
|
|
722
|
+
const preflightCounts = preflight.checks.reduce(
|
|
723
|
+
(acc, check) => {
|
|
724
|
+
acc[check.status] = (acc[check.status] ?? 0) + 1;
|
|
725
|
+
return acc;
|
|
726
|
+
},
|
|
727
|
+
{}
|
|
728
|
+
);
|
|
729
|
+
const priorityGapIds = [
|
|
730
|
+
"feed_generation",
|
|
731
|
+
"natural_trajectories",
|
|
732
|
+
"test_trajectories",
|
|
733
|
+
"smallest_model_benchmark",
|
|
734
|
+
"all_eliza1_tiers_benchmark",
|
|
735
|
+
"cerebras_reference",
|
|
736
|
+
"base_trained_improvement",
|
|
737
|
+
"all_eliza1_tier_improvements",
|
|
738
|
+
"agentic_benchmarks",
|
|
739
|
+
"benchmark_matrix",
|
|
740
|
+
"benchmark_case_provenance",
|
|
741
|
+
"eval_comparison",
|
|
742
|
+
"model_tracking",
|
|
743
|
+
"readable_source_samples"
|
|
744
|
+
];
|
|
745
|
+
const readinessStatusFor = (id) => evidence.readinessGaps.find((gap) => gap.id === id)?.status ?? "ready";
|
|
746
|
+
const comparisonInventory = evidence.benchmarks.comparisonInventory ?? [];
|
|
747
|
+
const dryRunComparisons = comparisonInventory.filter(
|
|
748
|
+
(comparison) => comparison.dryRun === true
|
|
749
|
+
).length;
|
|
750
|
+
const liveComparisons = Math.max(
|
|
751
|
+
0,
|
|
752
|
+
comparisonInventory.length - dryRunComparisons
|
|
753
|
+
);
|
|
754
|
+
const gaps = [...evidence.readinessGaps].sort((left, right) => {
|
|
755
|
+
const leftIndex = priorityGapIds.indexOf(left.id);
|
|
756
|
+
const rightIndex = priorityGapIds.indexOf(right.id);
|
|
757
|
+
const leftPriority = leftIndex >= 0 ? 0 : 1;
|
|
758
|
+
const rightPriority = rightIndex >= 0 ? 0 : 1;
|
|
759
|
+
if (leftPriority !== rightPriority) return leftPriority - rightPriority;
|
|
760
|
+
if (leftIndex >= 0 && rightIndex >= 0 && leftIndex !== rightIndex) {
|
|
761
|
+
return leftIndex - rightIndex;
|
|
762
|
+
}
|
|
763
|
+
return left.id.localeCompare(right.id);
|
|
764
|
+
}).slice(0, 5);
|
|
765
|
+
const sourceSamples = evidence.sourceSamples ?? {
|
|
766
|
+
huggingFace: [],
|
|
767
|
+
feed: [],
|
|
768
|
+
natural: [],
|
|
769
|
+
scenarios: [],
|
|
770
|
+
tests: [],
|
|
771
|
+
trainingJsonl: []
|
|
772
|
+
};
|
|
773
|
+
const sourceSampleEntries = Object.entries(sourceSamples);
|
|
774
|
+
const sampleCounts = sourceSampleEntries.map(([source, samples]) => `${source}=${samples.length}`).join(" ");
|
|
775
|
+
const sampleExamples = sourceSampleEntries.flatMap(
|
|
776
|
+
([source, samples]) => samples.slice(0, 2).map((sample) => {
|
|
777
|
+
const id = sample.trajectoryId ?? sample.title ?? "sample";
|
|
778
|
+
const task = sample.task ? `:${sample.task}` : "";
|
|
779
|
+
return `${source}:${id}${task}`;
|
|
780
|
+
})
|
|
781
|
+
).slice(0, 5).join(" ");
|
|
782
|
+
const failedSteps = (result.manifest.steps ?? []).filter((step) => step.status === "failed").map((step) => `${step.id}:${compactStepError(step.error)}`);
|
|
783
|
+
return [
|
|
784
|
+
`[run-collection] output=${result.outputDir}`,
|
|
785
|
+
`[run-collection] manifest=${result.manifestPath}`,
|
|
786
|
+
`[run-collection] readme=${result.readmePath}`,
|
|
787
|
+
`[run-collection] viewer=${result.manifest.analysis.indexHtmlPath}`,
|
|
788
|
+
`[run-collection] collection-index=${result.collectionIndex.indexHtmlPath} json=${result.collectionIndex.indexJsonPath}`,
|
|
789
|
+
`[run-collection] readiness=${result.manifest.readiness.status} ready=${result.manifest.readiness.ready} partial=${result.manifest.readiness.partial} missing=${result.manifest.readiness.missing}`,
|
|
790
|
+
`[run-collection] preflight live=${preflight.liveRequired ? "yes" : "no"} ok=${preflightCounts.ok ?? 0} warning=${preflightCounts.warning ?? 0} missing=${preflightCounts.missing ?? 0} skipped=${preflightCounts.skipped ?? 0}`,
|
|
791
|
+
`[run-collection] sources hf=${evidence.dataSources.huggingFaceDatasets} feed=${evidence.dataSources.feedDatasets} natural=${evidence.dataSources.naturalTrajectoryBundles} scenarios=${evidence.dataSources.scenarioRuns} scenario-native=${evidence.dataSources.scenarioNativeDatasets} tests=${evidence.dataSources.testTrajectories} jsonl=${evidence.dataSources.trainingJsonlDatasets}`,
|
|
792
|
+
`[run-collection] evals artifacts=${evidence.evals.evalArtifacts} comparisons=${evidence.evals.evalComparisons} action=${evidence.evals.actionBenchmarks} matrices=${evidence.evals.benchmarkMatrices} models=${evidence.training.models} training-runs=${evidence.training.trainingRuns}`,
|
|
793
|
+
`[run-collection] benchmarks pairs=${evidence.benchmarks.actionBenchmarkPairs} rows=${evidence.benchmarks.benchmarkRows} comparisons=${evidence.benchmarks.benchmarkComparisons} tiers=${evidence.benchmarks.tiers.join(",") || "none"}`,
|
|
794
|
+
`[run-collection] baseline established=${evidence.benchmarks.baselineProgress.establishedTiers.join(",") || "none"} next=${evidence.benchmarks.baselineProgress.nextTier ?? "none"} remaining=${evidence.benchmarks.baselineProgress.remainingTiers.join(",") || "none"} smallest=${evidence.benchmarks.baselineProgress.smallestTierEstablished ? "yes" : "no"} all=${evidence.benchmarks.baselineProgress.allTiersEstablished ? "yes" : "no"}`,
|
|
795
|
+
`[run-collection] benchmark-comparisons live=${liveComparisons} dry-run=${dryRunComparisons} improvements=${evidence.benchmarks.improvementComparisons.length}`,
|
|
796
|
+
`[run-collection] benchmark-readiness smallest=${readiness.smallestTier} all-tiers=${readiness.allEliza1Tiers} improvement=${readiness.baseTrainedImprovement} all-tier-improvements=${readiness.allEliza1TierImprovements} cerebras=${readiness.cerebrasReference} cases=${readinessStatusFor("benchmark_case_provenance")}`,
|
|
797
|
+
`[run-collection] source-readiness natural=${readinessStatusFor("natural_trajectories")} tests=${readinessStatusFor("test_trajectories")} readable=${readinessStatusFor("readable_source_samples")}`,
|
|
798
|
+
`[run-collection] eval-readiness comparison=${readinessStatusFor("eval_comparison")} models=${readinessStatusFor("model_tracking")}`,
|
|
799
|
+
`[run-collection] sample-readiness readable=${readinessStatusFor("readable_source_samples")}`,
|
|
800
|
+
`[run-collection] source-samples ${sampleCounts}${sampleExamples ? ` examples=${sampleExamples}` : ""}`,
|
|
801
|
+
failedSteps.length > 0 ? `[run-collection] failed-steps ${failedSteps.join(" | ")}` : "[run-collection] failed-steps none",
|
|
802
|
+
gaps.length > 0 ? `[run-collection] readiness-gaps ${gaps.map(
|
|
803
|
+
(gap) => `${gap.id}:${gap.status}${gap.recommendedCapability ? `->${gap.recommendedCapability}` : ""}${formatRecommendedParamsSuffix(gap.recommendedParams)}`
|
|
804
|
+
).join(" ")}` : "[run-collection] readiness-gaps none"
|
|
805
|
+
];
|
|
806
|
+
}
|
|
807
|
+
async function cmdValidate(args) {
|
|
808
|
+
const { values } = parseArgs({
|
|
809
|
+
args,
|
|
810
|
+
options: {
|
|
811
|
+
input: { type: "string", short: "i" }
|
|
812
|
+
}
|
|
813
|
+
});
|
|
814
|
+
if (!values.input) {
|
|
815
|
+
console.error("Usage: validate --input <path-to-raw_samples.json>");
|
|
816
|
+
process.exit(1);
|
|
817
|
+
}
|
|
818
|
+
const raw = await readFile(values.input, "utf-8");
|
|
819
|
+
const samples = JSON.parse(raw);
|
|
820
|
+
console.log(`Loaded ${samples.length} samples from ${values.input}`);
|
|
821
|
+
console.log("");
|
|
822
|
+
const report = validateDataset(samples);
|
|
823
|
+
console.log(formatQualityReport(report));
|
|
824
|
+
}
|
|
825
|
+
const OPTIMIZED_PROMPT_TASKS_CLI = [
|
|
826
|
+
"should_respond",
|
|
827
|
+
"context_routing",
|
|
828
|
+
"action_planner",
|
|
829
|
+
"response",
|
|
830
|
+
"media_description",
|
|
831
|
+
"view_context"
|
|
832
|
+
];
|
|
833
|
+
function isOptimizedPromptTaskCli(value) {
|
|
834
|
+
return OPTIMIZED_PROMPT_TASKS_CLI.includes(value);
|
|
835
|
+
}
|
|
836
|
+
async function cmdRollbackPrompt(args) {
|
|
837
|
+
const { values, positionals } = parseArgs({
|
|
838
|
+
args,
|
|
839
|
+
options: {
|
|
840
|
+
task: { type: "string" },
|
|
841
|
+
"store-root": { type: "string" }
|
|
842
|
+
},
|
|
843
|
+
allowPositionals: true
|
|
844
|
+
});
|
|
845
|
+
const taskName = values.task?.trim() ?? positionals[0]?.trim();
|
|
846
|
+
if (!taskName) {
|
|
847
|
+
console.error(
|
|
848
|
+
`Usage: rollback-prompt <task>
|
|
849
|
+
task: one of ${OPTIMIZED_PROMPT_TASKS_CLI.join(", ")}`
|
|
850
|
+
);
|
|
851
|
+
process.exit(1);
|
|
852
|
+
}
|
|
853
|
+
if (!isOptimizedPromptTaskCli(taskName)) {
|
|
854
|
+
console.error(
|
|
855
|
+
`Unknown task "${taskName}". Must be one of: ${OPTIMIZED_PROMPT_TASKS_CLI.join(", ")}`
|
|
856
|
+
);
|
|
857
|
+
process.exit(1);
|
|
858
|
+
}
|
|
859
|
+
const { OptimizedPromptService } = await import("@elizaos/core");
|
|
860
|
+
const service = new OptimizedPromptService();
|
|
861
|
+
const customRoot = values["store-root"]?.trim();
|
|
862
|
+
if (customRoot) {
|
|
863
|
+
service.setStoreRoot(customRoot);
|
|
864
|
+
} else {
|
|
865
|
+
const stateDir = process.env.ELIZA_STATE_DIR?.trim() || process.env.ELIZA_STATE_DIR?.trim() || join(homedir(), ".eliza");
|
|
866
|
+
service.setStoreRoot(join(stateDir, "optimized-prompts"));
|
|
867
|
+
}
|
|
868
|
+
await service.refresh();
|
|
869
|
+
try {
|
|
870
|
+
const promptTask = taskName === "context_routing" ? "should_respond" : taskName;
|
|
871
|
+
const newCurrent = await service.rollback(promptTask);
|
|
872
|
+
console.log(
|
|
873
|
+
`[rollback-prompt] task=${taskName} now points at ${newCurrent}`
|
|
874
|
+
);
|
|
875
|
+
} catch (err) {
|
|
876
|
+
console.error(
|
|
877
|
+
`[rollback-prompt] ${err instanceof Error ? err.message : String(err)}`
|
|
878
|
+
);
|
|
879
|
+
process.exit(1);
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
async function main() {
|
|
883
|
+
const args = process.argv.slice(2);
|
|
884
|
+
const command = args[0];
|
|
885
|
+
const restArgs = args.slice(1);
|
|
886
|
+
switch (command) {
|
|
887
|
+
case "generate":
|
|
888
|
+
await cmdGenerate(restArgs);
|
|
889
|
+
break;
|
|
890
|
+
case "validate":
|
|
891
|
+
await cmdValidate(restArgs);
|
|
892
|
+
break;
|
|
893
|
+
case "compare":
|
|
894
|
+
await cmdCompare(restArgs);
|
|
895
|
+
break;
|
|
896
|
+
case "export-trajectories":
|
|
897
|
+
await cmdExportTrajectories(restArgs);
|
|
898
|
+
break;
|
|
899
|
+
case "run-collection":
|
|
900
|
+
await cmdRunCollection(restArgs);
|
|
901
|
+
break;
|
|
902
|
+
case "list-collections":
|
|
903
|
+
await cmdListCollections(restArgs);
|
|
904
|
+
break;
|
|
905
|
+
case "rollback-prompt":
|
|
906
|
+
await cmdRollbackPrompt(restArgs);
|
|
907
|
+
break;
|
|
908
|
+
default:
|
|
909
|
+
console.log(`Usage: cli.ts <command> [options]
|
|
910
|
+
|
|
911
|
+
Commands:
|
|
912
|
+
generate Generate synthetic training data
|
|
913
|
+
--variants N Number of variants per blueprint (default: 5)
|
|
914
|
+
--output DIR Output directory (default: ./training-data)
|
|
915
|
+
--concurrency N API call concurrency (default: 5)
|
|
916
|
+
--contexts X,Y Filter to specific contexts
|
|
917
|
+
--decisions X,Y Filter to RESPOND,IGNORE,STOP
|
|
918
|
+
|
|
919
|
+
validate Validate a generated dataset
|
|
920
|
+
--input PATH Path to raw_samples.json
|
|
921
|
+
|
|
922
|
+
export-trajectories Re-export raw recorded trajectories to per-task JSONL
|
|
923
|
+
-i, --input DIR Trajectory dir (default: $ELIZA_TRAJECTORY_DIR or ~/.eliza/trajectories)
|
|
924
|
+
-o, --output DIR Output dir (default: ./training-data)
|
|
925
|
+
--max-per-task N Cap examples per task bucket
|
|
926
|
+
|
|
927
|
+
run-collection Collect HF/feed/natural/test/scenario/eval/benchmark evidence
|
|
928
|
+
-o, --output DIR Output dir (default: training state collection dir)
|
|
929
|
+
--tiers LIST Eliza-1 benchmark tiers, comma-separated, or "all" (default: 2b)
|
|
930
|
+
(all expands to ${ELIZA_ONE_BENCHMARK_TIER_LIST})
|
|
931
|
+
--live Execute live external work instead of dry-run defaults
|
|
932
|
+
--preflight-only Print live-readiness checks without collecting artifacts
|
|
933
|
+
--probe-endpoints Probe local OpenAI-compatible endpoints during preflight
|
|
934
|
+
--skip-matrix Skip benchmark matrix generation
|
|
935
|
+
--skip-hf Skip Hugging Face ingest
|
|
936
|
+
--hf-files LIST Comma-separated Hugging Face dataset paths to ingest
|
|
937
|
+
--skip-feed Skip feed generation
|
|
938
|
+
--skip-natural Skip natural trajectory export
|
|
939
|
+
--skip-tests Skip test trajectory collection
|
|
940
|
+
--skip-scenarios Skip scenario trajectories
|
|
941
|
+
--skip-action-benchmark Skip Eliza harness action benchmark execution
|
|
942
|
+
--benchmark-filter LIST Comma-separated action benchmark case ids
|
|
943
|
+
--benchmark-model ID Run action benchmark for one explicit model id
|
|
944
|
+
--benchmark-runtime-model ID Served local/provider model id (defaults to --benchmark-model)
|
|
945
|
+
--benchmark-variant V reference, base, or trained label for the explicit model
|
|
946
|
+
--cerebras-max-samples N Max prompts for benchmark-vs-Cerebras (default: 50)
|
|
947
|
+
--cerebras-variants V Eliza variants for benchmark-vs-Cerebras: trained, base, both (default: both)
|
|
948
|
+
--natural-sanitized-jsonl PATH Existing sanitized app trajectory JSONL
|
|
949
|
+
--natural-raw-jsonl PATH Existing raw app trajectory JSONL
|
|
950
|
+
--natural-run-id ID Run id to record on imported natural trajectories
|
|
951
|
+
--natural-tasks LIST Task buckets for natural trajectory export
|
|
952
|
+
--include-natural-raw Copy raw natural trajectory JSONL into the collection
|
|
953
|
+
--skip-eval-comparison Skip dry-run local eval comparison artifact
|
|
954
|
+
--skip-cerebras Skip benchmark-vs-Cerebras step
|
|
955
|
+
--skip-model-registry Skip persisted Eliza-1 model registry manifests
|
|
956
|
+
--skip-bundle-stage Skip Eliza-1 bundle stage step
|
|
957
|
+
|
|
958
|
+
list-collections List saved training collection runs
|
|
959
|
+
--root DIR Collection root or a single collection output dir
|
|
960
|
+
-n, --limit N Maximum runs to print (default: 20)
|
|
961
|
+
Prints gaps=<id>:<status>-><capability> params={...}
|
|
962
|
+
|
|
963
|
+
compare A/B compare two prompts on a trajectory dataset
|
|
964
|
+
--baseline PATH Path to baseline prompt (.txt)
|
|
965
|
+
--variant PATH Path to variant prompt (.txt)
|
|
966
|
+
--dataset PATH Path to JSONL dataset (eliza_native_v1)
|
|
967
|
+
--task NAME should_respond | context_routing | action_planner | response | media_description | view_context
|
|
968
|
+
--scorer KIND agreement | planner_action (default: from --task)
|
|
969
|
+
--mode MODE vs_historical (default) | pairwise
|
|
970
|
+
--max-examples N Cap evaluations
|
|
971
|
+
--tolerance F Pass threshold delta (default: 0.02)
|
|
972
|
+
--temperature F Sampling temperature (default: 0)
|
|
973
|
+
--max-tokens N Per-completion cap (default: 512)
|
|
974
|
+
-o, --output PATH Write JSON result to file
|
|
975
|
+
Exits with code 2 if variant regresses beyond --tolerance.
|
|
976
|
+
|
|
977
|
+
rollback-prompt Flip the optimized-prompt 'current' and 'previous' symlinks
|
|
978
|
+
<task> Required positional: should_respond | context_routing |
|
|
979
|
+
action_planner | response | media_description |
|
|
980
|
+
view_context
|
|
981
|
+
--store-root DIR Override the optimized-prompts store root (default:
|
|
982
|
+
$ELIZA_STATE_DIR / ~/.eliza/optimized-prompts)
|
|
983
|
+
|
|
984
|
+
Environment:
|
|
985
|
+
ANTHROPIC_API_KEY Use Claude as teacher model
|
|
986
|
+
OPENAI_API_KEY Use GPT-5 as teacher model
|
|
987
|
+
`);
|
|
988
|
+
break;
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) {
|
|
992
|
+
main().catch((err) => {
|
|
993
|
+
console.error(err);
|
|
994
|
+
process.exit(1);
|
|
995
|
+
});
|
|
996
|
+
}
|
|
997
|
+
export {
|
|
998
|
+
buildRunCollectionOptionsFromCliArgs,
|
|
999
|
+
formatListTrainingCollectionsSummary,
|
|
1000
|
+
formatRunCollectionSummary,
|
|
1001
|
+
formatTrainingCollectionPreflightSummary
|
|
1002
|
+
};
|
|
1003
|
+
//# sourceMappingURL=cli.js.map
|