@elizaos/plugin-training 2.0.3-beta.6 → 2.0.3-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backends/native.d.ts +96 -0
- package/dist/backends/native.d.ts.map +1 -0
- package/dist/backends/native.js +308 -0
- package/dist/backends/native.js.map +1 -0
- package/dist/cli/train.d.ts +22 -0
- package/dist/cli/train.d.ts.map +1 -0
- package/dist/cli/train.js +219 -0
- package/dist/cli/train.js.map +1 -0
- package/dist/core/action-benchmark-runner.d.ts +55 -0
- package/dist/core/action-benchmark-runner.d.ts.map +1 -0
- package/dist/core/action-benchmark-runner.js +341 -0
- package/dist/core/action-benchmark-runner.js.map +1 -0
- package/dist/core/artifact-store.d.ts +72 -0
- package/dist/core/artifact-store.d.ts.map +1 -0
- package/dist/core/artifact-store.js +50 -0
- package/dist/core/artifact-store.js.map +1 -0
- package/dist/core/benchmark-matrix-artifact.d.ts +102 -0
- package/dist/core/benchmark-matrix-artifact.d.ts.map +1 -0
- package/dist/core/benchmark-matrix-artifact.js +381 -0
- package/dist/core/benchmark-matrix-artifact.js.map +1 -0
- package/dist/core/benchmark-vs-cerebras-runner.d.ts +37 -0
- package/dist/core/benchmark-vs-cerebras-runner.d.ts.map +1 -0
- package/dist/core/benchmark-vs-cerebras-runner.js +151 -0
- package/dist/core/benchmark-vs-cerebras-runner.js.map +1 -0
- package/dist/core/cerebras-eval-model.d.ts +54 -0
- package/dist/core/cerebras-eval-model.d.ts.map +1 -0
- package/dist/core/cerebras-eval-model.js +249 -0
- package/dist/core/cerebras-eval-model.js.map +1 -0
- package/dist/core/cli.d.ts +15 -0
- package/dist/core/cli.d.ts.map +1 -0
- package/dist/core/cli.js +1003 -0
- package/dist/core/cli.js.map +1 -0
- package/dist/core/context-audit.d.ts +51 -0
- package/dist/core/context-audit.d.ts.map +1 -0
- package/dist/core/context-audit.js +166 -0
- package/dist/core/context-audit.js.map +1 -0
- package/dist/core/context-catalog.d.ts +47 -0
- package/dist/core/context-catalog.d.ts.map +1 -0
- package/dist/core/context-catalog.js +269 -0
- package/dist/core/context-catalog.js.map +1 -0
- package/dist/core/context-types.d.ts +3 -0
- package/dist/core/context-types.d.ts.map +1 -0
- package/dist/core/context-types.js +18 -0
- package/dist/core/context-types.js.map +1 -0
- package/dist/core/dataset-generator.d.ts +135 -0
- package/dist/core/dataset-generator.d.ts.map +1 -0
- package/dist/core/dataset-generator.js +895 -0
- package/dist/core/dataset-generator.js.map +1 -0
- package/dist/core/eliza1-benchmark-recipe.d.ts +18 -0
- package/dist/core/eliza1-benchmark-recipe.d.ts.map +1 -0
- package/dist/core/eliza1-benchmark-recipe.js +64 -0
- package/dist/core/eliza1-benchmark-recipe.js.map +1 -0
- package/dist/core/eliza1-bundle-stager.d.ts +57 -0
- package/dist/core/eliza1-bundle-stager.d.ts.map +1 -0
- package/dist/core/eliza1-bundle-stager.js +149 -0
- package/dist/core/eliza1-bundle-stager.js.map +1 -0
- package/dist/core/ensure-cron-job.d.ts +53 -0
- package/dist/core/ensure-cron-job.d.ts.map +1 -0
- package/dist/core/ensure-cron-job.js +51 -0
- package/dist/core/ensure-cron-job.js.map +1 -0
- package/dist/core/eval-comparison-artifact.d.ts +72 -0
- package/dist/core/eval-comparison-artifact.d.ts.map +1 -0
- package/dist/core/eval-comparison-artifact.js +281 -0
- package/dist/core/eval-comparison-artifact.js.map +1 -0
- package/dist/core/feed-generation-runner.d.ts +37 -0
- package/dist/core/feed-generation-runner.d.ts.map +1 -0
- package/dist/core/feed-generation-runner.js +232 -0
- package/dist/core/feed-generation-runner.js.map +1 -0
- package/dist/core/html-escape.d.ts +5 -0
- package/dist/core/html-escape.d.ts.map +1 -0
- package/dist/core/html-escape.js +11 -0
- package/dist/core/html-escape.js.map +1 -0
- package/dist/core/huggingface-dataset-ingest.d.ts +52 -0
- package/dist/core/huggingface-dataset-ingest.d.ts.map +1 -0
- package/dist/core/huggingface-dataset-ingest.js +134 -0
- package/dist/core/huggingface-dataset-ingest.js.map +1 -0
- package/dist/core/index.d.ts +29 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +204 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/privacy-filter.d.ts +95 -0
- package/dist/core/privacy-filter.d.ts.map +1 -0
- package/dist/core/privacy-filter.js +324 -0
- package/dist/core/privacy-filter.js.map +1 -0
- package/dist/core/promotion-gate.d.ts +117 -0
- package/dist/core/promotion-gate.d.ts.map +1 -0
- package/dist/core/promotion-gate.js +85 -0
- package/dist/core/promotion-gate.js.map +1 -0
- package/dist/core/promotion-persist.d.ts +116 -0
- package/dist/core/promotion-persist.d.ts.map +1 -0
- package/dist/core/promotion-persist.js +93 -0
- package/dist/core/promotion-persist.js.map +1 -0
- package/dist/core/prompt-compare.d.ts +99 -0
- package/dist/core/prompt-compare.d.ts.map +1 -0
- package/dist/core/prompt-compare.js +210 -0
- package/dist/core/prompt-compare.js.map +1 -0
- package/dist/core/replay-validator.d.ts +136 -0
- package/dist/core/replay-validator.d.ts.map +1 -0
- package/dist/core/replay-validator.js +312 -0
- package/dist/core/replay-validator.js.map +1 -0
- package/dist/core/roleplay-executor.d.ts +123 -0
- package/dist/core/roleplay-executor.d.ts.map +1 -0
- package/dist/core/roleplay-executor.js +675 -0
- package/dist/core/roleplay-executor.js.map +1 -0
- package/dist/core/roleplay-trajectories.d.ts +54 -0
- package/dist/core/roleplay-trajectories.d.ts.map +1 -0
- package/dist/core/roleplay-trajectories.js +88 -0
- package/dist/core/roleplay-trajectories.js.map +1 -0
- package/dist/core/scenario-blueprints.d.ts +62 -0
- package/dist/core/scenario-blueprints.d.ts.map +1 -0
- package/dist/core/scenario-blueprints.js +850 -0
- package/dist/core/scenario-blueprints.js.map +1 -0
- package/dist/core/scenario-runner.d.ts +36 -0
- package/dist/core/scenario-runner.d.ts.map +1 -0
- package/dist/core/scenario-runner.js +216 -0
- package/dist/core/scenario-runner.js.map +1 -0
- package/dist/core/skill-scoring-cron.d.ts +57 -0
- package/dist/core/skill-scoring-cron.d.ts.map +1 -0
- package/dist/core/skill-scoring-cron.js +180 -0
- package/dist/core/skill-scoring-cron.js.map +1 -0
- package/dist/core/test-trajectory-collector.d.ts +37 -0
- package/dist/core/test-trajectory-collector.d.ts.map +1 -0
- package/dist/core/test-trajectory-collector.js +225 -0
- package/dist/core/test-trajectory-collector.js.map +1 -0
- package/dist/core/track-c-queue-task.d.ts +37 -0
- package/dist/core/track-c-queue-task.d.ts.map +1 -0
- package/dist/core/track-c-queue-task.js +104 -0
- package/dist/core/track-c-queue-task.js.map +1 -0
- package/dist/core/training-analysis-index.d.ts +104 -0
- package/dist/core/training-analysis-index.d.ts.map +1 -0
- package/dist/core/training-analysis-index.js +3297 -0
- package/dist/core/training-analysis-index.js.map +1 -0
- package/dist/core/training-collection-runner.d.ts +508 -0
- package/dist/core/training-collection-runner.d.ts.map +1 -0
- package/dist/core/training-collection-runner.js +2299 -0
- package/dist/core/training-collection-runner.js.map +1 -0
- package/dist/core/training-config.d.ts +52 -0
- package/dist/core/training-config.d.ts.map +1 -0
- package/dist/core/training-config.js +117 -0
- package/dist/core/training-config.js.map +1 -0
- package/dist/core/training-orchestrator.d.ts +112 -0
- package/dist/core/training-orchestrator.d.ts.map +1 -0
- package/dist/core/training-orchestrator.js +729 -0
- package/dist/core/training-orchestrator.js.map +1 -0
- package/dist/core/training-readiness-report.d.ts +52 -0
- package/dist/core/training-readiness-report.d.ts.map +1 -0
- package/dist/core/training-readiness-report.js +765 -0
- package/dist/core/training-readiness-report.js.map +1 -0
- package/dist/core/trajectory-consumer.d.ts +15 -0
- package/dist/core/trajectory-consumer.d.ts.map +1 -0
- package/dist/core/trajectory-consumer.js +61 -0
- package/dist/core/trajectory-consumer.js.map +1 -0
- package/dist/core/trajectory-export-bundle.d.ts +95 -0
- package/dist/core/trajectory-export-bundle.d.ts.map +1 -0
- package/dist/core/trajectory-export-bundle.js +561 -0
- package/dist/core/trajectory-export-bundle.js.map +1 -0
- package/dist/core/trajectory-export-cron.d.ts +57 -0
- package/dist/core/trajectory-export-cron.d.ts.map +1 -0
- package/dist/core/trajectory-export-cron.js +170 -0
- package/dist/core/trajectory-export-cron.js.map +1 -0
- package/dist/core/trajectory-hf-upload.d.ts +50 -0
- package/dist/core/trajectory-hf-upload.d.ts.map +1 -0
- package/dist/core/trajectory-hf-upload.js +111 -0
- package/dist/core/trajectory-hf-upload.js.map +1 -0
- package/dist/core/trajectory-task-datasets.d.ts +62 -0
- package/dist/core/trajectory-task-datasets.d.ts.map +1 -0
- package/dist/core/trajectory-task-datasets.js +427 -0
- package/dist/core/trajectory-task-datasets.js.map +1 -0
- package/dist/core/wait-for-service.d.ts +25 -0
- package/dist/core/wait-for-service.d.ts.map +1 -0
- package/dist/core/wait-for-service.js +19 -0
- package/dist/core/wait-for-service.js.map +1 -0
- package/dist/core/workspace-runtime.d.ts +4 -0
- package/dist/core/workspace-runtime.d.ts.map +1 -0
- package/dist/core/workspace-runtime.js +25 -0
- package/dist/core/workspace-runtime.js.map +1 -0
- package/dist/dspy/artifact.d.ts +54 -0
- package/dist/dspy/artifact.d.ts.map +1 -0
- package/dist/dspy/artifact.js +61 -0
- package/dist/dspy/artifact.js.map +1 -0
- package/dist/dspy/chain-of-thought.d.ts +27 -0
- package/dist/dspy/chain-of-thought.d.ts.map +1 -0
- package/dist/dspy/chain-of-thought.js +43 -0
- package/dist/dspy/chain-of-thought.js.map +1 -0
- package/dist/dspy/examples.d.ts +72 -0
- package/dist/dspy/examples.d.ts.map +1 -0
- package/dist/dspy/examples.js +105 -0
- package/dist/dspy/examples.js.map +1 -0
- package/dist/dspy/index.d.ts +15 -0
- package/dist/dspy/index.d.ts.map +1 -0
- package/dist/dspy/index.js +40 -0
- package/dist/dspy/index.js.map +1 -0
- package/dist/dspy/lm-adapter.d.ts +100 -0
- package/dist/dspy/lm-adapter.d.ts.map +1 -0
- package/dist/dspy/lm-adapter.js +81 -0
- package/dist/dspy/lm-adapter.js.map +1 -0
- package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts +23 -0
- package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts.map +1 -0
- package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js +85 -0
- package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js.map +1 -0
- package/dist/dspy/optimizers/dspy-copro.d.ts +29 -0
- package/dist/dspy/optimizers/dspy-copro.d.ts.map +1 -0
- package/dist/dspy/optimizers/dspy-copro.js +141 -0
- package/dist/dspy/optimizers/dspy-copro.js.map +1 -0
- package/dist/dspy/optimizers/dspy-mipro.d.ts +37 -0
- package/dist/dspy/optimizers/dspy-mipro.d.ts.map +1 -0
- package/dist/dspy/optimizers/dspy-mipro.js +194 -0
- package/dist/dspy/optimizers/dspy-mipro.js.map +1 -0
- package/dist/dspy/optimizers/index.d.ts +5 -0
- package/dist/dspy/optimizers/index.d.ts.map +1 -0
- package/dist/dspy/optimizers/index.js +11 -0
- package/dist/dspy/optimizers/index.js.map +1 -0
- package/dist/dspy/optimizers/types.d.ts +39 -0
- package/dist/dspy/optimizers/types.d.ts.map +1 -0
- package/dist/dspy/optimizers/types.js +1 -0
- package/dist/dspy/optimizers/types.js.map +1 -0
- package/dist/dspy/predict.d.ts +49 -0
- package/dist/dspy/predict.d.ts.map +1 -0
- package/dist/dspy/predict.js +73 -0
- package/dist/dspy/predict.js.map +1 -0
- package/dist/dspy/signature.d.ts +88 -0
- package/dist/dspy/signature.d.ts.map +1 -0
- package/dist/dspy/signature.js +205 -0
- package/dist/dspy/signature.js.map +1 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/optimizers/bootstrap-fewshot.d.ts +42 -0
- package/dist/optimizers/bootstrap-fewshot.d.ts.map +1 -0
- package/dist/optimizers/bootstrap-fewshot.js +92 -0
- package/dist/optimizers/bootstrap-fewshot.js.map +1 -0
- package/dist/optimizers/gepa.d.ts +63 -0
- package/dist/optimizers/gepa.d.ts.map +1 -0
- package/dist/optimizers/gepa.js +232 -0
- package/dist/optimizers/gepa.js.map +1 -0
- package/dist/optimizers/index.d.ts +7 -0
- package/dist/optimizers/index.d.ts.map +1 -0
- package/dist/optimizers/index.js +51 -0
- package/dist/optimizers/index.js.map +1 -0
- package/dist/optimizers/instruction-search.d.ts +39 -0
- package/dist/optimizers/instruction-search.d.ts.map +1 -0
- package/dist/optimizers/instruction-search.js +108 -0
- package/dist/optimizers/instruction-search.js.map +1 -0
- package/dist/optimizers/prompt-evolution.d.ts +39 -0
- package/dist/optimizers/prompt-evolution.d.ts.map +1 -0
- package/dist/optimizers/prompt-evolution.js +101 -0
- package/dist/optimizers/prompt-evolution.js.map +1 -0
- package/dist/optimizers/scoring.d.ts +139 -0
- package/dist/optimizers/scoring.d.ts.map +1 -0
- package/dist/optimizers/scoring.js +299 -0
- package/dist/optimizers/scoring.js.map +1 -0
- package/dist/optimizers/types.d.ts +105 -0
- package/dist/optimizers/types.d.ts.map +1 -0
- package/dist/optimizers/types.js +1 -0
- package/dist/optimizers/types.js.map +1 -0
- package/dist/register-runtime.d.ts +3 -0
- package/dist/register-runtime.d.ts.map +1 -0
- package/dist/register-runtime.js +60 -0
- package/dist/register-runtime.js.map +1 -0
- package/dist/register-terminal-view.d.ts +15 -0
- package/dist/register-terminal-view.d.ts.map +1 -0
- package/dist/register-terminal-view.js +31 -0
- package/dist/register-terminal-view.js.map +1 -0
- package/dist/routes/experience-routes.d.ts +21 -0
- package/dist/routes/experience-routes.d.ts.map +1 -0
- package/dist/routes/experience-routes.js +513 -0
- package/dist/routes/experience-routes.js.map +1 -0
- package/dist/routes/index.d.ts +5 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +17 -0
- package/dist/routes/index.js.map +1 -0
- package/dist/routes/training-routes.d.ts +10 -0
- package/dist/routes/training-routes.d.ts.map +1 -0
- package/dist/routes/training-routes.js +1239 -0
- package/dist/routes/training-routes.js.map +1 -0
- package/dist/routes/training-vast-routes.d.ts +35 -0
- package/dist/routes/training-vast-routes.d.ts.map +1 -0
- package/dist/routes/training-vast-routes.js +249 -0
- package/dist/routes/training-vast-routes.js.map +1 -0
- package/dist/routes/trajectory-routes.d.ts +19 -0
- package/dist/routes/trajectory-routes.d.ts.map +1 -0
- package/dist/routes/trajectory-routes.js +1122 -0
- package/dist/routes/trajectory-routes.js.map +1 -0
- package/dist/services/index.d.ts +9 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +63 -0
- package/dist/services/index.js.map +1 -0
- package/dist/services/training-backend-check.d.ts +8 -0
- package/dist/services/training-backend-check.d.ts.map +1 -0
- package/dist/services/training-backend-check.js +31 -0
- package/dist/services/training-backend-check.js.map +1 -0
- package/dist/services/training-service-like.d.ts +40 -0
- package/dist/services/training-service-like.d.ts.map +1 -0
- package/dist/services/training-service-like.js +1 -0
- package/dist/services/training-service-like.js.map +1 -0
- package/dist/services/training-service-registry.d.ts +4 -0
- package/dist/services/training-service-registry.d.ts.map +1 -0
- package/dist/services/training-service-registry.js +12 -0
- package/dist/services/training-service-registry.js.map +1 -0
- package/dist/services/training-service.d.ts +59 -0
- package/dist/services/training-service.d.ts.map +1 -0
- package/dist/services/training-service.js +154 -0
- package/dist/services/training-service.js.map +1 -0
- package/dist/services/training-trigger.d.ts +177 -0
- package/dist/services/training-trigger.d.ts.map +1 -0
- package/dist/services/training-trigger.js +300 -0
- package/dist/services/training-trigger.js.map +1 -0
- package/dist/services/training-vast-service.d.ts +149 -0
- package/dist/services/training-vast-service.d.ts.map +1 -0
- package/dist/services/training-vast-service.js +648 -0
- package/dist/services/training-vast-service.js.map +1 -0
- package/dist/services/vast-inference-stats.d.ts +37 -0
- package/dist/services/vast-inference-stats.d.ts.map +1 -0
- package/dist/services/vast-inference-stats.js +81 -0
- package/dist/services/vast-inference-stats.js.map +1 -0
- package/dist/services/vast-job-store.d.ts +74 -0
- package/dist/services/vast-job-store.d.ts.map +1 -0
- package/dist/services/vast-job-store.js +194 -0
- package/dist/services/vast-job-store.js.map +1 -0
- package/dist/services/vast-subprocess.d.ts +27 -0
- package/dist/services/vast-subprocess.d.ts.map +1 -0
- package/dist/services/vast-subprocess.js +78 -0
- package/dist/services/vast-subprocess.js.map +1 -0
- package/dist/setup-routes.d.ts +17 -0
- package/dist/setup-routes.d.ts.map +1 -0
- package/dist/setup-routes.js +319 -0
- package/dist/setup-routes.js.map +1 -0
- package/dist/ui/FineTuningSpatialView.d.ts +49 -0
- package/dist/ui/FineTuningSpatialView.d.ts.map +1 -0
- package/dist/ui/FineTuningSpatialView.js +154 -0
- package/dist/ui/FineTuningSpatialView.js.map +1 -0
- package/dist/ui/FineTuningView.d.ts +7 -0
- package/dist/ui/FineTuningView.d.ts.map +1 -0
- package/dist/ui/FineTuningView.helpers.d.ts +17 -0
- package/dist/ui/FineTuningView.helpers.d.ts.map +1 -0
- package/dist/ui/FineTuningView.helpers.js +30 -0
- package/dist/ui/FineTuningView.helpers.js.map +1 -0
- package/dist/ui/FineTuningView.interact.d.ts +2 -0
- package/dist/ui/FineTuningView.interact.d.ts.map +1 -0
- package/dist/ui/FineTuningView.interact.js +300 -0
- package/dist/ui/FineTuningView.interact.js.map +1 -0
- package/dist/ui/FineTuningView.js +4653 -0
- package/dist/ui/FineTuningView.js.map +1 -0
- package/dist/ui/fine-tuning-panels.d.ts +100 -0
- package/dist/ui/fine-tuning-panels.d.ts.map +1 -0
- package/dist/ui/fine-tuning-panels.helpers.d.ts +19 -0
- package/dist/ui/fine-tuning-panels.helpers.d.ts.map +1 -0
- package/dist/ui/fine-tuning-panels.helpers.js +77 -0
- package/dist/ui/fine-tuning-panels.helpers.js.map +1 -0
- package/dist/ui/fine-tuning-panels.js +928 -0
- package/dist/ui/fine-tuning-panels.js.map +1 -0
- package/dist/ui/index.d.ts +5 -0
- package/dist/ui/index.d.ts.map +1 -0
- package/dist/ui/index.js +5 -0
- package/dist/ui/index.js.map +1 -0
- package/dist/ui/training-view-bundle.d.ts +3 -0
- package/dist/ui/training-view-bundle.d.ts.map +1 -0
- package/dist/ui/training-view-bundle.js +7 -0
- package/dist/ui/training-view-bundle.js.map +1 -0
- package/dist/views/bundle.js +5312 -0
- package/dist/views/bundle.js.map +1 -0
- package/package.json +7 -7
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
export declare const EVAL_COMPARISON_ARTIFACT_SCHEMA = "eliza_eval_comparison_artifact";
|
|
2
|
+
export declare const EVAL_COMPARISON_ARTIFACT_VERSION = 1;
|
|
3
|
+
export interface EvalComparisonArtifactInput {
|
|
4
|
+
report: Record<string, unknown>;
|
|
5
|
+
reportPath?: string;
|
|
6
|
+
outputDir?: string;
|
|
7
|
+
source?: Record<string, unknown>;
|
|
8
|
+
}
|
|
9
|
+
export interface EvalComparisonRunOptions {
|
|
10
|
+
trainingRoot?: string;
|
|
11
|
+
python?: string;
|
|
12
|
+
manifestPath?: string;
|
|
13
|
+
model?: string;
|
|
14
|
+
trainedModelPath?: string;
|
|
15
|
+
backend?: "mlx" | "cuda" | "cpu";
|
|
16
|
+
promptFile?: string;
|
|
17
|
+
maxTokens?: number;
|
|
18
|
+
systemPrompt?: string;
|
|
19
|
+
outputPath?: string;
|
|
20
|
+
outputDir?: string;
|
|
21
|
+
dryRun?: boolean;
|
|
22
|
+
}
|
|
23
|
+
export interface EvalComparisonArtifact {
|
|
24
|
+
schema: typeof EVAL_COMPARISON_ARTIFACT_SCHEMA;
|
|
25
|
+
version: typeof EVAL_COMPARISON_ARTIFACT_VERSION;
|
|
26
|
+
generatedAt: string;
|
|
27
|
+
reportPath?: string;
|
|
28
|
+
source: Record<string, unknown>;
|
|
29
|
+
models: {
|
|
30
|
+
base: string | null;
|
|
31
|
+
trained: string | null;
|
|
32
|
+
backend: string | null;
|
|
33
|
+
};
|
|
34
|
+
metrics: {
|
|
35
|
+
baseScore: number | null;
|
|
36
|
+
trainedScore: number | null;
|
|
37
|
+
improvementAbsolute: number | null;
|
|
38
|
+
improvementPercent: number | null;
|
|
39
|
+
baseLatencyMs: number | null;
|
|
40
|
+
trainedLatencyMs: number | null;
|
|
41
|
+
latencyDeltaMs: number | null;
|
|
42
|
+
promptCount: number | null;
|
|
43
|
+
distinctResponseCount: number | null;
|
|
44
|
+
};
|
|
45
|
+
summaries: {
|
|
46
|
+
base: Record<string, unknown> | null;
|
|
47
|
+
trained: Record<string, unknown> | null;
|
|
48
|
+
comparison: Record<string, unknown> | null;
|
|
49
|
+
};
|
|
50
|
+
raw: Record<string, unknown>;
|
|
51
|
+
}
|
|
52
|
+
export interface EvalComparisonArtifactResult {
|
|
53
|
+
outputDir: string;
|
|
54
|
+
artifactPath: string;
|
|
55
|
+
artifact: EvalComparisonArtifact;
|
|
56
|
+
}
|
|
57
|
+
export interface EvalComparisonRunResult extends EvalComparisonArtifactResult {
|
|
58
|
+
trainingRoot: string;
|
|
59
|
+
command: string[];
|
|
60
|
+
reportPath: string;
|
|
61
|
+
stdout: string;
|
|
62
|
+
stderr: string;
|
|
63
|
+
exitCode: number;
|
|
64
|
+
}
|
|
65
|
+
export declare function buildLocalEvalComparisonArgs(options: EvalComparisonRunOptions, resolved: {
|
|
66
|
+
trainingRoot: string;
|
|
67
|
+
reportPath: string;
|
|
68
|
+
}): string[];
|
|
69
|
+
export declare function buildEvalComparisonArtifactPayload(input: EvalComparisonArtifactInput): EvalComparisonArtifact;
|
|
70
|
+
export declare function writeEvalComparisonArtifact(input: EvalComparisonArtifactInput): Promise<EvalComparisonArtifactResult>;
|
|
71
|
+
export declare function runLocalEvalComparison(options: EvalComparisonRunOptions): Promise<EvalComparisonRunResult>;
|
|
72
|
+
//# sourceMappingURL=eval-comparison-artifact.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-comparison-artifact.d.ts","sourceRoot":"","sources":["../../src/core/eval-comparison-artifact.ts"],"names":[],"mappings":"AAKA,eAAO,MAAM,+BAA+B,mCAAmC,CAAC;AAChF,eAAO,MAAM,gCAAgC,IAAI,CAAC;AAElD,MAAM,WAAW,2BAA2B;IAC1C,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,wBAAwB;IACvC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,OAAO,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,KAAK,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,OAAO,+BAA+B,CAAC;IAC/C,OAAO,EAAE,OAAO,gCAAgC,CAAC;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,MAAM,EAAE;QACN,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;QACpB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;QACvB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;KACxB,CAAC;IACF,OAAO,EAAE;QACP,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;QACzB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;QAC5B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;QACnC,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;QAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;QAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;QAChC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;QAC9B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;QAC3B,qBAAqB,EAAE,MAAM,GAAG,IAAI,CAAC;KACtC,CAAC;IACF,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;QACrC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;QACxC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;KAC5C,CAAC;IACF,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,4BAA4B;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,sBAAsB,CAAC;CAClC;AAED,MAAM,WAAW,uBAAwB,SAAQ,4BAA4B;IAC3E,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,4BAA4B,CAC1C,OAAO,EAAE,wBAAwB,EACjC,QAAQ,EAAE;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,GACrD,MAAM,EAAE,CA8BV;AAwED,wBAAgB,kCAAkC,CAChD,KAAK,EAAE,2BAA2B,GACjC,sBAAsB,CA0FxB;AAMD,wBAAsB,2BAA2B,CAC/C,KAAK,EAAE,2BAA2B,GACjC,OAAO,CAAC,4BAA4B,CAAC,CAavC;AA6BD,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,uBAAuB,CAAC,CAwFlC"}
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { join, resolve } from "node:path";
|
|
4
|
+
import { trainingStateRoot } from "./training-config.js";
|
|
5
|
+
const EVAL_COMPARISON_ARTIFACT_SCHEMA = "eliza_eval_comparison_artifact";
|
|
6
|
+
const EVAL_COMPARISON_ARTIFACT_VERSION = 1;
|
|
7
|
+
function buildLocalEvalComparisonArgs(options, resolved) {
|
|
8
|
+
const scriptPath = join(
|
|
9
|
+
resolved.trainingRoot,
|
|
10
|
+
"scripts",
|
|
11
|
+
"rl",
|
|
12
|
+
"compare_local_models.py"
|
|
13
|
+
);
|
|
14
|
+
const args = [scriptPath];
|
|
15
|
+
if (options.manifestPath) {
|
|
16
|
+
args.push("--manifest", options.manifestPath);
|
|
17
|
+
} else {
|
|
18
|
+
if (!options.model || !options.trainedModelPath || !options.backend) {
|
|
19
|
+
throw new Error(
|
|
20
|
+
"Provide either manifestPath or model, trainedModelPath, and backend"
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
args.push("--model", options.model);
|
|
24
|
+
args.push("--trained-model-path", options.trainedModelPath);
|
|
25
|
+
args.push("--backend", options.backend);
|
|
26
|
+
}
|
|
27
|
+
if (options.promptFile) args.push("--prompt-file", options.promptFile);
|
|
28
|
+
if (typeof options.maxTokens === "number") {
|
|
29
|
+
args.push(
|
|
30
|
+
"--max-tokens",
|
|
31
|
+
String(Math.max(1, Math.floor(options.maxTokens)))
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
if (options.systemPrompt) args.push("--system-prompt", options.systemPrompt);
|
|
35
|
+
args.push("--output", resolved.reportPath);
|
|
36
|
+
return args;
|
|
37
|
+
}
|
|
38
|
+
function asRecord(value) {
|
|
39
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
|
40
|
+
}
|
|
41
|
+
function asString(value) {
|
|
42
|
+
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
|
|
43
|
+
}
|
|
44
|
+
function asNumber(value) {
|
|
45
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
46
|
+
}
|
|
47
|
+
function firstNumber(record, keys) {
|
|
48
|
+
if (!record) return null;
|
|
49
|
+
for (const key of keys) {
|
|
50
|
+
const value = asNumber(record[key]);
|
|
51
|
+
if (value !== null) return value;
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
function firstString(record, keys) {
|
|
56
|
+
if (!record) return null;
|
|
57
|
+
for (const key of keys) {
|
|
58
|
+
const value = asString(record[key]);
|
|
59
|
+
if (value) return value;
|
|
60
|
+
}
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
function nestedSummary(report, key) {
|
|
64
|
+
const variant = asRecord(report[key]);
|
|
65
|
+
return asRecord(variant?.summary);
|
|
66
|
+
}
|
|
67
|
+
function extractVariantModel(report, key) {
|
|
68
|
+
const variant = asRecord(report[key]);
|
|
69
|
+
return firstString(variant, [
|
|
70
|
+
"model_name",
|
|
71
|
+
"model_ref",
|
|
72
|
+
"adapter_path",
|
|
73
|
+
"served_model_id"
|
|
74
|
+
]);
|
|
75
|
+
}
|
|
76
|
+
function roundMetric(value) {
|
|
77
|
+
return value === null ? null : Number(value.toFixed(4));
|
|
78
|
+
}
|
|
79
|
+
function improvementPercent(base, trained) {
|
|
80
|
+
if (base === null || trained === null || base === 0) return null;
|
|
81
|
+
return (trained - base) / Math.abs(base) * 100;
|
|
82
|
+
}
|
|
83
|
+
function buildEvalComparisonArtifactPayload(input) {
|
|
84
|
+
const report = input.report;
|
|
85
|
+
const baseSummary = nestedSummary(report, "base_model") ?? asRecord(report.base_summary);
|
|
86
|
+
const trainedSummary = nestedSummary(report, "trained_model") ?? nestedSummary(report, "adapter_model") ?? asRecord(report.trained_summary) ?? asRecord(report.adapter_summary);
|
|
87
|
+
const comparison = asRecord(report.comparison);
|
|
88
|
+
const baseScore = firstNumber(baseSummary, [
|
|
89
|
+
"avg_score",
|
|
90
|
+
"score",
|
|
91
|
+
"format_ok",
|
|
92
|
+
"content_ok",
|
|
93
|
+
"test_avg_score"
|
|
94
|
+
]);
|
|
95
|
+
const trainedScore = firstNumber(trainedSummary, [
|
|
96
|
+
"avg_score",
|
|
97
|
+
"score",
|
|
98
|
+
"format_ok",
|
|
99
|
+
"content_ok",
|
|
100
|
+
"test_avg_score"
|
|
101
|
+
]);
|
|
102
|
+
const baseLatencyMs = firstNumber(baseSummary, [
|
|
103
|
+
"avg_latency_ms",
|
|
104
|
+
"latency_ms"
|
|
105
|
+
]);
|
|
106
|
+
const trainedLatencyMs = firstNumber(trainedSummary, [
|
|
107
|
+
"avg_latency_ms",
|
|
108
|
+
"latency_ms"
|
|
109
|
+
]);
|
|
110
|
+
const promptCount = firstNumber(baseSummary, ["prompt_count", "test_sample_count"]) ?? firstNumber(trainedSummary, ["prompt_count", "test_sample_count"]);
|
|
111
|
+
const generatedAt = asString(report.timestamp) ?? asString(report.generated_at) ?? asString(report.evaluated_at) ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
112
|
+
return {
|
|
113
|
+
schema: EVAL_COMPARISON_ARTIFACT_SCHEMA,
|
|
114
|
+
version: EVAL_COMPARISON_ARTIFACT_VERSION,
|
|
115
|
+
generatedAt,
|
|
116
|
+
reportPath: input.reportPath,
|
|
117
|
+
source: input.source ?? { kind: "training_eval_comparison" },
|
|
118
|
+
models: {
|
|
119
|
+
base: extractVariantModel(report, "base_model") ?? firstString(report, ["base_model", "model", "base_model_id"]),
|
|
120
|
+
trained: extractVariantModel(report, "trained_model") ?? extractVariantModel(report, "adapter_model") ?? firstString(report, [
|
|
121
|
+
"trained_model",
|
|
122
|
+
"adapter_model",
|
|
123
|
+
"trained_model_id"
|
|
124
|
+
]),
|
|
125
|
+
backend: asString(report.backend)
|
|
126
|
+
},
|
|
127
|
+
metrics: {
|
|
128
|
+
baseScore: roundMetric(baseScore),
|
|
129
|
+
trainedScore: roundMetric(trainedScore),
|
|
130
|
+
improvementAbsolute: roundMetric(
|
|
131
|
+
baseScore !== null && trainedScore !== null ? trainedScore - baseScore : null
|
|
132
|
+
),
|
|
133
|
+
improvementPercent: roundMetric(
|
|
134
|
+
improvementPercent(baseScore, trainedScore)
|
|
135
|
+
),
|
|
136
|
+
baseLatencyMs: roundMetric(baseLatencyMs),
|
|
137
|
+
trainedLatencyMs: roundMetric(trainedLatencyMs),
|
|
138
|
+
latencyDeltaMs: roundMetric(
|
|
139
|
+
baseLatencyMs !== null && trainedLatencyMs !== null ? trainedLatencyMs - baseLatencyMs : null
|
|
140
|
+
),
|
|
141
|
+
promptCount: promptCount === null ? null : Math.round(promptCount),
|
|
142
|
+
distinctResponseCount: firstNumber(comparison, ["distinct_response_count"]) ?? null
|
|
143
|
+
},
|
|
144
|
+
summaries: {
|
|
145
|
+
base: baseSummary,
|
|
146
|
+
trained: trainedSummary,
|
|
147
|
+
comparison
|
|
148
|
+
},
|
|
149
|
+
raw: report
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
function safeTimestamp(value) {
|
|
153
|
+
return value.replace(/[:.]/g, "-");
|
|
154
|
+
}
|
|
155
|
+
async function writeEvalComparisonArtifact(input) {
|
|
156
|
+
const artifact = buildEvalComparisonArtifactPayload(input);
|
|
157
|
+
const outputDir = input.outputDir ?? join(trainingStateRoot(), "evals", safeTimestamp(artifact.generatedAt));
|
|
158
|
+
await mkdir(outputDir, { recursive: true });
|
|
159
|
+
const artifactPath = join(outputDir, "eval-comparison.json");
|
|
160
|
+
await writeFile(
|
|
161
|
+
artifactPath,
|
|
162
|
+
`${JSON.stringify(artifact, null, 2)}
|
|
163
|
+
`,
|
|
164
|
+
"utf-8"
|
|
165
|
+
);
|
|
166
|
+
return { outputDir, artifactPath, artifact };
|
|
167
|
+
}
|
|
168
|
+
function collectProcess(command, args, cwd) {
|
|
169
|
+
return new Promise((resolvePromise, reject) => {
|
|
170
|
+
const child = spawn(command, args, {
|
|
171
|
+
cwd,
|
|
172
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
173
|
+
});
|
|
174
|
+
let stdout = "";
|
|
175
|
+
let stderr = "";
|
|
176
|
+
child.stdout.setEncoding("utf-8");
|
|
177
|
+
child.stderr.setEncoding("utf-8");
|
|
178
|
+
child.stdout.on("data", (chunk) => {
|
|
179
|
+
stdout += chunk;
|
|
180
|
+
});
|
|
181
|
+
child.stderr.on("data", (chunk) => {
|
|
182
|
+
stderr += chunk;
|
|
183
|
+
});
|
|
184
|
+
child.on("error", reject);
|
|
185
|
+
child.on("close", (code) => {
|
|
186
|
+
resolvePromise({ stdout, stderr, exitCode: code ?? 1 });
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
async function runLocalEvalComparison(options) {
|
|
191
|
+
const trainingRoot = resolve(
|
|
192
|
+
options.trainingRoot ?? join(process.cwd(), "packages", "training")
|
|
193
|
+
);
|
|
194
|
+
const outputDir = options.outputDir ?? join(trainingStateRoot(), "evals", safeTimestamp((/* @__PURE__ */ new Date()).toISOString()));
|
|
195
|
+
await mkdir(outputDir, { recursive: true });
|
|
196
|
+
const reportPath = resolve(
|
|
197
|
+
options.outputPath ?? join(outputDir, "local_model_comparison.json")
|
|
198
|
+
);
|
|
199
|
+
const command = options.python ?? "python3";
|
|
200
|
+
const args = buildLocalEvalComparisonArgs(options, {
|
|
201
|
+
trainingRoot,
|
|
202
|
+
reportPath
|
|
203
|
+
});
|
|
204
|
+
if (options.dryRun) {
|
|
205
|
+
const artifactResult2 = await writeEvalComparisonArtifact({
|
|
206
|
+
report: {
|
|
207
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
208
|
+
backend: options.backend,
|
|
209
|
+
base_model: {
|
|
210
|
+
model_ref: options.model ?? options.manifestPath ?? null,
|
|
211
|
+
summary: {}
|
|
212
|
+
},
|
|
213
|
+
trained_model: {
|
|
214
|
+
model_ref: options.trainedModelPath ?? options.manifestPath ?? null,
|
|
215
|
+
summary: {}
|
|
216
|
+
},
|
|
217
|
+
comparison: {
|
|
218
|
+
dry_run: true
|
|
219
|
+
}
|
|
220
|
+
},
|
|
221
|
+
reportPath,
|
|
222
|
+
outputDir,
|
|
223
|
+
source: {
|
|
224
|
+
kind: "training_local_eval_comparison",
|
|
225
|
+
trainingRoot,
|
|
226
|
+
manifestPath: options.manifestPath,
|
|
227
|
+
model: options.model,
|
|
228
|
+
trainedModelPath: options.trainedModelPath,
|
|
229
|
+
backend: options.backend,
|
|
230
|
+
dryRun: true
|
|
231
|
+
}
|
|
232
|
+
});
|
|
233
|
+
return {
|
|
234
|
+
...artifactResult2,
|
|
235
|
+
trainingRoot,
|
|
236
|
+
command: [command, ...args],
|
|
237
|
+
reportPath,
|
|
238
|
+
stdout: "[DRY RUN] Would run local eval comparison.",
|
|
239
|
+
stderr: "",
|
|
240
|
+
exitCode: 0
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
const proc = await collectProcess(command, args, trainingRoot);
|
|
244
|
+
if (proc.exitCode !== 0) {
|
|
245
|
+
throw new Error(
|
|
246
|
+
`compare_local_models.py exited with code ${proc.exitCode}: ${proc.stderr || proc.stdout}`
|
|
247
|
+
);
|
|
248
|
+
}
|
|
249
|
+
const report = JSON.parse(await readFile(reportPath, "utf-8"));
|
|
250
|
+
const artifactResult = await writeEvalComparisonArtifact({
|
|
251
|
+
report,
|
|
252
|
+
reportPath,
|
|
253
|
+
outputDir,
|
|
254
|
+
source: {
|
|
255
|
+
kind: "training_local_eval_comparison",
|
|
256
|
+
trainingRoot,
|
|
257
|
+
manifestPath: options.manifestPath,
|
|
258
|
+
model: options.model,
|
|
259
|
+
trainedModelPath: options.trainedModelPath,
|
|
260
|
+
backend: options.backend
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
return {
|
|
264
|
+
...artifactResult,
|
|
265
|
+
trainingRoot,
|
|
266
|
+
command: [command, ...args],
|
|
267
|
+
reportPath,
|
|
268
|
+
stdout: proc.stdout,
|
|
269
|
+
stderr: proc.stderr,
|
|
270
|
+
exitCode: proc.exitCode
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
export {
|
|
274
|
+
EVAL_COMPARISON_ARTIFACT_SCHEMA,
|
|
275
|
+
EVAL_COMPARISON_ARTIFACT_VERSION,
|
|
276
|
+
buildEvalComparisonArtifactPayload,
|
|
277
|
+
buildLocalEvalComparisonArgs,
|
|
278
|
+
runLocalEvalComparison,
|
|
279
|
+
writeEvalComparisonArtifact
|
|
280
|
+
};
|
|
281
|
+
//# sourceMappingURL=eval-comparison-artifact.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/core/eval-comparison-artifact.ts"],"sourcesContent":["import { spawn } from \"node:child_process\";\nimport { mkdir, readFile, writeFile } from \"node:fs/promises\";\nimport { join, resolve } from \"node:path\";\nimport { trainingStateRoot } from \"./training-config.js\";\n\nexport const EVAL_COMPARISON_ARTIFACT_SCHEMA = \"eliza_eval_comparison_artifact\";\nexport const EVAL_COMPARISON_ARTIFACT_VERSION = 1;\n\nexport interface EvalComparisonArtifactInput {\n report: Record<string, unknown>;\n reportPath?: string;\n outputDir?: string;\n source?: Record<string, unknown>;\n}\n\nexport interface EvalComparisonRunOptions {\n trainingRoot?: string;\n python?: string;\n manifestPath?: string;\n model?: string;\n trainedModelPath?: string;\n backend?: \"mlx\" | \"cuda\" | \"cpu\";\n promptFile?: string;\n maxTokens?: number;\n systemPrompt?: string;\n outputPath?: string;\n outputDir?: string;\n dryRun?: boolean;\n}\n\nexport interface EvalComparisonArtifact {\n schema: typeof EVAL_COMPARISON_ARTIFACT_SCHEMA;\n version: typeof EVAL_COMPARISON_ARTIFACT_VERSION;\n generatedAt: string;\n reportPath?: string;\n source: Record<string, unknown>;\n models: {\n base: string | null;\n trained: string | null;\n backend: string | null;\n };\n metrics: {\n baseScore: number | null;\n trainedScore: number | null;\n improvementAbsolute: number | null;\n improvementPercent: number | null;\n baseLatencyMs: number | null;\n trainedLatencyMs: number | null;\n latencyDeltaMs: number | null;\n promptCount: number | null;\n distinctResponseCount: number | null;\n };\n summaries: {\n base: Record<string, unknown> | null;\n trained: Record<string, unknown> | null;\n comparison: Record<string, unknown> | null;\n };\n raw: Record<string, unknown>;\n}\n\nexport interface EvalComparisonArtifactResult {\n outputDir: string;\n artifactPath: string;\n artifact: EvalComparisonArtifact;\n}\n\nexport interface EvalComparisonRunResult extends EvalComparisonArtifactResult {\n trainingRoot: string;\n command: string[];\n reportPath: string;\n stdout: string;\n stderr: string;\n exitCode: number;\n}\n\nexport function buildLocalEvalComparisonArgs(\n options: EvalComparisonRunOptions,\n resolved: { trainingRoot: string; reportPath: string },\n): string[] {\n const scriptPath = join(\n resolved.trainingRoot,\n \"scripts\",\n \"rl\",\n \"compare_local_models.py\",\n );\n const args = [scriptPath];\n if (options.manifestPath) {\n args.push(\"--manifest\", options.manifestPath);\n } else {\n if (!options.model || !options.trainedModelPath || !options.backend) {\n throw new Error(\n \"Provide either manifestPath or model, trainedModelPath, and backend\",\n );\n }\n args.push(\"--model\", options.model);\n args.push(\"--trained-model-path\", options.trainedModelPath);\n args.push(\"--backend\", options.backend);\n }\n if (options.promptFile) args.push(\"--prompt-file\", options.promptFile);\n if (typeof options.maxTokens === \"number\") {\n args.push(\n \"--max-tokens\",\n String(Math.max(1, Math.floor(options.maxTokens))),\n );\n }\n if (options.systemPrompt) args.push(\"--system-prompt\", options.systemPrompt);\n args.push(\"--output\", resolved.reportPath);\n return args;\n}\n\nfunction asRecord(value: unknown): Record<string, unknown> | null {\n return value && typeof value === \"object\" && !Array.isArray(value)\n ? (value as Record<string, unknown>)\n : null;\n}\n\nfunction asString(value: unknown): string | null {\n return typeof value === \"string\" && value.trim().length > 0\n ? value.trim()\n : null;\n}\n\nfunction asNumber(value: unknown): number | null {\n return typeof value === \"number\" && Number.isFinite(value) ? value : null;\n}\n\nfunction firstNumber(\n record: Record<string, unknown> | null,\n keys: readonly string[],\n): number | null {\n if (!record) return null;\n for (const key of keys) {\n const value = asNumber(record[key]);\n if (value !== null) return value;\n }\n return null;\n}\n\nfunction firstString(\n record: Record<string, unknown> | null,\n keys: readonly string[],\n): string | null {\n if (!record) return null;\n for (const key of keys) {\n const value = asString(record[key]);\n if (value) return value;\n }\n return null;\n}\n\nfunction nestedSummary(\n report: Record<string, unknown>,\n key: string,\n): Record<string, unknown> | null {\n const variant = asRecord(report[key]);\n return asRecord(variant?.summary);\n}\n\nfunction extractVariantModel(\n report: Record<string, unknown>,\n key: string,\n): string | null {\n const variant = asRecord(report[key]);\n return firstString(variant, [\n \"model_name\",\n \"model_ref\",\n \"adapter_path\",\n \"served_model_id\",\n ]);\n}\n\nfunction roundMetric(value: number | null): number | null {\n return value === null ? null : Number(value.toFixed(4));\n}\n\nfunction improvementPercent(base: number | null, trained: number | null) {\n if (base === null || trained === null || base === 0) return null;\n return ((trained - base) / Math.abs(base)) * 100;\n}\n\nexport function buildEvalComparisonArtifactPayload(\n input: EvalComparisonArtifactInput,\n): EvalComparisonArtifact {\n const report = input.report;\n const baseSummary =\n nestedSummary(report, \"base_model\") ?? asRecord(report.base_summary);\n const trainedSummary =\n nestedSummary(report, \"trained_model\") ??\n nestedSummary(report, \"adapter_model\") ??\n asRecord(report.trained_summary) ??\n asRecord(report.adapter_summary);\n const comparison = asRecord(report.comparison);\n const baseScore = firstNumber(baseSummary, [\n \"avg_score\",\n \"score\",\n \"format_ok\",\n \"content_ok\",\n \"test_avg_score\",\n ]);\n const trainedScore = firstNumber(trainedSummary, [\n \"avg_score\",\n \"score\",\n \"format_ok\",\n \"content_ok\",\n \"test_avg_score\",\n ]);\n const baseLatencyMs = firstNumber(baseSummary, [\n \"avg_latency_ms\",\n \"latency_ms\",\n ]);\n const trainedLatencyMs = firstNumber(trainedSummary, [\n \"avg_latency_ms\",\n \"latency_ms\",\n ]);\n const promptCount =\n firstNumber(baseSummary, [\"prompt_count\", \"test_sample_count\"]) ??\n firstNumber(trainedSummary, [\"prompt_count\", \"test_sample_count\"]);\n const generatedAt =\n asString(report.timestamp) ??\n asString(report.generated_at) ??\n asString(report.evaluated_at) ??\n new Date().toISOString();\n\n return {\n schema: EVAL_COMPARISON_ARTIFACT_SCHEMA,\n version: EVAL_COMPARISON_ARTIFACT_VERSION,\n generatedAt,\n reportPath: input.reportPath,\n source: input.source ?? { kind: \"training_eval_comparison\" },\n models: {\n base:\n extractVariantModel(report, \"base_model\") ??\n firstString(report, [\"base_model\", \"model\", \"base_model_id\"]),\n trained:\n extractVariantModel(report, \"trained_model\") ??\n extractVariantModel(report, \"adapter_model\") ??\n firstString(report, [\n \"trained_model\",\n \"adapter_model\",\n \"trained_model_id\",\n ]),\n backend: asString(report.backend),\n },\n metrics: {\n baseScore: roundMetric(baseScore),\n trainedScore: roundMetric(trainedScore),\n improvementAbsolute: roundMetric(\n baseScore !== null && trainedScore !== null\n ? trainedScore - baseScore\n : null,\n ),\n improvementPercent: roundMetric(\n improvementPercent(baseScore, trainedScore),\n ),\n baseLatencyMs: roundMetric(baseLatencyMs),\n trainedLatencyMs: roundMetric(trainedLatencyMs),\n latencyDeltaMs: roundMetric(\n baseLatencyMs !== null && trainedLatencyMs !== null\n ? trainedLatencyMs - baseLatencyMs\n : null,\n ),\n promptCount: promptCount === null ? null : Math.round(promptCount),\n distinctResponseCount:\n firstNumber(comparison, [\"distinct_response_count\"]) ?? null,\n },\n summaries: {\n base: baseSummary,\n trained: trainedSummary,\n comparison,\n },\n raw: report,\n };\n}\n\nfunction safeTimestamp(value: string): string {\n return value.replace(/[:.]/g, \"-\");\n}\n\nexport async function writeEvalComparisonArtifact(\n input: EvalComparisonArtifactInput,\n): Promise<EvalComparisonArtifactResult> {\n const artifact = buildEvalComparisonArtifactPayload(input);\n const outputDir =\n input.outputDir ??\n join(trainingStateRoot(), \"evals\", safeTimestamp(artifact.generatedAt));\n await mkdir(outputDir, { recursive: true });\n const artifactPath = join(outputDir, \"eval-comparison.json\");\n await writeFile(\n artifactPath,\n `${JSON.stringify(artifact, null, 2)}\\n`,\n \"utf-8\",\n );\n return { outputDir, artifactPath, artifact };\n}\n\nfunction collectProcess(\n command: string,\n args: string[],\n cwd: string,\n): Promise<{ stdout: string; stderr: string; exitCode: number }> {\n return new Promise((resolvePromise, reject) => {\n const child = spawn(command, args, {\n cwd,\n stdio: [\"ignore\", \"pipe\", \"pipe\"],\n });\n let stdout = \"\";\n let stderr = \"\";\n child.stdout.setEncoding(\"utf-8\");\n child.stderr.setEncoding(\"utf-8\");\n child.stdout.on(\"data\", (chunk) => {\n stdout += chunk;\n });\n child.stderr.on(\"data\", (chunk) => {\n stderr += chunk;\n });\n child.on(\"error\", reject);\n child.on(\"close\", (code) => {\n resolvePromise({ stdout, stderr, exitCode: code ?? 1 });\n });\n });\n}\n\nexport async function runLocalEvalComparison(\n options: EvalComparisonRunOptions,\n): Promise<EvalComparisonRunResult> {\n const trainingRoot = resolve(\n options.trainingRoot ?? join(process.cwd(), \"packages\", \"training\"),\n );\n const outputDir =\n options.outputDir ??\n join(trainingStateRoot(), \"evals\", safeTimestamp(new Date().toISOString()));\n await mkdir(outputDir, { recursive: true });\n const reportPath = resolve(\n options.outputPath ?? join(outputDir, \"local_model_comparison.json\"),\n );\n const command = options.python ?? \"python3\";\n const args = buildLocalEvalComparisonArgs(options, {\n trainingRoot,\n reportPath,\n });\n if (options.dryRun) {\n const artifactResult = await writeEvalComparisonArtifact({\n report: {\n timestamp: new Date().toISOString(),\n backend: options.backend,\n base_model: {\n model_ref: options.model ?? options.manifestPath ?? null,\n summary: {},\n },\n trained_model: {\n model_ref: options.trainedModelPath ?? options.manifestPath ?? null,\n summary: {},\n },\n comparison: {\n dry_run: true,\n },\n },\n reportPath,\n outputDir,\n source: {\n kind: \"training_local_eval_comparison\",\n trainingRoot,\n manifestPath: options.manifestPath,\n model: options.model,\n trainedModelPath: options.trainedModelPath,\n backend: options.backend,\n dryRun: true,\n },\n });\n return {\n ...artifactResult,\n trainingRoot,\n command: [command, ...args],\n reportPath,\n stdout: \"[DRY RUN] Would run local eval comparison.\",\n stderr: \"\",\n exitCode: 0,\n };\n }\n\n const proc = await collectProcess(command, args, trainingRoot);\n if (proc.exitCode !== 0) {\n throw new Error(\n `compare_local_models.py exited with code ${proc.exitCode}: ${proc.stderr || proc.stdout}`,\n );\n }\n const report = JSON.parse(await readFile(reportPath, \"utf-8\")) as Record<\n string,\n unknown\n >;\n const artifactResult = await writeEvalComparisonArtifact({\n report,\n reportPath,\n outputDir,\n source: {\n kind: \"training_local_eval_comparison\",\n trainingRoot,\n manifestPath: options.manifestPath,\n model: options.model,\n trainedModelPath: options.trainedModelPath,\n backend: options.backend,\n },\n });\n return {\n ...artifactResult,\n trainingRoot,\n command: [command, ...args],\n reportPath,\n stdout: proc.stdout,\n stderr: proc.stderr,\n exitCode: proc.exitCode,\n };\n}\n"],"mappings":"AAAA,SAAS,aAAa;AACtB,SAAS,OAAO,UAAU,iBAAiB;AAC3C,SAAS,MAAM,eAAe;AAC9B,SAAS,yBAAyB;AAE3B,MAAM,kCAAkC;AACxC,MAAM,mCAAmC;AAqEzC,SAAS,6BACd,SACA,UACU;AACV,QAAM,aAAa;AAAA,IACjB,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,OAAO,CAAC,UAAU;AACxB,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,cAAc,QAAQ,YAAY;AAAA,EAC9C,OAAO;AACL,QAAI,CAAC,QAAQ,SAAS,CAAC,QAAQ,oBAAoB,CAAC,QAAQ,SAAS;AACnE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,SAAK,KAAK,WAAW,QAAQ,KAAK;AAClC,SAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAC1D,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AACA,MAAI,QAAQ,WAAY,MAAK,KAAK,iBAAiB,QAAQ,UAAU;AACrE,MAAI,OAAO,QAAQ,cAAc,UAAU;AACzC,SAAK;AAAA,MACH;AAAA,MACA,OAAO,KAAK,IAAI,GAAG,KAAK,MAAM,QAAQ,SAAS,CAAC,CAAC;AAAA,IACnD;AAAA,EACF;AACA,MAAI,QAAQ,aAAc,MAAK,KAAK,mBAAmB,QAAQ,YAAY;AAC3E,OAAK,KAAK,YAAY,SAAS,UAAU;AACzC,SAAO;AACT;AAEA,SAAS,SAAS,OAAgD;AAChE,SAAO,SAAS,OAAO,UAAU,YAAY,CAAC,MAAM,QAAQ,KAAK,IAC5D,QACD;AACN;AAEA,SAAS,SAAS,OAA+B;AAC/C,SAAO,OAAO,UAAU,YAAY,MAAM,KAAK,EAAE,SAAS,IACtD,MAAM,KAAK,IACX;AACN;AAEA,SAAS,SAAS,OAA+B;AAC/C,SAAO,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK,IAAI,QAAQ;AACvE;AAEA,SAAS,YACP,QACA,MACe;AACf,MAAI,CAAC,OAAQ,QAAO;AACpB,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,SAAS,OAAO,GAAG,CAAC;AAClC,QAAI,UAAU,KAAM,QAAO;AAAA,EAC7B;AACA,SAAO;AACT;AAEA,SAAS,YACP,QACA,MACe;AACf,MAAI,CAAC,OAAQ,QAAO;AACpB,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,SAAS,OAAO,GAAG,CAAC;AAClC,QAAI,MAAO,QAAO;AAAA,EACpB;AACA,SAAO;AACT;AAEA,SAAS,cACP,QACA,KACgC;AAChC,QAAM,UAAU,SAAS,OAAO,GAAG,CAAC;AACpC,SAAO,SAAS,SAAS,OAAO;AAClC;AAEA,SAAS,oBACP,QACA,KACe;AACf,QAAM,UAAU,SAAS,OAAO,GAAG,CAAC;AACpC,SAAO,YAAY,SAAS;AAAA,IAC1B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;AAEA,SAAS,YAAY,OAAqC;AACxD,SAAO,UAAU,OAAO,OAAO,OAAO,MAAM,QAAQ,CAAC,CAAC;AACxD;AAEA,SAAS,mBAAmB,MAAqB,SAAwB;AACvE,MAAI,SAAS,QAAQ,YAAY,QAAQ,SAAS,EAAG,QAAO;AAC5D,UAAS,UAAU,QAAQ,KAAK,IAAI,IAAI,IAAK;AAC/C;AAEO,SAAS,mCACd,OACwB;AACxB,QAAM,SAAS,MAAM;AACrB,QAAM,cACJ,cAAc,QAAQ,YAAY,KAAK,SAAS,OAAO,YAAY;AACrE,QAAM,iBACJ,cAAc,QAAQ,eAAe,KACrC,cAAc,QAAQ,eAAe,KACrC,SAAS,OAAO,eAAe,KAC/B,SAAS,OAAO,eAAe;AACjC,QAAM,aAAa,SAAS,OAAO,UAAU;AAC7C,QAAM,YAAY,YAAY,aAAa;AAAA,IACzC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,QAAM,eAAe,YAAY,gBAAgB;AAAA,IAC/C;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,QAAM,gBAAgB,YAAY,aAAa;AAAA,IAC7C;AAAA,IACA;AAAA,EACF,CAAC;AACD,QAAM,mBAAmB,YAAY,gBAAgB;AAAA,IACnD;AAAA,IACA;AAAA,EACF,CAAC;AACD,QAAM,cACJ,YAAY,aAAa,CAAC,gBAAgB,mBAAmB,CAAC,KAC9D,YAAY,gBAAgB,CAAC,gBAAgB,mBAAmB,CAAC;AACnE,QAAM,cACJ,SAAS,OAAO,SAAS,KACzB,SAAS,OAAO,YAAY,KAC5B,SAAS,OAAO,YAAY,MAC5B,oBAAI,KAAK,GAAE,YAAY;AAEzB,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,SAAS;AAAA,IACT;AAAA,IACA,YAAY,MAAM;AAAA,IAClB,QAAQ,MAAM,UAAU,EAAE,MAAM,2BAA2B;AAAA,IAC3D,QAAQ;AAAA,MACN,MACE,oBAAoB,QAAQ,YAAY,KACxC,YAAY,QAAQ,CAAC,cAAc,SAAS,eAAe,CAAC;AAAA,MAC9D,SACE,oBAAoB,QAAQ,eAAe,KAC3C,oBAAoB,QAAQ,eAAe,KAC3C,YAAY,QAAQ;AAAA,QAClB;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA,MACH,SAAS,SAAS,OAAO,OAAO;AAAA,IAClC;AAAA,IACA,SAAS;AAAA,MACP,WAAW,YAAY,SAAS;AAAA,MAChC,cAAc,YAAY,YAAY;AAAA,MACtC,qBAAqB;AAAA,QACnB,cAAc,QAAQ,iBAAiB,OACnC,eAAe,YACf;AAAA,MACN;AAAA,MACA,oBAAoB;AAAA,QAClB,mBAAmB,WAAW,YAAY;AAAA,MAC5C;AAAA,MACA,eAAe,YAAY,aAAa;AAAA,MACxC,kBAAkB,YAAY,gBAAgB;AAAA,MAC9C,gBAAgB;AAAA,QACd,kBAAkB,QAAQ,qBAAqB,OAC3C,mBAAmB,gBACnB;AAAA,MACN;AAAA,MACA,aAAa,gBAAgB,OAAO,OAAO,KAAK,MAAM,WAAW;AAAA,MACjE,uBACE,YAAY,YAAY,CAAC,yBAAyB,CAAC,KAAK;AAAA,IAC5D;AAAA,IACA,WAAW;AAAA,MACT,MAAM;AAAA,MACN,SAAS;AAAA,MACT;AAAA,IACF;AAAA,IACA,KAAK;AAAA,EACP;AACF;AAEA,SAAS,cAAc,OAAuB;AAC5C,SAAO,MAAM,QAAQ,SAAS,GAAG;AACnC;AAEA,eAAsB,4BACpB,OACuC;AACvC,QAAM,WAAW,mCAAmC,KAAK;AACzD,QAAM,YACJ,MAAM,aACN,KAAK,kBAAkB,GAAG,SAAS,cAAc,SAAS,WAAW,CAAC;AACxE,QAAM,MAAM,WAAW,EAAE,WAAW,KAAK,CAAC;AAC1C,QAAM,eAAe,KAAK,WAAW,sBAAsB;AAC3D,QAAM;AAAA,IACJ;AAAA,IACA,GAAG,KAAK,UAAU,UAAU,MAAM,CAAC,CAAC;AAAA;AAAA,IACpC;AAAA,EACF;AACA,SAAO,EAAE,WAAW,cAAc,SAAS;AAC7C;AAEA,SAAS,eACP,SACA,MACA,KAC+D;AAC/D,SAAO,IAAI,QAAQ,CAAC,gBAAgB,WAAW;AAC7C,UAAM,QAAQ,MAAM,SAAS,MAAM;AAAA,MACjC;AAAA,MACA,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,IAClC,CAAC;AACD,QAAI,SAAS;AACb,QAAI,SAAS;AACb,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,GAAG,QAAQ,CAAC,UAAU;AACjC,gBAAU;AAAA,IACZ,CAAC;AACD,UAAM,OAAO,GAAG,QAAQ,CAAC,UAAU;AACjC,gBAAU;AAAA,IACZ,CAAC;AACD,UAAM,GAAG,SAAS,MAAM;AACxB,UAAM,GAAG,SAAS,CAAC,SAAS;AAC1B,qBAAe,EAAE,QAAQ,QAAQ,UAAU,QAAQ,EAAE,CAAC;AAAA,IACxD,CAAC;AAAA,EACH,CAAC;AACH;AAEA,eAAsB,uBACpB,SACkC;AAClC,QAAM,eAAe;AAAA,IACnB,QAAQ,gBAAgB,KAAK,QAAQ,IAAI,GAAG,YAAY,UAAU;AAAA,EACpE;AACA,QAAM,YACJ,QAAQ,aACR,KAAK,kBAAkB,GAAG,SAAS,eAAc,oBAAI,KAAK,GAAE,YAAY,CAAC,CAAC;AAC5E,QAAM,MAAM,WAAW,EAAE,WAAW,KAAK,CAAC;AAC1C,QAAM,aAAa;AAAA,IACjB,QAAQ,cAAc,KAAK,WAAW,6BAA6B;AAAA,EACrE;AACA,QAAM,UAAU,QAAQ,UAAU;AAClC,QAAM,OAAO,6BAA6B,SAAS;AAAA,IACjD;AAAA,IACA;AAAA,EACF,CAAC;AACD,MAAI,QAAQ,QAAQ;AAClB,UAAMA,kBAAiB,MAAM,4BAA4B;AAAA,MACvD,QAAQ;AAAA,QACN,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,QAClC,SAAS,QAAQ;AAAA,QACjB,YAAY;AAAA,UACV,WAAW,QAAQ,SAAS,QAAQ,gBAAgB;AAAA,UACpD,SAAS,CAAC;AAAA,QACZ;AAAA,QACA,eAAe;AAAA,UACb,WAAW,QAAQ,oBAAoB,QAAQ,gBAAgB;AAAA,UAC/D,SAAS,CAAC;AAAA,QACZ;AAAA,QACA,YAAY;AAAA,UACV,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ;AAAA,QACN,MAAM;AAAA,QACN;AAAA,QACA,cAAc,QAAQ;AAAA,QACtB,OAAO,QAAQ;AAAA,QACf,kBAAkB,QAAQ;AAAA,QAC1B,SAAS,QAAQ;AAAA,QACjB,QAAQ;AAAA,MACV;AAAA,IACF,CAAC;AACD,WAAO;AAAA,MACL,GAAGA;AAAA,MACH;AAAA,MACA,SAAS,CAAC,SAAS,GAAG,IAAI;AAAA,MAC1B;AAAA,MACA,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,IACZ;AAAA,EACF;AAEA,QAAM,OAAO,MAAM,eAAe,SAAS,MAAM,YAAY;AAC7D,MAAI,KAAK,aAAa,GAAG;AACvB,UAAM,IAAI;AAAA,MACR,4CAA4C,KAAK,QAAQ,KAAK,KAAK,UAAU,KAAK,MAAM;AAAA,IAC1F;AAAA,EACF;AACA,QAAM,SAAS,KAAK,MAAM,MAAM,SAAS,YAAY,OAAO,CAAC;AAI7D,QAAM,iBAAiB,MAAM,4BAA4B;AAAA,IACvD;AAAA,IACA;AAAA,IACA;AAAA,IACA,QAAQ;AAAA,MACN,MAAM;AAAA,MACN;AAAA,MACA,cAAc,QAAQ;AAAA,MACtB,OAAO,QAAQ;AAAA,MACf,kBAAkB,QAAQ;AAAA,MAC1B,SAAS,QAAQ;AAAA,IACnB;AAAA,EACF,CAAC;AACD,SAAO;AAAA,IACL,GAAG;AAAA,IACH;AAAA,IACA,SAAS,CAAC,SAAS,GAAG,IAAI;AAAA,IAC1B;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,QAAQ,KAAK;AAAA,IACb,UAAU,KAAK;AAAA,EACjB;AACF;","names":["artifactResult"]}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export interface FeedGenerationRunOptions {
|
|
2
|
+
workspaceRoot?: string;
|
|
3
|
+
bun?: string;
|
|
4
|
+
archetypes?: string;
|
|
5
|
+
numAgents?: number;
|
|
6
|
+
ticks?: number;
|
|
7
|
+
parallel?: number;
|
|
8
|
+
managerId?: string;
|
|
9
|
+
cleanup?: boolean;
|
|
10
|
+
dryRun?: boolean;
|
|
11
|
+
outputDir?: string;
|
|
12
|
+
}
|
|
13
|
+
export interface FeedGenerationRunResult {
|
|
14
|
+
workspaceRoot: string;
|
|
15
|
+
feedCliRoot: string;
|
|
16
|
+
outputDir: string;
|
|
17
|
+
artifacts: FeedGenerationArtifact[];
|
|
18
|
+
command: string[];
|
|
19
|
+
stdout: string;
|
|
20
|
+
stderr: string;
|
|
21
|
+
exitCode: number;
|
|
22
|
+
}
|
|
23
|
+
export interface FeedGenerationArtifact {
|
|
24
|
+
schema: string | null;
|
|
25
|
+
manifestPath: string;
|
|
26
|
+
exportPath: string | null;
|
|
27
|
+
outputDir: string | null;
|
|
28
|
+
sourceKind: string | null;
|
|
29
|
+
trajectories: number | null;
|
|
30
|
+
archetypes: unknown;
|
|
31
|
+
generatedAt: string | null;
|
|
32
|
+
}
|
|
33
|
+
export declare function buildFeedGenerationArgs(options: FeedGenerationRunOptions, resolved: {
|
|
34
|
+
outputDir: string;
|
|
35
|
+
}): string[];
|
|
36
|
+
export declare function runFeedGeneration(options?: FeedGenerationRunOptions): Promise<FeedGenerationRunResult>;
|
|
37
|
+
//# sourceMappingURL=feed-generation-runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feed-generation-runner.d.ts","sourceRoot":"","sources":["../../src/core/feed-generation-runner.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,wBAAwB;IACvC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,uBAAuB;IACtC,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,sBAAsB,EAAE,CAAC;IACpC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,UAAU,EAAE,OAAO,CAAC;IACpB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAyMD,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,wBAAwB,EACjC,QAAQ,EAAE;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GAC9B,MAAM,EAAE,CAqBV;AAED,wBAAsB,iBAAiB,CACrC,OAAO,GAAE,wBAA6B,GACrC,OAAO,CAAC,uBAAuB,CAAC,CA8BlC"}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { mkdir, readdir, readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { trainingStateRoot } from "./training-config.js";
|
|
5
|
+
import { defaultBunCommand, resolveWorkspaceRoot } from "./workspace-runtime.js";
|
|
6
|
+
function safeTimestamp(value) {
|
|
7
|
+
return value.replace(/[:.]/g, "-");
|
|
8
|
+
}
|
|
9
|
+
function positiveInt(value, fallback) {
|
|
10
|
+
return typeof value === "number" && Number.isFinite(value) ? Math.max(1, Math.floor(value)) : fallback;
|
|
11
|
+
}
|
|
12
|
+
function collectProcess(command, args, cwd) {
|
|
13
|
+
return new Promise((resolvePromise, reject) => {
|
|
14
|
+
const child = spawn(command, args, { cwd, stdio: ["ignore", "pipe", "pipe"] });
|
|
15
|
+
let stdout = "";
|
|
16
|
+
let stderr = "";
|
|
17
|
+
child.stdout.setEncoding("utf-8");
|
|
18
|
+
child.stderr.setEncoding("utf-8");
|
|
19
|
+
child.stdout.on("data", (chunk) => {
|
|
20
|
+
stdout += chunk;
|
|
21
|
+
});
|
|
22
|
+
child.stderr.on("data", (chunk) => {
|
|
23
|
+
stderr += chunk;
|
|
24
|
+
});
|
|
25
|
+
child.on("error", reject);
|
|
26
|
+
child.on("close", (code) => {
|
|
27
|
+
resolvePromise({ stdout, stderr, exitCode: code ?? 1 });
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
function asRecord(value) {
|
|
32
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
|
33
|
+
}
|
|
34
|
+
function stringOrNull(value) {
|
|
35
|
+
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
|
|
36
|
+
}
|
|
37
|
+
function numberOrNull(value) {
|
|
38
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
39
|
+
}
|
|
40
|
+
function parseArchetypes(value) {
|
|
41
|
+
const archetypes = (value ?? "trader").split(",").map((item) => item.trim()).filter(Boolean);
|
|
42
|
+
return archetypes.length > 0 ? archetypes : ["trader"];
|
|
43
|
+
}
|
|
44
|
+
async function writeDryRunFeedGenerationArtifact(input) {
|
|
45
|
+
const generatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
46
|
+
const archetypes = parseArchetypes(input.options.archetypes);
|
|
47
|
+
const agentsPerArchetype = positiveInt(input.options.numAgents, 1);
|
|
48
|
+
const ticks = positiveInt(input.options.ticks, 1);
|
|
49
|
+
const trajectoryRows = archetypes.flatMap(
|
|
50
|
+
(archetype) => Array.from({ length: agentsPerArchetype }, (_, index) => {
|
|
51
|
+
const ordinal = index + 1;
|
|
52
|
+
return {
|
|
53
|
+
trajectory_id: `feed-dry-run-${archetype}-${ordinal}`,
|
|
54
|
+
agent_id: `feed-dry-run-agent-${archetype}-${ordinal}`,
|
|
55
|
+
archetype,
|
|
56
|
+
scenario_id: "feed-dry-run",
|
|
57
|
+
score: null,
|
|
58
|
+
steps: [
|
|
59
|
+
{
|
|
60
|
+
action: "DRY_RUN",
|
|
61
|
+
kind: "planned_tick",
|
|
62
|
+
input: `${archetype} market observation for dry-run tick 1 of ${ticks}`,
|
|
63
|
+
output: `planned ${archetype} feed decision`,
|
|
64
|
+
tick: 1,
|
|
65
|
+
ticks
|
|
66
|
+
}
|
|
67
|
+
],
|
|
68
|
+
reasoning: `Dry-run feed generation preview for ${archetype}.`
|
|
69
|
+
};
|
|
70
|
+
})
|
|
71
|
+
);
|
|
72
|
+
const exportPath = join(input.outputDir, "feed-dry-run-trajectories.jsonl");
|
|
73
|
+
const manifestPath = join(input.outputDir, "feed-dry-run.manifest.json");
|
|
74
|
+
const trajectoryIds = trajectoryRows.map((row) => row.trajectory_id);
|
|
75
|
+
const agentsCreated = trajectoryRows.map((row) => row.agent_id);
|
|
76
|
+
const archetypeStats = Object.fromEntries(
|
|
77
|
+
archetypes.map((archetype) => [
|
|
78
|
+
archetype,
|
|
79
|
+
{
|
|
80
|
+
agents: agentsPerArchetype,
|
|
81
|
+
trajectories: agentsPerArchetype,
|
|
82
|
+
avgTicksPerAgent: ticks
|
|
83
|
+
}
|
|
84
|
+
])
|
|
85
|
+
);
|
|
86
|
+
await writeFile(
|
|
87
|
+
exportPath,
|
|
88
|
+
`${trajectoryRows.map((row) => JSON.stringify(row)).join("\n")}
|
|
89
|
+
`,
|
|
90
|
+
"utf8"
|
|
91
|
+
);
|
|
92
|
+
await writeFile(
|
|
93
|
+
manifestPath,
|
|
94
|
+
`${JSON.stringify(
|
|
95
|
+
{
|
|
96
|
+
schema: "feed_parallel_generation",
|
|
97
|
+
schemaVersion: 1,
|
|
98
|
+
generatedAt,
|
|
99
|
+
outputDir: input.outputDir,
|
|
100
|
+
exportPath,
|
|
101
|
+
manifestPath,
|
|
102
|
+
source: {
|
|
103
|
+
kind: "feed_train_parallel_generation",
|
|
104
|
+
archetypes
|
|
105
|
+
},
|
|
106
|
+
counts: {
|
|
107
|
+
agentsCreated: agentsCreated.length,
|
|
108
|
+
trajectories: trajectoryIds.length,
|
|
109
|
+
totalTicks: agentsCreated.length * ticks,
|
|
110
|
+
errors: 0
|
|
111
|
+
},
|
|
112
|
+
durationMs: 0,
|
|
113
|
+
cleanup: input.options.cleanup === true,
|
|
114
|
+
dryRun: true,
|
|
115
|
+
agentsCreated,
|
|
116
|
+
trajectoryIds,
|
|
117
|
+
archetypeStats,
|
|
118
|
+
errors: []
|
|
119
|
+
},
|
|
120
|
+
null,
|
|
121
|
+
2
|
|
122
|
+
)}
|
|
123
|
+
`,
|
|
124
|
+
"utf8"
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
async function discoverFeedGenerationArtifacts(outputDir) {
|
|
128
|
+
const manifestPaths = [];
|
|
129
|
+
async function walk(dir) {
|
|
130
|
+
let entries;
|
|
131
|
+
try {
|
|
132
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
133
|
+
} catch {
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
for (const entry of entries) {
|
|
137
|
+
const path = join(dir, entry.name);
|
|
138
|
+
if (entry.isDirectory()) {
|
|
139
|
+
await walk(path);
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
if (entry.isFile() && entry.name.endsWith(".manifest.json")) {
|
|
143
|
+
manifestPaths.push(path);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
await walk(outputDir);
|
|
148
|
+
const artifacts = [];
|
|
149
|
+
for (const manifestPath of manifestPaths) {
|
|
150
|
+
try {
|
|
151
|
+
const payload = asRecord(JSON.parse(await readFile(manifestPath, "utf8")));
|
|
152
|
+
if (!payload) continue;
|
|
153
|
+
const schema = stringOrNull(payload.schema);
|
|
154
|
+
if (schema !== "feed_parallel_generation" && schema !== "feed_training_trajectory_export") {
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
const counts = asRecord(payload.counts) ?? {};
|
|
158
|
+
const source = asRecord(payload.source) ?? {};
|
|
159
|
+
artifacts.push({
|
|
160
|
+
schema,
|
|
161
|
+
manifestPath,
|
|
162
|
+
exportPath: stringOrNull(payload.exportPath),
|
|
163
|
+
outputDir: stringOrNull(payload.outputDir),
|
|
164
|
+
sourceKind: stringOrNull(source.kind),
|
|
165
|
+
trajectories: numberOrNull(counts.trajectories),
|
|
166
|
+
archetypes: source.archetypes ?? source.archetype ?? null,
|
|
167
|
+
generatedAt: stringOrNull(payload.generatedAt)
|
|
168
|
+
});
|
|
169
|
+
} catch {
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return artifacts.sort(
|
|
173
|
+
(left, right) => left.manifestPath.localeCompare(right.manifestPath)
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
function buildFeedGenerationArgs(options, resolved) {
|
|
177
|
+
const args = [
|
|
178
|
+
"run",
|
|
179
|
+
"src/index.ts",
|
|
180
|
+
"train",
|
|
181
|
+
"parallel",
|
|
182
|
+
"--archetypes",
|
|
183
|
+
options.archetypes?.trim() || "trader",
|
|
184
|
+
"--num-agents",
|
|
185
|
+
String(positiveInt(options.numAgents, 1)),
|
|
186
|
+
"--ticks",
|
|
187
|
+
String(positiveInt(options.ticks, 1)),
|
|
188
|
+
"--parallel",
|
|
189
|
+
String(positiveInt(options.parallel, 1)),
|
|
190
|
+
"--output-dir",
|
|
191
|
+
resolved.outputDir
|
|
192
|
+
];
|
|
193
|
+
if (options.managerId) args.push("--manager-id", options.managerId);
|
|
194
|
+
if (options.cleanup) args.push("--cleanup");
|
|
195
|
+
if (options.dryRun) args.push("--dry-run");
|
|
196
|
+
return args;
|
|
197
|
+
}
|
|
198
|
+
async function runFeedGeneration(options = {}) {
|
|
199
|
+
const workspaceRoot = resolveWorkspaceRoot(options.workspaceRoot);
|
|
200
|
+
const feedCliRoot = join(workspaceRoot, "packages", "feed", "apps", "cli");
|
|
201
|
+
const stamp = safeTimestamp((/* @__PURE__ */ new Date()).toISOString());
|
|
202
|
+
const outputDir = options.outputDir ?? join(trainingStateRoot(), "feed", "parallel", stamp);
|
|
203
|
+
await mkdir(outputDir, { recursive: true });
|
|
204
|
+
const args = buildFeedGenerationArgs(options, { outputDir });
|
|
205
|
+
const command = options.bun ?? defaultBunCommand();
|
|
206
|
+
const proc = await collectProcess(command, args, feedCliRoot);
|
|
207
|
+
if (proc.exitCode !== 0) {
|
|
208
|
+
throw new Error(
|
|
209
|
+
`feed train parallel exited with code ${proc.exitCode}: ${proc.stderr || proc.stdout}`
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
let artifacts = await discoverFeedGenerationArtifacts(outputDir);
|
|
213
|
+
if (options.dryRun === true && artifacts.length === 0) {
|
|
214
|
+
await writeDryRunFeedGenerationArtifact({ outputDir, options });
|
|
215
|
+
artifacts = await discoverFeedGenerationArtifacts(outputDir);
|
|
216
|
+
}
|
|
217
|
+
return {
|
|
218
|
+
workspaceRoot,
|
|
219
|
+
feedCliRoot,
|
|
220
|
+
outputDir,
|
|
221
|
+
artifacts,
|
|
222
|
+
command: [command, ...args],
|
|
223
|
+
stdout: proc.stdout,
|
|
224
|
+
stderr: proc.stderr,
|
|
225
|
+
exitCode: proc.exitCode
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
export {
|
|
229
|
+
buildFeedGenerationArgs,
|
|
230
|
+
runFeedGeneration
|
|
231
|
+
};
|
|
232
|
+
//# sourceMappingURL=feed-generation-runner.js.map
|