@elizaos/plugin-training 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +126 -0
  3. package/package.json +109 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Shaw Walters and elizaOS Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,126 @@
1
+ # @elizaos/plugin-training
2
+
3
+ `@elizaos/plugin-training` adds fine-tuning, trajectory management, and
4
+ prompt-optimization infrastructure to an Eliza agent. Capabilities:
5
+
6
+ - **Auto-training** — `TrainingTriggerService` counts completed trajectories per
7
+ task and fires prompt optimization automatically when the configured threshold
8
+ is reached (default 100 trajectories, 12-hour cooldown).
9
+ - **Native optimizer** — in-process prompt optimization via
10
+ `instruction-search`, `prompt-evolution`, `gepa`, `bootstrap-fewshot`, and
11
+ DSPy-native variants (COPRO, MIPRO). Writes artifacts to
12
+ `<stateDir>/optimized-prompts/` for live pickup by `OptimizedPromptService`.
13
+ - **Vast.ai GPU training** — orchestrates remote training jobs via
14
+ `/api/training/vast/*` routes and the `VastTrainingService`.
15
+ - **Fine-tuning dashboard** — developer-only UI view at `/training` showing
16
+ jobs, datasets, models, evals, benchmarks, and trajectory management.
17
+ - **Data collection CLI** — collects Eliza harness benchmark evidence into
18
+ inspectable HTML+JSON run folders. The dashboard and CLI share the same APIs.
19
+
20
+ ## Data collection
21
+
22
+ Run a dry collection first. It writes artifacts, summaries, and viewers without
23
+ requiring live model endpoints:
24
+
25
+ ```bash
26
+ bun run --cwd plugins/plugin-training src/core/cli.ts run-collection \
27
+ -o /tmp/eliza-training-run \
28
+ --tiers 0_8b,2b
29
+ ```
30
+
31
+ Useful live-readiness checks:
32
+
33
+ ```bash
34
+ bun run --cwd plugins/plugin-training src/core/cli.ts run-collection \
35
+ --live \
36
+ --preflight-only \
37
+ --probe-endpoints
38
+ ```
39
+
40
+ The collection runner pulls together:
41
+
42
+ - Hugging Face Eliza-1 training data ingest.
43
+ - Feed-generated trajectories from `packages/feed`.
44
+ - Natural app trajectories from existing sanitized or raw JSONL exports.
45
+ - Scenario runner exports and native scenario trajectory JSONL.
46
+ - App-core test trajectory artifacts.
47
+ - Local base-vs-trained eval comparison artifacts.
48
+ - Eliza harness action benchmark pairs across Eliza-1 tiers.
49
+ - Benchmark matrix artifacts with Cerebras reference comparisons when enabled.
50
+ - Eliza-1 model registry and bundle-stage metadata.
51
+
52
+ ## Inputs
53
+
54
+ Natural trajectory imports can be pointed at existing files:
55
+
56
+ ```bash
57
+ bun run --cwd plugins/plugin-training src/core/cli.ts run-collection \
58
+ -o /tmp/eliza-training-run \
59
+ --natural-sanitized-jsonl /path/to/trajectories.sanitized.jsonl \
60
+ --natural-raw-jsonl /path/to/trajectories.raw.jsonl \
61
+ --natural-run-id app-run-2026-05-24
62
+ ```
63
+
64
+ Benchmark tiers accept a comma-separated list or `all`:
65
+
66
+ ```bash
67
+ --tiers all
68
+ ```
69
+
70
+ `all` expands to the Eliza-1 tier list used by the benchmark recipe.
71
+
72
+ ## Outputs
73
+
74
+ Each collection folder contains:
75
+
76
+ - `collection-manifest.json` with provenance, recipe, step results, evidence
77
+ summaries, readiness gaps, model inventory, benchmark comparisons, and source
78
+ sample previews.
79
+ - `README.md` with a markdown summary of sources, samples, models, evals,
80
+ benchmarks, readiness gaps, and step artifacts.
81
+ - `analysis/index.html` for per-run browsing of trajectories, datasets,
82
+ scenario turns, evals, benchmark rows, model stats, and collection evidence.
83
+ - A parent `collection-index.html` and `collection-index.json` that list saved
84
+ runs with source, eval, benchmark, model, readiness-gap, and viewer links.
85
+
86
+ Open the generated HTML files directly from the CLI output or from the
87
+ fine-tuning dashboard. Saved run cards expose the same source/eval/benchmark/model
88
+ artifact links as the collection index.
89
+
90
+ ## Listing saved runs
91
+
92
+ ```bash
93
+ bun run --cwd plugins/plugin-training src/core/cli.ts list-collections \
94
+ --root /tmp \
95
+ --limit 5
96
+ ```
97
+
98
+ The listing includes:
99
+
100
+ - `sources=` counts for Hugging Face, feed, natural, scenario, test, and JSONL
101
+ artifacts.
102
+ - `benchmarks=` plus baseline progression across Eliza-1 tiers.
103
+ - `evals=` with the first base-vs-trained improvement when available.
104
+ - `models=` with model inventory and first tracked model.
105
+ - `artifact-links=` counts for source and evidence links.
106
+ - `gaps=` recommended next actions such as
107
+ `feed_generation:missing->terminal-training-feed-generate`. When an action
108
+ needs options, the summary includes `params={...}`, for example
109
+ `all_eliza1_tiers_benchmark:missing->terminal-training-run-collection params={"actionBenchmarkPairs":"all"}`.
110
+
111
+ The same recommended params are stored in `collection-manifest.json`, rendered
112
+ in `README.md`, shown in the per-run HTML viewer, surfaced in
113
+ `plugin-dash-fine-tuning`, and preserved by the `/api/training/collect` client
114
+ path. This keeps terminal, API, and dashboard continuation paths aligned.
115
+
116
+ ## Live benchmarks and evals
117
+
118
+ Dry runs prove artifact wiring and viewer coverage. Live model evaluation needs
119
+ the selected provider endpoints and secrets available before running with
120
+ `--live`. Use `--preflight-only --probe-endpoints` first; missing checks are also
121
+ stored in the run manifest and shown in the HTML viewers and dashboard.
122
+
123
+ The collection is Eliza-harness oriented. It does not use MMLU as the success
124
+ metric; base and trained models are compared on Eliza action/eval artifacts and
125
+ reported as percentage improvements, including Cerebras reference deltas when a
126
+ reference benchmark is present.
package/package.json ADDED
@@ -0,0 +1,109 @@
1
+ {
2
+ "name": "@elizaos/plugin-training",
3
+ "version": "2.0.3-beta.2",
4
+ "type": "module",
5
+ "main": "./dist/index.js",
6
+ "exports": {
7
+ "./package.json": "./package.json",
8
+ ".": {
9
+ "types": "./dist/index.d.ts",
10
+ "eliza-source": {
11
+ "types": "./src/index.ts",
12
+ "import": "./src/index.ts",
13
+ "default": "./src/index.ts"
14
+ },
15
+ "import": "./dist/index.js",
16
+ "default": "./dist/index.js"
17
+ },
18
+ "./setup-routes": {
19
+ "types": "./dist/setup-routes.d.ts",
20
+ "eliza-source": {
21
+ "types": "./src/setup-routes.ts",
22
+ "import": "./src/setup-routes.ts",
23
+ "default": "./src/setup-routes.ts"
24
+ },
25
+ "import": "./dist/setup-routes.js",
26
+ "default": "./dist/setup-routes.js"
27
+ },
28
+ "./*.css": "./dist/*.css",
29
+ "./*": {
30
+ "types": "./dist/*.d.ts",
31
+ "import": "./dist/*.js",
32
+ "default": "./dist/*.js"
33
+ }
34
+ },
35
+ "scripts": {
36
+ "train": "bun run src/cli/train.ts",
37
+ "collect": "bun run src/core/cli.ts run-collection",
38
+ "verify:view-switching": "bun run scripts/verify-view-switching.ts",
39
+ "gepa:view-context": "bun run scripts/gepa-view-context.ts",
40
+ "test": "vitest run",
41
+ "test:watch": "vitest",
42
+ "build": "bun run build:js && bun run build:views && bun run build:types",
43
+ "clean": "rm -rf dist",
44
+ "build:js": "tsup --config ../tsup.plugin-packages.shared.ts",
45
+ "build:views": "bunx --bun vite build --config vite.config.views.ts",
46
+ "build:types": "tsc --noCheck -p tsconfig.build.json"
47
+ },
48
+ "dependencies": {
49
+ "@elizaos/agent": "2.0.3-beta.2",
50
+ "@elizaos/core": "2.0.3-beta.2",
51
+ "@elizaos/scenario-runner": "2.0.3-beta.2",
52
+ "@elizaos/shared": "2.0.3-beta.2",
53
+ "@elizaos/ui": "2.0.3-beta.2"
54
+ },
55
+ "peerDependencies": {
56
+ "react": "*",
57
+ "react-dom": "*"
58
+ },
59
+ "agentConfig": {
60
+ "pluginType": "elizaos:plugin:1.0.0",
61
+ "pluginParameters": {}
62
+ },
63
+ "elizaos": {
64
+ "app": {
65
+ "displayName": "Fine Tuning",
66
+ "category": "tool",
67
+ "launchType": "internal-tab",
68
+ "icon": "BrainCircuit",
69
+ "heroImage": "assets/hero.png",
70
+ "capabilities": [
71
+ "training",
72
+ "fine-tuning",
73
+ "trajectories",
74
+ "datasets",
75
+ "models",
76
+ "evals",
77
+ "benchmarks",
78
+ "analysis",
79
+ "data-collection"
80
+ ],
81
+ "uiExtension": {
82
+ "detailPanelId": "plugin-dash-fine-tuning"
83
+ },
84
+ "developerOnly": true,
85
+ "visibleInAppStore": true
86
+ }
87
+ },
88
+ "publishConfig": {
89
+ "access": "public"
90
+ },
91
+ "types": "./dist/index.d.ts",
92
+ "files": [
93
+ "dist"
94
+ ],
95
+ "devDependencies": {
96
+ "@testing-library/react": "^16.3.2",
97
+ "@types/react": "^19.0.0",
98
+ "@types/react-dom": "^19.0.0",
99
+ "dotenv": "^17.2.3",
100
+ "jsdom": "^29.0.0",
101
+ "react": "19.2.5",
102
+ "react-dom": "19.2.5",
103
+ "tsup": "^8.5.1",
104
+ "typescript": "^6.0.3",
105
+ "vite": "^8.0.0",
106
+ "vitest": "^4.0.17"
107
+ },
108
+ "gitHead": "82fe0f44215954c2417328203f5bd6510985c1fc"
109
+ }