agent-curriculum 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_curriculum-0.1.0/.github/workflows/release.yml +38 -0
- agent_curriculum-0.1.0/.gitignore +15 -0
- agent_curriculum-0.1.0/LICENSE +21 -0
- agent_curriculum-0.1.0/PKG-INFO +90 -0
- agent_curriculum-0.1.0/README.md +77 -0
- agent_curriculum-0.1.0/__init__.py +0 -0
- agent_curriculum-0.1.0/pyproject.toml +17 -0
- agent_curriculum-0.1.0/src/agent_curriculum/__init__.py +3 -0
- agent_curriculum-0.1.0/src/agent_curriculum/cli.py +119 -0
- agent_curriculum-0.1.0/src/agent_curriculum/configs/stage1.yaml +9 -0
- agent_curriculum-0.1.0/src/agent_curriculum/configs/stage2.yaml +9 -0
- agent_curriculum-0.1.0/src/agent_curriculum/configs/stage3.yaml +9 -0
- agent_curriculum-0.1.0/src/agent_curriculum/configs/stage4.yaml +9 -0
- agent_curriculum-0.1.0/src/agent_curriculum/configs/stage5.yaml +9 -0
- agent_curriculum-0.1.0/src/agent_curriculum/difficulty_scorer.py +177 -0
- agent_curriculum-0.1.0/src/agent_curriculum/scheduler.py +80 -0
- agent_curriculum-0.1.0/src/agent_curriculum/stage_builder.py +197 -0
- agent_curriculum-0.1.0/src/agent_curriculum/trainer.py +124 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: Release to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
id-token: write
|
|
10
|
+
packages: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build-and-publish:
|
|
14
|
+
name: Build and publish to PyPI
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
environment:
|
|
17
|
+
name: pypi
|
|
18
|
+
url: https://pypi.org/p/agent-curriculum
|
|
19
|
+
permissions:
|
|
20
|
+
id-token: write
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- name: Checkout code
|
|
24
|
+
uses: actions/checkout@v4
|
|
25
|
+
|
|
26
|
+
- name: Set up Python
|
|
27
|
+
uses: actions/setup-python@v5
|
|
28
|
+
with:
|
|
29
|
+
python-version: '3.12'
|
|
30
|
+
|
|
31
|
+
- name: Install build dependencies
|
|
32
|
+
run: python -m pip install --upgrade build
|
|
33
|
+
|
|
34
|
+
- name: Build package
|
|
35
|
+
run: python -m build
|
|
36
|
+
|
|
37
|
+
- name: Publish package to PyPI
|
|
38
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 FableForge Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-curriculum
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Curriculum learning for agent training — difficulty-scored stages
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: click>=8.0
|
|
8
|
+
Requires-Dist: numpy>=1.24
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Requires-Dist: pyyaml>=6.0
|
|
11
|
+
Requires-Dist: rich>=13.0
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# AgentCurriculum
|
|
15
|
+
|
|
16
|
+
[](LICENSE) [](https://www.python.org/downloads/) [](tests/)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
Curriculum learning for coding agents — train through progressive difficulty stages.
|
|
20
|
+
|
|
21
|
+
## Overview
|
|
22
|
+
|
|
23
|
+
AgentCurriculum ranks agent traces by difficulty and builds a 5-stage training curriculum where the model learns basic tool use first, then progressively harder multi-step reasoning and error recovery.
|
|
24
|
+
|
|
25
|
+
## Stages
|
|
26
|
+
|
|
27
|
+
| Stage | Name | Difficulty | Tools | Errors | LR | LoRA r |
|
|
28
|
+
|-------|------|-----------|-------|--------|----|--------|
|
|
29
|
+
| 1 | Basic | 0.0–0.2 | <5 | 0 | 2e-4 | 64 |
|
|
30
|
+
| 2 | Intermediate | 0.2–0.4 | <15 | ≤2 | 1e-4 | 64 |
|
|
31
|
+
| 3 | Advanced | 0.4–0.6 | <30 | ≤5 | 5e-5 | 32 |
|
|
32
|
+
| 4 | Expert | 0.6–0.8 | <60 | ≤10 | 3e-5 | 32 |
|
|
33
|
+
| 5 | Master | 0.8–1.0 | <100 | ≤20 | 1e-5 | 16 |
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install agent-curriculum
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from agent_curriculum import DifficultyScorer, StageBuilder, CurriculumTrainer
|
|
45
|
+
|
|
46
|
+
# Score traces by difficulty
|
|
47
|
+
scorer = DifficultyScorer()
|
|
48
|
+
scores = scorer.score_file("traces.jsonl")
|
|
49
|
+
|
|
50
|
+
# Build curriculum stages
|
|
51
|
+
builder = StageBuilder(scorer=scorer)
|
|
52
|
+
stages = builder.build_stages("traces.jsonl")
|
|
53
|
+
builder.generate_configs("configs/")
|
|
54
|
+
|
|
55
|
+
# Train through the curriculum
|
|
56
|
+
trainer = CurriculumTrainer(base_model="Qwen/Qwen2.5-14B")
|
|
57
|
+
results = trainer.train_curriculum("traces.jsonl", start_stage=1, end_stage=5)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## License
|
|
61
|
+
|
|
62
|
+
MIT
|
|
63
|
+
|
|
64
|
+
## Ecosystem
|
|
65
|
+
|
|
66
|
+
Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
|
|
67
|
+
|
|
68
|
+
| Project | Description |
|
|
69
|
+
| --- | --- |
|
|
70
|
+
| **[Anvil](../anvil)** | Self-verified coding agent |
|
|
71
|
+
| **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
|
|
72
|
+
| **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
|
|
73
|
+
| **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
|
|
74
|
+
| **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
|
|
75
|
+
| **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
|
|
76
|
+
| **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
|
|
77
|
+
| **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
|
|
78
|
+
| **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
|
|
79
|
+
| **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
|
|
80
|
+
| **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
|
|
81
|
+
| **[AgentDev](../agent-dev)** | VSCode extension with verification |
|
|
82
|
+
| **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
|
|
83
|
+
| **[AgentSkills](../agent-skills)** | npm for agent behaviors |
|
|
84
|
+
| **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
|
|
85
|
+
| **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
|
|
86
|
+
| **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
|
|
87
|
+
| **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
|
|
88
|
+
| **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
|
|
89
|
+
| **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
|
|
90
|
+
| **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# AgentCurriculum
|
|
2
|
+
|
|
3
|
+
[](LICENSE) [](https://www.python.org/downloads/) [](tests/)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Curriculum learning for coding agents — train through progressive difficulty stages.
|
|
7
|
+
|
|
8
|
+
## Overview
|
|
9
|
+
|
|
10
|
+
AgentCurriculum ranks agent traces by difficulty and builds a 5-stage training curriculum where the model learns basic tool use first, then progressively harder multi-step reasoning and error recovery.
|
|
11
|
+
|
|
12
|
+
## Stages
|
|
13
|
+
|
|
14
|
+
| Stage | Name | Difficulty | Tools | Errors | LR | LoRA r |
|
|
15
|
+
|-------|------|-----------|-------|--------|----|--------|
|
|
16
|
+
| 1 | Basic | 0.0–0.2 | <5 | 0 | 2e-4 | 64 |
|
|
17
|
+
| 2 | Intermediate | 0.2–0.4 | <15 | ≤2 | 1e-4 | 64 |
|
|
18
|
+
| 3 | Advanced | 0.4–0.6 | <30 | ≤5 | 5e-5 | 32 |
|
|
19
|
+
| 4 | Expert | 0.6–0.8 | <60 | ≤10 | 3e-5 | 32 |
|
|
20
|
+
| 5 | Master | 0.8–1.0 | <100 | ≤20 | 1e-5 | 16 |
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install agent-curriculum
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from agent_curriculum import DifficultyScorer, StageBuilder, CurriculumTrainer
|
|
32
|
+
|
|
33
|
+
# Score traces by difficulty
|
|
34
|
+
scorer = DifficultyScorer()
|
|
35
|
+
scores = scorer.score_file("traces.jsonl")
|
|
36
|
+
|
|
37
|
+
# Build curriculum stages
|
|
38
|
+
builder = StageBuilder(scorer=scorer)
|
|
39
|
+
stages = builder.build_stages("traces.jsonl")
|
|
40
|
+
builder.generate_configs("configs/")
|
|
41
|
+
|
|
42
|
+
# Train through the curriculum
|
|
43
|
+
trainer = CurriculumTrainer(base_model="Qwen/Qwen2.5-14B")
|
|
44
|
+
results = trainer.train_curriculum("traces.jsonl", start_stage=1, end_stage=5)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## License
|
|
48
|
+
|
|
49
|
+
MIT
|
|
50
|
+
|
|
51
|
+
## Ecosystem
|
|
52
|
+
|
|
53
|
+
Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
|
|
54
|
+
|
|
55
|
+
| Project | Description |
|
|
56
|
+
| --- | --- |
|
|
57
|
+
| **[Anvil](../anvil)** | Self-verified coding agent |
|
|
58
|
+
| **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
|
|
59
|
+
| **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
|
|
60
|
+
| **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
|
|
61
|
+
| **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
|
|
62
|
+
| **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
|
|
63
|
+
| **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
|
|
64
|
+
| **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
|
|
65
|
+
| **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
|
|
66
|
+
| **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
|
|
67
|
+
| **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
|
|
68
|
+
| **[AgentDev](../agent-dev)** | VSCode extension with verification |
|
|
69
|
+
| **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
|
|
70
|
+
| **[AgentSkills](../agent-skills)** | npm for agent behaviors |
|
|
71
|
+
| **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
|
|
72
|
+
| **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
|
|
73
|
+
| **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
|
|
74
|
+
| **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
|
|
75
|
+
| **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
|
|
76
|
+
| **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
|
|
77
|
+
| **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
|
|
File without changes
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "agent-curriculum"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Curriculum learning for agent training — difficulty-scored stages"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = ["pydantic>=2.0", "pyyaml>=6.0", "numpy>=1.24", "click>=8.0", "rich>=13.0"]
|
|
12
|
+
|
|
13
|
+
[project.scripts]
|
|
14
|
+
acurriculum = "agent_curriculum.cli:cli"
|
|
15
|
+
|
|
16
|
+
[tool.hatch.build.targets.wheel]
|
|
17
|
+
packages = ["src/agent_curriculum"]
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""CLI for AgentCurriculum — curriculum learning with difficulty-scored stages."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
|
|
13
|
+
from agent_curriculum.difficulty_scorer import DifficultyScorer
|
|
14
|
+
from agent_curriculum.stage_builder import StageBuilder
|
|
15
|
+
from agent_curriculum.scheduler import DEFAULT_SCHEDULE
|
|
16
|
+
|
|
17
|
+
console = Console()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@click.group()
|
|
21
|
+
@click.version_option(version="0.1.0")
|
|
22
|
+
def cli():
|
|
23
|
+
"""AgentCurriculum — Curriculum learning for agent training with difficulty-scored stages."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@cli.command()
|
|
28
|
+
@click.argument("trace_file", type=click.Path(exists=True))
|
|
29
|
+
@click.option("--output", "-o", type=click.Path(), help="Output JSONL file for scores")
|
|
30
|
+
def score(trace_file, output):
|
|
31
|
+
"""Score traces by difficulty level."""
|
|
32
|
+
scorer = DifficultyScorer()
|
|
33
|
+
scores = scorer.score_file(trace_file)
|
|
34
|
+
|
|
35
|
+
table = Table(title="Difficulty Scores")
|
|
36
|
+
table.add_column("Trace ID", style="cyan")
|
|
37
|
+
table.add_column("Difficulty", style="bold")
|
|
38
|
+
table.add_column("Level", style="magenta")
|
|
39
|
+
table.add_column("Tools", justify="right")
|
|
40
|
+
table.add_column("Errors", justify="right")
|
|
41
|
+
|
|
42
|
+
for s in scores:
|
|
43
|
+
level_color = {"basic": "green", "intermediate": "yellow", "advanced": "orange3", "expert": "red", "master": "bold red"}.get(s.difficulty_level, "white")
|
|
44
|
+
table.add_row(
|
|
45
|
+
s.trace_id,
|
|
46
|
+
f"{s.overall_difficulty:.3f}",
|
|
47
|
+
f"[{level_color}]{s.difficulty_level}[/{level_color}]",
|
|
48
|
+
str(s.tool_count),
|
|
49
|
+
str(s.error_count),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
console.print(table)
|
|
53
|
+
|
|
54
|
+
if output:
|
|
55
|
+
out_data = [s.to_dict() for s in scores]
|
|
56
|
+
Path(output).write_text("\n".join(json.dumps(d) for d in out_data))
|
|
57
|
+
console.print(f"[green]Scores saved to {output}[/green]")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@cli.command()
|
|
61
|
+
@click.argument("trace_file", type=click.Path(exists=True))
|
|
62
|
+
@click.option("--stages", "-s", default=5, type=int, help="Number of curriculum stages")
|
|
63
|
+
@click.option("--output", "-o", type=click.Path(), help="Output directory for stage files")
|
|
64
|
+
def build(trace_file, stages, output):
|
|
65
|
+
"""Build curriculum stages from a trace file."""
|
|
66
|
+
stager = StageBuilder(num_stages=stages)
|
|
67
|
+
result = stager.build_from_file(trace_file)
|
|
68
|
+
|
|
69
|
+
table = Table(title=f"Curriculum Stages ({len(result)} stages)")
|
|
70
|
+
table.add_column("Stage", style="cyan")
|
|
71
|
+
table.add_column("Name", style="bold")
|
|
72
|
+
table.add_column("Difficulty", style="magenta")
|
|
73
|
+
table.add_column("Traces", justify="right")
|
|
74
|
+
|
|
75
|
+
for stage in result:
|
|
76
|
+
table.add_row(
|
|
77
|
+
str(stage.stage_id),
|
|
78
|
+
stage.name,
|
|
79
|
+
f"[{stage.difficulty_range[0]:.1f} - {stage.difficulty_range[1]:.1f}]",
|
|
80
|
+
str(len(stage.traces)),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
console.print(table)
|
|
84
|
+
|
|
85
|
+
if output:
|
|
86
|
+
out_path = Path(output)
|
|
87
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
88
|
+
for stage in result:
|
|
89
|
+
stage_file = out_path / f"stage_{stage.stage_id}_{stage.name}.json"
|
|
90
|
+
stage_file.write_text(json.dumps(stage.to_dict(), indent=2))
|
|
91
|
+
console.print(f"[green]Stages saved to {output}[/green]")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@cli.command()
|
|
95
|
+
def schedule():
|
|
96
|
+
"""Show the default learning rate / batch size schedule."""
|
|
97
|
+
table = Table(title="Default Curriculum Schedule")
|
|
98
|
+
table.add_column("Stage", style="cyan")
|
|
99
|
+
table.add_column("LR", style="green")
|
|
100
|
+
table.add_column("Batch", justify="right")
|
|
101
|
+
table.add_column("LoRA r", justify="right")
|
|
102
|
+
table.add_column("LoRA alpha", justify="right")
|
|
103
|
+
table.add_column("Epochs", justify="right")
|
|
104
|
+
|
|
105
|
+
for s in DEFAULT_SCHEDULE:
|
|
106
|
+
table.add_row(
|
|
107
|
+
str(s.stage_id),
|
|
108
|
+
f"{s.learning_rate:.1e}",
|
|
109
|
+
str(s.batch_size),
|
|
110
|
+
str(s.lora_r),
|
|
111
|
+
str(s.lora_alpha),
|
|
112
|
+
str(s.num_epochs),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
console.print(table)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if __name__ == "__main__":
|
|
119
|
+
cli()
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Score trace difficulty based on tool count, error count, reasoning length, and session duration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import math
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DifficultyScore:
|
|
14
|
+
"""Composite difficulty score for an agent trace."""
|
|
15
|
+
|
|
16
|
+
trace_id: str
|
|
17
|
+
tool_count: int = 0
|
|
18
|
+
error_count: int = 0
|
|
19
|
+
reasoning_length: int = 0
|
|
20
|
+
session_duration: float = 0.0
|
|
21
|
+
unique_tools: int = 0
|
|
22
|
+
retry_count: int = 0
|
|
23
|
+
branch_count: int = 0
|
|
24
|
+
|
|
25
|
+
# Computed scores
|
|
26
|
+
tool_complexity: float = 0.0
|
|
27
|
+
error_complexity: float = 0.0
|
|
28
|
+
reasoning_complexity: float = 0.0
|
|
29
|
+
duration_complexity: float = 0.0
|
|
30
|
+
overall_difficulty: float = 0.0
|
|
31
|
+
difficulty_level: str = "basic"
|
|
32
|
+
|
|
33
|
+
def to_dict(self) -> dict[str, Any]:
|
|
34
|
+
return {
|
|
35
|
+
"trace_id": self.trace_id,
|
|
36
|
+
"tool_count": self.tool_count,
|
|
37
|
+
"error_count": self.error_count,
|
|
38
|
+
"reasoning_length": self.reasoning_length,
|
|
39
|
+
"session_duration": self.session_duration,
|
|
40
|
+
"unique_tools": self.unique_tools,
|
|
41
|
+
"retry_count": self.retry_count,
|
|
42
|
+
"branch_count": self.branch_count,
|
|
43
|
+
"tool_complexity": round(self.tool_complexity, 3),
|
|
44
|
+
"error_complexity": round(self.error_complexity, 3),
|
|
45
|
+
"reasoning_complexity": round(self.reasoning_complexity, 3),
|
|
46
|
+
"duration_complexity": round(self.duration_complexity, 3),
|
|
47
|
+
"overall_difficulty": round(self.overall_difficulty, 3),
|
|
48
|
+
"difficulty_level": self.difficulty_level,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DifficultyScorer:
|
|
53
|
+
"""Score agent traces by difficulty to build curriculum training stages.
|
|
54
|
+
|
|
55
|
+
Scoring factors:
|
|
56
|
+
- Tool count: More tool calls = harder
|
|
57
|
+
- Error count: More errors = harder, but retries show recovery skill
|
|
58
|
+
- Reasoning length: Longer reasoning = harder
|
|
59
|
+
- Session duration: Longer sessions = harder
|
|
60
|
+
- Unique tools: More diverse tool use = harder
|
|
61
|
+
- Branch count: More decision branches = harder
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
tool_weight: float = 0.25,
|
|
67
|
+
error_weight: float = 0.20,
|
|
68
|
+
reasoning_weight: float = 0.20,
|
|
69
|
+
duration_weight: float = 0.15,
|
|
70
|
+
diversity_weight: float = 0.10,
|
|
71
|
+
branch_weight: float = 0.10,
|
|
72
|
+
max_tool_count: int = 100,
|
|
73
|
+
max_reasoning_length: int = 50000,
|
|
74
|
+
max_session_duration: float = 3600.0,
|
|
75
|
+
):
|
|
76
|
+
self.tool_weight = tool_weight
|
|
77
|
+
self.error_weight = error_weight
|
|
78
|
+
self.reasoning_weight = reasoning_weight
|
|
79
|
+
self.duration_weight = duration_weight
|
|
80
|
+
self.diversity_weight = diversity_weight
|
|
81
|
+
self.branch_weight = branch_weight
|
|
82
|
+
self.max_tool_count = max_tool_count
|
|
83
|
+
self.max_reasoning_length = max_reasoning_length
|
|
84
|
+
self.max_session_duration = max_session_duration
|
|
85
|
+
|
|
86
|
+
def score_trace(self, trace: dict[str, Any]) -> DifficultyScore:
|
|
87
|
+
"""Score an individual trace for difficulty.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
trace: Dictionary with trace data including tool_calls, errors, etc.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
DifficultyScore with computed difficulty metrics.
|
|
94
|
+
"""
|
|
95
|
+
trace_id = trace.get("id", trace.get("trace_id", "unknown"))
|
|
96
|
+
tool_calls = trace.get("tool_calls", [])
|
|
97
|
+
errors = trace.get("errors", [])
|
|
98
|
+
reasoning = trace.get("reasoning", "")
|
|
99
|
+
duration = trace.get("duration", trace.get("session_duration", 0.0))
|
|
100
|
+
|
|
101
|
+
# Extract metrics
|
|
102
|
+
tool_count = len(tool_calls)
|
|
103
|
+
error_count = len(errors)
|
|
104
|
+
unique_tools = len(set(tc.get("name", "") for tc in tool_calls if tc.get("name")))
|
|
105
|
+
retry_count = sum(1 for e in errors if e.get("type") == "retry" or "retry" in str(e).lower())
|
|
106
|
+
branch_count = trace.get("branch_count", len(set(tc.get("name", "") for tc in tool_calls[:5])))
|
|
107
|
+
reasoning_length = len(reasoning) if isinstance(reasoning, str) else sum(len(r) for r in reasoning) if isinstance(reasoning, list) else 0
|
|
108
|
+
|
|
109
|
+
# Compute normalized sub-scores (0-1)
|
|
110
|
+
tool_complexity = min(tool_count / self.max_tool_count, 1.0) if self.max_tool_count > 0 else 0.0
|
|
111
|
+
error_complexity = 1.0 - math.exp(-error_count / 3.0) # exponential decay
|
|
112
|
+
reasoning_complexity = min(reasoning_length / self.max_reasoning_length, 1.0) if self.max_reasoning_length > 0 else 0.0
|
|
113
|
+
duration_complexity = min(duration / self.max_session_duration, 1.0) if self.max_session_duration > 0 else 0.0
|
|
114
|
+
diversity_score = min(unique_tools / 6.0, 1.0) # 6 = max unique tool types
|
|
115
|
+
branch_complexity = min(branch_count / 10.0, 1.0) if branch_count > 0 else 0.0
|
|
116
|
+
|
|
117
|
+
# Weighted overall difficulty
|
|
118
|
+
overall = (
|
|
119
|
+
tool_complexity * self.tool_weight
|
|
120
|
+
+ error_complexity * self.error_weight
|
|
121
|
+
+ reasoning_complexity * self.reasoning_weight
|
|
122
|
+
+ duration_complexity * self.duration_weight
|
|
123
|
+
+ diversity_score * self.diversity_weight
|
|
124
|
+
+ branch_complexity * self.branch_weight
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Classify difficulty level
|
|
128
|
+
if overall < 0.2:
|
|
129
|
+
level = "basic"
|
|
130
|
+
elif overall < 0.4:
|
|
131
|
+
level = "intermediate"
|
|
132
|
+
elif overall < 0.6:
|
|
133
|
+
level = "advanced"
|
|
134
|
+
elif overall < 0.8:
|
|
135
|
+
level = "expert"
|
|
136
|
+
else:
|
|
137
|
+
level = "master"
|
|
138
|
+
|
|
139
|
+
return DifficultyScore(
|
|
140
|
+
trace_id=trace_id,
|
|
141
|
+
tool_count=tool_count,
|
|
142
|
+
error_count=error_count,
|
|
143
|
+
reasoning_length=reasoning_length,
|
|
144
|
+
session_duration=duration,
|
|
145
|
+
unique_tools=unique_tools,
|
|
146
|
+
retry_count=retry_count,
|
|
147
|
+
branch_count=branch_count,
|
|
148
|
+
tool_complexity=tool_complexity,
|
|
149
|
+
error_complexity=error_complexity,
|
|
150
|
+
reasoning_complexity=reasoning_complexity,
|
|
151
|
+
duration_complexity=duration_complexity,
|
|
152
|
+
overall_difficulty=overall,
|
|
153
|
+
difficulty_level=level,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def score_file(self, path: str | Path) -> list[DifficultyScore]:
|
|
157
|
+
"""Score all traces in a JSONL file.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
path: Path to JSONL file with traces.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
List of DifficultyScore objects.
|
|
164
|
+
"""
|
|
165
|
+
path = Path(path)
|
|
166
|
+
scores = []
|
|
167
|
+
with open(path) as f:
|
|
168
|
+
for line in f:
|
|
169
|
+
line = line.strip()
|
|
170
|
+
if not line:
|
|
171
|
+
continue
|
|
172
|
+
try:
|
|
173
|
+
trace = json.loads(line)
|
|
174
|
+
scores.append(self.score_trace(trace))
|
|
175
|
+
except json.JSONDecodeError:
|
|
176
|
+
continue
|
|
177
|
+
return scores
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Learning rate and batch size scheduling per curriculum stage."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ScheduleConfig:
|
|
13
|
+
"""Learning rate and batch size schedule for a single stage."""
|
|
14
|
+
|
|
15
|
+
stage_id: int
|
|
16
|
+
learning_rate: float
|
|
17
|
+
batch_size: int
|
|
18
|
+
warmup_steps: int = 0
|
|
19
|
+
lora_r: int = 64
|
|
20
|
+
lora_alpha: int = 128
|
|
21
|
+
gradient_accumulation_steps: int = 4
|
|
22
|
+
weight_decay: float = 0.01
|
|
23
|
+
num_epochs: int = 2
|
|
24
|
+
|
|
25
|
+
def to_dict(self) -> dict[str, Any]:
|
|
26
|
+
return {k: getattr(self, k) for k in self.__dataclass_fields__}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Default schedule: progressively lower learning rates and higher discriminative LoRA
|
|
30
|
+
DEFAULT_SCHEDULE: list[ScheduleConfig] = [
|
|
31
|
+
ScheduleConfig(stage_id=1, learning_rate=2e-4, batch_size=8, lora_r=64, num_epochs=3),
|
|
32
|
+
ScheduleConfig(stage_id=2, learning_rate=1e-4, batch_size=4, lora_r=64, num_epochs=2),
|
|
33
|
+
ScheduleConfig(stage_id=3, learning_rate=5e-5, batch_size=4, lora_r=32, num_epochs=2),
|
|
34
|
+
ScheduleConfig(stage_id=4, learning_rate=3e-5, batch_size=2, lora_r=32, num_epochs=2),
|
|
35
|
+
ScheduleConfig(stage_id=5, learning_rate=1e-5, batch_size=2, lora_r=16, num_epochs=1),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CurriculumScheduler:
|
|
40
|
+
"""Schedule learning rates, batch sizes, and LoRA parameters per stage.
|
|
41
|
+
|
|
42
|
+
The scheduler follows a curriculum learning approach where later stages
|
|
43
|
+
use lower learning rates (to preserve earlier learning), smaller LoRA
|
|
44
|
+
ranks (more discriminative fine-tuning), and smaller batch sizes
|
|
45
|
+
(more gradient updates per sample).
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, schedule: list[ScheduleConfig] | None = None):
|
|
49
|
+
self.schedule = schedule or DEFAULT_SCHEDULE
|
|
50
|
+
|
|
51
|
+
def get_config(self, stage_id: int) -> ScheduleConfig:
|
|
52
|
+
"""Get training config for a specific stage.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
stage_id: Curriculum stage (1-5).
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
ScheduleConfig for the stage.
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
ValueError: If stage_id is out of range.
|
|
62
|
+
"""
|
|
63
|
+
for s in self.schedule:
|
|
64
|
+
if s.stage_id == stage_id:
|
|
65
|
+
return s
|
|
66
|
+
raise ValueError(f"Unknown stage {stage_id}. Available: 1-{len(self.schedule)}")
|
|
67
|
+
|
|
68
|
+
def get_all_configs(self) -> list[ScheduleConfig]:
|
|
69
|
+
"""Get all stage configs in order."""
|
|
70
|
+
return list(self.schedule)
|
|
71
|
+
|
|
72
|
+
def save_configs(self, output_dir: str) -> None:
|
|
73
|
+
"""Save all stage configs as YAML files."""
|
|
74
|
+
from pathlib import Path
|
|
75
|
+
output_path = Path(output_dir)
|
|
76
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
for config in self.schedule:
|
|
78
|
+
path = output_path / f"stage{config.stage_id}.yaml"
|
|
79
|
+
with open(path, "w") as f:
|
|
80
|
+
yaml.dump(config.to_dict(), f, default_flow_style=False)
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Build curriculum training stages from difficulty-scored traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
from agent_curriculum.difficulty_scorer import DifficultyScore, DifficultyScorer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class CurriculumStage:
|
|
17
|
+
"""A single stage in the training curriculum."""
|
|
18
|
+
|
|
19
|
+
stage_id: int
|
|
20
|
+
name: str
|
|
21
|
+
description: str
|
|
22
|
+
difficulty_range: tuple[float, float]
|
|
23
|
+
max_tools: int
|
|
24
|
+
max_errors: int
|
|
25
|
+
traces: list[dict[str, Any]] = field(default_factory=list)
|
|
26
|
+
scores: list[DifficultyScore] = field(default_factory=list)
|
|
27
|
+
config: dict[str, Any] = field(default_factory=dict)
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict[str, Any]:
|
|
30
|
+
return {
|
|
31
|
+
"stage_id": self.stage_id,
|
|
32
|
+
"name": self.name,
|
|
33
|
+
"description": self.description,
|
|
34
|
+
"difficulty_range": list(self.difficulty_range),
|
|
35
|
+
"max_tools": self.max_tools,
|
|
36
|
+
"max_errors": self.max_errors,
|
|
37
|
+
"num_traces": len(self.traces),
|
|
38
|
+
"config": self.config,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Pre-defined curriculum stages
|
|
43
|
+
STAGE_DEFINITIONS = [
|
|
44
|
+
CurriculumStage(
|
|
45
|
+
stage_id=1,
|
|
46
|
+
name="basic",
|
|
47
|
+
description="Basic tool use — simple read/edit/bash tasks with no errors",
|
|
48
|
+
difficulty_range=(0.0, 0.2),
|
|
49
|
+
max_tools=5,
|
|
50
|
+
max_errors=0,
|
|
51
|
+
config={
|
|
52
|
+
"learning_rate": 2e-4,
|
|
53
|
+
"batch_size": 8,
|
|
54
|
+
"num_epochs": 3,
|
|
55
|
+
"lora_r": 64,
|
|
56
|
+
},
|
|
57
|
+
),
|
|
58
|
+
CurriculumStage(
|
|
59
|
+
stage_id=2,
|
|
60
|
+
name="intermediate",
|
|
61
|
+
description="Intermediate tasks — multi-tool sequences with simple error recovery",
|
|
62
|
+
difficulty_range=(0.2, 0.4),
|
|
63
|
+
max_tools=15,
|
|
64
|
+
max_errors=2,
|
|
65
|
+
config={
|
|
66
|
+
"learning_rate": 1e-4,
|
|
67
|
+
"batch_size": 4,
|
|
68
|
+
"num_epochs": 2,
|
|
69
|
+
"lora_r": 64,
|
|
70
|
+
},
|
|
71
|
+
),
|
|
72
|
+
CurriculumStage(
|
|
73
|
+
stage_id=3,
|
|
74
|
+
name="advanced",
|
|
75
|
+
description="Advanced tasks — complex multi-step with error recovery",
|
|
76
|
+
difficulty_range=(0.4, 0.6),
|
|
77
|
+
max_tools=30,
|
|
78
|
+
max_errors=5,
|
|
79
|
+
config={
|
|
80
|
+
"learning_rate": 5e-5,
|
|
81
|
+
"batch_size": 4,
|
|
82
|
+
"num_epochs": 2,
|
|
83
|
+
"lora_r": 32,
|
|
84
|
+
},
|
|
85
|
+
),
|
|
86
|
+
CurriculumStage(
|
|
87
|
+
stage_id=4,
|
|
88
|
+
name="expert",
|
|
89
|
+
description="Expert tasks — multi-session with complex error patterns",
|
|
90
|
+
difficulty_range=(0.6, 0.8),
|
|
91
|
+
max_tools=60,
|
|
92
|
+
max_errors=10,
|
|
93
|
+
config={
|
|
94
|
+
"learning_rate": 3e-5,
|
|
95
|
+
"batch_size": 2,
|
|
96
|
+
"num_epochs": 2,
|
|
97
|
+
"lora_r": 32,
|
|
98
|
+
},
|
|
99
|
+
),
|
|
100
|
+
CurriculumStage(
|
|
101
|
+
stage_id=5,
|
|
102
|
+
name="master",
|
|
103
|
+
description="Master tasks — long-horizon planning with complex recovery",
|
|
104
|
+
difficulty_range=(0.8, 1.0),
|
|
105
|
+
max_tools=100,
|
|
106
|
+
max_errors=20,
|
|
107
|
+
config={
|
|
108
|
+
"learning_rate": 1e-5,
|
|
109
|
+
"batch_size": 2,
|
|
110
|
+
"num_epochs": 1,
|
|
111
|
+
"lora_r": 16,
|
|
112
|
+
},
|
|
113
|
+
),
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class StageBuilder:
|
|
118
|
+
"""Build training stages from difficulty-scored traces.
|
|
119
|
+
|
|
120
|
+
Assigns traces to curriculum stages based on difficulty scores and
|
|
121
|
+
generates YAML configs for each stage.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def __init__(self, scorer: DifficultyScorer | None = None, stages: list[CurriculumStage] | None = None):
|
|
125
|
+
self.scorer = scorer or DifficultyScorer()
|
|
126
|
+
self.stages = stages or [CurriculumStage(**s.__dict__) if isinstance(s, CurriculumStage) else CurriculumStage(**s) for s in STAGE_DEFINITIONS]
|
|
127
|
+
|
|
128
|
+
def build_stages(self, trace_path: str | Path) -> list[CurriculumStage]:
|
|
129
|
+
"""Build curriculum stages from a trace file.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
trace_path: Path to JSONL file with agent traces.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
List of CurriculumStage objects with assigned traces.
|
|
136
|
+
"""
|
|
137
|
+
trace_path = Path(trace_path)
|
|
138
|
+
scores = self.scorer.score_file(trace_path)
|
|
139
|
+
|
|
140
|
+
# Load traces
|
|
141
|
+
traces: list[dict[str, Any]] = []
|
|
142
|
+
with open(trace_path) as f:
|
|
143
|
+
for line in f:
|
|
144
|
+
line = line.strip()
|
|
145
|
+
if not line:
|
|
146
|
+
continue
|
|
147
|
+
try:
|
|
148
|
+
traces.append(json.loads(line))
|
|
149
|
+
except json.JSONDecodeError:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
# Assign traces to stages based on difficulty
|
|
153
|
+
for score, trace in zip(scores, traces):
|
|
154
|
+
for stage in self.stages:
|
|
155
|
+
low, high = stage.difficulty_range
|
|
156
|
+
if low <= score.overall_difficulty < high:
|
|
157
|
+
stage.traces.append(trace)
|
|
158
|
+
stage.scores.append(score)
|
|
159
|
+
break
|
|
160
|
+
|
|
161
|
+
# Add unmatched traces to the closest stage
|
|
162
|
+
assigned_traces = set()
|
|
163
|
+
for stage in self.stages:
|
|
164
|
+
for t in stage.traces:
|
|
165
|
+
if "id" in t:
|
|
166
|
+
assigned_traces.add(t["id"])
|
|
167
|
+
|
|
168
|
+
return self.stages
|
|
169
|
+
|
|
170
|
+
def generate_configs(self, output_dir: str | Path) -> list[Path]:
|
|
171
|
+
"""Generate YAML config files for each stage.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
output_dir: Directory to write config files.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
List of paths to generated config files.
|
|
178
|
+
"""
|
|
179
|
+
output_dir = Path(output_dir)
|
|
180
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
181
|
+
config_paths = []
|
|
182
|
+
|
|
183
|
+
for stage in self.stages:
|
|
184
|
+
config = {
|
|
185
|
+
"stage_id": stage.stage_id,
|
|
186
|
+
"name": stage.name,
|
|
187
|
+
"description": stage.description,
|
|
188
|
+
"difficulty_range": list(stage.difficulty_range),
|
|
189
|
+
"num_traces": len(stage.traces),
|
|
190
|
+
**stage.config,
|
|
191
|
+
}
|
|
192
|
+
config_path = output_dir / f"stage{stage.stage_id}.yaml"
|
|
193
|
+
with open(config_path, "w") as f:
|
|
194
|
+
yaml.dump(config, f, default_flow_style=False)
|
|
195
|
+
config_paths.append(config_path)
|
|
196
|
+
|
|
197
|
+
return config_paths
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Train model through curriculum stages with progressive difficulty."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from agent_curriculum.stage_builder import StageBuilder, CurriculumStage
|
|
12
|
+
from agent_curriculum.difficulty_scorer import DifficultyScorer
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class TrainingResult:
|
|
19
|
+
"""Result of training a single curriculum stage."""
|
|
20
|
+
|
|
21
|
+
stage_id: int
|
|
22
|
+
stage_name: str
|
|
23
|
+
num_traces: int
|
|
24
|
+
status: str = "configured"
|
|
25
|
+
output_dir: str = ""
|
|
26
|
+
metrics: dict[str, Any] = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CurriculumTrainer:
|
|
30
|
+
"""Train a model through curriculum stages with progressive difficulty.
|
|
31
|
+
|
|
32
|
+
The curriculum approach trains the model on easy examples first (basic
|
|
33
|
+
tool use) and progressively introduces harder examples (multi-step
|
|
34
|
+
reasoning with errors), allowing the model to build foundational
|
|
35
|
+
skills before tackling complex tasks.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
base_model: str = "Qwen/Qwen2.5-14B",
|
|
41
|
+
output_dir: str = "output/curriculum",
|
|
42
|
+
scorer: DifficultyScorer | None = None,
|
|
43
|
+
builder: StageBuilder | None = None,
|
|
44
|
+
):
|
|
45
|
+
self.base_model = base_model
|
|
46
|
+
self.output_dir = Path(output_dir)
|
|
47
|
+
self.scorer = scorer or DifficultyScorer()
|
|
48
|
+
self.builder = builder or StageBuilder(scorer=self.scorer)
|
|
49
|
+
self.results: list[TrainingResult] = []
|
|
50
|
+
|
|
51
|
+
def train_curriculum(
|
|
52
|
+
self,
|
|
53
|
+
trace_path: str | Path,
|
|
54
|
+
start_stage: int = 1,
|
|
55
|
+
end_stage: int = 5,
|
|
56
|
+
) -> list[TrainingResult]:
|
|
57
|
+
"""Train the model through curriculum stages.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
trace_path: Path to JSONL file with agent traces.
|
|
61
|
+
start_stage: Stage to start from (1-5).
|
|
62
|
+
end_stage: Stage to end at (1-5).
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of TrainingResult objects.
|
|
66
|
+
"""
|
|
67
|
+
# Build stages from traces
|
|
68
|
+
stages = self.builder.build_stages(trace_path)
|
|
69
|
+
|
|
70
|
+
self.results = []
|
|
71
|
+
for stage in stages:
|
|
72
|
+
if stage.stage_id < start_stage or stage.stage_id > end_stage:
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
result = self._train_stage(stage)
|
|
76
|
+
self.results.append(result)
|
|
77
|
+
|
|
78
|
+
logger.info(f"Stage {stage.stage_id} ({stage.name}): {result.status}")
|
|
79
|
+
logger.info(f" Traces: {result.num_traces}, Output: {result.output_dir}")
|
|
80
|
+
|
|
81
|
+
return self.results
|
|
82
|
+
|
|
83
|
+
def _train_stage(self, stage: CurriculumStage) -> TrainingResult:
|
|
84
|
+
"""Train a single curriculum stage.
|
|
85
|
+
|
|
86
|
+
In production, this would call the training pipeline (Unsloth/trl).
|
|
87
|
+
Here we configure and prepare the training.
|
|
88
|
+
"""
|
|
89
|
+
stage_dir = self.output_dir / f"stage{stage.stage_id}_{stage.name}"
|
|
90
|
+
stage_dir.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
|
|
92
|
+
# Save stage traces
|
|
93
|
+
traces_path = stage_dir / "traces.jsonl"
|
|
94
|
+
with open(traces_path, "w") as f:
|
|
95
|
+
for trace in stage.traces:
|
|
96
|
+
f.write(json.dumps(trace) + "\n")
|
|
97
|
+
|
|
98
|
+
# Save stage config
|
|
99
|
+
config = {
|
|
100
|
+
**stage.config,
|
|
101
|
+
"base_model": self.base_model,
|
|
102
|
+
"stage_id": stage.stage_id,
|
|
103
|
+
"stage_name": stage.name,
|
|
104
|
+
"num_traces": len(stage.traces),
|
|
105
|
+
"difficulty_range": list(stage.difficulty_range),
|
|
106
|
+
}
|
|
107
|
+
config_path = stage_dir / "training_config.json"
|
|
108
|
+
with open(config_path, "w") as f:
|
|
109
|
+
json.dump(config, f, indent=2)
|
|
110
|
+
|
|
111
|
+
return TrainingResult(
|
|
112
|
+
stage_id=stage.stage_id,
|
|
113
|
+
stage_name=stage.name,
|
|
114
|
+
num_traces=len(stage.traces),
|
|
115
|
+
status="configured",
|
|
116
|
+
output_dir=str(stage_dir),
|
|
117
|
+
metrics={
|
|
118
|
+
"num_traces": len(stage.traces),
|
|
119
|
+
"difficulty_range": list(stage.difficulty_range),
|
|
120
|
+
"learning_rate": stage.config.get("learning_rate"),
|
|
121
|
+
"batch_size": stage.config.get("batch_size"),
|
|
122
|
+
"lora_r": stage.config.get("lora_r"),
|
|
123
|
+
},
|
|
124
|
+
)
|