agent-curriculum 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ name: Release to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ permissions:
9
+ id-token: write
10
+ packages: write
11
+
12
+ jobs:
13
+ build-and-publish:
14
+ name: Build and publish to PyPI
15
+ runs-on: ubuntu-latest
16
+ environment:
17
+ name: pypi
18
+ url: https://pypi.org/p/agent-curriculum
19
+ permissions:
20
+ id-token: write
21
+
22
+ steps:
23
+ - name: Checkout code
24
+ uses: actions/checkout@v4
25
+
26
+ - name: Set up Python
27
+ uses: actions/setup-python@v5
28
+ with:
29
+ python-version: '3.12'
30
+
31
+ - name: Install build dependencies
32
+ run: python -m pip install --upgrade build
33
+
34
+ - name: Build package
35
+ run: python -m build
36
+
37
+ - name: Publish package to PyPI
38
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,15 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+ .ruff_cache/
12
+ .venv/
13
+ venv/
14
+ *.so
15
+ .env
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 FableForge Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: agent-curriculum
3
+ Version: 0.1.0
4
+ Summary: Curriculum learning for agent training — difficulty-scored stages
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: click>=8.0
8
+ Requires-Dist: numpy>=1.24
9
+ Requires-Dist: pydantic>=2.0
10
+ Requires-Dist: pyyaml>=6.0
11
+ Requires-Dist: rich>=13.0
12
+ Description-Content-Type: text/markdown
13
+
14
+ # AgentCurriculum
15
+
16
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/) [![Tests](https://img.shields.io/badge/tests-0-yellow.svg)](tests/)
17
+
18
+
19
+ Curriculum learning for coding agents — train through progressive difficulty stages.
20
+
21
+ ## Overview
22
+
23
+ AgentCurriculum ranks agent traces by difficulty and builds a 5-stage training curriculum where the model learns basic tool use first, then progressively harder multi-step reasoning and error recovery.
24
+
25
+ ## Stages
26
+
27
+ | Stage | Name | Difficulty | Tools | Errors | LR | LoRA r |
28
+ |-------|------|-----------|-------|--------|----|--------|
29
+ | 1 | Basic | 0.0–0.2 | <5 | 0 | 2e-4 | 64 |
30
+ | 2 | Intermediate | 0.2–0.4 | <15 | ≤2 | 1e-4 | 64 |
31
+ | 3 | Advanced | 0.4–0.6 | <30 | ≤5 | 5e-5 | 32 |
32
+ | 4 | Expert | 0.6–0.8 | <60 | ≤10 | 3e-5 | 32 |
33
+ | 5 | Master | 0.8–1.0 | <100 | ≤20 | 1e-5 | 16 |
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install agent-curriculum
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ```python
44
+ from agent_curriculum import DifficultyScorer, StageBuilder, CurriculumTrainer
45
+
46
+ # Score traces by difficulty
47
+ scorer = DifficultyScorer()
48
+ scores = scorer.score_file("traces.jsonl")
49
+
50
+ # Build curriculum stages
51
+ builder = StageBuilder(scorer=scorer)
52
+ stages = builder.build_stages("traces.jsonl")
53
+ builder.generate_configs("configs/")
54
+
55
+ # Train through the curriculum
56
+ trainer = CurriculumTrainer(base_model="Qwen/Qwen2.5-14B")
57
+ results = trainer.train_curriculum("traces.jsonl", start_stage=1, end_stage=5)
58
+ ```
59
+
60
+ ## License
61
+
62
+ MIT
63
+
64
+ ## Ecosystem
65
+
66
+ Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
67
+
68
+ | Project | Description |
69
+ | --- | --- |
70
+ | **[Anvil](../anvil)** | Self-verified coding agent |
71
+ | **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
72
+ | **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
73
+ | **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
74
+ | **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
75
+ | **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
76
+ | **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
77
+ | **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
78
+ | **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
79
+ | **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
80
+ | **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
81
+ | **[AgentDev](../agent-dev)** | VSCode extension with verification |
82
+ | **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
83
+ | **[AgentSkills](../agent-skills)** | npm for agent behaviors |
84
+ | **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
85
+ | **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
86
+ | **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
87
+ | **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
88
+ | **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
89
+ | **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
90
+ | **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
@@ -0,0 +1,77 @@
1
+ # AgentCurriculum
2
+
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/) [![Tests](https://img.shields.io/badge/tests-0-yellow.svg)](tests/)
4
+
5
+
6
+ Curriculum learning for coding agents — train through progressive difficulty stages.
7
+
8
+ ## Overview
9
+
10
+ AgentCurriculum ranks agent traces by difficulty and builds a 5-stage training curriculum where the model learns basic tool use first, then progressively harder multi-step reasoning and error recovery.
11
+
12
+ ## Stages
13
+
14
+ | Stage | Name | Difficulty | Tools | Errors | LR | LoRA r |
15
+ |-------|------|-----------|-------|--------|----|--------|
16
+ | 1 | Basic | 0.0–0.2 | <5 | 0 | 2e-4 | 64 |
17
+ | 2 | Intermediate | 0.2–0.4 | <15 | ≤2 | 1e-4 | 64 |
18
+ | 3 | Advanced | 0.4–0.6 | <30 | ≤5 | 5e-5 | 32 |
19
+ | 4 | Expert | 0.6–0.8 | <60 | ≤10 | 3e-5 | 32 |
20
+ | 5 | Master | 0.8–1.0 | <100 | ≤20 | 1e-5 | 16 |
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install agent-curriculum
26
+ ```
27
+
28
+ ## Quick Start
29
+
30
+ ```python
31
+ from agent_curriculum import DifficultyScorer, StageBuilder, CurriculumTrainer
32
+
33
+ # Score traces by difficulty
34
+ scorer = DifficultyScorer()
35
+ scores = scorer.score_file("traces.jsonl")
36
+
37
+ # Build curriculum stages
38
+ builder = StageBuilder(scorer=scorer)
39
+ stages = builder.build_stages("traces.jsonl")
40
+ builder.generate_configs("configs/")
41
+
42
+ # Train through the curriculum
43
+ trainer = CurriculumTrainer(base_model="Qwen/Qwen2.5-14B")
44
+ results = trainer.train_curriculum("traces.jsonl", start_stage=1, end_stage=5)
45
+ ```
46
+
47
+ ## License
48
+
49
+ MIT
50
+
51
+ ## Ecosystem
52
+
53
+ Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
54
+
55
+ | Project | Description |
56
+ | --- | --- |
57
+ | **[Anvil](../anvil)** | Self-verified coding agent |
58
+ | **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
59
+ | **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
60
+ | **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
61
+ | **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
62
+ | **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
63
+ | **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
64
+ | **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
65
+ | **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
66
+ | **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
67
+ | **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
68
+ | **[AgentDev](../agent-dev)** | VSCode extension with verification |
69
+ | **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
70
+ | **[AgentSkills](../agent-skills)** | npm for agent behaviors |
71
+ | **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
72
+ | **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
73
+ | **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
74
+ | **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
75
+ | **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
76
+ | **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
77
+ | **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
File without changes
@@ -0,0 +1,17 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agent-curriculum"
7
+ version = "0.1.0"
8
+ description = "Curriculum learning for agent training — difficulty-scored stages"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = ["pydantic>=2.0", "pyyaml>=6.0", "numpy>=1.24", "click>=8.0", "rich>=13.0"]
12
+
13
+ [project.scripts]
14
+ acurriculum = "agent_curriculum.cli:cli"
15
+
16
+ [tool.hatch.build.targets.wheel]
17
+ packages = ["src/agent_curriculum"]
@@ -0,0 +1,3 @@
1
+ """AgentCurriculum — Curriculum learning for agent training with difficulty-scored stages."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,119 @@
1
+ """CLI for AgentCurriculum — curriculum learning with difficulty-scored stages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ import click
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+
13
+ from agent_curriculum.difficulty_scorer import DifficultyScorer
14
+ from agent_curriculum.stage_builder import StageBuilder
15
+ from agent_curriculum.scheduler import DEFAULT_SCHEDULE
16
+
17
+ console = Console()
18
+
19
+
20
+ @click.group()
21
+ @click.version_option(version="0.1.0")
22
+ def cli():
23
+ """AgentCurriculum — Curriculum learning for agent training with difficulty-scored stages."""
24
+ pass
25
+
26
+
27
+ @cli.command()
28
+ @click.argument("trace_file", type=click.Path(exists=True))
29
+ @click.option("--output", "-o", type=click.Path(), help="Output JSONL file for scores")
30
+ def score(trace_file, output):
31
+ """Score traces by difficulty level."""
32
+ scorer = DifficultyScorer()
33
+ scores = scorer.score_file(trace_file)
34
+
35
+ table = Table(title="Difficulty Scores")
36
+ table.add_column("Trace ID", style="cyan")
37
+ table.add_column("Difficulty", style="bold")
38
+ table.add_column("Level", style="magenta")
39
+ table.add_column("Tools", justify="right")
40
+ table.add_column("Errors", justify="right")
41
+
42
+ for s in scores:
43
+ level_color = {"basic": "green", "intermediate": "yellow", "advanced": "orange3", "expert": "red", "master": "bold red"}.get(s.difficulty_level, "white")
44
+ table.add_row(
45
+ s.trace_id,
46
+ f"{s.overall_difficulty:.3f}",
47
+ f"[{level_color}]{s.difficulty_level}[/{level_color}]",
48
+ str(s.tool_count),
49
+ str(s.error_count),
50
+ )
51
+
52
+ console.print(table)
53
+
54
+ if output:
55
+ out_data = [s.to_dict() for s in scores]
56
+ Path(output).write_text("\n".join(json.dumps(d) for d in out_data))
57
+ console.print(f"[green]Scores saved to {output}[/green]")
58
+
59
+
60
+ @cli.command()
61
+ @click.argument("trace_file", type=click.Path(exists=True))
62
+ @click.option("--stages", "-s", default=5, type=int, help="Number of curriculum stages")
63
+ @click.option("--output", "-o", type=click.Path(), help="Output directory for stage files")
64
+ def build(trace_file, stages, output):
65
+ """Build curriculum stages from a trace file."""
66
+ stager = StageBuilder(num_stages=stages)
67
+ result = stager.build_from_file(trace_file)
68
+
69
+ table = Table(title=f"Curriculum Stages ({len(result)} stages)")
70
+ table.add_column("Stage", style="cyan")
71
+ table.add_column("Name", style="bold")
72
+ table.add_column("Difficulty", style="magenta")
73
+ table.add_column("Traces", justify="right")
74
+
75
+ for stage in result:
76
+ table.add_row(
77
+ str(stage.stage_id),
78
+ stage.name,
79
+ f"[{stage.difficulty_range[0]:.1f} - {stage.difficulty_range[1]:.1f}]",
80
+ str(len(stage.traces)),
81
+ )
82
+
83
+ console.print(table)
84
+
85
+ if output:
86
+ out_path = Path(output)
87
+ out_path.mkdir(parents=True, exist_ok=True)
88
+ for stage in result:
89
+ stage_file = out_path / f"stage_{stage.stage_id}_{stage.name}.json"
90
+ stage_file.write_text(json.dumps(stage.to_dict(), indent=2))
91
+ console.print(f"[green]Stages saved to {output}[/green]")
92
+
93
+
94
+ @cli.command()
95
+ def schedule():
96
+ """Show the default learning rate / batch size schedule."""
97
+ table = Table(title="Default Curriculum Schedule")
98
+ table.add_column("Stage", style="cyan")
99
+ table.add_column("LR", style="green")
100
+ table.add_column("Batch", justify="right")
101
+ table.add_column("LoRA r", justify="right")
102
+ table.add_column("LoRA alpha", justify="right")
103
+ table.add_column("Epochs", justify="right")
104
+
105
+ for s in DEFAULT_SCHEDULE:
106
+ table.add_row(
107
+ str(s.stage_id),
108
+ f"{s.learning_rate:.1e}",
109
+ str(s.batch_size),
110
+ str(s.lora_r),
111
+ str(s.lora_alpha),
112
+ str(s.num_epochs),
113
+ )
114
+
115
+ console.print(table)
116
+
117
+
118
+ if __name__ == "__main__":
119
+ cli()
@@ -0,0 +1,9 @@
1
+ stage_id: 1
2
+ name: basic
3
+ description: "Basic tool use — simple read/edit/bash tasks with no errors"
4
+ difficulty_range: [0.0, 0.2]
5
+ num_traces: 0
6
+ learning_rate: 0.0002
7
+ batch_size: 8
8
+ num_epochs: 3
9
+ lora_r: 64
@@ -0,0 +1,9 @@
1
+ stage_id: 2
2
+ name: intermediate
3
+ description: "Intermediate tasks — multi-tool sequences with simple error recovery"
4
+ difficulty_range: [0.2, 0.4]
5
+ num_traces: 0
6
+ learning_rate: 0.0001
7
+ batch_size: 4
8
+ num_epochs: 2
9
+ lora_r: 64
@@ -0,0 +1,9 @@
1
+ stage_id: 3
2
+ name: advanced
3
+ description: "Advanced tasks — complex multi-step with error recovery"
4
+ difficulty_range: [0.4, 0.6]
5
+ num_traces: 0
6
+ learning_rate: 5.0e-05
7
+ batch_size: 4
8
+ num_epochs: 2
9
+ lora_r: 32
@@ -0,0 +1,9 @@
1
+ stage_id: 4
2
+ name: expert
3
+ description: "Expert tasks — multi-session with complex error patterns"
4
+ difficulty_range: [0.6, 0.8]
5
+ num_traces: 0
6
+ learning_rate: 3.0e-05
7
+ batch_size: 2
8
+ num_epochs: 2
9
+ lora_r: 32
@@ -0,0 +1,9 @@
1
+ stage_id: 5
2
+ name: master
3
+ description: "Master tasks — long-horizon planning with complex recovery"
4
+ difficulty_range: [0.8, 1.0]
5
+ num_traces: 0
6
+ learning_rate: 1.0e-05
7
+ batch_size: 2
8
+ num_epochs: 1
9
+ lora_r: 16
@@ -0,0 +1,177 @@
1
+ """Score trace difficulty based on tool count, error count, reasoning length, and session duration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import math
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+
12
+ @dataclass
13
+ class DifficultyScore:
14
+ """Composite difficulty score for an agent trace."""
15
+
16
+ trace_id: str
17
+ tool_count: int = 0
18
+ error_count: int = 0
19
+ reasoning_length: int = 0
20
+ session_duration: float = 0.0
21
+ unique_tools: int = 0
22
+ retry_count: int = 0
23
+ branch_count: int = 0
24
+
25
+ # Computed scores
26
+ tool_complexity: float = 0.0
27
+ error_complexity: float = 0.0
28
+ reasoning_complexity: float = 0.0
29
+ duration_complexity: float = 0.0
30
+ overall_difficulty: float = 0.0
31
+ difficulty_level: str = "basic"
32
+
33
+ def to_dict(self) -> dict[str, Any]:
34
+ return {
35
+ "trace_id": self.trace_id,
36
+ "tool_count": self.tool_count,
37
+ "error_count": self.error_count,
38
+ "reasoning_length": self.reasoning_length,
39
+ "session_duration": self.session_duration,
40
+ "unique_tools": self.unique_tools,
41
+ "retry_count": self.retry_count,
42
+ "branch_count": self.branch_count,
43
+ "tool_complexity": round(self.tool_complexity, 3),
44
+ "error_complexity": round(self.error_complexity, 3),
45
+ "reasoning_complexity": round(self.reasoning_complexity, 3),
46
+ "duration_complexity": round(self.duration_complexity, 3),
47
+ "overall_difficulty": round(self.overall_difficulty, 3),
48
+ "difficulty_level": self.difficulty_level,
49
+ }
50
+
51
+
52
+ class DifficultyScorer:
53
+ """Score agent traces by difficulty to build curriculum training stages.
54
+
55
+ Scoring factors:
56
+ - Tool count: More tool calls = harder
57
+ - Error count: More errors = harder, but retries show recovery skill
58
+ - Reasoning length: Longer reasoning = harder
59
+ - Session duration: Longer sessions = harder
60
+ - Unique tools: More diverse tool use = harder
61
+ - Branch count: More decision branches = harder
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ tool_weight: float = 0.25,
67
+ error_weight: float = 0.20,
68
+ reasoning_weight: float = 0.20,
69
+ duration_weight: float = 0.15,
70
+ diversity_weight: float = 0.10,
71
+ branch_weight: float = 0.10,
72
+ max_tool_count: int = 100,
73
+ max_reasoning_length: int = 50000,
74
+ max_session_duration: float = 3600.0,
75
+ ):
76
+ self.tool_weight = tool_weight
77
+ self.error_weight = error_weight
78
+ self.reasoning_weight = reasoning_weight
79
+ self.duration_weight = duration_weight
80
+ self.diversity_weight = diversity_weight
81
+ self.branch_weight = branch_weight
82
+ self.max_tool_count = max_tool_count
83
+ self.max_reasoning_length = max_reasoning_length
84
+ self.max_session_duration = max_session_duration
85
+
86
+ def score_trace(self, trace: dict[str, Any]) -> DifficultyScore:
87
+ """Score an individual trace for difficulty.
88
+
89
+ Args:
90
+ trace: Dictionary with trace data including tool_calls, errors, etc.
91
+
92
+ Returns:
93
+ DifficultyScore with computed difficulty metrics.
94
+ """
95
+ trace_id = trace.get("id", trace.get("trace_id", "unknown"))
96
+ tool_calls = trace.get("tool_calls", [])
97
+ errors = trace.get("errors", [])
98
+ reasoning = trace.get("reasoning", "")
99
+ duration = trace.get("duration", trace.get("session_duration", 0.0))
100
+
101
+ # Extract metrics
102
+ tool_count = len(tool_calls)
103
+ error_count = len(errors)
104
+ unique_tools = len(set(tc.get("name", "") for tc in tool_calls if tc.get("name")))
105
+ retry_count = sum(1 for e in errors if e.get("type") == "retry" or "retry" in str(e).lower())
106
+ branch_count = trace.get("branch_count", len(set(tc.get("name", "") for tc in tool_calls[:5])))
107
+ reasoning_length = len(reasoning) if isinstance(reasoning, str) else sum(len(r) for r in reasoning) if isinstance(reasoning, list) else 0
108
+
109
+ # Compute normalized sub-scores (0-1)
110
+ tool_complexity = min(tool_count / self.max_tool_count, 1.0) if self.max_tool_count > 0 else 0.0
111
+ error_complexity = 1.0 - math.exp(-error_count / 3.0) # exponential decay
112
+ reasoning_complexity = min(reasoning_length / self.max_reasoning_length, 1.0) if self.max_reasoning_length > 0 else 0.0
113
+ duration_complexity = min(duration / self.max_session_duration, 1.0) if self.max_session_duration > 0 else 0.0
114
+ diversity_score = min(unique_tools / 6.0, 1.0) # 6 = max unique tool types
115
+ branch_complexity = min(branch_count / 10.0, 1.0) if branch_count > 0 else 0.0
116
+
117
+ # Weighted overall difficulty
118
+ overall = (
119
+ tool_complexity * self.tool_weight
120
+ + error_complexity * self.error_weight
121
+ + reasoning_complexity * self.reasoning_weight
122
+ + duration_complexity * self.duration_weight
123
+ + diversity_score * self.diversity_weight
124
+ + branch_complexity * self.branch_weight
125
+ )
126
+
127
+ # Classify difficulty level
128
+ if overall < 0.2:
129
+ level = "basic"
130
+ elif overall < 0.4:
131
+ level = "intermediate"
132
+ elif overall < 0.6:
133
+ level = "advanced"
134
+ elif overall < 0.8:
135
+ level = "expert"
136
+ else:
137
+ level = "master"
138
+
139
+ return DifficultyScore(
140
+ trace_id=trace_id,
141
+ tool_count=tool_count,
142
+ error_count=error_count,
143
+ reasoning_length=reasoning_length,
144
+ session_duration=duration,
145
+ unique_tools=unique_tools,
146
+ retry_count=retry_count,
147
+ branch_count=branch_count,
148
+ tool_complexity=tool_complexity,
149
+ error_complexity=error_complexity,
150
+ reasoning_complexity=reasoning_complexity,
151
+ duration_complexity=duration_complexity,
152
+ overall_difficulty=overall,
153
+ difficulty_level=level,
154
+ )
155
+
156
+ def score_file(self, path: str | Path) -> list[DifficultyScore]:
157
+ """Score all traces in a JSONL file.
158
+
159
+ Args:
160
+ path: Path to JSONL file with traces.
161
+
162
+ Returns:
163
+ List of DifficultyScore objects.
164
+ """
165
+ path = Path(path)
166
+ scores = []
167
+ with open(path) as f:
168
+ for line in f:
169
+ line = line.strip()
170
+ if not line:
171
+ continue
172
+ try:
173
+ trace = json.loads(line)
174
+ scores.append(self.score_trace(trace))
175
+ except json.JSONDecodeError:
176
+ continue
177
+ return scores
@@ -0,0 +1,80 @@
1
+ """Learning rate and batch size scheduling per curriculum stage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ import yaml
9
+
10
+
11
+ @dataclass
12
+ class ScheduleConfig:
13
+ """Learning rate and batch size schedule for a single stage."""
14
+
15
+ stage_id: int
16
+ learning_rate: float
17
+ batch_size: int
18
+ warmup_steps: int = 0
19
+ lora_r: int = 64
20
+ lora_alpha: int = 128
21
+ gradient_accumulation_steps: int = 4
22
+ weight_decay: float = 0.01
23
+ num_epochs: int = 2
24
+
25
+ def to_dict(self) -> dict[str, Any]:
26
+ return {k: getattr(self, k) for k in self.__dataclass_fields__}
27
+
28
+
29
+ # Default schedule: progressively lower learning rates and higher discriminative LoRA
30
+ DEFAULT_SCHEDULE: list[ScheduleConfig] = [
31
+ ScheduleConfig(stage_id=1, learning_rate=2e-4, batch_size=8, lora_r=64, num_epochs=3),
32
+ ScheduleConfig(stage_id=2, learning_rate=1e-4, batch_size=4, lora_r=64, num_epochs=2),
33
+ ScheduleConfig(stage_id=3, learning_rate=5e-5, batch_size=4, lora_r=32, num_epochs=2),
34
+ ScheduleConfig(stage_id=4, learning_rate=3e-5, batch_size=2, lora_r=32, num_epochs=2),
35
+ ScheduleConfig(stage_id=5, learning_rate=1e-5, batch_size=2, lora_r=16, num_epochs=1),
36
+ ]
37
+
38
+
39
+ class CurriculumScheduler:
40
+ """Schedule learning rates, batch sizes, and LoRA parameters per stage.
41
+
42
+ The scheduler follows a curriculum learning approach where later stages
43
+ use lower learning rates (to preserve earlier learning), smaller LoRA
44
+ ranks (more discriminative fine-tuning), and smaller batch sizes
45
+ (more gradient updates per sample).
46
+ """
47
+
48
+ def __init__(self, schedule: list[ScheduleConfig] | None = None):
49
+ self.schedule = schedule or DEFAULT_SCHEDULE
50
+
51
+ def get_config(self, stage_id: int) -> ScheduleConfig:
52
+ """Get training config for a specific stage.
53
+
54
+ Args:
55
+ stage_id: Curriculum stage (1-5).
56
+
57
+ Returns:
58
+ ScheduleConfig for the stage.
59
+
60
+ Raises:
61
+ ValueError: If stage_id is out of range.
62
+ """
63
+ for s in self.schedule:
64
+ if s.stage_id == stage_id:
65
+ return s
66
+ raise ValueError(f"Unknown stage {stage_id}. Available: 1-{len(self.schedule)}")
67
+
68
+ def get_all_configs(self) -> list[ScheduleConfig]:
69
+ """Get all stage configs in order."""
70
+ return list(self.schedule)
71
+
72
+ def save_configs(self, output_dir: str) -> None:
73
+ """Save all stage configs as YAML files."""
74
+ from pathlib import Path
75
+ output_path = Path(output_dir)
76
+ output_path.mkdir(parents=True, exist_ok=True)
77
+ for config in self.schedule:
78
+ path = output_path / f"stage{config.stage_id}.yaml"
79
+ with open(path, "w") as f:
80
+ yaml.dump(config.to_dict(), f, default_flow_style=False)
@@ -0,0 +1,197 @@
1
+ """Build curriculum training stages from difficulty-scored traces."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import yaml
11
+
12
+ from agent_curriculum.difficulty_scorer import DifficultyScore, DifficultyScorer
13
+
14
+
15
+ @dataclass
16
+ class CurriculumStage:
17
+ """A single stage in the training curriculum."""
18
+
19
+ stage_id: int
20
+ name: str
21
+ description: str
22
+ difficulty_range: tuple[float, float]
23
+ max_tools: int
24
+ max_errors: int
25
+ traces: list[dict[str, Any]] = field(default_factory=list)
26
+ scores: list[DifficultyScore] = field(default_factory=list)
27
+ config: dict[str, Any] = field(default_factory=dict)
28
+
29
+ def to_dict(self) -> dict[str, Any]:
30
+ return {
31
+ "stage_id": self.stage_id,
32
+ "name": self.name,
33
+ "description": self.description,
34
+ "difficulty_range": list(self.difficulty_range),
35
+ "max_tools": self.max_tools,
36
+ "max_errors": self.max_errors,
37
+ "num_traces": len(self.traces),
38
+ "config": self.config,
39
+ }
40
+
41
+
42
+ # Pre-defined curriculum stages
43
+ STAGE_DEFINITIONS = [
44
+ CurriculumStage(
45
+ stage_id=1,
46
+ name="basic",
47
+ description="Basic tool use — simple read/edit/bash tasks with no errors",
48
+ difficulty_range=(0.0, 0.2),
49
+ max_tools=5,
50
+ max_errors=0,
51
+ config={
52
+ "learning_rate": 2e-4,
53
+ "batch_size": 8,
54
+ "num_epochs": 3,
55
+ "lora_r": 64,
56
+ },
57
+ ),
58
+ CurriculumStage(
59
+ stage_id=2,
60
+ name="intermediate",
61
+ description="Intermediate tasks — multi-tool sequences with simple error recovery",
62
+ difficulty_range=(0.2, 0.4),
63
+ max_tools=15,
64
+ max_errors=2,
65
+ config={
66
+ "learning_rate": 1e-4,
67
+ "batch_size": 4,
68
+ "num_epochs": 2,
69
+ "lora_r": 64,
70
+ },
71
+ ),
72
+ CurriculumStage(
73
+ stage_id=3,
74
+ name="advanced",
75
+ description="Advanced tasks — complex multi-step with error recovery",
76
+ difficulty_range=(0.4, 0.6),
77
+ max_tools=30,
78
+ max_errors=5,
79
+ config={
80
+ "learning_rate": 5e-5,
81
+ "batch_size": 4,
82
+ "num_epochs": 2,
83
+ "lora_r": 32,
84
+ },
85
+ ),
86
+ CurriculumStage(
87
+ stage_id=4,
88
+ name="expert",
89
+ description="Expert tasks — multi-session with complex error patterns",
90
+ difficulty_range=(0.6, 0.8),
91
+ max_tools=60,
92
+ max_errors=10,
93
+ config={
94
+ "learning_rate": 3e-5,
95
+ "batch_size": 2,
96
+ "num_epochs": 2,
97
+ "lora_r": 32,
98
+ },
99
+ ),
100
+ CurriculumStage(
101
+ stage_id=5,
102
+ name="master",
103
+ description="Master tasks — long-horizon planning with complex recovery",
104
+ difficulty_range=(0.8, 1.0),
105
+ max_tools=100,
106
+ max_errors=20,
107
+ config={
108
+ "learning_rate": 1e-5,
109
+ "batch_size": 2,
110
+ "num_epochs": 1,
111
+ "lora_r": 16,
112
+ },
113
+ ),
114
+ ]
115
+
116
+
117
+ class StageBuilder:
118
+ """Build training stages from difficulty-scored traces.
119
+
120
+ Assigns traces to curriculum stages based on difficulty scores and
121
+ generates YAML configs for each stage.
122
+ """
123
+
124
+ def __init__(self, scorer: DifficultyScorer | None = None, stages: list[CurriculumStage] | None = None):
125
+ self.scorer = scorer or DifficultyScorer()
126
+ self.stages = stages or [CurriculumStage(**s.__dict__) if isinstance(s, CurriculumStage) else CurriculumStage(**s) for s in STAGE_DEFINITIONS]
127
+
128
+ def build_stages(self, trace_path: str | Path) -> list[CurriculumStage]:
129
+ """Build curriculum stages from a trace file.
130
+
131
+ Args:
132
+ trace_path: Path to JSONL file with agent traces.
133
+
134
+ Returns:
135
+ List of CurriculumStage objects with assigned traces.
136
+ """
137
+ trace_path = Path(trace_path)
138
+ scores = self.scorer.score_file(trace_path)
139
+
140
+ # Load traces
141
+ traces: list[dict[str, Any]] = []
142
+ with open(trace_path) as f:
143
+ for line in f:
144
+ line = line.strip()
145
+ if not line:
146
+ continue
147
+ try:
148
+ traces.append(json.loads(line))
149
+ except json.JSONDecodeError:
150
+ continue
151
+
152
+ # Assign traces to stages based on difficulty
153
+ for score, trace in zip(scores, traces):
154
+ for stage in self.stages:
155
+ low, high = stage.difficulty_range
156
+ if low <= score.overall_difficulty < high:
157
+ stage.traces.append(trace)
158
+ stage.scores.append(score)
159
+ break
160
+
161
+ # Add unmatched traces to the closest stage
162
+ assigned_traces = set()
163
+ for stage in self.stages:
164
+ for t in stage.traces:
165
+ if "id" in t:
166
+ assigned_traces.add(t["id"])
167
+
168
+ return self.stages
169
+
170
+ def generate_configs(self, output_dir: str | Path) -> list[Path]:
171
+ """Generate YAML config files for each stage.
172
+
173
+ Args:
174
+ output_dir: Directory to write config files.
175
+
176
+ Returns:
177
+ List of paths to generated config files.
178
+ """
179
+ output_dir = Path(output_dir)
180
+ output_dir.mkdir(parents=True, exist_ok=True)
181
+ config_paths = []
182
+
183
+ for stage in self.stages:
184
+ config = {
185
+ "stage_id": stage.stage_id,
186
+ "name": stage.name,
187
+ "description": stage.description,
188
+ "difficulty_range": list(stage.difficulty_range),
189
+ "num_traces": len(stage.traces),
190
+ **stage.config,
191
+ }
192
+ config_path = output_dir / f"stage{stage.stage_id}.yaml"
193
+ with open(config_path, "w") as f:
194
+ yaml.dump(config, f, default_flow_style=False)
195
+ config_paths.append(config_path)
196
+
197
+ return config_paths
@@ -0,0 +1,124 @@
1
+ """Train model through curriculum stages with progressive difficulty."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from agent_curriculum.stage_builder import StageBuilder, CurriculumStage
12
+ from agent_curriculum.difficulty_scorer import DifficultyScorer
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @dataclass
18
+ class TrainingResult:
19
+ """Result of training a single curriculum stage."""
20
+
21
+ stage_id: int
22
+ stage_name: str
23
+ num_traces: int
24
+ status: str = "configured"
25
+ output_dir: str = ""
26
+ metrics: dict[str, Any] = field(default_factory=dict)
27
+
28
+
29
+ class CurriculumTrainer:
30
+ """Train a model through curriculum stages with progressive difficulty.
31
+
32
+ The curriculum approach trains the model on easy examples first (basic
33
+ tool use) and progressively introduces harder examples (multi-step
34
+ reasoning with errors), allowing the model to build foundational
35
+ skills before tackling complex tasks.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ base_model: str = "Qwen/Qwen2.5-14B",
41
+ output_dir: str = "output/curriculum",
42
+ scorer: DifficultyScorer | None = None,
43
+ builder: StageBuilder | None = None,
44
+ ):
45
+ self.base_model = base_model
46
+ self.output_dir = Path(output_dir)
47
+ self.scorer = scorer or DifficultyScorer()
48
+ self.builder = builder or StageBuilder(scorer=self.scorer)
49
+ self.results: list[TrainingResult] = []
50
+
51
+ def train_curriculum(
52
+ self,
53
+ trace_path: str | Path,
54
+ start_stage: int = 1,
55
+ end_stage: int = 5,
56
+ ) -> list[TrainingResult]:
57
+ """Train the model through curriculum stages.
58
+
59
+ Args:
60
+ trace_path: Path to JSONL file with agent traces.
61
+ start_stage: Stage to start from (1-5).
62
+ end_stage: Stage to end at (1-5).
63
+
64
+ Returns:
65
+ List of TrainingResult objects.
66
+ """
67
+ # Build stages from traces
68
+ stages = self.builder.build_stages(trace_path)
69
+
70
+ self.results = []
71
+ for stage in stages:
72
+ if stage.stage_id < start_stage or stage.stage_id > end_stage:
73
+ continue
74
+
75
+ result = self._train_stage(stage)
76
+ self.results.append(result)
77
+
78
+ logger.info(f"Stage {stage.stage_id} ({stage.name}): {result.status}")
79
+ logger.info(f" Traces: {result.num_traces}, Output: {result.output_dir}")
80
+
81
+ return self.results
82
+
83
+ def _train_stage(self, stage: CurriculumStage) -> TrainingResult:
84
+ """Train a single curriculum stage.
85
+
86
+ In production, this would call the training pipeline (Unsloth/trl).
87
+ Here we configure and prepare the training.
88
+ """
89
+ stage_dir = self.output_dir / f"stage{stage.stage_id}_{stage.name}"
90
+ stage_dir.mkdir(parents=True, exist_ok=True)
91
+
92
+ # Save stage traces
93
+ traces_path = stage_dir / "traces.jsonl"
94
+ with open(traces_path, "w") as f:
95
+ for trace in stage.traces:
96
+ f.write(json.dumps(trace) + "\n")
97
+
98
+ # Save stage config
99
+ config = {
100
+ **stage.config,
101
+ "base_model": self.base_model,
102
+ "stage_id": stage.stage_id,
103
+ "stage_name": stage.name,
104
+ "num_traces": len(stage.traces),
105
+ "difficulty_range": list(stage.difficulty_range),
106
+ }
107
+ config_path = stage_dir / "training_config.json"
108
+ with open(config_path, "w") as f:
109
+ json.dump(config, f, indent=2)
110
+
111
+ return TrainingResult(
112
+ stage_id=stage.stage_id,
113
+ stage_name=stage.name,
114
+ num_traces=len(stage.traces),
115
+ status="configured",
116
+ output_dir=str(stage_dir),
117
+ metrics={
118
+ "num_traces": len(stage.traces),
119
+ "difficulty_range": list(stage.difficulty_range),
120
+ "learning_rate": stage.config.get("learning_rate"),
121
+ "batch_size": stage.config.get("batch_size"),
122
+ "lora_r": stage.config.get("lora_r"),
123
+ },
124
+ )