openadapt-ml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. openadapt_ml/__init__.py +0 -0
  2. openadapt_ml/benchmarks/__init__.py +125 -0
  3. openadapt_ml/benchmarks/agent.py +825 -0
  4. openadapt_ml/benchmarks/azure.py +761 -0
  5. openadapt_ml/benchmarks/base.py +366 -0
  6. openadapt_ml/benchmarks/cli.py +884 -0
  7. openadapt_ml/benchmarks/data_collection.py +432 -0
  8. openadapt_ml/benchmarks/runner.py +381 -0
  9. openadapt_ml/benchmarks/waa.py +704 -0
  10. openadapt_ml/cloud/__init__.py +5 -0
  11. openadapt_ml/cloud/azure_inference.py +441 -0
  12. openadapt_ml/cloud/lambda_labs.py +2445 -0
  13. openadapt_ml/cloud/local.py +790 -0
  14. openadapt_ml/config.py +56 -0
  15. openadapt_ml/datasets/__init__.py +0 -0
  16. openadapt_ml/datasets/next_action.py +507 -0
  17. openadapt_ml/evals/__init__.py +23 -0
  18. openadapt_ml/evals/grounding.py +241 -0
  19. openadapt_ml/evals/plot_eval_metrics.py +174 -0
  20. openadapt_ml/evals/trajectory_matching.py +486 -0
  21. openadapt_ml/grounding/__init__.py +45 -0
  22. openadapt_ml/grounding/base.py +236 -0
  23. openadapt_ml/grounding/detector.py +570 -0
  24. openadapt_ml/ingest/__init__.py +43 -0
  25. openadapt_ml/ingest/capture.py +312 -0
  26. openadapt_ml/ingest/loader.py +232 -0
  27. openadapt_ml/ingest/synthetic.py +1102 -0
  28. openadapt_ml/models/__init__.py +0 -0
  29. openadapt_ml/models/api_adapter.py +171 -0
  30. openadapt_ml/models/base_adapter.py +59 -0
  31. openadapt_ml/models/dummy_adapter.py +42 -0
  32. openadapt_ml/models/qwen_vl.py +426 -0
  33. openadapt_ml/runtime/__init__.py +0 -0
  34. openadapt_ml/runtime/policy.py +182 -0
  35. openadapt_ml/schemas/__init__.py +53 -0
  36. openadapt_ml/schemas/sessions.py +122 -0
  37. openadapt_ml/schemas/validation.py +252 -0
  38. openadapt_ml/scripts/__init__.py +0 -0
  39. openadapt_ml/scripts/compare.py +1490 -0
  40. openadapt_ml/scripts/demo_policy.py +62 -0
  41. openadapt_ml/scripts/eval_policy.py +287 -0
  42. openadapt_ml/scripts/make_gif.py +153 -0
  43. openadapt_ml/scripts/prepare_synthetic.py +43 -0
  44. openadapt_ml/scripts/run_qwen_login_benchmark.py +192 -0
  45. openadapt_ml/scripts/train.py +174 -0
  46. openadapt_ml/training/__init__.py +0 -0
  47. openadapt_ml/training/benchmark_viewer.py +1538 -0
  48. openadapt_ml/training/shared_ui.py +157 -0
  49. openadapt_ml/training/stub_provider.py +276 -0
  50. openadapt_ml/training/trainer.py +2446 -0
  51. openadapt_ml/training/viewer.py +2970 -0
  52. openadapt_ml-0.1.0.dist-info/METADATA +818 -0
  53. openadapt_ml-0.1.0.dist-info/RECORD +55 -0
  54. openadapt_ml-0.1.0.dist-info/WHEEL +4 -0
  55. openadapt_ml-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,192 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import argparse
5
+ import os
6
+
7
+ from openadapt_ml.config import settings
8
+ from openadapt_ml.scripts.train import main as train_main
9
+ from openadapt_ml.scripts.eval_policy import main as eval_main
10
+ from openadapt_ml.evals.plot_eval_metrics import plot_eval_metrics
11
+
12
+
13
+ def _require_env(var_name: str) -> None:
14
+ """Raise a clear error if a required API key is missing."""
15
+
16
+ # Check settings first, then fall back to os.getenv
17
+ if var_name == "ANTHROPIC_API_KEY":
18
+ key = settings.anthropic_api_key or os.getenv(var_name)
19
+ elif var_name == "OPENAI_API_KEY":
20
+ key = settings.openai_api_key or os.getenv(var_name)
21
+ else:
22
+ key = os.getenv(var_name)
23
+
24
+ if not key:
25
+ raise RuntimeError(
26
+ f"API key {var_name} is required for this benchmark but is not set. "
27
+ "Please set it in .env file, as an environment variable, or configure "
28
+ "it before including the corresponding API backend."
29
+ )
30
+
31
+
32
+ def run_qwen_login_benchmark(
33
+ config_path: str,
34
+ out_dir: str,
35
+ include_claude: bool = False,
36
+ include_openai: bool = False,
37
+ skip_train: bool = False,
38
+ ) -> None:
39
+ """Run end-to-end synthetic login benchmark (train → eval base/FT → plot).
40
+
41
+ This is a thin orchestrator over existing train/eval/plot utilities. It:
42
+ - trains a LoRA adapter using the given config
43
+ - evaluates the base (no LoRA) and fine-tuned models on fresh synthetic data
44
+ - writes eval JSONs and a comparison plot under the given output directory
45
+ """
46
+
47
+ config = Path(config_path)
48
+ out_root = Path(out_dir)
49
+
50
+ eval_dir = out_root / "eval"
51
+ plots_dir = out_root / "plots"
52
+
53
+ eval_dir.mkdir(parents=True, exist_ok=True)
54
+ plots_dir.mkdir(parents=True, exist_ok=True)
55
+
56
+ # Validate API keys up front if needed.
57
+ if include_claude:
58
+ _require_env("ANTHROPIC_API_KEY")
59
+ if include_openai:
60
+ _require_env("OPENAI_API_KEY")
61
+
62
+ # 1) Train LoRA adapter according to config unless explicitly skipped.
63
+ if not skip_train:
64
+ train_main(str(config))
65
+
66
+ metric_files = []
67
+ labels = []
68
+
69
+ # 2) Evaluate Qwen base model (ignoring any LoRA config).
70
+ base_json = eval_dir / "eval_qwen_base.json"
71
+ eval_main(
72
+ config_path=str(config),
73
+ backend="qwen3",
74
+ output_json=str(base_json),
75
+ ignore_lora=True,
76
+ log_samples=None,
77
+ log_limit=None,
78
+ )
79
+ metric_files.append(base_json)
80
+ labels.append("Qwen3-2B base")
81
+
82
+ # 3) Evaluate fine-tuned Qwen model (LoRA-enabled).
83
+ ft_json = eval_dir / "eval_qwen_ft.json"
84
+ eval_main(
85
+ config_path=str(config),
86
+ backend="qwen3",
87
+ output_json=str(ft_json),
88
+ ignore_lora=False,
89
+ log_samples=None,
90
+ log_limit=None,
91
+ )
92
+ metric_files.append(ft_json)
93
+ labels.append("Qwen3-2B FT")
94
+
95
+ # 4) Optionally evaluate API backends.
96
+ if include_claude:
97
+ claude_json = eval_dir / "eval_claude.json"
98
+ eval_main(
99
+ config_path=str(config),
100
+ backend="claude",
101
+ output_json=str(claude_json),
102
+ ignore_lora=False,
103
+ log_samples=None,
104
+ log_limit=None,
105
+ )
106
+ metric_files.append(claude_json)
107
+ labels.append("Claude Sonnet 4.5")
108
+
109
+ if include_openai:
110
+ gpt_json = eval_dir / "eval_gpt51.json"
111
+ eval_main(
112
+ config_path=str(config),
113
+ backend="openai",
114
+ output_json=str(gpt_json),
115
+ ignore_lora=False,
116
+ log_samples=None,
117
+ log_limit=None,
118
+ )
119
+ metric_files.append(gpt_json)
120
+ labels.append("GPT-5.1")
121
+
122
+ # 5) Plot metrics for whichever backends were evaluated.
123
+ if include_claude or include_openai:
124
+ plot_name = "qwen_vs_apis.png"
125
+ else:
126
+ plot_name = "qwen_base_vs_ft.png"
127
+
128
+ plot_path = plots_dir / plot_name
129
+ plot_eval_metrics(
130
+ metric_files=metric_files,
131
+ labels=labels,
132
+ output_path=plot_path,
133
+ )
134
+
135
+
136
+ def main() -> None:
137
+ parser = argparse.ArgumentParser(
138
+ description=(
139
+ "Run the synthetic login benchmark end-to-end (train → eval base/FT → plot)."
140
+ )
141
+ )
142
+ parser.add_argument(
143
+ "--config",
144
+ type=str,
145
+ required=True,
146
+ help="Path to YAML config file (e.g. configs/qwen3vl_synthetic_dev.yaml)",
147
+ )
148
+ parser.add_argument(
149
+ "--out-dir",
150
+ type=str,
151
+ required=True,
152
+ help=(
153
+ "Output directory for eval JSONs and plots "
154
+ "(e.g. experiments/qwen_login/2b_dev)"
155
+ ),
156
+ )
157
+ parser.add_argument(
158
+ "--include-claude",
159
+ action="store_true",
160
+ help="Include Claude Sonnet 4.5 API backend in the evaluation.",
161
+ )
162
+ parser.add_argument(
163
+ "--include-openai",
164
+ action="store_true",
165
+ help="Include GPT-5.1 API backend in the evaluation.",
166
+ )
167
+ parser.add_argument(
168
+ "--include-all-apis",
169
+ action="store_true",
170
+ help="Shorthand to include both Claude and GPT-5.1 backends.",
171
+ )
172
+ parser.add_argument(
173
+ "--skip-train",
174
+ action="store_true",
175
+ help="Skip LoRA training and only run evaluations.",
176
+ )
177
+ args = parser.parse_args()
178
+
179
+ include_claude = args.include_claude or args.include_all_apis
180
+ include_openai = args.include_openai or args.include_all_apis
181
+
182
+ run_qwen_login_benchmark(
183
+ config_path=args.config,
184
+ out_dir=args.out_dir,
185
+ include_claude=include_claude,
186
+ include_openai=include_openai,
187
+ skip_train=args.skip_train,
188
+ )
189
+
190
+
191
+ if __name__ == "__main__":
192
+ main()
@@ -0,0 +1,174 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import List, Optional, Dict, Any
5
+
6
+ import yaml
7
+
8
+ from openadapt_ml.datasets.next_action import NextActionDataset, build_next_action_sft_samples
9
+ from openadapt_ml.ingest.synthetic import generate_synthetic_sessions
10
+ from openadapt_ml.models.qwen_vl import QwenVLAdapter
11
+ from openadapt_ml.training.trainer import TrainingConfig, TrainingLogger, train_supervised
12
+
13
+
14
+ def _load_config(path: str | Path) -> dict:
15
+ with open(path, "r", encoding="utf-8") as f:
16
+ return yaml.safe_load(f)
17
+
18
+
19
+ def _load_capture_episodes(capture_path: str | Path, goal: str | None = None) -> list:
20
+ """Load episodes from an openadapt-capture recording."""
21
+ from openadapt_ml.ingest.capture import capture_to_episode
22
+
23
+ capture_path = Path(capture_path)
24
+ episode = capture_to_episode(capture_path, goal=goal)
25
+ return [episode]
26
+
27
+
28
+ def main(
29
+ config_path: str,
30
+ capture_path: str | None = None,
31
+ goal: str | None = None,
32
+ output_dir: str | None = None,
33
+ open_dashboard: bool = False,
34
+ ) -> None:
35
+ cfg = _load_config(config_path)
36
+
37
+ model_name = cfg["model"]["name"]
38
+ load_in_4bit = cfg["model"].get("load_in_4bit", False)
39
+ max_pixels = cfg["model"].get("max_pixels") # For faster training with smaller images
40
+ min_pixels = cfg["model"].get("min_pixels")
41
+
42
+ # LoRA config may include an optional weights_path where the trained
43
+ # adapter should be saved. We pass a cleaned config (without
44
+ # weights_path) to the adapter loader.
45
+ raw_lora_cfg = cfg.get("lora")
46
+ lora_weights_path: Optional[str] = None
47
+ lora_cfg: Optional[Dict[str, Any]] = None
48
+ if isinstance(raw_lora_cfg, dict):
49
+ lora_weights_path = raw_lora_cfg.get("weights_path")
50
+ lora_cfg = {k: v for k, v in raw_lora_cfg.items() if k != "weights_path"}
51
+ else:
52
+ lora_cfg = raw_lora_cfg
53
+
54
+ # Load data - either from capture or synthetic
55
+ use_som = cfg.get("synthetic_data", {}).get("use_som", False)
56
+
57
+ if capture_path:
58
+ # Load from real openadapt-capture recording
59
+ print(f"Loading capture from: {capture_path}")
60
+ episodes = _load_capture_episodes(capture_path, goal=goal)
61
+ data_source = f"capture '{Path(capture_path).name}'"
62
+ else:
63
+ # Generate synthetic data
64
+ synth_cfg = cfg.get("synthetic_data", {})
65
+ num_sessions = synth_cfg.get("num_sessions", 10)
66
+ seed = synth_cfg.get("seed")
67
+ default_output_dir = str(Path("synthetic") / "train")
68
+ output_dir = synth_cfg.get("output_dir", default_output_dir)
69
+ use_som = synth_cfg.get("use_som", False)
70
+ scenario = synth_cfg.get("scenario", "login")
71
+
72
+ sessions = generate_synthetic_sessions(
73
+ num_sessions=num_sessions,
74
+ seed=seed,
75
+ output_dir=output_dir,
76
+ use_som=use_som,
77
+ scenario=scenario,
78
+ )
79
+ episodes = [ep for sess in sessions for ep in sess.episodes]
80
+ data_source = f"synthetic '{scenario}'"
81
+
82
+ samples = build_next_action_sft_samples(episodes, use_som=use_som)
83
+ dataset = NextActionDataset(samples)
84
+
85
+ # Adapter + model
86
+ adapter = QwenVLAdapter.from_pretrained(
87
+ model_name=model_name,
88
+ lora_config=lora_cfg,
89
+ load_in_4bit=load_in_4bit,
90
+ max_pixels=max_pixels,
91
+ min_pixels=min_pixels,
92
+ )
93
+
94
+ # Training config
95
+ train_cfg_raw = cfg.get("training", {})
96
+ # Determine output directory
97
+ if output_dir is None:
98
+ output_dir = train_cfg_raw.get("output_dir", "training_output")
99
+ train_cfg = TrainingConfig(
100
+ num_train_epochs=train_cfg_raw.get("num_train_epochs", 1),
101
+ per_device_train_batch_size=train_cfg_raw.get("per_device_train_batch_size", 1),
102
+ gradient_accumulation_steps=train_cfg_raw.get("gradient_accumulation_steps", 1),
103
+ learning_rate=train_cfg_raw.get("learning_rate", 2e-4),
104
+ warmup_ratio=train_cfg_raw.get("warmup_ratio", 0.03),
105
+ weight_decay=train_cfg_raw.get("weight_decay", 0.0),
106
+ max_grad_norm=train_cfg_raw.get("max_grad_norm", 1.0),
107
+ logging_steps=train_cfg_raw.get("logging_steps", 10),
108
+ lr_scheduler_type=train_cfg_raw.get("lr_scheduler_type", "linear"),
109
+ early_stop_loss=train_cfg_raw.get("early_stop_loss", 1e-4),
110
+ early_stop_patience=train_cfg_raw.get("early_stop_patience", 10),
111
+ output_dir=output_dir,
112
+ # Evaluation settings
113
+ eval_every_epoch=train_cfg_raw.get("eval_every_epoch", True),
114
+ eval_samples=train_cfg_raw.get("eval_samples", 3),
115
+ )
116
+
117
+ som_label = " (SoM mode)" if use_som else " (coordinate mode)"
118
+ print(f"Loaded {len(episodes)} episodes and {len(samples)} SFT samples{som_label} from {data_source}.")
119
+ print("Starting training...")
120
+
121
+ # Get goal from episodes (for logging/viewer)
122
+ episode_goal = episodes[0].goal if episodes else ""
123
+
124
+ # Create logger with metadata for dashboard
125
+ logger = TrainingLogger(
126
+ output_dir=train_cfg.output_dir,
127
+ config=train_cfg,
128
+ capture_path=str(capture_path) if capture_path else "",
129
+ config_path=str(config_path),
130
+ goal=goal or episode_goal, # Use explicit goal or episode goal
131
+ )
132
+
133
+ # Pass the first episode for periodic evaluation (if available)
134
+ eval_episode = episodes[0] if episodes else None
135
+ training_success = train_supervised(adapter, dataset, train_cfg, logger=logger, episode=eval_episode)
136
+
137
+ # Persist the trained adapter if a weights_path was provided and training succeeded.
138
+ if lora_weights_path:
139
+ if training_success:
140
+ save_path = Path(lora_weights_path)
141
+ save_path.mkdir(parents=True, exist_ok=True)
142
+ adapter.model.save_pretrained(save_path) # type: ignore[arg-type]
143
+ print(f"Saved LoRA adapter to {save_path}")
144
+ else:
145
+ print("Training aborted due to invalid loss. Skipping checkpoint save to avoid corrupted weights.")
146
+
147
+ # Open dashboard in browser if requested
148
+ if open_dashboard:
149
+ import webbrowser
150
+ dashboard_path = Path(output_dir) / "dashboard.html"
151
+ if dashboard_path.exists():
152
+ webbrowser.open(f"file://{dashboard_path.absolute()}")
153
+
154
+
155
+ if __name__ == "__main__":
156
+ import argparse
157
+
158
+ parser = argparse.ArgumentParser(
159
+ description="Train Qwen-VL adapter on synthetic data or openadapt-capture recordings."
160
+ )
161
+ parser.add_argument("--config", type=str, required=True, help="Path to YAML config file.")
162
+ parser.add_argument("--capture", type=str, help="Path to openadapt-capture recording directory.")
163
+ parser.add_argument("--goal", type=str, help="Task goal/description (overrides recording's task description).")
164
+ parser.add_argument("--output-dir", type=str, help="Output directory for logs and dashboard.")
165
+ parser.add_argument("--open", action="store_true", help="Open training dashboard in browser.")
166
+ args = parser.parse_args()
167
+
168
+ main(
169
+ args.config,
170
+ capture_path=args.capture,
171
+ goal=args.goal,
172
+ output_dir=args.output_dir,
173
+ open_dashboard=args.open,
174
+ )
File without changes