openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. openadapt_ml/benchmarks/__init__.py +8 -0
  2. openadapt_ml/benchmarks/agent.py +90 -11
  3. openadapt_ml/benchmarks/azure.py +35 -6
  4. openadapt_ml/benchmarks/cli.py +4449 -201
  5. openadapt_ml/benchmarks/live_tracker.py +180 -0
  6. openadapt_ml/benchmarks/runner.py +41 -4
  7. openadapt_ml/benchmarks/viewer.py +1219 -0
  8. openadapt_ml/benchmarks/vm_monitor.py +610 -0
  9. openadapt_ml/benchmarks/waa.py +61 -4
  10. openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
  11. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  12. openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
  13. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  14. openadapt_ml/benchmarks/waa_live.py +619 -0
  15. openadapt_ml/cloud/local.py +1555 -1
  16. openadapt_ml/cloud/ssh_tunnel.py +553 -0
  17. openadapt_ml/datasets/next_action.py +87 -68
  18. openadapt_ml/evals/grounding.py +26 -8
  19. openadapt_ml/evals/trajectory_matching.py +84 -36
  20. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  21. openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
  22. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  23. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  24. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  25. openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
  26. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  27. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  28. openadapt_ml/experiments/waa_demo/runner.py +717 -0
  29. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  30. openadapt_ml/export/__init__.py +9 -0
  31. openadapt_ml/export/__main__.py +6 -0
  32. openadapt_ml/export/cli.py +89 -0
  33. openadapt_ml/export/parquet.py +265 -0
  34. openadapt_ml/ingest/__init__.py +3 -4
  35. openadapt_ml/ingest/capture.py +89 -81
  36. openadapt_ml/ingest/loader.py +116 -68
  37. openadapt_ml/ingest/synthetic.py +221 -159
  38. openadapt_ml/retrieval/README.md +226 -0
  39. openadapt_ml/retrieval/USAGE.md +391 -0
  40. openadapt_ml/retrieval/__init__.py +91 -0
  41. openadapt_ml/retrieval/demo_retriever.py +817 -0
  42. openadapt_ml/retrieval/embeddings.py +629 -0
  43. openadapt_ml/retrieval/index.py +194 -0
  44. openadapt_ml/retrieval/retriever.py +160 -0
  45. openadapt_ml/runtime/policy.py +10 -10
  46. openadapt_ml/schema/__init__.py +104 -0
  47. openadapt_ml/schema/converters.py +541 -0
  48. openadapt_ml/schema/episode.py +457 -0
  49. openadapt_ml/scripts/compare.py +26 -16
  50. openadapt_ml/scripts/eval_policy.py +4 -5
  51. openadapt_ml/scripts/prepare_synthetic.py +14 -17
  52. openadapt_ml/scripts/train.py +81 -70
  53. openadapt_ml/training/benchmark_viewer.py +3225 -0
  54. openadapt_ml/training/trainer.py +120 -363
  55. openadapt_ml/training/trl_trainer.py +354 -0
  56. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
  57. openadapt_ml-0.2.0.dist-info/RECORD +86 -0
  58. openadapt_ml/schemas/__init__.py +0 -53
  59. openadapt_ml/schemas/sessions.py +0 -122
  60. openadapt_ml/schemas/validation.py +0 -252
  61. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  62. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
  63. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,354 @@
1
+ """Simplified training using TRL SFTTrainer + Unsloth.
2
+
3
+ This module provides a minimal, efficient training path for VLMs:
4
+ - Unsloth for 2x speed, 50% less VRAM
5
+ - TRL SFTTrainer for production-grade training
6
+ - Direct integration with openadapt-ml data format
7
+
8
+ Usage:
9
+ from openadapt_ml.training.trl_trainer import train_with_trl
10
+
11
+ # Train on episodes
12
+ train_with_trl(
13
+ episodes=episodes,
14
+ model_name="unsloth/Qwen2.5-VL-7B-Instruct",
15
+ output_dir="checkpoints/my_model",
16
+ )
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ from dataclasses import dataclass
23
+ from pathlib import Path
24
+ from typing import Any, Dict, List, Optional
25
+
26
+ from PIL import Image
27
+
28
+
29
+ @dataclass
30
+ class TRLTrainingConfig:
31
+ """Configuration for TRL-based training."""
32
+
33
+ # Model
34
+ model_name: str = "unsloth/Qwen2.5-VL-7B-Instruct"
35
+ load_in_4bit: bool = True
36
+ max_seq_length: int = 4096
37
+
38
+ # LoRA
39
+ lora_r: int = 16
40
+ lora_alpha: int = 32
41
+ lora_dropout: float = 0.0
42
+ finetune_vision_layers: bool = False # Set True if grounding needs improvement
43
+
44
+ # Training
45
+ num_epochs: int = 3
46
+ batch_size: int = 1
47
+ gradient_accumulation_steps: int = 4
48
+ learning_rate: float = 2e-4
49
+ warmup_ratio: float = 0.03
50
+
51
+ # Output
52
+ output_dir: str = "checkpoints"
53
+ logging_steps: int = 10
54
+ save_strategy: str = "epoch"
55
+
56
+
57
+ def _load_unsloth_model(config: TRLTrainingConfig):
58
+ """Load model with Unsloth optimizations.
59
+
60
+ Returns:
61
+ tuple: (model, tokenizer, is_unsloth) - is_unsloth indicates if Unsloth was used
62
+ """
63
+ # Check if Unsloth is explicitly disabled via environment variable
64
+ if os.environ.get("OPENADAPT_DISABLE_UNSLOTH", "").lower() in ("1", "true", "yes"):
65
+ print("Unsloth disabled via OPENADAPT_DISABLE_UNSLOTH environment variable")
66
+ return _load_standard_model(config)
67
+
68
+ try:
69
+ from unsloth import FastVisionModel
70
+
71
+ model, tokenizer = FastVisionModel.from_pretrained(
72
+ config.model_name,
73
+ load_in_4bit=config.load_in_4bit,
74
+ use_gradient_checkpointing="unsloth",
75
+ max_seq_length=config.max_seq_length,
76
+ )
77
+
78
+ # Apply LoRA
79
+ model = FastVisionModel.get_peft_model(
80
+ model,
81
+ finetune_vision_layers=config.finetune_vision_layers,
82
+ finetune_language_layers=True,
83
+ finetune_attention_modules=True,
84
+ finetune_mlp_modules=True,
85
+ r=config.lora_r,
86
+ lora_alpha=config.lora_alpha,
87
+ lora_dropout=config.lora_dropout,
88
+ random_state=42,
89
+ )
90
+
91
+ # Enable training mode
92
+ FastVisionModel.for_training(model)
93
+
94
+ print(f"✓ Loaded {config.model_name} with Unsloth (4-bit: {config.load_in_4bit})")
95
+ return model, tokenizer, True
96
+
97
+ except ImportError:
98
+ print("⚠ Unsloth not installed, falling back to standard transformers")
99
+ return _load_standard_model(config)
100
+
101
+
102
+ def _load_standard_model(config: TRLTrainingConfig):
103
+ """Fallback: Load model with standard transformers + peft."""
104
+ from transformers import AutoModelForCausalLM, AutoProcessor
105
+ from peft import LoraConfig, get_peft_model
106
+ import torch
107
+
108
+ model = AutoModelForCausalLM.from_pretrained(
109
+ config.model_name,
110
+ torch_dtype=torch.bfloat16,
111
+ device_map="auto",
112
+ trust_remote_code=True,
113
+ )
114
+ processor = AutoProcessor.from_pretrained(config.model_name, trust_remote_code=True)
115
+
116
+ # Apply LoRA
117
+ peft_config = LoraConfig(
118
+ r=config.lora_r,
119
+ lora_alpha=config.lora_alpha,
120
+ lora_dropout=config.lora_dropout,
121
+ target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
122
+ task_type="CAUSAL_LM",
123
+ )
124
+ model = get_peft_model(model, peft_config)
125
+
126
+ print(f"✓ Loaded {config.model_name} with standard transformers")
127
+ return model, processor, False
128
+
129
+
130
+ def _convert_samples_to_trl_format(
131
+ samples: List[Dict[str, Any]],
132
+ base_path: Optional[Path] = None,
133
+ ) -> List[Dict[str, Any]]:
134
+ """Convert openadapt-ml samples to TRL format.
135
+
136
+ The only change is loading image paths as PIL Images.
137
+
138
+ Args:
139
+ samples: List of samples from build_next_action_sft_samples()
140
+ base_path: Optional base path to resolve relative image paths
141
+
142
+ Returns:
143
+ List of samples with PIL Images instead of paths
144
+ """
145
+ trl_samples = []
146
+
147
+ for sample in samples:
148
+ # Load images as PIL
149
+ pil_images = []
150
+ for img_path in sample["images"]:
151
+ path = Path(img_path)
152
+ if base_path and not path.is_absolute():
153
+ path = base_path / path
154
+
155
+ if path.exists():
156
+ pil_images.append(Image.open(path).convert("RGB"))
157
+ else:
158
+ print(f"⚠ Image not found: {path}")
159
+ continue
160
+
161
+ if not pil_images:
162
+ continue # Skip samples with missing images
163
+
164
+ trl_samples.append({
165
+ "images": pil_images,
166
+ "messages": sample["messages"],
167
+ })
168
+
169
+ return trl_samples
170
+
171
+
172
+ def train_with_trl(
173
+ episodes: List,
174
+ config: Optional[TRLTrainingConfig] = None,
175
+ use_som: bool = False,
176
+ base_path: Optional[Path] = None,
177
+ ) -> str:
178
+ """Train a VLM using TRL SFTTrainer + Unsloth.
179
+
180
+ This is the simplified training entry point that replaces the legacy
181
+ custom training loop. It:
182
+ 1. Converts episodes to TRL format
183
+ 2. Loads model with Unsloth (or fallback)
184
+ 3. Trains with TRL's SFTTrainer
185
+ 4. Saves LoRA adapter
186
+
187
+ Args:
188
+ episodes: List of Episode objects from openadapt-ml schema
189
+ config: Training configuration (uses defaults if None)
190
+ use_som: If True, use Set-of-Marks DSL instead of coordinates
191
+ base_path: Base path for resolving relative image paths
192
+
193
+ Returns:
194
+ Path to saved checkpoint
195
+ """
196
+ from datasets import Dataset
197
+ from openadapt_ml.datasets.next_action import build_next_action_sft_samples
198
+
199
+ config = config or TRLTrainingConfig()
200
+
201
+ # Step 1: Convert episodes to SFT samples
202
+ print(f"Converting {len(episodes)} episodes to training samples...")
203
+ raw_samples = build_next_action_sft_samples(episodes, use_som=use_som)
204
+ print(f" Generated {len(raw_samples)} training samples")
205
+
206
+ # Step 2: Convert to TRL format (load images as PIL)
207
+ print("Loading images...")
208
+ trl_samples = _convert_samples_to_trl_format(raw_samples, base_path)
209
+ print(f" Loaded {len(trl_samples)} samples with images")
210
+
211
+ if not trl_samples:
212
+ raise ValueError("No valid training samples after loading images")
213
+
214
+ # Step 3: Create HuggingFace Dataset
215
+ dataset = Dataset.from_list(trl_samples)
216
+
217
+ # Step 4: Load model with Unsloth (or fallback)
218
+ model, tokenizer, is_unsloth = _load_unsloth_model(config)
219
+
220
+ # Step 5: Configure and run training
221
+ try:
222
+ from trl import SFTTrainer, SFTConfig
223
+
224
+ if is_unsloth:
225
+ # Unsloth-specific configuration
226
+ from unsloth.trainer import UnslothVisionDataCollator
227
+
228
+ training_args = SFTConfig(
229
+ output_dir=config.output_dir,
230
+ per_device_train_batch_size=config.batch_size,
231
+ gradient_accumulation_steps=config.gradient_accumulation_steps,
232
+ learning_rate=config.learning_rate,
233
+ num_train_epochs=config.num_epochs,
234
+ warmup_ratio=config.warmup_ratio,
235
+ lr_scheduler_type="cosine",
236
+ logging_steps=config.logging_steps,
237
+ save_strategy=config.save_strategy,
238
+ # Unsloth-specific settings
239
+ remove_unused_columns=False,
240
+ dataset_text_field="",
241
+ dataset_kwargs={"skip_prepare_dataset": True},
242
+ )
243
+
244
+ trainer = SFTTrainer(
245
+ model=model,
246
+ tokenizer=tokenizer,
247
+ data_collator=UnslothVisionDataCollator(model, tokenizer),
248
+ train_dataset=dataset,
249
+ args=training_args,
250
+ )
251
+ else:
252
+ # Standard TRL configuration
253
+ training_args = SFTConfig(
254
+ output_dir=config.output_dir,
255
+ per_device_train_batch_size=config.batch_size,
256
+ gradient_accumulation_steps=config.gradient_accumulation_steps,
257
+ learning_rate=config.learning_rate,
258
+ num_train_epochs=config.num_epochs,
259
+ warmup_ratio=config.warmup_ratio,
260
+ lr_scheduler_type="cosine",
261
+ logging_steps=config.logging_steps,
262
+ save_strategy=config.save_strategy,
263
+ max_length=None, # Critical for VLMs
264
+ assistant_only_loss=True,
265
+ )
266
+
267
+ trainer = SFTTrainer(
268
+ model=model,
269
+ train_dataset=dataset,
270
+ args=training_args,
271
+ )
272
+
273
+ print(f"\n{'='*50}")
274
+ print(f"Starting training:")
275
+ print(f" Model: {config.model_name}")
276
+ print(f" Samples: {len(trl_samples)}")
277
+ print(f" Epochs: {config.num_epochs}")
278
+ print(f" Batch size: {config.batch_size}")
279
+ print(f" Unsloth: {is_unsloth}")
280
+ print(f" Output: {config.output_dir}")
281
+ print(f"{'='*50}\n")
282
+
283
+ trainer.train()
284
+
285
+ # Save the LoRA adapter
286
+ checkpoint_path = Path(config.output_dir) / "final"
287
+ trainer.save_model(str(checkpoint_path))
288
+ print(f"\n✓ Saved checkpoint to {checkpoint_path}")
289
+
290
+ return str(checkpoint_path)
291
+
292
+ except ImportError as e:
293
+ raise ImportError(
294
+ f"TRL not installed. Install with: pip install trl\n"
295
+ f"Original error: {e}"
296
+ )
297
+
298
+
299
+ def train_from_parquet(
300
+ parquet_path: str,
301
+ config: Optional[TRLTrainingConfig] = None,
302
+ use_som: bool = False,
303
+ ) -> str:
304
+ """Train from a parquet file exported by openadapt-ml.
305
+
306
+ Args:
307
+ parquet_path: Path to parquet file with episode data
308
+ config: Training configuration
309
+ use_som: Use Set-of-Marks DSL
310
+
311
+ Returns:
312
+ Path to saved checkpoint
313
+ """
314
+ from openadapt_ml.export import from_parquet
315
+
316
+ print(f"Loading episodes from {parquet_path}...")
317
+ episodes = from_parquet(parquet_path)
318
+
319
+ base_path = Path(parquet_path).parent
320
+
321
+ return train_with_trl(
322
+ episodes=episodes,
323
+ config=config,
324
+ use_som=use_som,
325
+ base_path=base_path,
326
+ )
327
+
328
+
329
+ if __name__ == "__main__":
330
+ # Simple CLI for testing
331
+ import argparse
332
+
333
+ parser = argparse.ArgumentParser(description="Train VLM with TRL + Unsloth")
334
+ parser.add_argument("--parquet", required=True, help="Path to parquet file")
335
+ parser.add_argument("--output", default="checkpoints", help="Output directory")
336
+ parser.add_argument("--model", default="unsloth/Qwen2.5-VL-7B-Instruct", help="Model name")
337
+ parser.add_argument("--epochs", type=int, default=3, help="Number of epochs")
338
+ parser.add_argument("--use-som", action="store_true", help="Use Set-of-Marks DSL")
339
+
340
+ args = parser.parse_args()
341
+
342
+ config = TRLTrainingConfig(
343
+ model_name=args.model,
344
+ output_dir=args.output,
345
+ num_epochs=args.epochs,
346
+ )
347
+
348
+ checkpoint = train_from_parquet(
349
+ parquet_path=args.parquet,
350
+ config=config,
351
+ use_som=args.use_som,
352
+ )
353
+
354
+ print(f"\nTraining complete! Checkpoint: {checkpoint}")