openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -115
  8. openadapt_ml/benchmarks/agent.py +265 -421
  9. openadapt_ml/benchmarks/azure.py +28 -19
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1722 -4847
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +22 -5
  14. openadapt_ml/benchmarks/vm_monitor.py +530 -29
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
  16. openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
  17. openadapt_ml/cloud/azure_inference.py +3 -5
  18. openadapt_ml/cloud/lambda_labs.py +722 -307
  19. openadapt_ml/cloud/local.py +2038 -487
  20. openadapt_ml/cloud/ssh_tunnel.py +68 -26
  21. openadapt_ml/datasets/next_action.py +40 -30
  22. openadapt_ml/evals/grounding.py +8 -3
  23. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  24. openadapt_ml/evals/trajectory_matching.py +41 -26
  25. openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
  26. openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
  27. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  28. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  29. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  30. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  31. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  32. openadapt_ml/experiments/waa_demo/runner.py +29 -14
  33. openadapt_ml/export/parquet.py +36 -24
  34. openadapt_ml/grounding/detector.py +18 -14
  35. openadapt_ml/ingest/__init__.py +8 -6
  36. openadapt_ml/ingest/capture.py +25 -22
  37. openadapt_ml/ingest/loader.py +7 -4
  38. openadapt_ml/ingest/synthetic.py +189 -100
  39. openadapt_ml/models/api_adapter.py +14 -4
  40. openadapt_ml/models/base_adapter.py +10 -2
  41. openadapt_ml/models/providers/__init__.py +288 -0
  42. openadapt_ml/models/providers/anthropic.py +266 -0
  43. openadapt_ml/models/providers/base.py +299 -0
  44. openadapt_ml/models/providers/google.py +376 -0
  45. openadapt_ml/models/providers/openai.py +342 -0
  46. openadapt_ml/models/qwen_vl.py +46 -19
  47. openadapt_ml/perception/__init__.py +35 -0
  48. openadapt_ml/perception/integration.py +399 -0
  49. openadapt_ml/retrieval/demo_retriever.py +50 -24
  50. openadapt_ml/retrieval/embeddings.py +9 -8
  51. openadapt_ml/retrieval/retriever.py +3 -1
  52. openadapt_ml/runtime/__init__.py +50 -0
  53. openadapt_ml/runtime/policy.py +18 -5
  54. openadapt_ml/runtime/safety_gate.py +471 -0
  55. openadapt_ml/schema/__init__.py +9 -0
  56. openadapt_ml/schema/converters.py +74 -27
  57. openadapt_ml/schema/episode.py +31 -18
  58. openadapt_ml/scripts/capture_screenshots.py +530 -0
  59. openadapt_ml/scripts/compare.py +85 -54
  60. openadapt_ml/scripts/demo_policy.py +4 -1
  61. openadapt_ml/scripts/eval_policy.py +15 -9
  62. openadapt_ml/scripts/make_gif.py +1 -1
  63. openadapt_ml/scripts/prepare_synthetic.py +3 -1
  64. openadapt_ml/scripts/train.py +21 -9
  65. openadapt_ml/segmentation/README.md +920 -0
  66. openadapt_ml/segmentation/__init__.py +97 -0
  67. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  68. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  69. openadapt_ml/segmentation/annotator.py +610 -0
  70. openadapt_ml/segmentation/cache.py +290 -0
  71. openadapt_ml/segmentation/cli.py +674 -0
  72. openadapt_ml/segmentation/deduplicator.py +656 -0
  73. openadapt_ml/segmentation/frame_describer.py +788 -0
  74. openadapt_ml/segmentation/pipeline.py +340 -0
  75. openadapt_ml/segmentation/schemas.py +622 -0
  76. openadapt_ml/segmentation/segment_extractor.py +634 -0
  77. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  78. openadapt_ml/training/benchmark_viewer.py +52 -41
  79. openadapt_ml/training/shared_ui.py +7 -7
  80. openadapt_ml/training/stub_provider.py +57 -35
  81. openadapt_ml/training/trainer.py +143 -86
  82. openadapt_ml/training/trl_trainer.py +70 -21
  83. openadapt_ml/training/viewer.py +323 -108
  84. openadapt_ml/training/viewer_components.py +180 -0
  85. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
  86. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  87. openadapt_ml/benchmarks/base.py +0 -366
  88. openadapt_ml/benchmarks/data_collection.py +0 -432
  89. openadapt_ml/benchmarks/live_tracker.py +0 -180
  90. openadapt_ml/benchmarks/runner.py +0 -418
  91. openadapt_ml/benchmarks/waa.py +0 -761
  92. openadapt_ml/benchmarks/waa_live.py +0 -619
  93. openadapt_ml-0.2.0.dist-info/RECORD +0 -86
  94. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  95. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,12 @@
1
1
  """Benchmark viewer HTML generation.
2
2
 
3
+ .. deprecated::
4
+ This module is deprecated. Use ``openadapt_viewer`` instead::
5
+
6
+ from openadapt_viewer import generate_benchmark_viewer
7
+
8
+ The openadapt-viewer package is the canonical location for viewer code.
9
+
3
10
  This module generates a standalone HTML viewer for benchmark results,
4
11
  showing task list with pass/fail status, step-by-step replay of
5
12
  benchmark executions, screenshots, actions, and reasoning at each step.
@@ -34,6 +41,7 @@ from __future__ import annotations
34
41
  import base64
35
42
  import json
36
43
  import logging
44
+ import warnings
37
45
  from pathlib import Path
38
46
  from typing import Any
39
47
 
@@ -42,6 +50,13 @@ from openadapt_ml.training.shared_ui import (
42
50
  generate_shared_header_html as _generate_shared_header_html,
43
51
  )
44
52
 
53
+ warnings.warn(
54
+ "openadapt_ml.benchmarks.viewer is deprecated. "
55
+ "Use openadapt_viewer instead: from openadapt_viewer import generate_benchmark_viewer",
56
+ DeprecationWarning,
57
+ stacklevel=2,
58
+ )
59
+
45
60
  logger = logging.getLogger(__name__)
46
61
 
47
62
 
@@ -133,7 +148,9 @@ def load_task_results(benchmark_dir: Path) -> list[dict[str, Any]]:
133
148
  screenshots_dir = task_dir / "screenshots"
134
149
  if screenshots_dir.exists():
135
150
  screenshot_paths = sorted(screenshots_dir.glob("*.png"))
136
- task_data["screenshots"] = [str(p.relative_to(benchmark_dir)) for p in screenshot_paths]
151
+ task_data["screenshots"] = [
152
+ str(p.relative_to(benchmark_dir)) for p in screenshot_paths
153
+ ]
137
154
  else:
138
155
  task_data["screenshots"] = []
139
156
 
@@ -294,7 +311,7 @@ def _generate_benchmark_viewer_html(
294
311
  num_success = sum(1 for t in tasks if t.get("execution", {}).get("success", False))
295
312
  success_rate = (num_success / num_tasks * 100) if num_tasks > 0 else 0
296
313
 
297
- html = f'''<!DOCTYPE html>
314
+ html = f"""<!DOCTYPE html>
298
315
  <html lang="en">
299
316
  <head>
300
317
  <meta charset="UTF-8">
@@ -785,7 +802,7 @@ def _generate_benchmark_viewer_html(
785
802
  <div class="stat-label">Failed</div>
786
803
  </div>
787
804
  <div class="stat-card">
788
- <div class="stat-value {'success' if success_rate >= 50 else 'error'}">{success_rate:.1f}%</div>
805
+ <div class="stat-value {"success" if success_rate >= 50 else "error"}">{success_rate:.1f}%</div>
789
806
  <div class="stat-label">Success Rate</div>
790
807
  </div>
791
808
  </div>
@@ -838,7 +855,7 @@ def _generate_benchmark_viewer_html(
838
855
  const summary = {summary_json};
839
856
  const domainStats = {domain_stats_json};
840
857
  const tasks = {tasks_json};
841
- const embedScreenshots = {'true' if embed_screenshots else 'false'};
858
+ const embedScreenshots = {"true" if embed_screenshots else "false"};
842
859
 
843
860
  let currentTaskIndex = -1;
844
861
  let currentStepIndex = 0;
@@ -1214,6 +1231,6 @@ def _generate_benchmark_viewer_html(
1214
1231
  </script>
1215
1232
  </body>
1216
1233
  </html>
1217
- '''
1234
+ """
1218
1235
 
1219
1236
  return html