openadapt-ml 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -115
  8. openadapt_ml/benchmarks/agent.py +265 -421
  9. openadapt_ml/benchmarks/azure.py +28 -19
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1722 -4847
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +22 -5
  14. openadapt_ml/benchmarks/vm_monitor.py +530 -29
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
  16. openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
  17. openadapt_ml/cloud/azure_inference.py +3 -5
  18. openadapt_ml/cloud/lambda_labs.py +722 -307
  19. openadapt_ml/cloud/local.py +2038 -487
  20. openadapt_ml/cloud/ssh_tunnel.py +68 -26
  21. openadapt_ml/datasets/next_action.py +40 -30
  22. openadapt_ml/evals/grounding.py +8 -3
  23. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  24. openadapt_ml/evals/trajectory_matching.py +41 -26
  25. openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
  26. openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
  27. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  28. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  29. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  30. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  31. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  32. openadapt_ml/experiments/waa_demo/runner.py +29 -14
  33. openadapt_ml/export/parquet.py +36 -24
  34. openadapt_ml/grounding/detector.py +18 -14
  35. openadapt_ml/ingest/__init__.py +8 -6
  36. openadapt_ml/ingest/capture.py +25 -22
  37. openadapt_ml/ingest/loader.py +7 -4
  38. openadapt_ml/ingest/synthetic.py +189 -100
  39. openadapt_ml/models/api_adapter.py +14 -4
  40. openadapt_ml/models/base_adapter.py +10 -2
  41. openadapt_ml/models/providers/__init__.py +288 -0
  42. openadapt_ml/models/providers/anthropic.py +266 -0
  43. openadapt_ml/models/providers/base.py +299 -0
  44. openadapt_ml/models/providers/google.py +376 -0
  45. openadapt_ml/models/providers/openai.py +342 -0
  46. openadapt_ml/models/qwen_vl.py +46 -19
  47. openadapt_ml/perception/__init__.py +35 -0
  48. openadapt_ml/perception/integration.py +399 -0
  49. openadapt_ml/retrieval/demo_retriever.py +50 -24
  50. openadapt_ml/retrieval/embeddings.py +9 -8
  51. openadapt_ml/retrieval/retriever.py +3 -1
  52. openadapt_ml/runtime/__init__.py +50 -0
  53. openadapt_ml/runtime/policy.py +18 -5
  54. openadapt_ml/runtime/safety_gate.py +471 -0
  55. openadapt_ml/schema/__init__.py +9 -0
  56. openadapt_ml/schema/converters.py +74 -27
  57. openadapt_ml/schema/episode.py +31 -18
  58. openadapt_ml/scripts/capture_screenshots.py +530 -0
  59. openadapt_ml/scripts/compare.py +85 -54
  60. openadapt_ml/scripts/demo_policy.py +4 -1
  61. openadapt_ml/scripts/eval_policy.py +15 -9
  62. openadapt_ml/scripts/make_gif.py +1 -1
  63. openadapt_ml/scripts/prepare_synthetic.py +3 -1
  64. openadapt_ml/scripts/train.py +21 -9
  65. openadapt_ml/segmentation/README.md +920 -0
  66. openadapt_ml/segmentation/__init__.py +97 -0
  67. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  68. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  69. openadapt_ml/segmentation/annotator.py +610 -0
  70. openadapt_ml/segmentation/cache.py +290 -0
  71. openadapt_ml/segmentation/cli.py +674 -0
  72. openadapt_ml/segmentation/deduplicator.py +656 -0
  73. openadapt_ml/segmentation/frame_describer.py +788 -0
  74. openadapt_ml/segmentation/pipeline.py +340 -0
  75. openadapt_ml/segmentation/schemas.py +622 -0
  76. openadapt_ml/segmentation/segment_extractor.py +634 -0
  77. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  78. openadapt_ml/training/benchmark_viewer.py +52 -41
  79. openadapt_ml/training/shared_ui.py +7 -7
  80. openadapt_ml/training/stub_provider.py +57 -35
  81. openadapt_ml/training/trainer.py +143 -86
  82. openadapt_ml/training/trl_trainer.py +70 -21
  83. openadapt_ml/training/viewer.py +323 -108
  84. openadapt_ml/training/viewer_components.py +180 -0
  85. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/METADATA +215 -14
  86. openadapt_ml-0.2.2.dist-info/RECORD +116 -0
  87. openadapt_ml/benchmarks/base.py +0 -366
  88. openadapt_ml/benchmarks/data_collection.py +0 -432
  89. openadapt_ml/benchmarks/live_tracker.py +0 -180
  90. openadapt_ml/benchmarks/runner.py +0 -418
  91. openadapt_ml/benchmarks/waa.py +0 -761
  92. openadapt_ml/benchmarks/waa_live.py +0 -619
  93. openadapt_ml-0.2.0.dist-info/RECORD +0 -86
  94. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/WHEEL +0 -0
  95. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from typing import TYPE_CHECKING
7
7
 
8
8
  if TYPE_CHECKING:
9
- from openadapt_ml.schema import Action, ActionType, Episode, Step
9
+ from openadapt_ml.schema import Action, Episode, Step
10
10
 
11
11
 
12
12
  def format_action(action: "Action") -> str:
@@ -19,7 +19,7 @@ def format_action(action: "Action") -> str:
19
19
  String representation like "CLICK(0.5, 0.3)" or "TYPE('hello')".
20
20
  """
21
21
  # Get action type value (handle both enum and string)
22
- action_type = action.type.value if hasattr(action.type, 'value') else action.type
22
+ action_type = action.type.value if hasattr(action.type, "value") else action.type
23
23
 
24
24
  if action_type == "click":
25
25
  if action.normalized_coordinates is not None:
@@ -53,7 +53,10 @@ def format_action(action: "Action") -> str:
53
53
  return f"SCROLL({direction})"
54
54
 
55
55
  elif action_type == "drag":
56
- if action.normalized_coordinates is not None and action.normalized_end is not None:
56
+ if (
57
+ action.normalized_coordinates is not None
58
+ and action.normalized_end is not None
59
+ ):
57
60
  x, y = action.normalized_coordinates
58
61
  end_x, end_y = action.normalized_end
59
62
  return f"DRAG({x:.3f}, {y:.3f}, {end_x:.3f}, {end_y:.3f})"
@@ -112,7 +115,11 @@ def format_episode_as_demo(
112
115
  lines.append(format_step(step, i))
113
116
 
114
117
  # Optionally include screenshot reference
115
- if include_screenshots and step.observation and step.observation.screenshot_path:
118
+ if (
119
+ include_screenshots
120
+ and step.observation
121
+ and step.observation.screenshot_path
122
+ ):
116
123
  lines.append(f" [Screenshot: {step.observation.screenshot_path}]")
117
124
 
118
125
  lines.append("")
@@ -167,9 +174,12 @@ def format_episode_verbose(
167
174
  if next_step.observation and next_step.observation.window_title:
168
175
  if (
169
176
  not step.observation
170
- or next_step.observation.window_title != step.observation.window_title
177
+ or next_step.observation.window_title
178
+ != step.observation.window_title
171
179
  ):
172
- lines.append(f" [Result: Window changed to {next_step.observation.window_title}]")
180
+ lines.append(
181
+ f" [Result: Window changed to {next_step.observation.window_title}]"
182
+ )
173
183
 
174
184
  lines.append("")
175
185
 
@@ -8,14 +8,12 @@ from __future__ import annotations
8
8
  import argparse
9
9
  import base64
10
10
  import json
11
- import sys
12
11
  from dataclasses import dataclass, field
13
12
  from datetime import datetime
14
13
  from pathlib import Path
15
14
  from typing import Any
16
15
 
17
16
  from openadapt_ml.experiments.demo_prompt.format_demo import (
18
- format_episode_as_demo,
19
17
  format_episode_verbose,
20
18
  generate_length_matched_control,
21
19
  get_demo_screenshot_paths,
@@ -138,14 +136,16 @@ class DemoPromptExperiment:
138
136
  if Path(path).exists():
139
137
  with open(path, "rb") as f:
140
138
  image_b64 = base64.b64encode(f.read()).decode("utf-8")
141
- content.append({
142
- "type": "image",
143
- "source": {
144
- "type": "base64",
145
- "media_type": "image/png",
146
- "data": image_b64,
147
- },
148
- })
139
+ content.append(
140
+ {
141
+ "type": "image",
142
+ "source": {
143
+ "type": "base64",
144
+ "media_type": "image/png",
145
+ "data": image_b64,
146
+ },
147
+ }
148
+ )
149
149
 
150
150
  # Add text
151
151
  content.append({"type": "text", "text": user_content})
@@ -158,7 +158,11 @@ class DemoPromptExperiment:
158
158
  )
159
159
 
160
160
  parts = getattr(response, "content", [])
161
- texts = [getattr(p, "text", "") for p in parts if getattr(p, "type", "") == "text"]
161
+ texts = [
162
+ getattr(p, "text", "")
163
+ for p in parts
164
+ if getattr(p, "type", "") == "text"
165
+ ]
162
166
  return "\n".join([t for t in texts if t]).strip()
163
167
 
164
168
  elif self.provider == "openai":
@@ -170,10 +174,14 @@ class DemoPromptExperiment:
170
174
  if Path(path).exists():
171
175
  with open(path, "rb") as f:
172
176
  image_b64 = base64.b64encode(f.read()).decode("utf-8")
173
- user_content_parts.append({
174
- "type": "image_url",
175
- "image_url": {"url": f"data:image/png;base64,{image_b64}"},
176
- })
177
+ user_content_parts.append(
178
+ {
179
+ "type": "image_url",
180
+ "image_url": {
181
+ "url": f"data:image/png;base64,{image_b64}"
182
+ },
183
+ }
184
+ )
177
185
 
178
186
  # Add text
179
187
  user_content_parts.append({"type": "text", "text": user_content})
@@ -446,7 +454,9 @@ def run_experiment(
446
454
  output_path = Path(output_dir)
447
455
  output_path.mkdir(parents=True, exist_ok=True)
448
456
 
449
- results_file = output_path / f"results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
457
+ results_file = (
458
+ output_path / f"results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
459
+ )
450
460
  with open(results_file, "w") as f:
451
461
  json.dump(
452
462
  {
@@ -0,0 +1,70 @@
1
+ """Representation Shootout Experiment.
2
+
3
+ Compares three approaches for GUI action prediction under distribution drift:
4
+
5
+ - Condition A: Raw Coordinates - Direct coordinate regression
6
+ - Condition B: Coordinates + Visual Cues - Enhanced with markers and zoom
7
+ - Condition C: Marks (Element IDs) - Element classification using SoM
8
+
9
+ Usage:
10
+ # Run full experiment
11
+ python -m openadapt_ml.experiments.representation_shootout.runner run
12
+
13
+ # Run specific condition
14
+ python -m openadapt_ml.experiments.representation_shootout.runner run --condition marks
15
+
16
+ # Evaluate under specific drift
17
+ python -m openadapt_ml.experiments.representation_shootout.runner eval --drift resolution
18
+
19
+ See docs/experiments/representation_shootout_design.md for full documentation.
20
+ """
21
+
22
+ from openadapt_ml.experiments.representation_shootout.config import (
23
+ ConditionConfig,
24
+ ConditionName,
25
+ DriftConfig,
26
+ DriftType,
27
+ ExperimentConfig,
28
+ MetricName,
29
+ )
30
+ from openadapt_ml.experiments.representation_shootout.conditions import (
31
+ ConditionBase,
32
+ CoordsCuesCondition,
33
+ MarksCondition,
34
+ RawCoordsCondition,
35
+ create_condition,
36
+ )
37
+ from openadapt_ml.experiments.representation_shootout.evaluator import (
38
+ DriftEvaluator,
39
+ EvaluationResult,
40
+ compute_metrics,
41
+ make_recommendation,
42
+ )
43
+ from openadapt_ml.experiments.representation_shootout.runner import (
44
+ ExperimentRunner,
45
+ run_experiment,
46
+ )
47
+
48
+ __all__ = [
49
+ # Config
50
+ "ExperimentConfig",
51
+ "ConditionConfig",
52
+ "ConditionName",
53
+ "DriftConfig",
54
+ "DriftType",
55
+ "MetricName",
56
+ # Conditions
57
+ "ConditionBase",
58
+ "RawCoordsCondition",
59
+ "CoordsCuesCondition",
60
+ "MarksCondition",
61
+ "create_condition",
62
+ # Evaluator
63
+ "DriftEvaluator",
64
+ "EvaluationResult",
65
+ "compute_metrics",
66
+ "make_recommendation",
67
+ # Runner
68
+ "ExperimentRunner",
69
+ "run_experiment",
70
+ ]