openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -107
  8. openadapt_ml/benchmarks/agent.py +297 -374
  9. openadapt_ml/benchmarks/azure.py +62 -24
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1874 -751
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +1236 -0
  14. openadapt_ml/benchmarks/vm_monitor.py +1111 -0
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
  16. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  17. openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
  18. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  19. openadapt_ml/cloud/azure_inference.py +3 -5
  20. openadapt_ml/cloud/lambda_labs.py +722 -307
  21. openadapt_ml/cloud/local.py +3194 -89
  22. openadapt_ml/cloud/ssh_tunnel.py +595 -0
  23. openadapt_ml/datasets/next_action.py +125 -96
  24. openadapt_ml/evals/grounding.py +32 -9
  25. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  26. openadapt_ml/evals/trajectory_matching.py +120 -57
  27. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  28. openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
  29. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  30. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  31. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  32. openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
  33. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  34. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  35. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  36. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  37. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  38. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  39. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  40. openadapt_ml/experiments/waa_demo/runner.py +732 -0
  41. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  42. openadapt_ml/export/__init__.py +9 -0
  43. openadapt_ml/export/__main__.py +6 -0
  44. openadapt_ml/export/cli.py +89 -0
  45. openadapt_ml/export/parquet.py +277 -0
  46. openadapt_ml/grounding/detector.py +18 -14
  47. openadapt_ml/ingest/__init__.py +11 -10
  48. openadapt_ml/ingest/capture.py +97 -86
  49. openadapt_ml/ingest/loader.py +120 -69
  50. openadapt_ml/ingest/synthetic.py +344 -193
  51. openadapt_ml/models/api_adapter.py +14 -4
  52. openadapt_ml/models/base_adapter.py +10 -2
  53. openadapt_ml/models/providers/__init__.py +288 -0
  54. openadapt_ml/models/providers/anthropic.py +266 -0
  55. openadapt_ml/models/providers/base.py +299 -0
  56. openadapt_ml/models/providers/google.py +376 -0
  57. openadapt_ml/models/providers/openai.py +342 -0
  58. openadapt_ml/models/qwen_vl.py +46 -19
  59. openadapt_ml/perception/__init__.py +35 -0
  60. openadapt_ml/perception/integration.py +399 -0
  61. openadapt_ml/retrieval/README.md +226 -0
  62. openadapt_ml/retrieval/USAGE.md +391 -0
  63. openadapt_ml/retrieval/__init__.py +91 -0
  64. openadapt_ml/retrieval/demo_retriever.py +843 -0
  65. openadapt_ml/retrieval/embeddings.py +630 -0
  66. openadapt_ml/retrieval/index.py +194 -0
  67. openadapt_ml/retrieval/retriever.py +162 -0
  68. openadapt_ml/runtime/__init__.py +50 -0
  69. openadapt_ml/runtime/policy.py +27 -14
  70. openadapt_ml/runtime/safety_gate.py +471 -0
  71. openadapt_ml/schema/__init__.py +113 -0
  72. openadapt_ml/schema/converters.py +588 -0
  73. openadapt_ml/schema/episode.py +470 -0
  74. openadapt_ml/scripts/capture_screenshots.py +530 -0
  75. openadapt_ml/scripts/compare.py +102 -61
  76. openadapt_ml/scripts/demo_policy.py +4 -1
  77. openadapt_ml/scripts/eval_policy.py +19 -14
  78. openadapt_ml/scripts/make_gif.py +1 -1
  79. openadapt_ml/scripts/prepare_synthetic.py +16 -17
  80. openadapt_ml/scripts/train.py +98 -75
  81. openadapt_ml/segmentation/README.md +920 -0
  82. openadapt_ml/segmentation/__init__.py +97 -0
  83. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  84. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  85. openadapt_ml/segmentation/annotator.py +610 -0
  86. openadapt_ml/segmentation/cache.py +290 -0
  87. openadapt_ml/segmentation/cli.py +674 -0
  88. openadapt_ml/segmentation/deduplicator.py +656 -0
  89. openadapt_ml/segmentation/frame_describer.py +788 -0
  90. openadapt_ml/segmentation/pipeline.py +340 -0
  91. openadapt_ml/segmentation/schemas.py +622 -0
  92. openadapt_ml/segmentation/segment_extractor.py +634 -0
  93. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  94. openadapt_ml/training/benchmark_viewer.py +3255 -19
  95. openadapt_ml/training/shared_ui.py +7 -7
  96. openadapt_ml/training/stub_provider.py +57 -35
  97. openadapt_ml/training/trainer.py +255 -441
  98. openadapt_ml/training/trl_trainer.py +403 -0
  99. openadapt_ml/training/viewer.py +323 -108
  100. openadapt_ml/training/viewer_components.py +180 -0
  101. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
  102. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  103. openadapt_ml/benchmarks/base.py +0 -366
  104. openadapt_ml/benchmarks/data_collection.py +0 -432
  105. openadapt_ml/benchmarks/runner.py +0 -381
  106. openadapt_ml/benchmarks/waa.py +0 -704
  107. openadapt_ml/schemas/__init__.py +0 -53
  108. openadapt_ml/schemas/sessions.py +0 -122
  109. openadapt_ml/schemas/validation.py +0 -252
  110. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  111. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  112. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,216 @@
1
+ # =============================================================================
2
+ # WAA (Windows Agent Arena) Docker Image
3
+ # =============================================================================
4
+ #
5
+ # This image combines:
6
+ # 1. dockurr/windows:latest - Modern base that auto-downloads Windows 11
7
+ # 2. windowsarena/winarena:latest - Official WAA benchmark client and scripts
8
+ #
9
+ # The official windowsarena/winarena uses an outdated dockurr/windows (v0.00)
10
+ # that doesn't auto-download Windows. This image fixes that while keeping
11
+ # full compatibility with the official WAA benchmark.
12
+ #
13
+ # Usage:
14
+ # # Build the image
15
+ # docker build -t waa-auto:latest .
16
+ #
17
+ # # Run benchmark (after Windows is set up)
18
+ # docker run --rm --device=/dev/kvm --cap-add NET_ADMIN \
19
+ # -p 8006:8006 -p 5000:5000 -p 7200:7200 \
20
+ # -v /path/to/storage:/storage \
21
+ # -e OPENAI_API_KEY="your-key" \
22
+ # waa-auto:latest \
23
+ # "/entry.sh --start-client true --model gpt-4o --num-tasks 5"
24
+ #
25
+ # =============================================================================
26
+
27
+ FROM dockurr/windows:latest
28
+
29
+ # -----------------------------------------------------------------------------
30
+ # Copy official WAA components from windowsarena/winarena
31
+ # -----------------------------------------------------------------------------
32
+
33
+ # Copy benchmark client scripts
34
+ COPY --from=windowsarena/winarena:latest /entry.sh /entry.sh
35
+ COPY --from=windowsarena/winarena:latest /entry_setup.sh /entry_setup.sh
36
+ COPY --from=windowsarena/winarena:latest /start_client.sh /start_client.sh
37
+
38
+ # Copy the Python benchmark client code
39
+ COPY --from=windowsarena/winarena:latest /client /client
40
+
41
+ # Copy our WAA server startup script
42
+ COPY start_waa_server.bat /oem/start_waa_server.bat
43
+
44
+ # Copy model weights (GroundingDINO, OmniParser, etc.)
45
+ COPY --from=windowsarena/winarena:latest /models /models
46
+
47
+ # Copy Windows setup scripts (install.bat, setup.ps1, etc.)
48
+ COPY --from=windowsarena/winarena:latest /oem /oem
49
+
50
+ # Copy OEM files AFTER dockurr/samba starts (which wipes /tmp/smb)
51
+ # Copy IMMEDIATELY (no delay) and SYNCHRONOUSLY (not backgrounded) to ensure
52
+ # files are available before Windows boots and runs FirstLogonCommands
53
+ RUN sed -i '/^return 0$/i cp -r /oem/* /tmp/smb/ 2>/dev/null || true' /run/samba.sh && \
54
+ echo "Inserted OEM copy before return in samba.sh"
55
+
56
+ # DO NOT replace dockurr/windows's autounattend.xml - it handles OOBE properly
57
+ # Instead, only PATCH it to add InstallFrom element (prevents "Select OS" dialog)
58
+ # This preserves dockurr/windows's native OEM mechanism
59
+ RUN for xml in /run/assets/win11x64.xml /run/assets/win11x64-enterprise-eval.xml; do \
60
+ if [ -f "$xml" ] && ! grep -q "InstallFrom" "$xml"; then \
61
+ sed -i 's|<InstallTo>|<InstallFrom>\n <MetaData wcm:action="add">\n <Key>/IMAGE/INDEX</Key>\n <Value>1</Value>\n </MetaData>\n </InstallFrom>\n <InstallTo>|' "$xml"; \
62
+ fi; \
63
+ done && echo "Added InstallFrom element for automatic image selection"
64
+
65
+ # -----------------------------------------------------------------------------
66
+ # Create start_vm.sh that uses our dockurr/windows entrypoint
67
+ # -----------------------------------------------------------------------------
68
+
69
+ RUN printf '#!/bin/bash\n/usr/bin/tini -s /run/entry.sh\n' > /start_vm.sh && chmod +x /start_vm.sh
70
+
71
+ # -----------------------------------------------------------------------------
72
+ # Patch IP addresses: official uses 20.20.20.21, dockurr/windows uses 172.30.0.2
73
+ # -----------------------------------------------------------------------------
74
+
75
+ # Patch entry scripts (must work - these files were just copied)
76
+ RUN sed -i 's|20.20.20.21|172.30.0.2|g' /entry_setup.sh && \
77
+ sed -i 's|20.20.20.21|172.30.0.2|g' /entry.sh && \
78
+ sed -i 's|20.20.20.21|172.30.0.2|g' /start_client.sh && \
79
+ echo "Patched entry scripts"
80
+
81
+ # Patch client Python files
82
+ RUN find /client -name "*.py" -exec sed -i 's|20.20.20.21|172.30.0.2|g' {} \; && \
83
+ echo "Patched client Python files"
84
+
85
+ # -----------------------------------------------------------------------------
86
+ # Add API-backed agent support (Claude / OpenAI)
87
+ # NOTE: API agents (api-claude, api-openai) are run EXTERNALLY via openadapt-evals CLI
88
+ # which connects to the WAA server over SSH tunnel. No internal patching needed.
89
+ # The api_agent.py is included for reference/future use.
90
+ # -----------------------------------------------------------------------------
91
+
92
+ # Copy api_agent.py for reference (used externally by openadapt-evals)
93
+ COPY api_agent.py /client/mm_agents/api_agent.py
94
+
95
+ # -----------------------------------------------------------------------------
96
+ # Fix Windows setup for automation
97
+ # -----------------------------------------------------------------------------
98
+
99
+ # Set password for AutoLogon (Windows 11 requires password for login)
100
+ RUN sed -i 's|<Value></Value>|<Value>docker</Value>|g' /run/assets/win11x64.xml 2>/dev/null || true
101
+ RUN sed -i 's|<Value />|<Value>docker</Value>|g' /run/assets/win11x64.xml 2>/dev/null || true
102
+
103
+ # Add firewall disable and other automation commands to FirstLogonCommands
104
+ # CRITICAL: Also create a scheduled task so WAA server starts on EVERY boot, not just first logon
105
+ RUN if grep -q "</FirstLogonCommands>" /run/assets/win11x64.xml; then \
106
+ LAST_ORDER=$(grep -oP "Order>\K[0-9]+" /run/assets/win11x64.xml | sort -n | tail -1) && \
107
+ N1=$((LAST_ORDER + 1)) && \
108
+ N2=$((LAST_ORDER + 2)) && \
109
+ N3=$((LAST_ORDER + 3)) && \
110
+ N4=$((LAST_ORDER + 4)) && \
111
+ N5=$((LAST_ORDER + 5)) && \
112
+ N6=$((LAST_ORDER + 6)) && \
113
+ sed -i "s|</FirstLogonCommands>|\
114
+ <SynchronousCommand wcm:action=\"add\">\n\
115
+ <Order>$N1</Order>\n\
116
+ <CommandLine>netsh advfirewall set allprofiles state off</CommandLine>\n\
117
+ <Description>Disable Windows Firewall</Description>\n\
118
+ </SynchronousCommand>\n\
119
+ <SynchronousCommand wcm:action=\"add\">\n\
120
+ <Order>$N2</Order>\n\
121
+ <CommandLine>powercfg /change standby-timeout-ac 0</CommandLine>\n\
122
+ <Description>Disable sleep</Description>\n\
123
+ </SynchronousCommand>\n\
124
+ <SynchronousCommand wcm:action=\"add\">\n\
125
+ <Order>$N3</Order>\n\
126
+ <CommandLine>powercfg /change monitor-timeout-ac 0</CommandLine>\n\
127
+ <Description>Disable monitor timeout</Description>\n\
128
+ </SynchronousCommand>\n\
129
+ <SynchronousCommand wcm:action=\"add\">\n\
130
+ <Order>$N4</Order>\n\
131
+ <CommandLine>reg add \"HKLM\\\\SOFTWARE\\\\Policies\\\\Microsoft\\\\Windows\\\\Personalization\" /v NoLockScreen /t REG_DWORD /d 1 /f</CommandLine>\n\
132
+ <Description>Disable lock screen</Description>\n\
133
+ </SynchronousCommand>\n\
134
+ <SynchronousCommand wcm:action=\"add\">\n\
135
+ <Order>$N5</Order>\n\
136
+ <CommandLine>cmd /c start /wait \\\\\\\\host.lan\\\\Data\\\\install.bat</CommandLine>\n\
137
+ <Description>Run WAA setup script to install Python, Chrome, etc.</Description>\n\
138
+ </SynchronousCommand>\n\
139
+ <SynchronousCommand wcm:action=\"add\">\n\
140
+ <Order>$N6</Order>\n\
141
+ <CommandLine>schtasks /create /tn \"WAAServer\" /tr \"\\\\\\\\host.lan\\\\Data\\\\start_waa_server.bat\" /sc onlogon /rl highest /f</CommandLine>\n\
142
+ <Description>Create scheduled task for WAA server auto-start on every boot</Description>\n\
143
+ </SynchronousCommand>\n\
144
+ <SynchronousCommand wcm:action=\"add\">\n\
145
+ <Order>$((N6 + 1))</Order>\n\
146
+ <CommandLine>reg add \"HKCU\\\\SOFTWARE\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run\" /v WAAServer /t REG_SZ /d \"cmd /c \\\\\\\\host.lan\\\\Data\\\\start_waa_server.bat\" /f</CommandLine>\n\
147
+ <Description>Add registry entry for WAA server auto-start (backup)</Description>\n\
148
+ </SynchronousCommand>\n\
149
+ <SynchronousCommand wcm:action=\"add\">\n\
150
+ <Order>$((N6 + 2))</Order>\n\
151
+ <CommandLine>\\\\\\\\host.lan\\\\Data\\\\start_waa_server.bat</CommandLine>\n\
152
+ <Description>Start WAA server immediately</Description>\n\
153
+ </SynchronousCommand>\n\
154
+ </FirstLogonCommands>|" /run/assets/win11x64.xml; \
155
+ fi
156
+
157
+ # -----------------------------------------------------------------------------
158
+ # Copy Python 3.9 and all packages from vanilla image
159
+ # -----------------------------------------------------------------------------
160
+ # IMPORTANT: Do NOT install Python from apt or pip install packages ourselves.
161
+ # The vanilla image has Python 3.9.20 with transformers 4.46.2 which is compatible
162
+ # with GroundingDINO. Installing our own Python (3.13) with latest transformers (5.0)
163
+ # breaks the navi agent with: AttributeError: 'BertModel' has no attribute 'get_head_mask'
164
+
165
+ # Copy Python 3.9 installation from vanilla (binaries, libraries, packages)
166
+ COPY --from=windowsarena/winarena:latest /usr/local/bin/python* /usr/local/bin/
167
+ COPY --from=windowsarena/winarena:latest /usr/local/bin/pip* /usr/local/bin/
168
+ COPY --from=windowsarena/winarena:latest /usr/local/lib/python3.9 /usr/local/lib/python3.9
169
+ COPY --from=windowsarena/winarena:latest /usr/local/lib/libpython3.9.so* /usr/local/lib/
170
+ COPY --from=windowsarena/winarena:latest /usr/local/include/python3.9 /usr/local/include/python3.9
171
+
172
+ # Ensure the shared library is found
173
+ RUN ldconfig
174
+
175
+ # Create symlinks for python/pip commands
176
+ RUN ln -sf /usr/local/bin/python3.9 /usr/local/bin/python && \
177
+ ln -sf /usr/local/bin/python3.9 /usr/bin/python && \
178
+ ln -sf /usr/local/bin/python3.9 /usr/bin/python3 && \
179
+ ln -sf /usr/local/bin/pip3.9 /usr/local/bin/pip && \
180
+ ln -sf /usr/local/bin/pip3.9 /usr/bin/pip && \
181
+ ln -sf /usr/local/bin/pip3.9 /usr/bin/pip3
182
+
183
+ # Install only system dependencies that Python packages need (not Python itself)
184
+ RUN apt-get update && apt-get install -y --no-install-recommends \
185
+ tesseract-ocr \
186
+ libgl1 \
187
+ libglib2.0-0 \
188
+ libsm6 \
189
+ libxext6 \
190
+ libxrender-dev \
191
+ ffmpeg \
192
+ && rm -rf /var/lib/apt/lists/*
193
+
194
+ # Note: Playwright browsers not copied - not needed for navi agent (uses GroundingDINO)
195
+ # If needed later, install via: python -m playwright install chromium
196
+
197
+ # -----------------------------------------------------------------------------
198
+ # Environment configuration
199
+ # -----------------------------------------------------------------------------
200
+
201
+ ENV YRES="900"
202
+ ENV XRES="1440"
203
+ ENV RAM_SIZE="8G"
204
+ ENV CPU_CORES="4"
205
+ ENV DISK_SIZE="30G"
206
+ ENV VERSION="11e"
207
+ ENV ARGUMENTS="-qmp tcp:0.0.0.0:7200,server,nowait"
208
+
209
+ # Expose ports
210
+ EXPOSE 8006 5000 7200 3389
211
+
212
+ # Default entrypoint - use dockurr/windows's native entry point
213
+ # The OEM files are copied by samba.sh (patched above) when Samba starts
214
+ # dockurr/windows handles: QEMU VM startup, Samba, VNC, Windows boot
215
+ # Our patched autounattend.xml handles: FirstLogonCommands that run install.bat
216
+ ENTRYPOINT ["/usr/bin/tini", "-s", "/run/entry.sh"]
@@ -0,0 +1,10 @@
1
+ """WAA (Windows Agent Arena) deployment module.
2
+
3
+ This module contains files that are deployed into the WAA Docker container:
4
+ - api_agent.py: API-based agent (Claude/GPT-5.1) for WAA
5
+ - Dockerfile: Custom waa-auto Docker image
6
+ """
7
+
8
+ from openadapt_ml.benchmarks.waa_deploy.api_agent import ApiAgent
9
+
10
+ __all__ = ["ApiAgent"]