openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. openadapt_ml/benchmarks/__init__.py +8 -0
  2. openadapt_ml/benchmarks/agent.py +90 -11
  3. openadapt_ml/benchmarks/azure.py +35 -6
  4. openadapt_ml/benchmarks/cli.py +4449 -201
  5. openadapt_ml/benchmarks/live_tracker.py +180 -0
  6. openadapt_ml/benchmarks/runner.py +41 -4
  7. openadapt_ml/benchmarks/viewer.py +1219 -0
  8. openadapt_ml/benchmarks/vm_monitor.py +610 -0
  9. openadapt_ml/benchmarks/waa.py +61 -4
  10. openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
  11. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  12. openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
  13. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  14. openadapt_ml/benchmarks/waa_live.py +619 -0
  15. openadapt_ml/cloud/local.py +1555 -1
  16. openadapt_ml/cloud/ssh_tunnel.py +553 -0
  17. openadapt_ml/datasets/next_action.py +87 -68
  18. openadapt_ml/evals/grounding.py +26 -8
  19. openadapt_ml/evals/trajectory_matching.py +84 -36
  20. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  21. openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
  22. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  23. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  24. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  25. openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
  26. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  27. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  28. openadapt_ml/experiments/waa_demo/runner.py +717 -0
  29. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  30. openadapt_ml/export/__init__.py +9 -0
  31. openadapt_ml/export/__main__.py +6 -0
  32. openadapt_ml/export/cli.py +89 -0
  33. openadapt_ml/export/parquet.py +265 -0
  34. openadapt_ml/ingest/__init__.py +3 -4
  35. openadapt_ml/ingest/capture.py +89 -81
  36. openadapt_ml/ingest/loader.py +116 -68
  37. openadapt_ml/ingest/synthetic.py +221 -159
  38. openadapt_ml/retrieval/README.md +226 -0
  39. openadapt_ml/retrieval/USAGE.md +391 -0
  40. openadapt_ml/retrieval/__init__.py +91 -0
  41. openadapt_ml/retrieval/demo_retriever.py +817 -0
  42. openadapt_ml/retrieval/embeddings.py +629 -0
  43. openadapt_ml/retrieval/index.py +194 -0
  44. openadapt_ml/retrieval/retriever.py +160 -0
  45. openadapt_ml/runtime/policy.py +10 -10
  46. openadapt_ml/schema/__init__.py +104 -0
  47. openadapt_ml/schema/converters.py +541 -0
  48. openadapt_ml/schema/episode.py +457 -0
  49. openadapt_ml/scripts/compare.py +26 -16
  50. openadapt_ml/scripts/eval_policy.py +4 -5
  51. openadapt_ml/scripts/prepare_synthetic.py +14 -17
  52. openadapt_ml/scripts/train.py +81 -70
  53. openadapt_ml/training/benchmark_viewer.py +3225 -0
  54. openadapt_ml/training/trainer.py +120 -363
  55. openadapt_ml/training/trl_trainer.py +354 -0
  56. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
  57. openadapt_ml-0.2.0.dist-info/RECORD +86 -0
  58. openadapt_ml/schemas/__init__.py +0 -53
  59. openadapt_ml/schemas/sessions.py +0 -122
  60. openadapt_ml/schemas/validation.py +0 -252
  61. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  62. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
  63. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,222 @@
1
+ # =============================================================================
2
+ # WAA (Windows Agent Arena) Docker Image
3
+ # =============================================================================
4
+ #
5
+ # This image combines:
6
+ # 1. dockurr/windows:latest - Modern base that auto-downloads Windows 11
7
+ # 2. windowsarena/winarena:latest - Official WAA benchmark client and scripts
8
+ #
9
+ # The official windowsarena/winarena uses an outdated dockurr/windows (v0.00)
10
+ # that doesn't auto-download Windows. This image fixes that while keeping
11
+ # full compatibility with the official WAA benchmark.
12
+ #
13
+ # Usage:
14
+ # # Build the image
15
+ # docker build -t waa-auto:latest .
16
+ #
17
+ # # Run benchmark (after Windows is set up)
18
+ # docker run --rm --device=/dev/kvm --cap-add NET_ADMIN \
19
+ # -p 8006:8006 -p 5000:5000 -p 7200:7200 \
20
+ # -v /path/to/storage:/storage \
21
+ # -e OPENAI_API_KEY="your-key" \
22
+ # waa-auto:latest \
23
+ # "/entry.sh --start-client true --model gpt-4o --num-tasks 5"
24
+ #
25
+ # =============================================================================
26
+
27
+ FROM dockurr/windows:latest
28
+
29
+ # -----------------------------------------------------------------------------
30
+ # Copy official WAA components from windowsarena/winarena
31
+ # -----------------------------------------------------------------------------
32
+
33
+ # Copy benchmark client scripts
34
+ COPY --from=windowsarena/winarena:latest /entry.sh /entry.sh
35
+ COPY --from=windowsarena/winarena:latest /entry_setup.sh /entry_setup.sh
36
+ COPY --from=windowsarena/winarena:latest /start_client.sh /start_client.sh
37
+
38
+ # Copy the Python benchmark client code
39
+ COPY --from=windowsarena/winarena:latest /client /client
40
+
41
+ # Copy our WAA server startup script
42
+ COPY start_waa_server.bat /oem/start_waa_server.bat
43
+
44
+ # Copy model weights (GroundingDINO, OmniParser, etc.)
45
+ COPY --from=windowsarena/winarena:latest /models /models
46
+
47
+ # Copy Windows setup scripts (install.bat, setup.ps1, etc.)
48
+ COPY --from=windowsarena/winarena:latest /oem /oem
49
+
50
+ # Copy OEM files AFTER dockurr/samba starts (which wipes /tmp/smb)
51
+ # Copy IMMEDIATELY (no delay) and SYNCHRONOUSLY (not backgrounded) to ensure
52
+ # files are available before Windows boots and runs FirstLogonCommands
53
+ RUN sed -i '/^return 0$/i cp -r /oem/* /tmp/smb/ 2>/dev/null || true' /run/samba.sh && \
54
+ echo "Inserted OEM copy before return in samba.sh"
55
+
56
+ # Copy unattend.xml for automated Windows installation
57
+ COPY --from=windowsarena/winarena:latest /run/assets/win11x64-enterprise-eval.xml /run/assets/win11x64.xml
58
+
59
+ # -----------------------------------------------------------------------------
60
+ # Create start_vm.sh that uses our dockurr/windows entrypoint
61
+ # -----------------------------------------------------------------------------
62
+
63
+ RUN printf '#!/bin/bash\n/usr/bin/tini -s /run/entry.sh\n' > /start_vm.sh && chmod +x /start_vm.sh
64
+
65
+ # -----------------------------------------------------------------------------
66
+ # Patch IP addresses: official uses 20.20.20.21, dockurr/windows uses 172.30.0.2
67
+ # -----------------------------------------------------------------------------
68
+
69
+ # Patch entry scripts (must work - these files were just copied)
70
+ RUN sed -i 's|20.20.20.21|172.30.0.2|g' /entry_setup.sh && \
71
+ sed -i 's|20.20.20.21|172.30.0.2|g' /entry.sh && \
72
+ sed -i 's|20.20.20.21|172.30.0.2|g' /start_client.sh && \
73
+ echo "Patched entry scripts"
74
+
75
+ # Patch client Python files
76
+ RUN find /client -name "*.py" -exec sed -i 's|20.20.20.21|172.30.0.2|g' {} \; && \
77
+ echo "Patched client Python files"
78
+
79
+ # -----------------------------------------------------------------------------
80
+ # Add API-backed agent support (Claude Sonnet 4.5 / GPT-5.1)
81
+ # This allows using --agent api-claude or --agent api-openai instead of navi
82
+ # -----------------------------------------------------------------------------
83
+
84
+ # Copy api_agent.py to the client mm_agents directory
85
+ COPY api_agent.py /client/mm_agents/api_agent.py
86
+
87
+ # Patch run.py to support api-claude and api-openai agents
88
+ # This adds elif blocks after the "navi" agent handling
89
+ # Using Python to insert the patch with proper indentation
90
+ RUN python3 -c "import re; \
91
+ f = open('/client/run.py', 'r'); c = f.read(); f.close(); \
92
+ patch = ''' elif cfg_args[\"agent_name\"] in [\"api-claude\", \"api-openai\"]:\n from mm_agents.api_agent import ApiAgent\n provider = \"anthropic\" if cfg_args[\"agent_name\"] == \"api-claude\" else \"openai\"\n agent = ApiAgent(provider=provider, temperature=args.temperature)\n'''; \
93
+ c = c.replace('raise ValueError(f\"Unknown agent name: {cfg_args', patch + ' raise ValueError(f\"Unknown agent name: {cfg_args'); \
94
+ f = open('/client/run.py', 'w'); f.write(c); f.close(); \
95
+ print('Patched run.py for API agents')"
96
+
97
+ # -----------------------------------------------------------------------------
98
+ # Fix Windows setup for automation
99
+ # -----------------------------------------------------------------------------
100
+
101
+ # Set password for AutoLogon (Windows 11 requires password for login)
102
+ RUN sed -i 's|<Value></Value>|<Value>docker</Value>|g' /run/assets/win11x64.xml 2>/dev/null || true
103
+ RUN sed -i 's|<Value />|<Value>docker</Value>|g' /run/assets/win11x64.xml 2>/dev/null || true
104
+
105
+ # Add firewall disable and other automation commands to FirstLogonCommands
106
+ # CRITICAL: Also create a scheduled task so WAA server starts on EVERY boot, not just first logon
107
+ RUN if grep -q "</FirstLogonCommands>" /run/assets/win11x64.xml; then \
108
+ LAST_ORDER=$(grep -oP "Order>\K[0-9]+" /run/assets/win11x64.xml | sort -n | tail -1) && \
109
+ N1=$((LAST_ORDER + 1)) && \
110
+ N2=$((LAST_ORDER + 2)) && \
111
+ N3=$((LAST_ORDER + 3)) && \
112
+ N4=$((LAST_ORDER + 4)) && \
113
+ N5=$((LAST_ORDER + 5)) && \
114
+ N6=$((LAST_ORDER + 6)) && \
115
+ sed -i "s|</FirstLogonCommands>|\
116
+ <SynchronousCommand wcm:action=\"add\">\n\
117
+ <Order>$N1</Order>\n\
118
+ <CommandLine>netsh advfirewall set allprofiles state off</CommandLine>\n\
119
+ <Description>Disable Windows Firewall</Description>\n\
120
+ </SynchronousCommand>\n\
121
+ <SynchronousCommand wcm:action=\"add\">\n\
122
+ <Order>$N2</Order>\n\
123
+ <CommandLine>powercfg /change standby-timeout-ac 0</CommandLine>\n\
124
+ <Description>Disable sleep</Description>\n\
125
+ </SynchronousCommand>\n\
126
+ <SynchronousCommand wcm:action=\"add\">\n\
127
+ <Order>$N3</Order>\n\
128
+ <CommandLine>powercfg /change monitor-timeout-ac 0</CommandLine>\n\
129
+ <Description>Disable monitor timeout</Description>\n\
130
+ </SynchronousCommand>\n\
131
+ <SynchronousCommand wcm:action=\"add\">\n\
132
+ <Order>$N4</Order>\n\
133
+ <CommandLine>reg add \"HKLM\\\\SOFTWARE\\\\Policies\\\\Microsoft\\\\Windows\\\\Personalization\" /v NoLockScreen /t REG_DWORD /d 1 /f</CommandLine>\n\
134
+ <Description>Disable lock screen</Description>\n\
135
+ </SynchronousCommand>\n\
136
+ <SynchronousCommand wcm:action=\"add\">\n\
137
+ <Order>$N5</Order>\n\
138
+ <CommandLine>cmd /c start /wait \\\\\\\\host.lan\\\\Data\\\\install.bat</CommandLine>\n\
139
+ <Description>Run WAA setup script to install Python, Chrome, etc.</Description>\n\
140
+ </SynchronousCommand>\n\
141
+ <SynchronousCommand wcm:action=\"add\">\n\
142
+ <Order>$N6</Order>\n\
143
+ <CommandLine>schtasks /create /tn \"WAAServer\" /tr \"\\\\\\\\host.lan\\\\Data\\\\start_waa_server.bat\" /sc onlogon /rl highest /f</CommandLine>\n\
144
+ <Description>Create scheduled task for WAA server auto-start on every boot</Description>\n\
145
+ </SynchronousCommand>\n\
146
+ <SynchronousCommand wcm:action=\"add\">\n\
147
+ <Order>$((N6 + 1))</Order>\n\
148
+ <CommandLine>reg add \"HKCU\\\\SOFTWARE\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run\" /v WAAServer /t REG_SZ /d \"cmd /c \\\\\\\\host.lan\\\\Data\\\\start_waa_server.bat\" /f</CommandLine>\n\
149
+ <Description>Add registry entry for WAA server auto-start (backup)</Description>\n\
150
+ </SynchronousCommand>\n\
151
+ <SynchronousCommand wcm:action=\"add\">\n\
152
+ <Order>$((N6 + 2))</Order>\n\
153
+ <CommandLine>\\\\\\\\host.lan\\\\Data\\\\start_waa_server.bat</CommandLine>\n\
154
+ <Description>Start WAA server immediately</Description>\n\
155
+ </SynchronousCommand>\n\
156
+ </FirstLogonCommands>|" /run/assets/win11x64.xml; \
157
+ fi
158
+
159
+ # -----------------------------------------------------------------------------
160
+ # Install Python and dependencies directly
161
+ # dockurr/windows base is Debian trixie which has Python 3.12
162
+ # -----------------------------------------------------------------------------
163
+
164
+ # Install Python 3 and system dependencies
165
+ RUN apt-get update && apt-get install -y --no-install-recommends \
166
+ python3 \
167
+ python3-venv \
168
+ python3-pip \
169
+ tesseract-ocr \
170
+ libgl1 \
171
+ libglib2.0-0 \
172
+ libsm6 \
173
+ libxext6 \
174
+ libxrender-dev \
175
+ ffmpeg \
176
+ && rm -rf /var/lib/apt/lists/* \
177
+ && ln -sf /usr/bin/python3 /usr/bin/python
178
+
179
+ # Install Python dependencies for WAA client
180
+ # Using --break-system-packages since we're in a container
181
+ # Full dependency list from: github.com/microsoft/WindowsAgentArena/blob/main/src/win-arena-container/client/requirements.txt
182
+ RUN pip3 install --no-cache-dir --break-system-packages \
183
+ torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
184
+ pip3 install --no-cache-dir --break-system-packages \
185
+ gymnasium farama-notifications cloudpickle packaging typer rich tqdm colorama \
186
+ openai anthropic google-generativeai groq tiktoken \
187
+ pyyaml jsonschema tenacity httpx backoff toml func-timeout wrapt-timeout-decorator \
188
+ psutil pyperclip screeninfo mss pyautogui fabric \
189
+ easyocr pillow pytesseract opencv-python-headless scikit-image ImageHash \
190
+ requests flask beautifulsoup4 lxml cssselect xmltodict playwright requests-toolbelt \
191
+ pydrive openpyxl python-docx python-pptx odfpy pypdf PyPDF2 pdfplumber pymupdf borb \
192
+ xlrd xlwt xlsxwriter mammoth pdf2image \
193
+ google-api-python-client google-auth-httplib2 google-auth-oauthlib gdown \
194
+ numpy pandas scipy formulas rapidfuzz anytree addict \
195
+ transformers accelerate "timm>=0.9.0,<1.0.0" ultralytics supervision pycocotools einops \
196
+ mutagen pyacoustid chardet librosa fastdtw \
197
+ py7zr LnkParse3 \
198
+ matplotlib wandb yapf
199
+
200
+ # Install Playwright browsers
201
+ RUN playwright install chromium
202
+
203
+ # -----------------------------------------------------------------------------
204
+ # Environment configuration
205
+ # -----------------------------------------------------------------------------
206
+
207
+ ENV YRES="900"
208
+ ENV XRES="1440"
209
+ ENV RAM_SIZE="8G"
210
+ ENV CPU_CORES="4"
211
+ ENV DISK_SIZE="30G"
212
+ ENV VERSION="11e"
213
+ ENV ARGUMENTS="-qmp tcp:0.0.0.0:7200,server,nowait"
214
+
215
+ # Expose ports
216
+ EXPOSE 8006 5000 7200 3389
217
+
218
+ # Default entrypoint - copy OEM files then run entry.sh
219
+ # Use: /entry.sh --start-client true --model gpt-4o
220
+ # Or: /entry.sh --start-client false (just start Windows, no benchmark)
221
+ ENTRYPOINT ["/bin/bash", "-c"]
222
+ CMD ["/copy-oem.sh /entry.sh --start-client false"]
@@ -0,0 +1,10 @@
1
+ """WAA (Windows Agent Arena) deployment module.
2
+
3
+ This module contains files that are deployed into the WAA Docker container:
4
+ - api_agent.py: API-based agent (Claude/GPT-5.1) for WAA
5
+ - Dockerfile: Custom waa-auto Docker image
6
+ """
7
+
8
+ from openadapt_ml.benchmarks.waa_deploy.api_agent import ApiAgent
9
+
10
+ __all__ = ["ApiAgent"]