PyPI - openadapt-ml - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

openadapt-ml 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

openadapt_ml/baselines/__init__.py +121 -0
openadapt_ml/baselines/adapter.py +185 -0
openadapt_ml/baselines/cli.py +314 -0
openadapt_ml/baselines/config.py +448 -0
openadapt_ml/baselines/parser.py +922 -0
openadapt_ml/baselines/prompts.py +787 -0
openadapt_ml/benchmarks/__init__.py +13 -115
openadapt_ml/benchmarks/agent.py +265 -421
openadapt_ml/benchmarks/azure.py +28 -19
openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
openadapt_ml/benchmarks/cli.py +1722 -4847
openadapt_ml/benchmarks/trace_export.py +631 -0
openadapt_ml/benchmarks/viewer.py +22 -5
openadapt_ml/benchmarks/vm_monitor.py +530 -29
openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
openadapt_ml/cloud/azure_inference.py +3 -5
openadapt_ml/cloud/lambda_labs.py +722 -307
openadapt_ml/cloud/local.py +2038 -487
openadapt_ml/cloud/ssh_tunnel.py +68 -26
openadapt_ml/datasets/next_action.py +40 -30
openadapt_ml/evals/grounding.py +8 -3
openadapt_ml/evals/plot_eval_metrics.py +15 -13
openadapt_ml/evals/trajectory_matching.py +41 -26
openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
openadapt_ml/experiments/representation_shootout/config.py +390 -0
openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
openadapt_ml/experiments/representation_shootout/runner.py +687 -0
openadapt_ml/experiments/waa_demo/runner.py +29 -14
openadapt_ml/export/parquet.py +36 -24
openadapt_ml/grounding/detector.py +18 -14
openadapt_ml/ingest/__init__.py +8 -6
openadapt_ml/ingest/capture.py +25 -22
openadapt_ml/ingest/loader.py +7 -4
openadapt_ml/ingest/synthetic.py +189 -100
openadapt_ml/models/api_adapter.py +14 -4
openadapt_ml/models/base_adapter.py +10 -2
openadapt_ml/models/providers/__init__.py +288 -0
openadapt_ml/models/providers/anthropic.py +266 -0
openadapt_ml/models/providers/base.py +299 -0
openadapt_ml/models/providers/google.py +376 -0
openadapt_ml/models/providers/openai.py +342 -0
openadapt_ml/models/qwen_vl.py +46 -19
openadapt_ml/perception/__init__.py +35 -0
openadapt_ml/perception/integration.py +399 -0
openadapt_ml/retrieval/demo_retriever.py +50 -24
openadapt_ml/retrieval/embeddings.py +9 -8
openadapt_ml/retrieval/retriever.py +3 -1
openadapt_ml/runtime/__init__.py +50 -0
openadapt_ml/runtime/policy.py +18 -5
openadapt_ml/runtime/safety_gate.py +471 -0
openadapt_ml/schema/__init__.py +9 -0
openadapt_ml/schema/converters.py +74 -27
openadapt_ml/schema/episode.py +31 -18
openadapt_ml/scripts/capture_screenshots.py +530 -0
openadapt_ml/scripts/compare.py +85 -54
openadapt_ml/scripts/demo_policy.py +4 -1
openadapt_ml/scripts/eval_policy.py +15 -9
openadapt_ml/scripts/make_gif.py +1 -1
openadapt_ml/scripts/prepare_synthetic.py +3 -1
openadapt_ml/scripts/train.py +21 -9
openadapt_ml/segmentation/README.md +920 -0
openadapt_ml/segmentation/__init__.py +97 -0
openadapt_ml/segmentation/adapters/__init__.py +5 -0
openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
openadapt_ml/segmentation/annotator.py +610 -0
openadapt_ml/segmentation/cache.py +290 -0
openadapt_ml/segmentation/cli.py +674 -0
openadapt_ml/segmentation/deduplicator.py +656 -0
openadapt_ml/segmentation/frame_describer.py +788 -0
openadapt_ml/segmentation/pipeline.py +340 -0
openadapt_ml/segmentation/schemas.py +622 -0
openadapt_ml/segmentation/segment_extractor.py +634 -0
openadapt_ml/training/azure_ops_viewer.py +1097 -0
openadapt_ml/training/benchmark_viewer.py +52 -41
openadapt_ml/training/shared_ui.py +7 -7
openadapt_ml/training/stub_provider.py +57 -35
openadapt_ml/training/trainer.py +143 -86
openadapt_ml/training/trl_trainer.py +70 -21
openadapt_ml/training/viewer.py +323 -108
openadapt_ml/training/viewer_components.py +180 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
openadapt_ml-0.2.1.dist-info/RECORD +116 -0
openadapt_ml/benchmarks/base.py +0 -366
openadapt_ml/benchmarks/data_collection.py +0 -432
openadapt_ml/benchmarks/live_tracker.py +0 -180
openadapt_ml/benchmarks/runner.py +0 -418
openadapt_ml/benchmarks/waa.py +0 -761
openadapt_ml/benchmarks/waa_live.py +0 -619
openadapt_ml-0.2.0.dist-info/RECORD +0 -86
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0

openadapt_ml/benchmarks/waa_deploy/Dockerfile CHANGED Viewed

@@ -53,8 +53,14 @@ COPY --from=windowsarena/winarena:latest /oem /oem
 RUN sed -i '/^return 0$/i cp -r /oem/* /tmp/smb/ 2>/dev/null || true' /run/samba.sh && \
     echo "Inserted OEM copy before return in samba.sh"
-# Copy unattend.xml for automated Windows installation
-COPY --from=windowsarena/winarena:latest /run/assets/win11x64-enterprise-eval.xml /run/assets/win11x64.xml
+# DO NOT replace dockurr/windows's autounattend.xml - it handles OOBE properly
+# Instead, only PATCH it to add InstallFrom element (prevents "Select OS" dialog)
+# This preserves dockurr/windows's native OEM mechanism
+RUN for xml in /run/assets/win11x64.xml /run/assets/win11x64-enterprise-eval.xml; do \
+        if [ -f "$xml" ] && ! grep -q "InstallFrom" "$xml"; then \
+            sed -i 's|<InstallTo>|<InstallFrom>\n            <MetaData wcm:action="add">\n              <Key>/IMAGE/INDEX</Key>\n              <Value>1</Value>\n            </MetaData>\n          </InstallFrom>\n          <InstallTo>|' "$xml"; \
+        fi; \
+    done && echo "Added InstallFrom element for automatic image selection"
 # -----------------------------------------------------------------------------
 # Create start_vm.sh that uses our dockurr/windows entrypoint
@@ -77,23 +83,15 @@ RUN find /client -name "*.py" -exec sed -i 's|20.20.20.21|172.30.0.2|g' {} \; &&
     echo "Patched client Python files"
 # -----------------------------------------------------------------------------
-# Add API-backed agent support (Claude Sonnet 4.5 / GPT-5.1)
-# This allows using --agent api-claude or --agent api-openai instead of navi
+# Add API-backed agent support (Claude / OpenAI)
+# NOTE: API agents (api-claude, api-openai) are run EXTERNALLY via openadapt-evals CLI
+# which connects to the WAA server over SSH tunnel. No internal patching needed.
+# The api_agent.py is included for reference/future use.
 # -----------------------------------------------------------------------------
-# Copy api_agent.py to the client mm_agents directory
+# Copy api_agent.py for reference (used externally by openadapt-evals)
 COPY api_agent.py /client/mm_agents/api_agent.py
-# Patch run.py to support api-claude and api-openai agents
-# This adds elif blocks after the "navi" agent handling
-# Using Python to insert the patch with proper indentation
-RUN python3 -c "import re; \
-f = open('/client/run.py', 'r'); c = f.read(); f.close(); \
-patch = '''    elif cfg_args[\"agent_name\"] in [\"api-claude\", \"api-openai\"]:\n        from mm_agents.api_agent import ApiAgent\n        provider = \"anthropic\" if cfg_args[\"agent_name\"] == \"api-claude\" else \"openai\"\n        agent = ApiAgent(provider=provider, temperature=args.temperature)\n'''; \
-c = c.replace('raise ValueError(f\"Unknown agent name: {cfg_args', patch + '    raise ValueError(f\"Unknown agent name: {cfg_args'); \
-f = open('/client/run.py', 'w'); f.write(c); f.close(); \
-print('Patched run.py for API agents')"
 # -----------------------------------------------------------------------------
 # Fix Windows setup for automation
 # -----------------------------------------------------------------------------
@@ -157,15 +155,33 @@ RUN if grep -q "</FirstLogonCommands>" /run/assets/win11x64.xml; then \
     fi
 # -----------------------------------------------------------------------------
-# Install Python and dependencies directly
-# dockurr/windows base is Debian trixie which has Python 3.12
+# Copy Python 3.9 and all packages from vanilla image
 # -----------------------------------------------------------------------------
-# Install Python 3 and system dependencies
+# IMPORTANT: Do NOT install Python from apt or pip install packages ourselves.
+# The vanilla image has Python 3.9.20 with transformers 4.46.2 which is compatible
+# with GroundingDINO. Installing our own Python (3.13) with latest transformers (5.0)
+# breaks the navi agent with: AttributeError: 'BertModel' has no attribute 'get_head_mask'
+# Copy Python 3.9 installation from vanilla (binaries, libraries, packages)
+COPY --from=windowsarena/winarena:latest /usr/local/bin/python* /usr/local/bin/
+COPY --from=windowsarena/winarena:latest /usr/local/bin/pip* /usr/local/bin/
+COPY --from=windowsarena/winarena:latest /usr/local/lib/python3.9 /usr/local/lib/python3.9
+COPY --from=windowsarena/winarena:latest /usr/local/lib/libpython3.9.so* /usr/local/lib/
+COPY --from=windowsarena/winarena:latest /usr/local/include/python3.9 /usr/local/include/python3.9
+# Ensure the shared library is found
+RUN ldconfig
+# Create symlinks for python/pip commands
+RUN ln -sf /usr/local/bin/python3.9 /usr/local/bin/python && \
+    ln -sf /usr/local/bin/python3.9 /usr/bin/python && \
+    ln -sf /usr/local/bin/python3.9 /usr/bin/python3 && \
+    ln -sf /usr/local/bin/pip3.9 /usr/local/bin/pip && \
+    ln -sf /usr/local/bin/pip3.9 /usr/bin/pip && \
+    ln -sf /usr/local/bin/pip3.9 /usr/bin/pip3
+# Install only system dependencies that Python packages need (not Python itself)
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    python3 \
-    python3-venv \
-    python3-pip \
     tesseract-ocr \
     libgl1 \
     libglib2.0-0 \
@@ -173,32 +189,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libxext6 \
     libxrender-dev \
     ffmpeg \
-    && rm -rf /var/lib/apt/lists/* \
-    && ln -sf /usr/bin/python3 /usr/bin/python
-# Install Python dependencies for WAA client
-# Using --break-system-packages since we're in a container
-# Full dependency list from: github.com/microsoft/WindowsAgentArena/blob/main/src/win-arena-container/client/requirements.txt
-RUN pip3 install --no-cache-dir --break-system-packages \
-    torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
-    pip3 install --no-cache-dir --break-system-packages \
-    gymnasium farama-notifications cloudpickle packaging typer rich tqdm colorama \
-    openai anthropic google-generativeai groq tiktoken \
-    pyyaml jsonschema tenacity httpx backoff toml func-timeout wrapt-timeout-decorator \
-    psutil pyperclip screeninfo mss pyautogui fabric \
-    easyocr pillow pytesseract opencv-python-headless scikit-image ImageHash \
-    requests flask beautifulsoup4 lxml cssselect xmltodict playwright requests-toolbelt \
-    pydrive openpyxl python-docx python-pptx odfpy pypdf PyPDF2 pdfplumber pymupdf borb \
-    xlrd xlwt xlsxwriter mammoth pdf2image \
-    google-api-python-client google-auth-httplib2 google-auth-oauthlib gdown \
-    numpy pandas scipy formulas rapidfuzz anytree addict \
-    transformers accelerate "timm>=0.9.0,<1.0.0" ultralytics supervision pycocotools einops \
-    mutagen pyacoustid chardet librosa fastdtw \
-    py7zr LnkParse3 \
-    matplotlib wandb yapf
-# Install Playwright browsers
-RUN playwright install chromium
+    && rm -rf /var/lib/apt/lists/*
+# Note: Playwright browsers not copied - not needed for navi agent (uses GroundingDINO)
+# If needed later, install via: python -m playwright install chromium
 # -----------------------------------------------------------------------------
 # Environment configuration
@@ -215,8 +209,8 @@ ENV ARGUMENTS="-qmp tcp:0.0.0.0:7200,server,nowait"
 # Expose ports
 EXPOSE 8006 5000 7200 3389
-# Default entrypoint - copy OEM files then run entry.sh
-# Use: /entry.sh --start-client true --model gpt-4o
-# Or:  /entry.sh --start-client false (just start Windows, no benchmark)
-ENTRYPOINT ["/bin/bash", "-c"]
-CMD ["/copy-oem.sh /entry.sh --start-client false"]
+# Default entrypoint - use dockurr/windows's native entry point
+# The OEM files are copied by samba.sh (patched above) when Samba starts
+# dockurr/windows handles: QEMU VM startup, Samba, VNC, Windows boot
+# Our patched autounattend.xml handles: FirstLogonCommands that run install.bat
+ENTRYPOINT ["/usr/bin/tini", "-s", "/run/entry.sh"]

openadapt_ml/benchmarks/waa_deploy/api_agent.py CHANGED Viewed

@@ -43,7 +43,7 @@ import logging
 import os
 import re
 from io import BytesIO
-from typing import Any, Dict, List
+from typing import Dict, List
 from PIL import Image
@@ -210,6 +210,7 @@ class ApiAgent:
                 )
             try:
                 from anthropic import Anthropic
                 self._client = Anthropic(api_key=self.api_key)
             except ImportError:
                 raise RuntimeError(
@@ -225,6 +226,7 @@ class ApiAgent:
                 )
             try:
                 from openai import OpenAI
                 self._client = OpenAI(api_key=self.api_key)
             except ImportError:
                 raise RuntimeError(
@@ -240,9 +242,13 @@ class ApiAgent:
         self.memory_block_text = "# empty memory block"
         self.step_counter = 0
-        logger.info(f"ApiAgent initialized with provider={provider}, model={self.model}")
+        logger.info(
+            f"ApiAgent initialized with provider={provider}, model={self.model}"
+        )
         if self.demo:
-            logger.info(f"Demo trajectory provided ({len(self.demo)} chars) - will persist across all steps")
+            logger.info(
+                f"Demo trajectory provided ({len(self.demo)} chars) - will persist across all steps"
+            )
     def predict(self, instruction: str, obs: Dict) -> tuple:
         """Predict the next action based on observation.
@@ -325,10 +331,9 @@ class ApiAgent:
         # Add action history if enabled (enhanced: includes reasoning, not just raw actions)
         if self.use_history and self.history:
             # Use rich history with reasoning (like PC Agent-E)
-            history_entries = self.history[-self.history_cutoff:]
+            history_entries = self.history[-self.history_cutoff :]
             history_str = "\n\n".join(
-                f"[Step {i+1}] {entry}"
-                for i, entry in enumerate(history_entries)
+                f"[Step {i + 1}] {entry}" for i, entry in enumerate(history_entries)
             )
             content_parts.append(f"History of previous steps:\n{history_str}")
             logs["history_entries"] = len(history_entries)
@@ -381,14 +386,18 @@ class ApiAgent:
             actions = [code_text]
             self.prev_actions.append(code_text)
             # Store rich history with reasoning (memory + action)
-            self._add_to_history(f"Thought: {self.memory_block_text}\nAction: {code_text}")
+            self._add_to_history(
+                f"Thought: {self.memory_block_text}\nAction: {code_text}"
+            )
         else:
             # Try to extract action from response text
             action = self._parse_action_from_text(response_text, w, h)
             if action:
                 actions = [action]
                 self.prev_actions.append(action)
-                self._add_to_history(f"Thought: {self.memory_block_text}\nAction: {action}")
+                self._add_to_history(
+                    f"Thought: {self.memory_block_text}\nAction: {action}"
+                )
             else:
                 logger.warning("Could not extract action from response")
                 actions = ["# Could not parse action"]
@@ -483,33 +492,25 @@ class ApiAgent:
             Python code string or None if parsing failed.
         """
         # Try to find click coordinates
-        click_match = re.search(
-            r"click.*?(\d+)\s*,\s*(\d+)", text, re.IGNORECASE
-        )
+        click_match = re.search(r"click.*?(\d+)\s*,\s*(\d+)", text, re.IGNORECASE)
         if click_match:
             x, y = int(click_match.group(1)), int(click_match.group(2))
             return f"computer.click({x}, {y})"
         # Try to find type text
-        type_match = re.search(
-            r'type[:\s]+["\'](.+?)["\']', text, re.IGNORECASE
-        )
+        type_match = re.search(r'type[:\s]+["\'](.+?)["\']', text, re.IGNORECASE)
         if type_match:
             text_to_type = type_match.group(1)
             return f'computer.type("{text_to_type}")'
         # Try to find key press
-        key_match = re.search(
-            r"press[:\s]+(\w+)", text, re.IGNORECASE
-        )
+        key_match = re.search(r"press[:\s]+(\w+)", text, re.IGNORECASE)
         if key_match:
             key = key_match.group(1).lower()
             return f'computer.press("{key}")'
         # Try to find hotkey
-        hotkey_match = re.search(
-            r"hotkey[:\s]+(\w+)\s*\+\s*(\w+)", text, re.IGNORECASE
-        )
+        hotkey_match = re.search(r"hotkey[:\s]+(\w+)\s*\+\s*(\w+)", text, re.IGNORECASE)
         if hotkey_match:
             key1, key2 = hotkey_match.group(1).lower(), hotkey_match.group(2).lower()
             return f'computer.hotkey("{key1}", "{key2}")'

openadapt_ml/cloud/azure_inference.py CHANGED Viewed

@@ -144,7 +144,7 @@ class AzureInferenceQueue:
         blob_name = f"checkpoints/epoch_{epoch}/{checkpoint_path.name}"
         logger.info(f"Uploading checkpoint to {blob_name}...")
-        checkpoint_blob_client = self.blob_service.get_blob_client(
+        self.blob_service.get_blob_client(
             container=self.checkpoints_container, blob=blob_name
         )
@@ -378,9 +378,7 @@ def main():
     submit_parser.add_argument(
         "--checkpoint", "-c", required=True, help="Path to checkpoint directory"
     )
-    submit_parser.add_argument(
-        "--capture", required=True, help="Path to capture data"
-    )
+    submit_parser.add_argument("--capture", required=True, help="Path to capture data")
     submit_parser.add_argument(
         "--epoch", "-e", type=int, default=0, help="Epoch number"
     )
@@ -415,7 +413,7 @@ def main():
     if args.command == "inference-submit":
         # Submit checkpoint for inference
-        print(f"Submitting checkpoint for inference...")
+        print("Submitting checkpoint for inference...")
         job = queue.submit_checkpoint(
             checkpoint_path=args.checkpoint,
             capture_path=args.capture,

openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

openadapt-ml 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl