openadapt-ml 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/CHANGELOG.md +114 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/PKG-INFO +1 -1
- openadapt_ml-0.3.0/docs/AZURE_ML_COST_TRACKING.md +268 -0
- openadapt_ml-0.3.0/docs/AZURE_ML_LIVE_LOGGING.md +255 -0
- openadapt_ml-0.3.0/openadapt_ml/benchmarks/cli.py +8408 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/vm_monitor.py +6 -3
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/pyproject.toml +1 -1
- openadapt_ml-0.2.2/openadapt_ml/benchmarks/cli.py +0 -2007
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/.env.example +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/.github/workflows/release.yml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/.github/workflows/test.yml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/.gitignore +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/.gitmodules +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/.python-version +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/CLAUDE.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/LICENSE +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/README.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/RETRIEVAL_QUICKSTART.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen2_5vl_synthetic.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen3vl_capture.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen3vl_capture_4bit.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen3vl_capture_batched.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen3vl_synthetic.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen3vl_synthetic_coord_v2.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen3vl_synthetic_dev.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen3vl_synthetic_registration_som.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/configs/qwen3vl_synthetic_som.yaml +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/Dockerfile.simple +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/README.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/docs/WAA_ACR_DESIGN.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/docs/WAA_APPROACH_REVIEW.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/docs/WAA_EVAL_ATTEMPTS.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/docs/WAA_RELIABILITY_ANALYSIS.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/docs/WINDOWS_PRODUCT_KEY_RCA.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/docs/azure_waa_setup.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/docs/waa_setup.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/tmp_dockerfile_winarena.txt +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/waa_deploy/Dockerfile +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/waa_deploy/Dockerfile.backup +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/waa_deploy/Dockerfile.simplified +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/waa_deploy/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/waa_deploy/api_agent.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/deprecated/waa_deploy/start_waa_server.bat +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/ARCHITECTURE_DECISIONS.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/AZURE_DASHBOARD_SPEC.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/CLEANUP_NOTES.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/GEMINI_GROUNDING_QUICKSTART.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/IMPLEMENTATION_SUMMARY_GEMINI_GROUNDING.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/NEXT_STEPS_GROUNDING_ARCHITECTURE.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/PRIORITY_2_COMPLETION_SUMMARY.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/PRIVACY_IMPLEMENTATION_PLAN.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/RECORD_IMPLEMENTATION_PLAN.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/REPOSITORY_HISTORY.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/REPO_CONSOLIDATION_PLAN.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/SEGMENTATION_TEST_PLAN.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/SEGMENTATION_TEST_RESULTS.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/VM_MONITOR_SCREENSHOT_IMPLEMENTATION.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/WAA_PARALLELIZATION_DESIGN.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/WAA_UNATTENDED_SCALABLE.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/analysis_jan2026.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/architecture_diagram.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/auto_shutoff_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/background_task_visibility.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/batching_and_schedulers.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/benchmark_integration_plan.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/benchmark_next_steps.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/benchmark_run_ui_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/benchmark_viewer_integration.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/benchmark_viewer_phase2.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/benchmark_viewer_ux_improvements.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/capture_format_decision.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/chrome_extension_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/cloud_gpu_training.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/current_state_dec2024.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/dashboard_architecture.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/demo_prompt_experiment.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/demo_retrieval_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/desktop_app_plan.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/early_termination.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/enterprise/COORDS_VS_MARKS_ABLATION.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/enterprise/README.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/enterprise_integration.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/eval_json_schema.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/experiments/demo_conditioned_prompting_results.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/experiments/multi_step_experiment_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/experiments/representation_shootout_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/experiments/waa_benchmark_results_jan2026.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/experiments/waa_demo_experiment_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/gemini_grounding.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/github_org_profile_content.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/github_org_update_plan.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/grpo_training_report.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/gui_actor_integration.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/benchmark_viewer.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/dashboard/training_bottom.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/dashboard/training_top.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/dashboard/viewer_bottom.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/dashboard/viewer_top.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/grounding_demo.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/grounding_demo_full.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/training-dashboard.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/images/viewer-comparison.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/infra_refactor_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/live_benchmark_monitoring_fix.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/live_inference_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/mock_adapter_evaluation_fix.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/new_openadapt_architecture.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/openadapt_capture_migration_detailed.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/openadapt_capture_migration_plan.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/openadapt_integration_plan.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/opencua_integration.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/output_artifacts_and_media.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/parallelization_implementation.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/parquet_export_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/perception_integration.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/prediction_loading_architecture.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/qwen3_vl_embedding_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/qwen3_vl_embedding_literature_review.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/qwen3_vl_embedding_research.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/qwen_login_experiment.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/research/cua_waa_comparison.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/research_thesis.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/roadmap.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/safety_gate_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/schema/README.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/schema/episode.schema.json +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/schema_consolidation_plan.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/screenshots/vm_monitor_dashboard_full.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/screenshots/vm_monitor_details.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/screenshots/vm_monitor_terminal.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/semantic_element_capture.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/set_of_marks_implementation.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/smart_mock_agent_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/som_implementation_verification.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/sse_architecture.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/sse_benchmark_endpoint.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/sse_frontend_integration.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/sse_quick_reference.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/sse_usage_examples.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/state_and_next_steps_qwen_login.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/stub_training_adapter.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/synthetic_login_jitter_and_ablation.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/training_feedback_ux.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/trl_unsloth_integration_analysis.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/unified_compute_architecture.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/viewer_architecture_survey.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/viewer_consolidation_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/viewer_eval_integration.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/viewer_layout_redesign.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/viewer_redesign_proposal.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/vision.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/vm_monitor_screenshot_analysis.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/waa_demo_recording_guide.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/waa_live_adapter_design.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/waa_network_architecture.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/waa_parallelization_plan.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/waa_speedup_options.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/waa_vanilla_automation.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/wandb_integration.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/docs/website_redesign_plan.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/examples/README.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/examples/demo_retrieval_example.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/examples/retrieval_with_capture.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/examples/sample_data.json +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/examples/test_gemini_grounding.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/examples/train_from_json.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiment_results/representation_shootout/results_20260116_142335.json +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/2b_dev/media/qwen3_2b_login_demo.gif +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/2b_dev/media/qwen3_2b_login_demo_session_0001.gif +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/2b_dev/plots/base_vs_ft.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/2b_dev/plots/qwen3_2b_base_vs_ft_hardened_v2.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/2b_dev/plots/qwen_vs_apis.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/2b_dev_fixed/plots/qwen_base_vs_ft.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/8b_hero/plots/qwen3_8b_base_vs_ft_hardened_v2.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/SOM_INVESTIGATION_REPORT.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/comprehensive_comparison.png +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/login_demo.gif +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/registration_demo.gif +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/experiments/qwen_login/registration_som_eval.json +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/negative_control_results/NEGATIVE_CONTROL_REPORT.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/negative_control_results/RESULTS_SUMMARY.txt +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/negative_control_results/negative_control_20251231_005135.json +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/baselines/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/baselines/adapter.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/baselines/cli.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/baselines/config.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/baselines/parser.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/baselines/prompts.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/agent.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/azure.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/azure_ops_tracker.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/trace_export.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/viewer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/waa_deploy/Dockerfile +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/waa_deploy/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/waa_deploy/api_agent.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/cloud/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/cloud/azure_inference.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/cloud/lambda_labs.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/cloud/local.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/cloud/ssh_tunnel.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/config.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/datasets/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/datasets/next_action.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/evals/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/evals/grounding.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/evals/plot_eval_metrics.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/evals/trajectory_matching.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/demo_prompt/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/demo_prompt/format_demo.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/demo_prompt/run_experiment.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/representation_shootout/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/representation_shootout/conditions.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/representation_shootout/config.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/representation_shootout/evaluator.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/representation_shootout/runner.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/waa_demo/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/waa_demo/demos.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/waa_demo/runner.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/experiments/waa_demo/tasks.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/export/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/export/__main__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/export/cli.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/export/parquet.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/grounding/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/grounding/base.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/grounding/detector.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/ingest/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/ingest/capture.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/ingest/loader.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/ingest/synthetic.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/api_adapter.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/base_adapter.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/dummy_adapter.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/providers/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/providers/anthropic.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/providers/base.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/providers/google.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/providers/openai.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/models/qwen_vl.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/perception/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/perception/integration.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/retrieval/README.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/retrieval/USAGE.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/retrieval/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/retrieval/demo_retriever.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/retrieval/embeddings.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/retrieval/index.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/retrieval/retriever.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/runtime/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/runtime/policy.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/runtime/safety_gate.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/schema/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/schema/converters.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/schema/episode.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/capture_screenshots.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/compare.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/demo_policy.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/eval_policy.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/make_gif.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/prepare_synthetic.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/run_qwen_login_benchmark.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/scripts/train.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/README.md +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/adapters/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/adapters/capture_adapter.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/annotator.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/cache.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/cli.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/deduplicator.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/frame_describer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/pipeline.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/schemas.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/segmentation/segment_extractor.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/azure_ops_viewer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/benchmark_viewer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/shared_ui.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/stub_provider.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/trainer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/trl_trainer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/viewer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/openadapt_ml/training/viewer_components.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/generate_vm_screenshots.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/generate_vm_screenshots_simple.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/p0_validate_demo_persistence.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/p1_episode_success_ab_test.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/run_demo_experiment.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/run_demo_experiment_n30.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/run_multistep_experiment.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/setup_azure.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/waa_bootstrap_helper.sh +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/scripts/waa_bootstrap_local.sh +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/benchmarks/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/benchmarks/test_api_agent.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/benchmarks/test_waa.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/integration/__init__.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/integration/test_benchmark_viewer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/integration/test_data_collection.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/integration/test_live_eval.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/integration/test_sse_endpoint.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_action_parsing.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_api_adapter.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_baselines.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_batching.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_capture_adapter.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_demo_persistence.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_demo_retrieval.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_gemini_grounding_imports.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_local_cli.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_mock_labeling.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_negative_control.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_parquet_export.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_providers.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_retrieval.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_segmentation_pipeline.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_terminal_output.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_training_dummy.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_trl_trainer.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_viewer_screenshots.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/tests/test_waa_demo.py +0 -0
- {openadapt_ml-0.2.2 → openadapt_ml-0.3.0}/uv.lock +0 -0
|
@@ -1,6 +1,120 @@
|
|
|
1
1
|
# CHANGELOG
|
|
2
2
|
|
|
3
3
|
|
|
4
|
+
## v0.3.0 (2026-02-05)
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
- **cli**: Improve pool-create reliability and error handling
|
|
9
|
+
([`f23bd57`](https://github.com/OpenAdaptAI/openadapt-ml/commit/f23bd571a76c361d9e46d99820728ffdedb5cef5))
|
|
10
|
+
|
|
11
|
+
- Properly clean up test VM and associated resources during quota check - Use sudo for docker pull
|
|
12
|
+
(usermod not effective in same session) - Add pool-cleanup command for orphaned resources - Show
|
|
13
|
+
full error messages in pool creation failures
|
|
14
|
+
|
|
15
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
16
|
+
|
|
17
|
+
- **pool**: Use WAA native task distribution with --worker_id/--num_workers
|
|
18
|
+
([`ef0d8c7`](https://github.com/OpenAdaptAI/openadapt-ml/commit/ef0d8c7ecf60b1644dbc5a40ed0a05b1b4c2f597))
|
|
19
|
+
|
|
20
|
+
- Fixed task distribution: WAA ignores --start_idx/--num_tasks, use native --worker_id and
|
|
21
|
+
--num_workers parameters instead - Worker 0 gets tasks 0, N, 2N... Worker 1 gets tasks 1, N+1,
|
|
22
|
+
2N+1... - Use vanilla windowsarena/winarena image with correct IP (20.20.20.21) - Add container
|
|
23
|
+
reuse check (skip restart if already running) - Pass API key via env var instead of config file -
|
|
24
|
+
Fix QMP port exposure (7200) for QEMU control - Store Windows disk on /mnt for 300GB temp storage
|
|
25
|
+
(D8ds_v5)
|
|
26
|
+
|
|
27
|
+
Tested: 2-worker pool running 4 tasks in parallel successfully
|
|
28
|
+
|
|
29
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
30
|
+
|
|
31
|
+
- **waa**: Use D4ds_v4 VM size for quota compatibility
|
|
32
|
+
([`2a51a97`](https://github.com/OpenAdaptAI/openadapt-ml/commit/2a51a976f10db135ed79971162f5605de944dd6e))
|
|
33
|
+
|
|
34
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
35
|
+
|
|
36
|
+
- **waa**: Use D8ds_v5 VM size for Azure ML workers
|
|
37
|
+
([`71a0fdd`](https://github.com/OpenAdaptAI/openadapt-ml/commit/71a0fddfa4216a49b3a37c1ff1cc2d98d1f605a1))
|
|
38
|
+
|
|
39
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
40
|
+
|
|
41
|
+
### Documentation
|
|
42
|
+
|
|
43
|
+
- Add Azure ML log streaming and cost tracking guides
|
|
44
|
+
([`59c3a3e`](https://github.com/OpenAdaptAI/openadapt-ml/commit/59c3a3ef852d747599de53fe74f660aea6d5b033))
|
|
45
|
+
|
|
46
|
+
Document the new CLI commands for: - Live log streaming from Azure ML jobs - Cost tracking for
|
|
47
|
+
compute instances - Teardown procedures
|
|
48
|
+
|
|
49
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
50
|
+
|
|
51
|
+
### Features
|
|
52
|
+
|
|
53
|
+
- **cli**: Add Azure ML log streaming, cost tracking, and teardown
|
|
54
|
+
([`59e3cf7`](https://github.com/OpenAdaptAI/openadapt-ml/commit/59e3cf7fbdb53f07866709efd60b05a1aa511ed5))
|
|
55
|
+
|
|
56
|
+
Add comprehensive Azure ML management commands: - azure-ml-stream: Stream logs from running jobs
|
|
57
|
+
using Python SDK with account key auth (works around DefaultAzureCredential permission issues) -
|
|
58
|
+
azure-ml-cost: Track compute instance uptime and estimated costs - azure-ml-teardown: Cancel jobs
|
|
59
|
+
and delete compute instances
|
|
60
|
+
|
|
61
|
+
Also improves: - azure-ml-quota: Shows both ML Dedicated quota (what Azure ML actually uses) and
|
|
62
|
+
regular VM quota - Better error handling and logging throughout
|
|
63
|
+
|
|
64
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
65
|
+
|
|
66
|
+
- **cli**: Add Azure ML status, VNC, and monitor commands
|
|
67
|
+
([`7985cff`](https://github.com/OpenAdaptAI/openadapt-ml/commit/7985cff95cc50d0313b3f3cb8ff5a1a1de039a71))
|
|
68
|
+
|
|
69
|
+
New commands for end-to-end Azure ML automation: - azure-ml-status: Show jobs and compute instances
|
|
70
|
+
- azure-ml-vnc: Set up VNC tunnel to compute instance - azure-ml-monitor: Monitor jobs with auto
|
|
71
|
+
VNC setup
|
|
72
|
+
|
|
73
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
74
|
+
|
|
75
|
+
- **cli**: Add azure-ml-quota command for quota management
|
|
76
|
+
([`eecb3a4`](https://github.com/OpenAdaptAI/openadapt-ml/commit/eecb3a461c2b5f6882755dab52f2cfb0c9a9616a))
|
|
77
|
+
|
|
78
|
+
Semi-automated quota increase workflow: - Checks current quota for WAA-compatible VM families -
|
|
79
|
+
Shows which families have sufficient quota - Opens Azure Portal quota page with instructions -
|
|
80
|
+
Guides user through the request process
|
|
81
|
+
|
|
82
|
+
Usage: uv run python -m openadapt_ml.benchmarks.cli azure-ml-quota
|
|
83
|
+
|
|
84
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
85
|
+
|
|
86
|
+
- **cli**: Add multi-VM pool commands for parallel WAA evaluation
|
|
87
|
+
([`005664a`](https://github.com/OpenAdaptAI/openadapt-ml/commit/005664ab0ec6c425ac9ebe1af19b23e552b9bf90))
|
|
88
|
+
|
|
89
|
+
Add pool-create, pool-wait, and pool-run commands for running WAA benchmarks across multiple VMs in
|
|
90
|
+
parallel:
|
|
91
|
+
|
|
92
|
+
- pool-create --workers N: Create N VMs with Docker and WAA image - Parallel VM creation using
|
|
93
|
+
ThreadPoolExecutor - Auto-selects available region and VM size - Configures Docker with /mnt
|
|
94
|
+
storage - Registers pool for tracking
|
|
95
|
+
|
|
96
|
+
- pool-wait: Wait for WAA to be ready on all workers - Starts WAA containers on each worker - Polls
|
|
97
|
+
/probe endpoint until ready - Configurable timeout
|
|
98
|
+
|
|
99
|
+
- pool-run --tasks N: Distribute tasks across pool - Round-robin task distribution - Parallel
|
|
100
|
+
execution on all workers - Progress tracking in registry
|
|
101
|
+
|
|
102
|
+
This enables ~5x faster benchmark completion with 5 workers, or full 154-task evaluation in ~10min
|
|
103
|
+
with 10+ workers.
|
|
104
|
+
|
|
105
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
106
|
+
|
|
107
|
+
### Refactoring
|
|
108
|
+
|
|
109
|
+
- **waa**: Update submodule with SDK v2 migration
|
|
110
|
+
([`5080ad6`](https://github.com/OpenAdaptAI/openadapt-ml/commit/5080ad697e88ff297dfbb14f2c0756f53ebfd496))
|
|
111
|
+
|
|
112
|
+
Updates WindowsAgentArena submodule to include Azure ML SDK v2 migration that enables job submission
|
|
113
|
+
from macOS ARM64.
|
|
114
|
+
|
|
115
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
116
|
+
|
|
117
|
+
|
|
4
118
|
## v0.2.2 (2026-01-29)
|
|
5
119
|
|
|
6
120
|
### Bug Fixes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openadapt-ml
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Model-agnostic, domain-agnostic ML engine for GUI automation agents
|
|
5
5
|
Project-URL: Homepage, https://github.com/OpenAdaptAI/openadapt-ml
|
|
6
6
|
Project-URL: Repository, https://github.com/OpenAdaptAI/openadapt-ml
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# Azure ML Cost Tracking and Teardown
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
This document describes how to track costs and perform full teardown of Azure ML resources used for WAA benchmark evaluation.
|
|
6
|
+
|
|
7
|
+
## Cost Components
|
|
8
|
+
|
|
9
|
+
### 1. Compute Instances
|
|
10
|
+
|
|
11
|
+
Azure ML Compute Instances are the primary cost driver.
|
|
12
|
+
|
|
13
|
+
| VM Size | vCPU | Memory | Price/hour |
|
|
14
|
+
|---------|------|--------|------------|
|
|
15
|
+
| Standard_D4_v3 | 4 | 16 GB | $0.19/hr |
|
|
16
|
+
| Standard_D8_v3 | 8 | 32 GB | $0.38/hr |
|
|
17
|
+
| Standard_D4s_v3 | 4 | 16 GB | $0.19/hr |
|
|
18
|
+
| Standard_D8s_v3 | 8 | 32 GB | $0.38/hr |
|
|
19
|
+
| Standard_D4ds_v5 | 4 | 16 GB | $0.42/hr |
|
|
20
|
+
| Standard_D8ds_v5 | 8 | 32 GB | $0.38/hr |
|
|
21
|
+
|
|
22
|
+
**Note**: Compute instances continue billing while running, even if idle. Always delete after use.
|
|
23
|
+
|
|
24
|
+
### 2. Blob Storage
|
|
25
|
+
|
|
26
|
+
Used for the golden Windows image and benchmark results.
|
|
27
|
+
|
|
28
|
+
| Component | Size | Price |
|
|
29
|
+
|-----------|------|-------|
|
|
30
|
+
| Golden image (data.img) | ~25 GB | ~$0.45/month |
|
|
31
|
+
| OVMF firmware | ~8 MB | negligible |
|
|
32
|
+
| Benchmark results | Variable | ~$0.018/GB/month |
|
|
33
|
+
|
|
34
|
+
**Blob Container**: `azureml-blobstore-84e6d3a8-2f98-4ff9-9ee1-b2f84ebc0d90`
|
|
35
|
+
|
|
36
|
+
### 3. File Share
|
|
37
|
+
|
|
38
|
+
Startup scripts and code mounted to compute instances.
|
|
39
|
+
|
|
40
|
+
| Component | Size | Price |
|
|
41
|
+
|-----------|------|-------|
|
|
42
|
+
| Startup script | ~1 KB | negligible |
|
|
43
|
+
| Mounted code | Variable | ~$0.06/GB/month |
|
|
44
|
+
|
|
45
|
+
**File Share**: `code-391ff5ac-6576-460f-ba4d-7e03433c68b6`
|
|
46
|
+
|
|
47
|
+
### 4. API Calls (External)
|
|
48
|
+
|
|
49
|
+
OpenAI/Anthropic API costs depend on model and task count.
|
|
50
|
+
|
|
51
|
+
| Model | Input | Output | Est. per task |
|
|
52
|
+
|-------|-------|--------|---------------|
|
|
53
|
+
| GPT-4o | $2.50/1M | $10/1M | ~$0.05 |
|
|
54
|
+
| GPT-4o-mini | $0.15/1M | $0.60/1M | ~$0.003 |
|
|
55
|
+
| Claude Sonnet 4.5 | $3/1M | $15/1M | ~$0.06 |
|
|
56
|
+
|
|
57
|
+
## CLI Commands
|
|
58
|
+
|
|
59
|
+
### Cost Summary
|
|
60
|
+
|
|
61
|
+
Show current Azure ML resource costs and usage.
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Show cost summary
|
|
65
|
+
uv run python -m openadapt_ml.benchmarks.cli run-azure-ml --cost-summary
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Output:
|
|
69
|
+
```
|
|
70
|
+
=== Azure ML Cost Summary ===
|
|
71
|
+
|
|
72
|
+
Compute Instances:
|
|
73
|
+
w0Expe02021336 STANDARD_D4_V3 Running 2.5 hrs $0.48
|
|
74
|
+
w0Expe02020148 STANDARD_D4_V3 Failed - -
|
|
75
|
+
|
|
76
|
+
Total compute: 2.5 hrs, ~$0.48
|
|
77
|
+
|
|
78
|
+
Blob Storage:
|
|
79
|
+
storage/data.img 24.8 GB
|
|
80
|
+
storage/OVMF_CODE... 3.5 MB
|
|
81
|
+
storage/OVMF_VARS... 3.5 MB
|
|
82
|
+
|
|
83
|
+
Total storage: 24.81 GB, ~$0.45/month
|
|
84
|
+
|
|
85
|
+
Experiments:
|
|
86
|
+
openadapt-ml: 3 runs (last 7 days)
|
|
87
|
+
|
|
88
|
+
Estimated Monthly Cost: ~$0.50 (storage only, compute billed hourly)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Resource Listing
|
|
92
|
+
|
|
93
|
+
List all Azure ML resources that can be torn down.
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# List all resources
|
|
97
|
+
uv run python -m openadapt_ml.benchmarks.cli run-azure-ml --list-resources
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Output:
|
|
101
|
+
```
|
|
102
|
+
=== Azure ML Resources ===
|
|
103
|
+
|
|
104
|
+
Compute Instances (2):
|
|
105
|
+
- w0Expe02021336 STANDARD_D4_V3 Running
|
|
106
|
+
- w0Expe02020148 STANDARD_D4_V3 CreateFailed
|
|
107
|
+
|
|
108
|
+
Blob Storage (storage/ prefix):
|
|
109
|
+
- data.img (24.8 GB)
|
|
110
|
+
- OVMF_CODE_4M.ms.fd (3.5 MB)
|
|
111
|
+
- OVMF_VARS_4M.ms.fd (3.5 MB)
|
|
112
|
+
|
|
113
|
+
File Share (Users/openadapt/):
|
|
114
|
+
- compute-instance-startup.sh (1 KB)
|
|
115
|
+
|
|
116
|
+
Azure ML Jobs (last 7 days): 3
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Full Teardown
|
|
120
|
+
|
|
121
|
+
Delete all Azure ML resources to stop all costs. This is the recommended way to clean up after benchmarking.
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# Teardown (dry run - shows what would be deleted)
|
|
125
|
+
uv run python -m openadapt_ml.benchmarks.cli run-azure-ml --teardown
|
|
126
|
+
|
|
127
|
+
# Teardown with confirmation
|
|
128
|
+
uv run python -m openadapt_ml.benchmarks.cli run-azure-ml --teardown --confirm
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
**What gets deleted:**
|
|
132
|
+
- All compute instances (stops compute billing)
|
|
133
|
+
- Golden image in blob storage (optional, use --keep-image to preserve)
|
|
134
|
+
- Benchmark result files in blob storage
|
|
135
|
+
- Startup script in file share
|
|
136
|
+
|
|
137
|
+
**What is NOT deleted:**
|
|
138
|
+
- Azure ML Workspace (shared resource)
|
|
139
|
+
- Resource Group (shared resource)
|
|
140
|
+
- Storage Account (just the data inside)
|
|
141
|
+
- Azure ML Experiments (metadata only, no cost)
|
|
142
|
+
|
|
143
|
+
Output:
|
|
144
|
+
```
|
|
145
|
+
=== Azure ML Teardown ===
|
|
146
|
+
|
|
147
|
+
Will delete:
|
|
148
|
+
Compute Instances:
|
|
149
|
+
- w0Expe02021336 (Running, ~$0.48 billed)
|
|
150
|
+
- w0Expe02020148 (Failed)
|
|
151
|
+
|
|
152
|
+
Blob Storage:
|
|
153
|
+
- storage/data.img (24.8 GB)
|
|
154
|
+
- storage/OVMF_CODE_4M.ms.fd
|
|
155
|
+
- storage/OVMF_VARS_4M.ms.fd
|
|
156
|
+
|
|
157
|
+
File Share:
|
|
158
|
+
- Users/openadapt/compute-instance-startup.sh
|
|
159
|
+
|
|
160
|
+
Use --confirm to proceed with deletion.
|
|
161
|
+
Use --keep-image to preserve the golden image for future runs.
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Keeping the Golden Image
|
|
165
|
+
|
|
166
|
+
If you plan to run more benchmarks later, you can preserve the golden image to avoid re-uploading 30GB.
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
# Teardown but keep the golden image
|
|
170
|
+
uv run python -m openadapt_ml.benchmarks.cli run-azure-ml --teardown --confirm --keep-image
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Cost Tracking Implementation
|
|
174
|
+
|
|
175
|
+
### Approach 1: Azure CLI Queries (Implemented)
|
|
176
|
+
|
|
177
|
+
Uses `az ml compute list` and `az storage blob list` to enumerate resources and estimate costs based on known pricing.
|
|
178
|
+
|
|
179
|
+
**Pros:**
|
|
180
|
+
- No additional Azure permissions needed
|
|
181
|
+
- Real-time resource visibility
|
|
182
|
+
- Works with existing service principal
|
|
183
|
+
|
|
184
|
+
**Cons:**
|
|
185
|
+
- Estimates only (not actual billed amounts)
|
|
186
|
+
- Manual pricing table maintenance
|
|
187
|
+
|
|
188
|
+
### Approach 2: Azure Cost Management API (Future)
|
|
189
|
+
|
|
190
|
+
Uses `az consumption usage list` to get actual billed amounts.
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
# Get actual usage (requires Cost Management Reader role)
|
|
194
|
+
az consumption usage list \
|
|
195
|
+
--start-date 2026-01-01 \
|
|
196
|
+
--end-date 2026-02-02 \
|
|
197
|
+
--query "[?contains(resourceGroup, 'openadapt')].{Resource:resourceId, Cost:cost, Date:usageEnd}"
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
**Pros:**
|
|
201
|
+
- Actual billed amounts
|
|
202
|
+
- Historical cost tracking
|
|
203
|
+
|
|
204
|
+
**Cons:**
|
|
205
|
+
- Requires additional Azure role assignment
|
|
206
|
+
- 24-48 hour data delay
|
|
207
|
+
|
|
208
|
+
## Resource Hierarchy
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
Azure Subscription: 78add6c6-c92a-4a53-b751-eb644ac77e59
|
|
212
|
+
└── Resource Group: openadapt-agents
|
|
213
|
+
├── Azure ML Workspace: openadapt-ml
|
|
214
|
+
│ ├── Compute Instances (BILLABLE - delete after use)
|
|
215
|
+
│ │ └── w0Exp* (created by run_azure.py)
|
|
216
|
+
│ └── Experiments (metadata, no cost)
|
|
217
|
+
│ └── openadapt-ml
|
|
218
|
+
│
|
|
219
|
+
└── Storage Account: openadapstoraged655a89ec
|
|
220
|
+
├── Blob Container: azureml-blobstore-*
|
|
221
|
+
│ └── storage/ (golden image, ~$0.45/month)
|
|
222
|
+
│
|
|
223
|
+
└── File Share: code-*
|
|
224
|
+
└── Users/openadapt/ (startup script, negligible)
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Best Practices
|
|
228
|
+
|
|
229
|
+
### During Development
|
|
230
|
+
|
|
231
|
+
1. **Use single worker**: `--workers 1` to minimize compute costs
|
|
232
|
+
2. **Use gpt-4o-mini**: Cheapest OpenAI model for testing
|
|
233
|
+
3. **Delete immediately**: Run `--teardown --confirm` after each session
|
|
234
|
+
|
|
235
|
+
### For Full Evaluation
|
|
236
|
+
|
|
237
|
+
1. **Estimate costs first**: `--cost-summary` before running
|
|
238
|
+
2. **Use appropriate workers**: Match to your task count and budget
|
|
239
|
+
3. **Keep golden image**: `--keep-image` if running multiple evaluations
|
|
240
|
+
4. **Monitor progress**: Check Azure portal for stuck instances
|
|
241
|
+
|
|
242
|
+
### Cost Optimization
|
|
243
|
+
|
|
244
|
+
| Scenario | Est. Cost | Recommendation |
|
|
245
|
+
|----------|-----------|----------------|
|
|
246
|
+
| Single task test | ~$0.10 | 1 worker, gpt-4o-mini |
|
|
247
|
+
| 10 tasks | ~$1.50 | 1 worker, gpt-4o-mini |
|
|
248
|
+
| Full 154 tasks | ~$20-50 | 4-8 workers, depends on model |
|
|
249
|
+
|
|
250
|
+
## Troubleshooting
|
|
251
|
+
|
|
252
|
+
### "Permission denied" on cost queries
|
|
253
|
+
|
|
254
|
+
The service principal may not have Cost Management Reader role. Use the resource listing approach instead.
|
|
255
|
+
|
|
256
|
+
### Compute instances stuck in "Creating"
|
|
257
|
+
|
|
258
|
+
Delete manually: `az ml compute delete --name <name> --workspace-name openadapt-ml --resource-group openadapt-agents --yes`
|
|
259
|
+
|
|
260
|
+
### Storage costs continue after teardown
|
|
261
|
+
|
|
262
|
+
Check if you used `--keep-image`. Golden image alone costs ~$0.45/month.
|
|
263
|
+
|
|
264
|
+
## References
|
|
265
|
+
|
|
266
|
+
- [Azure ML Pricing](https://azure.microsoft.com/en-us/pricing/details/machine-learning/)
|
|
267
|
+
- [Azure Blob Storage Pricing](https://azure.microsoft.com/en-us/pricing/details/storage/blobs/)
|
|
268
|
+
- [WAA Azure Setup](./VANILLA_WAA_AZURE_AUTOMATION.md)
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# Azure ML Live Logging Design Document
|
|
2
|
+
|
|
3
|
+
## Problem Statement
|
|
4
|
+
|
|
5
|
+
When running WAA benchmark evaluations on Azure ML compute instances, we need to see real-time logs from inside the Docker container to:
|
|
6
|
+
1. Verify patches are being applied correctly (e.g., IP address fix)
|
|
7
|
+
2. Monitor Windows VM boot progress
|
|
8
|
+
3. Debug issues without waiting for job completion
|
|
9
|
+
4. Confirm the WAA server is responding
|
|
10
|
+
|
|
11
|
+
**Current pain point**: Jobs run for 15-30+ minutes, and we can't see what's happening inside until they complete or fail.
|
|
12
|
+
|
|
13
|
+
## Architecture
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
Azure ML Job
|
|
17
|
+
└── Compute Instance (Standard_D8ds_v4)
|
|
18
|
+
└── Docker Container (windowsarena/winarena)
|
|
19
|
+
└── run_entry.py (entry point)
|
|
20
|
+
└── /entry_setup.sh
|
|
21
|
+
└── QEMU (Windows 11 VM)
|
|
22
|
+
└── WAA Flask Server (port 5000)
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Approaches Tried
|
|
26
|
+
|
|
27
|
+
### 1. Azure ML SDK `jobs.stream()` ❌
|
|
28
|
+
```python
|
|
29
|
+
client.jobs.stream('job_name')
|
|
30
|
+
```
|
|
31
|
+
**Result**: Appears to hang indefinitely. May be waiting for job to complete before returning any output.
|
|
32
|
+
|
|
33
|
+
**Why it doesn't work**:
|
|
34
|
+
- `stream()` may only stream *control plane* logs (job start, provisioning), not container stdout
|
|
35
|
+
- Container logs need different endpoint/mechanism
|
|
36
|
+
|
|
37
|
+
### 2. Azure ML SDK `jobs.download()` ❌
|
|
38
|
+
```python
|
|
39
|
+
client.jobs.download(name='job_name', download_path='/tmp', output_name='default')
|
|
40
|
+
```
|
|
41
|
+
**Result**: Error - "Download is allowed only in states ['Completed', 'Failed', 'Canceled', ...]"
|
|
42
|
+
|
|
43
|
+
**Why it doesn't work**: By design, artifacts are only available after job termination.
|
|
44
|
+
|
|
45
|
+
### 3. SSH to Compute Instance ⚠️ (Partial)
|
|
46
|
+
```bash
|
|
47
|
+
ssh azureuser@104.43.139.26 -p 50000
|
|
48
|
+
docker exec <container_id> cat /path/to/logs
|
|
49
|
+
```
|
|
50
|
+
**Result**: Works but requires:
|
|
51
|
+
- Finding the compute instance IP (not easily available from SDK)
|
|
52
|
+
- Finding the SSH port (default 50000 for Azure ML)
|
|
53
|
+
- Finding the container ID inside the instance
|
|
54
|
+
- Navigating into container to find logs
|
|
55
|
+
|
|
56
|
+
**Challenge**: Compute instance IPs aren't exposed in the standard SDK. Need to use lower-level Azure APIs or parse from job metadata.
|
|
57
|
+
|
|
58
|
+
### 4. Reading Blob Storage Directly ⚠️ (Investigating)
|
|
59
|
+
Azure ML writes job artifacts to blob storage at path: `ExperimentRun/dcid.<job_name>/`
|
|
60
|
+
```bash
|
|
61
|
+
az storage blob list --container-name azureml --prefix "ExperimentRun/dcid.job_name"
|
|
62
|
+
```
|
|
63
|
+
**Result**: Logs may not be written in real-time. Standard output goes to `user_logs/std_log.txt` but sync timing is unclear.
|
|
64
|
+
|
|
65
|
+
## Potential Solutions
|
|
66
|
+
|
|
67
|
+
### Option A: Write Logs to Blob Storage in Real-Time
|
|
68
|
+
**Approach**: Modify `run_entry.py` to write logs directly to mounted blob storage.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
# In run_entry.py
|
|
72
|
+
import sys
|
|
73
|
+
|
|
74
|
+
# Azure ML mounts output paths - write logs there
|
|
75
|
+
log_path = "/mnt/azureml/cr/j/.../user_logs/live_log.txt"
|
|
76
|
+
with open(log_path, 'a') as f:
|
|
77
|
+
f.write("Starting VM...\n")
|
|
78
|
+
f.flush() # Ensure immediate write
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Pros**: Uses existing infrastructure
|
|
82
|
+
**Cons**: Need to determine exact mount path; may still have sync delays
|
|
83
|
+
|
|
84
|
+
### Option B: Configure Application Insights/Log Analytics
|
|
85
|
+
**Approach**: Stream container logs to Azure Monitor.
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
# In job environment
|
|
89
|
+
from applicationinsights import TelemetryClient
|
|
90
|
+
tc = TelemetryClient('<instrumentation_key>')
|
|
91
|
+
tc.track_trace('Starting VM...')
|
|
92
|
+
tc.flush()
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Pros**: Centralized logging, queryable, real-time
|
|
96
|
+
**Cons**: Additional Azure resource, more complexity
|
|
97
|
+
|
|
98
|
+
### Option C: HTTP Webhook for Status Updates
|
|
99
|
+
**Approach**: Have `run_entry.py` POST status updates to an external endpoint.
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import requests
|
|
103
|
+
|
|
104
|
+
def log_status(message):
|
|
105
|
+
requests.post('https://your-endpoint.com/log', json={
|
|
106
|
+
'job': job_name,
|
|
107
|
+
'message': message,
|
|
108
|
+
'timestamp': time.time()
|
|
109
|
+
})
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**Pros**: Real-time, flexible
|
|
113
|
+
**Cons**: Requires external endpoint, network connectivity from Azure ML
|
|
114
|
+
|
|
115
|
+
### Option D: Write to Mounted Output Directory (RECOMMENDED)
|
|
116
|
+
**Approach**: Azure ML automatically mounts output directories. Write logs there.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
# run_entry.py
|
|
120
|
+
import os
|
|
121
|
+
import sys
|
|
122
|
+
from datetime import datetime
|
|
123
|
+
|
|
124
|
+
# Azure ML sets this to the mounted output path
|
|
125
|
+
output_dir = os.environ.get('AZUREML_OUTPUT_PATH', '/mnt/azureml/outputs')
|
|
126
|
+
log_file = os.path.join(output_dir, 'live_log.txt')
|
|
127
|
+
|
|
128
|
+
class TeeLogger:
|
|
129
|
+
def __init__(self, *files):
|
|
130
|
+
self.files = files
|
|
131
|
+
def write(self, data):
|
|
132
|
+
for f in self.files:
|
|
133
|
+
f.write(data)
|
|
134
|
+
f.flush()
|
|
135
|
+
def flush(self):
|
|
136
|
+
for f in self.files:
|
|
137
|
+
f.flush()
|
|
138
|
+
|
|
139
|
+
# Redirect stdout to both console and file
|
|
140
|
+
log_handle = open(log_file, 'w')
|
|
141
|
+
sys.stdout = TeeLogger(sys.__stdout__, log_handle)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Then read from blob storage:
|
|
145
|
+
```bash
|
|
146
|
+
az storage blob download \
|
|
147
|
+
--account-name <storage_account> \
|
|
148
|
+
--container-name azureml \
|
|
149
|
+
--name "ExperimentRun/dcid.<job_name>/outputs/live_log.txt" \
|
|
150
|
+
--file /tmp/live_log.txt
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Pros**: Uses existing infrastructure, no additional setup
|
|
154
|
+
**Cons**: Still have blob sync delays (typically 30-60 seconds)
|
|
155
|
+
|
|
156
|
+
### Option E: Azure ML Run Logging API
|
|
157
|
+
**Approach**: Use Azure ML's built-in logging that's designed for real-time streaming.
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from azureml.core import Run
|
|
161
|
+
run = Run.get_context()
|
|
162
|
+
|
|
163
|
+
run.log('status', 'Starting VM...')
|
|
164
|
+
run.log('ip_patch', 'Applied')
|
|
165
|
+
run.log('windows_boot', 'In progress')
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Then query via SDK:
|
|
169
|
+
```python
|
|
170
|
+
run = client.runs.get(job_name)
|
|
171
|
+
metrics = run.get_metrics()
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**Pros**: Built-in, designed for this use case
|
|
175
|
+
**Cons**: Requires azureml-core package in container; may conflict with existing setup
|
|
176
|
+
|
|
177
|
+
### Option F: Compute Instance SSH with Automation (SIMPLEST)
|
|
178
|
+
**Approach**: Automate the SSH process to extract container logs.
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
def get_live_logs(job_name):
|
|
182
|
+
# 1. Get compute instance from job
|
|
183
|
+
job = client.jobs.get(job_name)
|
|
184
|
+
compute_name = job.compute # e.g., "w0Expe02041220"
|
|
185
|
+
|
|
186
|
+
# 2. Get compute instance IP via Azure Resource Manager API
|
|
187
|
+
# (Need to call ARM directly as SDK doesn't expose this)
|
|
188
|
+
|
|
189
|
+
# 3. SSH and get logs
|
|
190
|
+
ssh_cmd = f"ssh -p 50000 azureuser@{ip} 'docker logs $(docker ps -q)'"
|
|
191
|
+
logs = subprocess.run(ssh_cmd, capture_output=True)
|
|
192
|
+
return logs.stdout
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
**Pros**: Works with existing setup, real-time
|
|
196
|
+
**Cons**: Requires parsing ARM API for IP; SSH key must be configured
|
|
197
|
+
|
|
198
|
+
## Recommendation
|
|
199
|
+
|
|
200
|
+
**Short-term (now)**: Use **Option F** - SSH automation. We already have SSH access working; just need to automate finding the IP and container ID.
|
|
201
|
+
|
|
202
|
+
**Medium-term**: Use **Option D** - Write to mounted output + poll blob storage. More reliable and doesn't require SSH.
|
|
203
|
+
|
|
204
|
+
**Long-term**: Use **Option E** - Azure ML Run logging. Most integrated solution.
|
|
205
|
+
|
|
206
|
+
## Implementation Plan
|
|
207
|
+
|
|
208
|
+
### Phase 1: SSH Automation (This Week)
|
|
209
|
+
|
|
210
|
+
1. Add CLI command to get compute instance IP:
|
|
211
|
+
```bash
|
|
212
|
+
uv run python -m openadapt_ml.benchmarks.cli azure-logs --job quiet_pasta_byvjklj2q8
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
2. Implementation:
|
|
216
|
+
```python
|
|
217
|
+
def cmd_azure_logs(job_name: str, follow: bool = False):
|
|
218
|
+
"""Get live logs from Azure ML job via SSH."""
|
|
219
|
+
# Get job
|
|
220
|
+
job = client.jobs.get(job_name)
|
|
221
|
+
compute_name = job.compute
|
|
222
|
+
|
|
223
|
+
# Get compute IP via ARM API
|
|
224
|
+
ip = get_compute_ip(compute_name)
|
|
225
|
+
|
|
226
|
+
# SSH command
|
|
227
|
+
if follow:
|
|
228
|
+
cmd = f"ssh -p 50000 azureuser@{ip} 'docker logs -f $(docker ps -q)'"
|
|
229
|
+
else:
|
|
230
|
+
cmd = f"ssh -p 50000 azureuser@{ip} 'docker logs $(docker ps -q)'"
|
|
231
|
+
|
|
232
|
+
subprocess.run(cmd, shell=True)
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Phase 2: Blob Storage Logging (Next Week)
|
|
236
|
+
|
|
237
|
+
1. Modify `run_entry.py` to tee stdout to output directory
|
|
238
|
+
2. Add CLI command to poll blob storage for live logs
|
|
239
|
+
3. Add background agent that polls and streams logs to terminal
|
|
240
|
+
|
|
241
|
+
## Open Questions
|
|
242
|
+
|
|
243
|
+
1. Does Azure ML SDK have undocumented methods for live log streaming?
|
|
244
|
+
2. What's the actual blob sync delay for output directories?
|
|
245
|
+
3. Can we use Azure SignalR for real-time log push?
|
|
246
|
+
4. Is there a REST API endpoint for log streaming (used by Azure Portal)?
|
|
247
|
+
|
|
248
|
+
## References
|
|
249
|
+
|
|
250
|
+
- [Azure ML Job Logging](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics)
|
|
251
|
+
- [Azure ML Run Context](https://learn.microsoft.com/en-us/python/api/azureml-core/azureml.core.run.run)
|
|
252
|
+
- [Streaming Logs in Azure ML](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-debug-visual-studio-code)
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
*Last updated: 2026-02-04*
|