@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
//! Monitoring and alerting module for production deployments
|
|
2
|
+
//!
|
|
3
|
+
//! Provides hooks for observability, metrics collection, and alerting.
|
|
4
|
+
|
|
5
|
+
use crate::types::AgentRunResult;
|
|
6
|
+
use serde::{Deserialize, Serialize};
|
|
7
|
+
use std::collections::HashMap;
|
|
8
|
+
use std::sync::atomic::{AtomicU64, Ordering};
|
|
9
|
+
use std::sync::Arc;
|
|
10
|
+
use std::time::{Duration, Instant};
|
|
11
|
+
|
|
12
|
+
/// Metrics for monitoring agent performance
|
|
13
|
+
#[derive(Debug, Default)]
|
|
14
|
+
pub struct AgentMetrics {
|
|
15
|
+
/// Total runs started
|
|
16
|
+
pub runs_started: AtomicU64,
|
|
17
|
+
/// Total runs completed successfully
|
|
18
|
+
pub runs_completed: AtomicU64,
|
|
19
|
+
/// Total runs failed
|
|
20
|
+
pub runs_failed: AtomicU64,
|
|
21
|
+
/// Total cost in micro-dollars
|
|
22
|
+
pub total_cost_micros: AtomicU64,
|
|
23
|
+
/// Total tokens sent
|
|
24
|
+
pub total_tokens_sent: AtomicU64,
|
|
25
|
+
/// Total tokens received
|
|
26
|
+
pub total_tokens_received: AtomicU64,
|
|
27
|
+
/// Total API calls
|
|
28
|
+
pub total_api_calls: AtomicU64,
|
|
29
|
+
/// Total execution time in milliseconds
|
|
30
|
+
pub total_execution_time_ms: AtomicU64,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
impl AgentMetrics {
|
|
34
|
+
pub fn new() -> Self {
|
|
35
|
+
Self::default()
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
pub fn record_run_start(&self) {
|
|
39
|
+
self.runs_started.fetch_add(1, Ordering::SeqCst);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
pub fn record_run_complete(&self, result: &AgentRunResult) {
|
|
43
|
+
let is_error = result
|
|
44
|
+
.info
|
|
45
|
+
.exit_status
|
|
46
|
+
.as_ref()
|
|
47
|
+
.map(|s| s.contains("error"))
|
|
48
|
+
.unwrap_or(false);
|
|
49
|
+
|
|
50
|
+
if is_error {
|
|
51
|
+
self.runs_failed.fetch_add(1, Ordering::SeqCst);
|
|
52
|
+
} else {
|
|
53
|
+
self.runs_completed.fetch_add(1, Ordering::SeqCst);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Record model stats if available
|
|
57
|
+
if let Some(ref stats) = result.info.model_stats {
|
|
58
|
+
self.total_cost_micros
|
|
59
|
+
.fetch_add((stats.instance_cost * 1_000_000.0) as u64, Ordering::SeqCst);
|
|
60
|
+
self.total_tokens_sent
|
|
61
|
+
.fetch_add(stats.tokens_sent, Ordering::SeqCst);
|
|
62
|
+
self.total_tokens_received
|
|
63
|
+
.fetch_add(stats.tokens_received, Ordering::SeqCst);
|
|
64
|
+
self.total_api_calls
|
|
65
|
+
.fetch_add(stats.api_calls, Ordering::SeqCst);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
pub fn record_execution_time(&self, duration: Duration) {
|
|
70
|
+
self.total_execution_time_ms
|
|
71
|
+
.fetch_add(duration.as_millis() as u64, Ordering::SeqCst);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/// Get current metrics as a snapshot
|
|
75
|
+
pub fn snapshot(&self) -> MetricsSnapshot {
|
|
76
|
+
MetricsSnapshot {
|
|
77
|
+
runs_started: self.runs_started.load(Ordering::SeqCst),
|
|
78
|
+
runs_completed: self.runs_completed.load(Ordering::SeqCst),
|
|
79
|
+
runs_failed: self.runs_failed.load(Ordering::SeqCst),
|
|
80
|
+
total_cost: self.total_cost_micros.load(Ordering::SeqCst) as f64 / 1_000_000.0,
|
|
81
|
+
total_tokens_sent: self.total_tokens_sent.load(Ordering::SeqCst),
|
|
82
|
+
total_tokens_received: self.total_tokens_received.load(Ordering::SeqCst),
|
|
83
|
+
total_api_calls: self.total_api_calls.load(Ordering::SeqCst),
|
|
84
|
+
total_execution_time_ms: self.total_execution_time_ms.load(Ordering::SeqCst),
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/// Immutable snapshot of metrics for reporting
|
|
90
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
91
|
+
pub struct MetricsSnapshot {
|
|
92
|
+
pub runs_started: u64,
|
|
93
|
+
pub runs_completed: u64,
|
|
94
|
+
pub runs_failed: u64,
|
|
95
|
+
pub total_cost: f64,
|
|
96
|
+
pub total_tokens_sent: u64,
|
|
97
|
+
pub total_tokens_received: u64,
|
|
98
|
+
pub total_api_calls: u64,
|
|
99
|
+
pub total_execution_time_ms: u64,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
impl MetricsSnapshot {
|
|
103
|
+
/// Calculate success rate
|
|
104
|
+
pub fn success_rate(&self) -> f64 {
|
|
105
|
+
if self.runs_started == 0 {
|
|
106
|
+
return 0.0;
|
|
107
|
+
}
|
|
108
|
+
self.runs_completed as f64 / self.runs_started as f64
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/// Calculate average cost per run
|
|
112
|
+
pub fn avg_cost_per_run(&self) -> f64 {
|
|
113
|
+
if self.runs_started == 0 {
|
|
114
|
+
return 0.0;
|
|
115
|
+
}
|
|
116
|
+
self.total_cost / self.runs_started as f64
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/// Calculate average execution time
|
|
120
|
+
pub fn avg_execution_time_ms(&self) -> f64 {
|
|
121
|
+
if self.runs_started == 0 {
|
|
122
|
+
return 0.0;
|
|
123
|
+
}
|
|
124
|
+
self.total_execution_time_ms as f64 / self.runs_started as f64
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/// Alert severity levels
|
|
129
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
130
|
+
#[serde(rename_all = "lowercase")]
|
|
131
|
+
pub enum AlertSeverity {
|
|
132
|
+
Info,
|
|
133
|
+
Warning,
|
|
134
|
+
Error,
|
|
135
|
+
Critical,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/// Alert information
|
|
139
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
140
|
+
pub struct Alert {
|
|
141
|
+
pub severity: AlertSeverity,
|
|
142
|
+
pub message: String,
|
|
143
|
+
pub timestamp: chrono::DateTime<chrono::Utc>,
|
|
144
|
+
pub context: HashMap<String, serde_json::Value>,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
impl Alert {
|
|
148
|
+
pub fn new(severity: AlertSeverity, message: impl Into<String>) -> Self {
|
|
149
|
+
Self {
|
|
150
|
+
severity,
|
|
151
|
+
message: message.into(),
|
|
152
|
+
timestamp: chrono::Utc::now(),
|
|
153
|
+
context: HashMap::new(),
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
pub fn with_context(mut self, key: impl Into<String>, value: impl Serialize) -> Self {
|
|
158
|
+
self.context
|
|
159
|
+
.insert(key.into(), serde_json::to_value(value).unwrap_or_default());
|
|
160
|
+
self
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/// Trait for alert handlers
|
|
165
|
+
pub trait AlertHandler: Send + Sync {
|
|
166
|
+
fn handle(&self, alert: &Alert);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/// Log-based alert handler (default)
|
|
170
|
+
pub struct LogAlertHandler;
|
|
171
|
+
|
|
172
|
+
impl AlertHandler for LogAlertHandler {
|
|
173
|
+
fn handle(&self, alert: &Alert) {
|
|
174
|
+
match alert.severity {
|
|
175
|
+
AlertSeverity::Info => tracing::info!(
|
|
176
|
+
message = %alert.message,
|
|
177
|
+
context = ?alert.context,
|
|
178
|
+
"Alert"
|
|
179
|
+
),
|
|
180
|
+
AlertSeverity::Warning => tracing::warn!(
|
|
181
|
+
message = %alert.message,
|
|
182
|
+
context = ?alert.context,
|
|
183
|
+
"Alert"
|
|
184
|
+
),
|
|
185
|
+
AlertSeverity::Error => tracing::error!(
|
|
186
|
+
message = %alert.message,
|
|
187
|
+
context = ?alert.context,
|
|
188
|
+
"Alert"
|
|
189
|
+
),
|
|
190
|
+
AlertSeverity::Critical => tracing::error!(
|
|
191
|
+
message = %alert.message,
|
|
192
|
+
context = ?alert.context,
|
|
193
|
+
severity = "CRITICAL",
|
|
194
|
+
"Alert"
|
|
195
|
+
),
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/// Webhook-based alert handler for external services (Slack, PagerDuty, etc.)
|
|
201
|
+
pub struct WebhookAlertHandler {
|
|
202
|
+
url: String,
|
|
203
|
+
client: reqwest::Client,
|
|
204
|
+
min_severity: AlertSeverity,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
impl WebhookAlertHandler {
|
|
208
|
+
pub fn new(url: impl Into<String>, min_severity: AlertSeverity) -> Self {
|
|
209
|
+
Self {
|
|
210
|
+
url: url.into(),
|
|
211
|
+
client: reqwest::Client::new(),
|
|
212
|
+
min_severity,
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
fn should_send(&self, severity: AlertSeverity) -> bool {
|
|
217
|
+
matches!(
|
|
218
|
+
(self.min_severity, severity),
|
|
219
|
+
(AlertSeverity::Critical, AlertSeverity::Critical)
|
|
220
|
+
| (AlertSeverity::Error, AlertSeverity::Critical | AlertSeverity::Error)
|
|
221
|
+
| (AlertSeverity::Warning, AlertSeverity::Critical | AlertSeverity::Error | AlertSeverity::Warning)
|
|
222
|
+
| (AlertSeverity::Info, _)
|
|
223
|
+
)
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
impl AlertHandler for WebhookAlertHandler {
|
|
228
|
+
fn handle(&self, alert: &Alert) {
|
|
229
|
+
if !self.should_send(alert.severity) {
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
let url = self.url.clone();
|
|
234
|
+
let payload = serde_json::json!({
|
|
235
|
+
"severity": alert.severity,
|
|
236
|
+
"message": alert.message,
|
|
237
|
+
"timestamp": alert.timestamp.to_rfc3339(),
|
|
238
|
+
"context": alert.context,
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
let client = self.client.clone();
|
|
242
|
+
|
|
243
|
+
// Fire and forget - don't block on webhook
|
|
244
|
+
tokio::spawn(async move {
|
|
245
|
+
if let Err(e) = client.post(&url).json(&payload).send().await {
|
|
246
|
+
tracing::warn!(error = %e, "Failed to send webhook alert");
|
|
247
|
+
}
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/// Alert thresholds for automatic alerting
|
|
253
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
254
|
+
pub struct AlertThresholds {
|
|
255
|
+
/// Alert if cost exceeds this amount
|
|
256
|
+
pub cost_limit: f64,
|
|
257
|
+
/// Alert if failure rate exceeds this percentage
|
|
258
|
+
pub failure_rate_percent: f64,
|
|
259
|
+
/// Alert if average execution time exceeds this (ms)
|
|
260
|
+
pub execution_time_ms: u64,
|
|
261
|
+
/// Alert if API calls exceed this limit
|
|
262
|
+
pub api_calls_limit: u64,
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
impl Default for AlertThresholds {
|
|
266
|
+
fn default() -> Self {
|
|
267
|
+
Self {
|
|
268
|
+
cost_limit: 100.0, // $100
|
|
269
|
+
failure_rate_percent: 20.0, // 20%
|
|
270
|
+
execution_time_ms: 600_000, // 10 minutes
|
|
271
|
+
api_calls_limit: 10_000, // 10k calls
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/// Monitor that checks metrics against thresholds
|
|
277
|
+
pub struct MetricsMonitor {
|
|
278
|
+
metrics: Arc<AgentMetrics>,
|
|
279
|
+
thresholds: AlertThresholds,
|
|
280
|
+
handlers: Vec<Box<dyn AlertHandler>>,
|
|
281
|
+
#[allow(dead_code)]
|
|
282
|
+
last_check: std::sync::Mutex<Instant>,
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
impl MetricsMonitor {
|
|
286
|
+
pub fn new(metrics: Arc<AgentMetrics>, thresholds: AlertThresholds) -> Self {
|
|
287
|
+
Self {
|
|
288
|
+
metrics,
|
|
289
|
+
thresholds,
|
|
290
|
+
handlers: vec![Box::new(LogAlertHandler)],
|
|
291
|
+
last_check: std::sync::Mutex::new(Instant::now()),
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
pub fn add_handler(&mut self, handler: Box<dyn AlertHandler>) {
|
|
296
|
+
self.handlers.push(handler);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
pub fn check(&self) {
|
|
300
|
+
let snapshot = self.metrics.snapshot();
|
|
301
|
+
|
|
302
|
+
// Check cost limit
|
|
303
|
+
if snapshot.total_cost > self.thresholds.cost_limit {
|
|
304
|
+
self.alert(
|
|
305
|
+
Alert::new(
|
|
306
|
+
AlertSeverity::Warning,
|
|
307
|
+
format!(
|
|
308
|
+
"Cost limit exceeded: ${:.2} > ${:.2}",
|
|
309
|
+
snapshot.total_cost, self.thresholds.cost_limit
|
|
310
|
+
),
|
|
311
|
+
)
|
|
312
|
+
.with_context("total_cost", snapshot.total_cost),
|
|
313
|
+
);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Check failure rate
|
|
317
|
+
let failure_rate = if snapshot.runs_started > 0 {
|
|
318
|
+
100.0 * snapshot.runs_failed as f64 / snapshot.runs_started as f64
|
|
319
|
+
} else {
|
|
320
|
+
0.0
|
|
321
|
+
};
|
|
322
|
+
|
|
323
|
+
if failure_rate > self.thresholds.failure_rate_percent && snapshot.runs_started >= 10 {
|
|
324
|
+
self.alert(
|
|
325
|
+
Alert::new(
|
|
326
|
+
AlertSeverity::Error,
|
|
327
|
+
format!(
|
|
328
|
+
"High failure rate: {:.1}% > {:.1}%",
|
|
329
|
+
failure_rate, self.thresholds.failure_rate_percent
|
|
330
|
+
),
|
|
331
|
+
)
|
|
332
|
+
.with_context("failure_rate", failure_rate)
|
|
333
|
+
.with_context("runs_failed", snapshot.runs_failed)
|
|
334
|
+
.with_context("runs_started", snapshot.runs_started),
|
|
335
|
+
);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// Check API calls
|
|
339
|
+
if snapshot.total_api_calls > self.thresholds.api_calls_limit {
|
|
340
|
+
self.alert(
|
|
341
|
+
Alert::new(
|
|
342
|
+
AlertSeverity::Warning,
|
|
343
|
+
format!(
|
|
344
|
+
"API call limit exceeded: {} > {}",
|
|
345
|
+
snapshot.total_api_calls, self.thresholds.api_calls_limit
|
|
346
|
+
),
|
|
347
|
+
)
|
|
348
|
+
.with_context("api_calls", snapshot.total_api_calls),
|
|
349
|
+
);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
fn alert(&self, alert: Alert) {
|
|
354
|
+
for handler in &self.handlers {
|
|
355
|
+
handler.handle(&alert);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/// Health check status
|
|
361
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
362
|
+
pub struct HealthStatus {
|
|
363
|
+
pub healthy: bool,
|
|
364
|
+
pub components: HashMap<String, ComponentHealth>,
|
|
365
|
+
pub timestamp: chrono::DateTime<chrono::Utc>,
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
369
|
+
pub struct ComponentHealth {
|
|
370
|
+
pub healthy: bool,
|
|
371
|
+
pub message: String,
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
impl HealthStatus {
|
|
375
|
+
pub fn new() -> Self {
|
|
376
|
+
Self {
|
|
377
|
+
healthy: true,
|
|
378
|
+
components: HashMap::new(),
|
|
379
|
+
timestamp: chrono::Utc::now(),
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
pub fn add_component(
|
|
384
|
+
&mut self,
|
|
385
|
+
name: impl Into<String>,
|
|
386
|
+
healthy: bool,
|
|
387
|
+
message: impl Into<String>,
|
|
388
|
+
) {
|
|
389
|
+
let name = name.into();
|
|
390
|
+
if !healthy {
|
|
391
|
+
self.healthy = false;
|
|
392
|
+
}
|
|
393
|
+
self.components.insert(
|
|
394
|
+
name,
|
|
395
|
+
ComponentHealth {
|
|
396
|
+
healthy,
|
|
397
|
+
message: message.into(),
|
|
398
|
+
},
|
|
399
|
+
);
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
impl Default for HealthStatus {
|
|
404
|
+
fn default() -> Self {
|
|
405
|
+
Self::new()
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/// Perform a health check
|
|
410
|
+
pub async fn health_check() -> HealthStatus {
|
|
411
|
+
let mut status = HealthStatus::new();
|
|
412
|
+
|
|
413
|
+
// Check Docker availability
|
|
414
|
+
let docker_check = tokio::process::Command::new("docker")
|
|
415
|
+
.arg("info")
|
|
416
|
+
.output()
|
|
417
|
+
.await;
|
|
418
|
+
|
|
419
|
+
match docker_check {
|
|
420
|
+
Ok(output) if output.status.success() => {
|
|
421
|
+
status.add_component("docker", true, "Docker daemon is running");
|
|
422
|
+
}
|
|
423
|
+
Ok(_) => {
|
|
424
|
+
status.add_component("docker", false, "Docker daemon not responding");
|
|
425
|
+
}
|
|
426
|
+
Err(e) => {
|
|
427
|
+
status.add_component("docker", false, format!("Docker not available: {}", e));
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Check environment variables
|
|
432
|
+
let has_api_key =
|
|
433
|
+
std::env::var("OPENAI_API_KEY").is_ok() || std::env::var("ANTHROPIC_API_KEY").is_ok();
|
|
434
|
+
|
|
435
|
+
status.add_component(
|
|
436
|
+
"api_keys",
|
|
437
|
+
has_api_key,
|
|
438
|
+
if has_api_key {
|
|
439
|
+
"API keys configured"
|
|
440
|
+
} else {
|
|
441
|
+
"No API keys found"
|
|
442
|
+
},
|
|
443
|
+
);
|
|
444
|
+
|
|
445
|
+
status
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
#[cfg(test)]
|
|
449
|
+
mod tests {
|
|
450
|
+
use super::*;
|
|
451
|
+
|
|
452
|
+
#[test]
|
|
453
|
+
fn test_metrics_snapshot() {
|
|
454
|
+
let metrics = AgentMetrics::new();
|
|
455
|
+
metrics.runs_started.store(10, Ordering::SeqCst);
|
|
456
|
+
metrics.runs_completed.store(8, Ordering::SeqCst);
|
|
457
|
+
metrics.runs_failed.store(2, Ordering::SeqCst);
|
|
458
|
+
|
|
459
|
+
let snapshot = metrics.snapshot();
|
|
460
|
+
assert_eq!(snapshot.runs_started, 10);
|
|
461
|
+
assert!((snapshot.success_rate() - 0.8).abs() < 0.001);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
#[test]
|
|
465
|
+
fn test_alert_with_context() {
|
|
466
|
+
let alert = Alert::new(AlertSeverity::Warning, "Test alert")
|
|
467
|
+
.with_context("count", 42)
|
|
468
|
+
.with_context("name", "test");
|
|
469
|
+
|
|
470
|
+
assert_eq!(alert.severity, AlertSeverity::Warning);
|
|
471
|
+
assert_eq!(alert.context.len(), 2);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
#[test]
|
|
475
|
+
fn test_health_status() {
|
|
476
|
+
let mut status = HealthStatus::new();
|
|
477
|
+
assert!(status.healthy);
|
|
478
|
+
|
|
479
|
+
status.add_component("test", false, "Failed");
|
|
480
|
+
assert!(!status.healthy);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
//! Run hooks for monitoring and extending run behavior
|
|
2
|
+
|
|
3
|
+
use crate::types::AgentRunResult;
|
|
4
|
+
use async_trait::async_trait;
|
|
5
|
+
|
|
6
|
+
/// Hook for run events
|
|
7
|
+
#[async_trait]
|
|
8
|
+
pub trait RunHook: Send + Sync {
|
|
9
|
+
/// Called when run is initialized
|
|
10
|
+
fn on_init(&mut self, _run: &dyn std::any::Any) {}
|
|
11
|
+
|
|
12
|
+
/// Called when run starts
|
|
13
|
+
fn on_start(&mut self) {}
|
|
14
|
+
|
|
15
|
+
/// Called when run ends
|
|
16
|
+
fn on_end(&mut self) {}
|
|
17
|
+
|
|
18
|
+
/// Called when an instance is skipped
|
|
19
|
+
fn on_instance_skipped(&mut self, _reason: &str) {}
|
|
20
|
+
|
|
21
|
+
/// Called when an instance starts
|
|
22
|
+
fn on_instance_start(&mut self, _index: usize, _instance_id: &str) {}
|
|
23
|
+
|
|
24
|
+
/// Called when an instance completes
|
|
25
|
+
fn on_instance_completed(&mut self, _result: &AgentRunResult) {}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/// Combined hook that wraps multiple hooks
|
|
29
|
+
pub struct CombinedRunHook {
|
|
30
|
+
hooks: Vec<Box<dyn RunHook>>,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
impl CombinedRunHook {
|
|
34
|
+
pub fn new() -> Self {
|
|
35
|
+
Self { hooks: Vec::new() }
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
pub fn add_hook(&mut self, hook: Box<dyn RunHook>) {
|
|
39
|
+
self.hooks.push(hook);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
impl Default for CombinedRunHook {
|
|
44
|
+
fn default() -> Self {
|
|
45
|
+
Self::new()
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
#[async_trait]
|
|
50
|
+
impl RunHook for CombinedRunHook {
|
|
51
|
+
fn on_init(&mut self, run: &dyn std::any::Any) {
|
|
52
|
+
for hook in &mut self.hooks {
|
|
53
|
+
hook.on_init(run);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
fn on_start(&mut self) {
|
|
58
|
+
for hook in &mut self.hooks {
|
|
59
|
+
hook.on_start();
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
fn on_end(&mut self) {
|
|
64
|
+
for hook in &mut self.hooks {
|
|
65
|
+
hook.on_end();
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
fn on_instance_skipped(&mut self, reason: &str) {
|
|
70
|
+
for hook in &mut self.hooks {
|
|
71
|
+
hook.on_instance_skipped(reason);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
fn on_instance_start(&mut self, index: usize, instance_id: &str) {
|
|
76
|
+
for hook in &mut self.hooks {
|
|
77
|
+
hook.on_instance_start(index, instance_id);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
fn on_instance_completed(&mut self, result: &AgentRunResult) {
|
|
82
|
+
for hook in &mut self.hooks {
|
|
83
|
+
hook.on_instance_completed(result);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/// Hook to save applied patches
|
|
89
|
+
pub struct SaveApplyPatchHook {
|
|
90
|
+
pub output_dir: String,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
impl SaveApplyPatchHook {
|
|
94
|
+
pub fn new(output_dir: impl Into<String>) -> Self {
|
|
95
|
+
Self {
|
|
96
|
+
output_dir: output_dir.into(),
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
#[async_trait]
|
|
102
|
+
impl RunHook for SaveApplyPatchHook {
|
|
103
|
+
fn on_instance_completed(&mut self, result: &AgentRunResult) {
|
|
104
|
+
if let Some(ref _submission) = result.info.submission {
|
|
105
|
+
let patch_path = std::path::Path::new(&self.output_dir).join("patches");
|
|
106
|
+
let _ = std::fs::create_dir_all(&patch_path);
|
|
107
|
+
|
|
108
|
+
// Save the patch
|
|
109
|
+
// In a full implementation, would write to file based on instance ID
|
|
110
|
+
tracing::info!(path = ?patch_path, "Would save patch");
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/// Hook to open PRs
|
|
116
|
+
pub struct OpenPRHook {
|
|
117
|
+
pub github_token: Option<String>,
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
impl OpenPRHook {
|
|
121
|
+
pub fn new(github_token: Option<String>) -> Self {
|
|
122
|
+
Self { github_token }
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
#[async_trait]
|
|
127
|
+
impl RunHook for OpenPRHook {
|
|
128
|
+
fn on_instance_completed(&mut self, result: &AgentRunResult) {
|
|
129
|
+
if result.info.submission.is_some() && self.github_token.is_some() {
|
|
130
|
+
// In a full implementation, would create a PR using GitHub API
|
|
131
|
+
tracing::info!("Would create PR with submission");
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
//! Run module for SWE-agent
|
|
2
|
+
//!
|
|
3
|
+
//! This module provides the execution infrastructure for running agents
|
|
4
|
+
//! on problem instances.
|
|
5
|
+
|
|
6
|
+
pub mod hooks;
|
|
7
|
+
pub mod run_batch;
|
|
8
|
+
pub mod run_single;
|
|
9
|
+
|
|
10
|
+
pub use hooks::*;
|
|
11
|
+
pub use run_batch::*;
|
|
12
|
+
pub use run_single::*;
|