@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,1029 @@
|
|
|
1
|
+
//! Core agent implementations for SWE-agent
|
|
2
|
+
//!
|
|
3
|
+
//! This module contains the main agent types that coordinate the problem-solving loop.
|
|
4
|
+
|
|
5
|
+
use super::history_processors::{
|
|
6
|
+
create_history_processor, ChainedHistoryProcessor, HistoryProcessor, HistoryProcessorConfig,
|
|
7
|
+
};
|
|
8
|
+
use super::hooks::{AgentHook, CombinedAgentHook, QueryMessageEvent};
|
|
9
|
+
use super::models::{get_model, GlobalStats, InstanceStats, Model, ModelConfig};
|
|
10
|
+
use super::problem_statement::{
|
|
11
|
+
create_problem_statement, ProblemStatement, ProblemStatementConfig,
|
|
12
|
+
};
|
|
13
|
+
use super::reviewer::{get_retry_loop_from_config, RetryLoop, RetryLoopConfig, ReviewSubmission};
|
|
14
|
+
use crate::environment::SWEEnv;
|
|
15
|
+
use crate::exceptions::{tokens, Result, SWEAgentError};
|
|
16
|
+
use crate::tools::{ToolConfig, ToolHandler};
|
|
17
|
+
use crate::types::{
|
|
18
|
+
AgentInfo, AgentRunResult, Content, EnvironmentState, History, HistoryItem, MessageType,
|
|
19
|
+
QueryMessage, Role, StepOutput, TemplateConfig, Trajectory, TrajectoryStep,
|
|
20
|
+
};
|
|
21
|
+
use crate::utils::template::render_template;
|
|
22
|
+
use crate::VERSION;
|
|
23
|
+
use async_trait::async_trait;
|
|
24
|
+
use serde::{Deserialize, Serialize};
|
|
25
|
+
use std::collections::HashMap;
|
|
26
|
+
use std::path::{Path, PathBuf};
|
|
27
|
+
use std::sync::Arc;
|
|
28
|
+
|
|
29
|
+
/// Trait for all agent types
|
|
30
|
+
#[async_trait]
|
|
31
|
+
pub trait Agent: Send + Sync {
|
|
32
|
+
/// Add a hook to the agent
|
|
33
|
+
fn add_hook(&mut self, hook: Box<dyn AgentHook>);
|
|
34
|
+
|
|
35
|
+
/// Get trajectory data
|
|
36
|
+
fn get_trajectory_data(&self) -> TrajectoryData;
|
|
37
|
+
|
|
38
|
+
/// Run a single step
|
|
39
|
+
async fn step(&mut self) -> Result<StepOutput>;
|
|
40
|
+
|
|
41
|
+
/// Run the agent on a problem
|
|
42
|
+
async fn run(
|
|
43
|
+
&mut self,
|
|
44
|
+
env: &mut SWEEnv,
|
|
45
|
+
problem_statement: Box<dyn ProblemStatement>,
|
|
46
|
+
output_dir: &Path,
|
|
47
|
+
) -> Result<AgentRunResult>;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/// Data from a trajectory
|
|
51
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
52
|
+
pub struct TrajectoryData {
|
|
53
|
+
pub trajectory: Trajectory,
|
|
54
|
+
pub history: History,
|
|
55
|
+
pub info: AgentInfo,
|
|
56
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
57
|
+
pub replay_config: Option<String>,
|
|
58
|
+
pub environment: String,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/// Configuration for the default agent
|
|
62
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
63
|
+
pub struct DefaultAgentConfig {
|
|
64
|
+
#[serde(default)]
|
|
65
|
+
pub name: String,
|
|
66
|
+
#[serde(default)]
|
|
67
|
+
pub templates: TemplateConfig,
|
|
68
|
+
#[serde(default)]
|
|
69
|
+
pub tools: ToolConfig,
|
|
70
|
+
#[serde(default)]
|
|
71
|
+
pub history_processors: Vec<HistoryProcessorConfig>,
|
|
72
|
+
#[serde(default)]
|
|
73
|
+
pub model: ModelConfig,
|
|
74
|
+
#[serde(default = "default_max_requeries")]
|
|
75
|
+
pub max_requeries: usize,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
fn default_max_requeries() -> usize {
|
|
79
|
+
3
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
impl Default for DefaultAgentConfig {
|
|
83
|
+
fn default() -> Self {
|
|
84
|
+
Self {
|
|
85
|
+
name: "main".to_string(),
|
|
86
|
+
templates: TemplateConfig::default(),
|
|
87
|
+
tools: ToolConfig::default(),
|
|
88
|
+
history_processors: Vec::new(),
|
|
89
|
+
model: ModelConfig::default(),
|
|
90
|
+
max_requeries: default_max_requeries(),
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/// Default agent implementation
|
|
96
|
+
pub struct DefaultAgent {
|
|
97
|
+
pub name: String,
|
|
98
|
+
model: Box<dyn Model>,
|
|
99
|
+
templates: TemplateConfig,
|
|
100
|
+
tools: ToolHandler,
|
|
101
|
+
history_processors: Box<dyn HistoryProcessor>,
|
|
102
|
+
max_requeries: usize,
|
|
103
|
+
|
|
104
|
+
// Runtime state
|
|
105
|
+
env: Option<Arc<tokio::sync::Mutex<SWEEnv>>>,
|
|
106
|
+
problem_statement: Option<Box<dyn ProblemStatement>>,
|
|
107
|
+
traj_path: Option<PathBuf>,
|
|
108
|
+
history: History,
|
|
109
|
+
trajectory: Trajectory,
|
|
110
|
+
info: AgentInfo,
|
|
111
|
+
chook: CombinedAgentHook,
|
|
112
|
+
|
|
113
|
+
// Counters
|
|
114
|
+
n_consecutive_timeouts: usize,
|
|
115
|
+
total_execution_time: f64,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
impl DefaultAgent {
|
|
119
|
+
pub fn new(
|
|
120
|
+
name: impl Into<String>,
|
|
121
|
+
model: Box<dyn Model>,
|
|
122
|
+
templates: TemplateConfig,
|
|
123
|
+
tools: ToolHandler,
|
|
124
|
+
history_processors: Box<dyn HistoryProcessor>,
|
|
125
|
+
max_requeries: usize,
|
|
126
|
+
) -> Self {
|
|
127
|
+
Self {
|
|
128
|
+
name: name.into(),
|
|
129
|
+
model,
|
|
130
|
+
templates,
|
|
131
|
+
tools,
|
|
132
|
+
history_processors,
|
|
133
|
+
max_requeries,
|
|
134
|
+
env: None,
|
|
135
|
+
problem_statement: None,
|
|
136
|
+
traj_path: None,
|
|
137
|
+
history: Vec::new(),
|
|
138
|
+
trajectory: Vec::new(),
|
|
139
|
+
info: AgentInfo::default(),
|
|
140
|
+
chook: CombinedAgentHook::new(),
|
|
141
|
+
n_consecutive_timeouts: 0,
|
|
142
|
+
total_execution_time: 0.0,
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
pub fn from_config(config: DefaultAgentConfig) -> Result<Self> {
|
|
147
|
+
let global_stats = Arc::new(GlobalStats::default());
|
|
148
|
+
let model = get_model(config.model, global_stats)?;
|
|
149
|
+
let tools = ToolHandler::new(config.tools)?;
|
|
150
|
+
|
|
151
|
+
let processors: Vec<Box<dyn HistoryProcessor>> = config
|
|
152
|
+
.history_processors
|
|
153
|
+
.iter()
|
|
154
|
+
.map(create_history_processor)
|
|
155
|
+
.collect();
|
|
156
|
+
|
|
157
|
+
let history_processor: Box<dyn HistoryProcessor> = if processors.is_empty() {
|
|
158
|
+
Box::new(super::history_processors::DefaultHistoryProcessor)
|
|
159
|
+
} else {
|
|
160
|
+
Box::new(ChainedHistoryProcessor::new(processors))
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
Ok(Self::new(
|
|
164
|
+
config.name,
|
|
165
|
+
model,
|
|
166
|
+
config.templates,
|
|
167
|
+
tools,
|
|
168
|
+
history_processor,
|
|
169
|
+
config.max_requeries,
|
|
170
|
+
))
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/// Get processed messages for model query
|
|
174
|
+
fn get_messages(&self) -> History {
|
|
175
|
+
let filtered: History = self
|
|
176
|
+
.history
|
|
177
|
+
.iter()
|
|
178
|
+
.filter(|item| item.agent.as_deref() == Some(&self.name) || item.agent.is_none())
|
|
179
|
+
.cloned()
|
|
180
|
+
.collect();
|
|
181
|
+
|
|
182
|
+
self.history_processors.process(filtered)
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
fn append_history(&mut self, item: HistoryItem) {
|
|
186
|
+
let event = QueryMessageEvent {
|
|
187
|
+
agent: item.agent.clone().unwrap_or_default(),
|
|
188
|
+
role: format!("{:?}", item.role),
|
|
189
|
+
content: item.content.as_str(),
|
|
190
|
+
message_type: item
|
|
191
|
+
.message_type
|
|
192
|
+
.as_ref()
|
|
193
|
+
.map(|t| format!("{:?}", t))
|
|
194
|
+
.unwrap_or_default(),
|
|
195
|
+
is_demo: item.is_demo,
|
|
196
|
+
thought: item.thought.clone(),
|
|
197
|
+
action: item.action.clone(),
|
|
198
|
+
};
|
|
199
|
+
self.chook.on_query_message_added(&event);
|
|
200
|
+
self.history.push(item);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/// Setup the agent for a new problem instance
|
|
204
|
+
pub async fn setup(
|
|
205
|
+
&mut self,
|
|
206
|
+
env: Arc<tokio::sync::Mutex<SWEEnv>>,
|
|
207
|
+
problem_statement: Box<dyn ProblemStatement>,
|
|
208
|
+
output_dir: &Path,
|
|
209
|
+
) -> Result<()> {
|
|
210
|
+
std::fs::create_dir_all(output_dir)?;
|
|
211
|
+
|
|
212
|
+
self.problem_statement = Some(problem_statement);
|
|
213
|
+
self.env = Some(env.clone());
|
|
214
|
+
|
|
215
|
+
let ps = self.problem_statement.as_ref().unwrap();
|
|
216
|
+
let iid = ps.id();
|
|
217
|
+
tracing::info!(instance_id = iid, "Setting up agent");
|
|
218
|
+
|
|
219
|
+
self.traj_path = Some(output_dir.join(format!("{}.traj", iid)));
|
|
220
|
+
tracing::info!(path = ?self.traj_path, "Trajectory will be saved");
|
|
221
|
+
|
|
222
|
+
self.chook.on_tools_installation_started();
|
|
223
|
+
|
|
224
|
+
{
|
|
225
|
+
let mut env_guard = env.lock().await;
|
|
226
|
+
self.tools.install(&mut env_guard).await?;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
self.chook.on_setup_attempt();
|
|
230
|
+
|
|
231
|
+
self.info = AgentInfo {
|
|
232
|
+
swe_agent_version: Some(VERSION.to_string()),
|
|
233
|
+
..Default::default()
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
// Add system message
|
|
237
|
+
self.add_system_message_to_history();
|
|
238
|
+
|
|
239
|
+
// Add demonstrations
|
|
240
|
+
self.add_demonstrations_to_history()?;
|
|
241
|
+
|
|
242
|
+
// Add instance template
|
|
243
|
+
let state = {
|
|
244
|
+
let env_guard = env.lock().await;
|
|
245
|
+
self.tools.get_state(&env_guard).await
|
|
246
|
+
};
|
|
247
|
+
self.add_instance_template_to_history(&state);
|
|
248
|
+
|
|
249
|
+
self.chook.on_setup_done();
|
|
250
|
+
|
|
251
|
+
Ok(())
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
fn get_format_dict(&self, extra: Option<HashMap<String, String>>) -> HashMap<String, String> {
|
|
255
|
+
let mut dict = extra.unwrap_or_default();
|
|
256
|
+
|
|
257
|
+
if let Some(ref ps) = self.problem_statement {
|
|
258
|
+
dict.insert("problem_statement".to_string(), ps.get_problem_statement());
|
|
259
|
+
for (k, v) in ps.get_extra_fields() {
|
|
260
|
+
dict.insert(k, v);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if let Some(ref cmd_docs) = self.tools.config.command_docs {
|
|
265
|
+
dict.insert("command_docs".to_string(), cmd_docs.clone());
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
dict
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
fn add_system_message_to_history(&mut self) {
|
|
272
|
+
let format_dict = self.get_format_dict(None);
|
|
273
|
+
let system_msg = render_template(&self.templates.system_template, &format_dict)
|
|
274
|
+
.unwrap_or_else(|_| self.templates.system_template.clone());
|
|
275
|
+
|
|
276
|
+
tracing::info!(agent = %self.name, "SYSTEM\n{}", system_msg);
|
|
277
|
+
|
|
278
|
+
self.append_history(HistoryItem {
|
|
279
|
+
role: Role::System,
|
|
280
|
+
content: Content::Text(system_msg),
|
|
281
|
+
agent: Some(self.name.clone()),
|
|
282
|
+
message_type: Some(MessageType::System),
|
|
283
|
+
..Default::default()
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
fn add_demonstrations_to_history(&mut self) -> Result<()> {
|
|
288
|
+
for demo_path in &self.templates.demonstrations.clone() {
|
|
289
|
+
self.add_demonstration_to_history(demo_path)?;
|
|
290
|
+
}
|
|
291
|
+
Ok(())
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
fn add_demonstration_to_history(&mut self, demo_path: &str) -> Result<()> {
|
|
295
|
+
if self.templates.demonstration_template.is_none() && !self.templates.put_demos_in_history {
|
|
296
|
+
return Err(SWEAgentError::ConfigurationError(
|
|
297
|
+
"Cannot use demonstrations without demonstration_template or put_demos_in_history"
|
|
298
|
+
.to_string(),
|
|
299
|
+
));
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
tracing::info!(path = demo_path, "Loading demonstration");
|
|
303
|
+
let content = std::fs::read_to_string(demo_path)?;
|
|
304
|
+
|
|
305
|
+
// Parse demonstration (YAML or JSON)
|
|
306
|
+
let demo_history: Vec<HistoryItem> =
|
|
307
|
+
if demo_path.ends_with(".yaml") || demo_path.ends_with(".yml") {
|
|
308
|
+
let parsed: serde_yaml::Value = serde_yaml::from_str(&content)?;
|
|
309
|
+
if let Some(history) = parsed.get("history") {
|
|
310
|
+
serde_yaml::from_value(history.clone())?
|
|
311
|
+
} else {
|
|
312
|
+
Vec::new()
|
|
313
|
+
}
|
|
314
|
+
} else {
|
|
315
|
+
let parsed: serde_json::Value = serde_json::from_str(&content)?;
|
|
316
|
+
if let Some(history) = parsed.get("history") {
|
|
317
|
+
serde_json::from_value(history.clone())?
|
|
318
|
+
} else {
|
|
319
|
+
Vec::new()
|
|
320
|
+
}
|
|
321
|
+
};
|
|
322
|
+
|
|
323
|
+
if self.templates.put_demos_in_history {
|
|
324
|
+
for mut entry in demo_history {
|
|
325
|
+
if entry.role != Role::System {
|
|
326
|
+
entry.is_demo = Some(true);
|
|
327
|
+
entry.agent = Some(entry.agent.unwrap_or_else(|| self.name.clone()));
|
|
328
|
+
self.append_history(entry);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
} else if let Some(ref template) = self.templates.demonstration_template {
|
|
332
|
+
let demo_text: String = demo_history
|
|
333
|
+
.iter()
|
|
334
|
+
.filter(|e| e.role != Role::System)
|
|
335
|
+
.map(|e| e.content.as_str())
|
|
336
|
+
.collect::<Vec<_>>()
|
|
337
|
+
.join("\n");
|
|
338
|
+
|
|
339
|
+
let mut vars = HashMap::new();
|
|
340
|
+
vars.insert("demonstration".to_string(), demo_text);
|
|
341
|
+
let demonstration = render_template(template, &vars)?;
|
|
342
|
+
|
|
343
|
+
self.append_history(HistoryItem {
|
|
344
|
+
role: Role::User,
|
|
345
|
+
content: Content::Text(demonstration),
|
|
346
|
+
agent: Some(self.name.clone()),
|
|
347
|
+
is_demo: Some(true),
|
|
348
|
+
message_type: Some(MessageType::Demonstration),
|
|
349
|
+
..Default::default()
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
Ok(())
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
fn add_instance_template_to_history(&mut self, state: &HashMap<String, String>) {
|
|
357
|
+
let format_dict = self.get_format_dict(Some(state.clone()));
|
|
358
|
+
|
|
359
|
+
let mut templates = vec![self.templates.instance_template.clone()];
|
|
360
|
+
if let Some(ref strategy) = self.templates.strategy_template {
|
|
361
|
+
templates.push(strategy.clone());
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
let message: String = templates
|
|
365
|
+
.iter()
|
|
366
|
+
.filter_map(|t| render_template(t, &format_dict).ok())
|
|
367
|
+
.collect::<Vec<_>>()
|
|
368
|
+
.join("\n");
|
|
369
|
+
|
|
370
|
+
self.append_history(HistoryItem {
|
|
371
|
+
role: Role::User,
|
|
372
|
+
content: Content::Text(message),
|
|
373
|
+
agent: Some(self.name.clone()),
|
|
374
|
+
message_type: Some(MessageType::Observation),
|
|
375
|
+
..Default::default()
|
|
376
|
+
});
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
#[allow(dead_code)]
|
|
380
|
+
fn get_trajectory(&self) -> Trajectory {
|
|
381
|
+
self.trajectory.clone()
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
fn save_trajectory(&self) -> Result<()> {
|
|
385
|
+
if let Some(ref path) = self.traj_path {
|
|
386
|
+
let data = self.get_trajectory_data();
|
|
387
|
+
let json = serde_json::to_string_pretty(&data)?;
|
|
388
|
+
std::fs::write(path, json)?;
|
|
389
|
+
}
|
|
390
|
+
Ok(())
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
async fn forward(&mut self, history: History) -> Result<StepOutput> {
|
|
394
|
+
if self.total_execution_time > self.tools.config.total_execution_timeout as f64 {
|
|
395
|
+
return Err(SWEAgentError::TotalExecutionTimeExceeded);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
let mut step = StepOutput {
|
|
399
|
+
query: history
|
|
400
|
+
.iter()
|
|
401
|
+
.map(|h| QueryMessage {
|
|
402
|
+
role: h.role.clone(),
|
|
403
|
+
content: h.content.as_str(),
|
|
404
|
+
message_type: h.message_type.clone(),
|
|
405
|
+
})
|
|
406
|
+
.collect(),
|
|
407
|
+
..Default::default()
|
|
408
|
+
};
|
|
409
|
+
|
|
410
|
+
// Query model
|
|
411
|
+
self.chook.on_model_query(&history, &self.name);
|
|
412
|
+
|
|
413
|
+
let output = self.model.query(&history).await?;
|
|
414
|
+
|
|
415
|
+
step.output = output.message.clone();
|
|
416
|
+
|
|
417
|
+
// Parse thought and action
|
|
418
|
+
let (thought, action) = self.tools.parse_actions(&output)?;
|
|
419
|
+
step.thought = thought;
|
|
420
|
+
step.action = action;
|
|
421
|
+
step.thinking_blocks = output.thinking_blocks;
|
|
422
|
+
step.tool_calls = output.tool_calls.clone();
|
|
423
|
+
|
|
424
|
+
if let Some(ref tool_calls) = output.tool_calls {
|
|
425
|
+
step.tool_call_ids = Some(tool_calls.iter().map(|tc| tc.id.clone()).collect());
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
tracing::info!(
|
|
429
|
+
thought = %step.thought,
|
|
430
|
+
action = %step.action,
|
|
431
|
+
"💭 THOUGHT / 🎬 ACTION"
|
|
432
|
+
);
|
|
433
|
+
|
|
434
|
+
self.chook.on_actions_generated(&step);
|
|
435
|
+
|
|
436
|
+
self.handle_action(&mut step).await
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
async fn handle_action(&mut self, step: &mut StepOutput) -> Result<StepOutput> {
|
|
440
|
+
// Check if action is blocked
|
|
441
|
+
if self.tools.should_block_action(&step.action) {
|
|
442
|
+
return Err(SWEAgentError::BlockedAction(step.action.clone()));
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// Handle exit command
|
|
446
|
+
if step.action.trim() == "exit" {
|
|
447
|
+
tracing::info!("Exiting agent");
|
|
448
|
+
step.done = true;
|
|
449
|
+
step.observation = "Exited".to_string();
|
|
450
|
+
step.exit_status = Some("exit_command".to_string());
|
|
451
|
+
|
|
452
|
+
if let Some(ref env) = self.env {
|
|
453
|
+
let env_guard = env.lock().await;
|
|
454
|
+
let state_map = self.tools.get_state(&env_guard).await;
|
|
455
|
+
step.state = EnvironmentState {
|
|
456
|
+
working_dir: state_map.get("working_dir").cloned(),
|
|
457
|
+
open_files: state_map
|
|
458
|
+
.get("open_files")
|
|
459
|
+
.map(|s| s.split(", ").map(String::from).collect()),
|
|
460
|
+
git_status: state_map.get("git_status").cloned(),
|
|
461
|
+
..Default::default()
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return Ok(step.clone());
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
self.chook.on_action_started(step);
|
|
469
|
+
let execution_start = std::time::Instant::now();
|
|
470
|
+
|
|
471
|
+
let run_action = self.tools.guard_multiline_input(&step.action);
|
|
472
|
+
|
|
473
|
+
// Execute command
|
|
474
|
+
let observation = if let Some(ref env) = self.env {
|
|
475
|
+
let env_guard = env.lock().await;
|
|
476
|
+
match env_guard
|
|
477
|
+
.communicate(&run_action, Some(self.tools.config.execution_timeout))
|
|
478
|
+
.await
|
|
479
|
+
{
|
|
480
|
+
Ok(output) => {
|
|
481
|
+
self.n_consecutive_timeouts = 0;
|
|
482
|
+
output
|
|
483
|
+
}
|
|
484
|
+
Err(SWEAgentError::CommandTimeout { timeout, command }) => {
|
|
485
|
+
self.n_consecutive_timeouts += 1;
|
|
486
|
+
if self.n_consecutive_timeouts
|
|
487
|
+
>= self.tools.config.max_consecutive_execution_timeouts
|
|
488
|
+
{
|
|
489
|
+
return Err(SWEAgentError::CommandTimeout { timeout, command });
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
env_guard.interrupt_session().await?;
|
|
493
|
+
|
|
494
|
+
let mut vars = HashMap::new();
|
|
495
|
+
vars.insert("timeout".to_string(), timeout.to_string());
|
|
496
|
+
vars.insert("command".to_string(), command);
|
|
497
|
+
render_template(&self.templates.command_cancelled_timeout_template, &vars)?
|
|
498
|
+
}
|
|
499
|
+
Err(e) => return Err(e),
|
|
500
|
+
}
|
|
501
|
+
} else {
|
|
502
|
+
return Err(SWEAgentError::EnvironmentError(
|
|
503
|
+
"Environment not initialized".to_string(),
|
|
504
|
+
));
|
|
505
|
+
};
|
|
506
|
+
|
|
507
|
+
step.observation = observation.clone();
|
|
508
|
+
step.execution_time = execution_start.elapsed().as_secs_f64();
|
|
509
|
+
self.total_execution_time += step.execution_time;
|
|
510
|
+
|
|
511
|
+
self.chook.on_action_executed(step);
|
|
512
|
+
|
|
513
|
+
if let Some(ref env) = self.env {
|
|
514
|
+
let env_guard = env.lock().await;
|
|
515
|
+
let state_map = self.tools.get_state(&env_guard).await;
|
|
516
|
+
step.state = EnvironmentState {
|
|
517
|
+
working_dir: state_map.get("working_dir").cloned(),
|
|
518
|
+
open_files: state_map
|
|
519
|
+
.get("open_files")
|
|
520
|
+
.map(|s| s.split(", ").map(String::from).collect()),
|
|
521
|
+
git_status: state_map.get("git_status").cloned(),
|
|
522
|
+
..Default::default()
|
|
523
|
+
};
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// Check for special tokens
|
|
527
|
+
if observation.contains(tokens::RETRY_WITH_OUTPUT) {
|
|
528
|
+
step.observation = observation.replace(tokens::RETRY_WITH_OUTPUT, "");
|
|
529
|
+
return Err(SWEAgentError::RetryWithOutput);
|
|
530
|
+
} else if observation.contains(tokens::RETRY_WITHOUT_OUTPUT) {
|
|
531
|
+
step.observation = observation.replace(tokens::RETRY_WITHOUT_OUTPUT, "");
|
|
532
|
+
return Err(SWEAgentError::RetryWithoutOutput);
|
|
533
|
+
} else if observation.contains(tokens::EXIT_FORFEIT) {
|
|
534
|
+
return Err(SWEAgentError::ExitForfeit);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
self.handle_submission(step, None, false).await
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
async fn handle_submission(
|
|
541
|
+
&self,
|
|
542
|
+
step: &mut StepOutput,
|
|
543
|
+
observation: Option<&str>,
|
|
544
|
+
force_submission: bool,
|
|
545
|
+
) -> Result<StepOutput> {
|
|
546
|
+
let obs = observation.unwrap_or(&step.observation);
|
|
547
|
+
let is_submission = self.tools.check_for_submission_cmd(obs);
|
|
548
|
+
|
|
549
|
+
if is_submission || force_submission {
|
|
550
|
+
if let Some(ref env) = self.env {
|
|
551
|
+
let env_guard = env.lock().await;
|
|
552
|
+
match env_guard.read_file("/root/model.patch").await {
|
|
553
|
+
Ok(submission) => {
|
|
554
|
+
let trimmed = submission.trim();
|
|
555
|
+
if !trimmed.is_empty() {
|
|
556
|
+
step.submission = Some(submission.clone());
|
|
557
|
+
step.observation = submission;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
if step.exit_status.is_none() {
|
|
561
|
+
step.exit_status = Some("submitted".to_string());
|
|
562
|
+
} else if step.submission.is_some() {
|
|
563
|
+
let status = step.exit_status.as_ref().unwrap();
|
|
564
|
+
step.exit_status = Some(format!("submitted ({})", status));
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
step.done = true;
|
|
568
|
+
tracing::info!(submission = ?step.submission, "Found submission");
|
|
569
|
+
}
|
|
570
|
+
Err(_) => {
|
|
571
|
+
tracing::warn!("Submission file not found");
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
Ok(step.clone())
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
fn add_step_to_trajectory(&mut self, step: &StepOutput) {
|
|
581
|
+
self.trajectory.push(TrajectoryStep::from(step));
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
async fn forward_with_handling(&mut self, mut history: History) -> Result<StepOutput> {
|
|
585
|
+
let mut n_format_fails = 0;
|
|
586
|
+
|
|
587
|
+
loop {
|
|
588
|
+
match self.forward(history.clone()).await {
|
|
589
|
+
Ok(step) => return Ok(step),
|
|
590
|
+
Err(e) => {
|
|
591
|
+
if e.should_exit() {
|
|
592
|
+
let mut step = StepOutput {
|
|
593
|
+
done: true,
|
|
594
|
+
thought: e.to_string(),
|
|
595
|
+
exit_status: Some(e.exit_status().to_string()),
|
|
596
|
+
..Default::default()
|
|
597
|
+
};
|
|
598
|
+
return self.attempt_autosubmission_after_error(&mut step).await;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
if e.should_retry() {
|
|
602
|
+
n_format_fails += 1;
|
|
603
|
+
if n_format_fails >= self.max_requeries {
|
|
604
|
+
let mut step = StepOutput {
|
|
605
|
+
done: true,
|
|
606
|
+
thought: "Exit due to repeated format errors".to_string(),
|
|
607
|
+
exit_status: Some("exit_format".to_string()),
|
|
608
|
+
..Default::default()
|
|
609
|
+
};
|
|
610
|
+
return self.attempt_autosubmission_after_error(&mut step).await;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
// Prepare requery
|
|
614
|
+
let template = &self.tools.config.format_error_template;
|
|
615
|
+
let vars = self.get_format_dict(None);
|
|
616
|
+
let error_msg = render_template(template, &vars)?;
|
|
617
|
+
|
|
618
|
+
history = self.get_messages();
|
|
619
|
+
history.push(HistoryItem {
|
|
620
|
+
role: Role::User,
|
|
621
|
+
content: Content::Text(error_msg),
|
|
622
|
+
agent: Some(self.name.clone()),
|
|
623
|
+
message_type: Some(MessageType::User),
|
|
624
|
+
..Default::default()
|
|
625
|
+
});
|
|
626
|
+
|
|
627
|
+
continue;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
return Err(e);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
async fn attempt_autosubmission_after_error(
|
|
637
|
+
&self,
|
|
638
|
+
step: &mut StepOutput,
|
|
639
|
+
) -> Result<StepOutput> {
|
|
640
|
+
tracing::warn!("Attempting autosubmission after error");
|
|
641
|
+
step.done = true;
|
|
642
|
+
|
|
643
|
+
if let Some(ref env) = self.env {
|
|
644
|
+
let env_guard = env.lock().await;
|
|
645
|
+
|
|
646
|
+
// Try to create submission
|
|
647
|
+
let submission_cmd = "git add -A && git diff --cached > /root/model.patch";
|
|
648
|
+
let _ = env_guard.communicate(submission_cmd, Some(30)).await;
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
self.handle_submission(step, None, true).await
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
#[async_trait]
|
|
656
|
+
impl Agent for DefaultAgent {
|
|
657
|
+
fn add_hook(&mut self, hook: Box<dyn AgentHook>) {
|
|
658
|
+
self.chook.add_hook(hook);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
fn get_trajectory_data(&self) -> TrajectoryData {
|
|
662
|
+
TrajectoryData {
|
|
663
|
+
trajectory: self.trajectory.clone(),
|
|
664
|
+
history: self.history.clone(),
|
|
665
|
+
info: self.info.clone(),
|
|
666
|
+
replay_config: None,
|
|
667
|
+
environment: "unknown".to_string(),
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
async fn step(&mut self) -> Result<StepOutput> {
|
|
672
|
+
self.chook.on_step_start();
|
|
673
|
+
|
|
674
|
+
let n_step = self.trajectory.len() + 1;
|
|
675
|
+
tracing::info!(step = n_step, "Starting step");
|
|
676
|
+
|
|
677
|
+
let messages = self.get_messages();
|
|
678
|
+
let step_output = self.forward_with_handling(messages).await?;
|
|
679
|
+
|
|
680
|
+
// Add to history
|
|
681
|
+
self.append_history(HistoryItem {
|
|
682
|
+
role: Role::Assistant,
|
|
683
|
+
content: Content::Text(step_output.output.clone()),
|
|
684
|
+
thought: Some(step_output.thought.clone()),
|
|
685
|
+
action: Some(step_output.action.clone()),
|
|
686
|
+
agent: Some(self.name.clone()),
|
|
687
|
+
tool_calls: step_output.tool_calls.clone(),
|
|
688
|
+
message_type: Some(MessageType::Action),
|
|
689
|
+
thinking_blocks: step_output.thinking_blocks.clone(),
|
|
690
|
+
..Default::default()
|
|
691
|
+
});
|
|
692
|
+
|
|
693
|
+
// Add observation
|
|
694
|
+
let observation = &step_output.observation;
|
|
695
|
+
let template = if observation.trim().is_empty() {
|
|
696
|
+
self.templates
|
|
697
|
+
.next_step_no_output_template
|
|
698
|
+
.as_ref()
|
|
699
|
+
.unwrap_or(&self.templates.next_step_template)
|
|
700
|
+
} else if observation.len() > self.templates.max_observation_length {
|
|
701
|
+
&self.templates.next_step_truncated_observation_template
|
|
702
|
+
} else {
|
|
703
|
+
&self.templates.next_step_template
|
|
704
|
+
};
|
|
705
|
+
|
|
706
|
+
let mut format_dict = self.get_format_dict(None);
|
|
707
|
+
format_dict.insert("observation".to_string(), observation.clone());
|
|
708
|
+
format_dict.insert(
|
|
709
|
+
"elided_chars".to_string(),
|
|
710
|
+
(observation
|
|
711
|
+
.len()
|
|
712
|
+
.saturating_sub(self.templates.max_observation_length))
|
|
713
|
+
.to_string(),
|
|
714
|
+
);
|
|
715
|
+
format_dict.insert(
|
|
716
|
+
"max_observation_length".to_string(),
|
|
717
|
+
self.templates.max_observation_length.to_string(),
|
|
718
|
+
);
|
|
719
|
+
|
|
720
|
+
let obs_message = render_template(template, &format_dict)?;
|
|
721
|
+
|
|
722
|
+
self.append_history(HistoryItem {
|
|
723
|
+
role: Role::User,
|
|
724
|
+
content: Content::Text(obs_message),
|
|
725
|
+
agent: Some(self.name.clone()),
|
|
726
|
+
message_type: Some(MessageType::Observation),
|
|
727
|
+
tool_call_ids: step_output.tool_call_ids.clone(),
|
|
728
|
+
..Default::default()
|
|
729
|
+
});
|
|
730
|
+
|
|
731
|
+
// Update info
|
|
732
|
+
self.info.submission = step_output.submission.clone();
|
|
733
|
+
self.info.exit_status = step_output.exit_status.clone();
|
|
734
|
+
self.info.model_stats = Some(self.model.get_stats().to_model_stats());
|
|
735
|
+
|
|
736
|
+
self.add_step_to_trajectory(&step_output);
|
|
737
|
+
self.chook.on_step_done(&step_output, &self.info);
|
|
738
|
+
|
|
739
|
+
Ok(step_output)
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
async fn run(
|
|
743
|
+
&mut self,
|
|
744
|
+
env: &mut SWEEnv,
|
|
745
|
+
problem_statement: Box<dyn ProblemStatement>,
|
|
746
|
+
output_dir: &Path,
|
|
747
|
+
) -> Result<AgentRunResult> {
|
|
748
|
+
// Wrap env in Arc<Mutex> for shared access
|
|
749
|
+
let env_arc = Arc::new(tokio::sync::Mutex::new(std::mem::take(env)));
|
|
750
|
+
|
|
751
|
+
self.setup(env_arc.clone(), problem_statement, output_dir)
|
|
752
|
+
.await?;
|
|
753
|
+
|
|
754
|
+
self.chook.on_run_start();
|
|
755
|
+
let mut step_output = StepOutput::default();
|
|
756
|
+
|
|
757
|
+
while !step_output.done {
|
|
758
|
+
step_output = self.step().await?;
|
|
759
|
+
let _ = self.save_trajectory();
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
self.chook.on_run_done(&self.trajectory, &self.info);
|
|
763
|
+
tracing::info!(path = ?self.traj_path, "Trajectory saved");
|
|
764
|
+
|
|
765
|
+
// Restore env
|
|
766
|
+
let restored_env = Arc::try_unwrap(env_arc)
|
|
767
|
+
.map_err(|_| SWEAgentError::RuntimeError("Could not restore environment".to_string()))?
|
|
768
|
+
.into_inner();
|
|
769
|
+
*env = restored_env;
|
|
770
|
+
|
|
771
|
+
Ok(AgentRunResult {
|
|
772
|
+
info: self.info.clone(),
|
|
773
|
+
trajectory: self.trajectory.clone(),
|
|
774
|
+
})
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
/// Configuration for retry agent
|
|
779
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
780
|
+
pub struct RetryAgentConfig {
|
|
781
|
+
#[serde(default)]
|
|
782
|
+
pub name: String,
|
|
783
|
+
pub agent_configs: Vec<DefaultAgentConfig>,
|
|
784
|
+
#[serde(default)]
|
|
785
|
+
pub retry_loop: RetryLoopConfig,
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
/// Retry agent that tries multiple configurations
|
|
789
|
+
pub struct RetryAgent {
|
|
790
|
+
config: RetryAgentConfig,
|
|
791
|
+
hooks: Vec<Box<dyn AgentHook>>,
|
|
792
|
+
i_attempt: usize,
|
|
793
|
+
agent: Option<DefaultAgent>,
|
|
794
|
+
attempt_data: Vec<TrajectoryData>,
|
|
795
|
+
total_instance_stats: InstanceStats,
|
|
796
|
+
chook: CombinedAgentHook,
|
|
797
|
+
traj_path: Option<PathBuf>,
|
|
798
|
+
problem_statement: Option<Box<dyn ProblemStatement>>,
|
|
799
|
+
env: Option<Arc<tokio::sync::Mutex<SWEEnv>>>,
|
|
800
|
+
output_dir: Option<PathBuf>,
|
|
801
|
+
retry_loop: Option<Box<dyn RetryLoop>>,
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
impl RetryAgent {
|
|
805
|
+
pub fn new(config: RetryAgentConfig) -> Self {
|
|
806
|
+
Self {
|
|
807
|
+
config,
|
|
808
|
+
hooks: Vec::new(),
|
|
809
|
+
i_attempt: 0,
|
|
810
|
+
agent: None,
|
|
811
|
+
attempt_data: Vec::new(),
|
|
812
|
+
total_instance_stats: InstanceStats::default(),
|
|
813
|
+
chook: CombinedAgentHook::new(),
|
|
814
|
+
traj_path: None,
|
|
815
|
+
problem_statement: None,
|
|
816
|
+
env: None,
|
|
817
|
+
output_dir: None,
|
|
818
|
+
retry_loop: None,
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
pub fn from_config(config: RetryAgentConfig) -> Self {
|
|
823
|
+
Self::new(config)
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
fn setup_agent(&mut self) -> Result<()> {
|
|
827
|
+
let agent_config_idx = self.i_attempt % self.config.agent_configs.len();
|
|
828
|
+
let agent_config = self.config.agent_configs[agent_config_idx].clone();
|
|
829
|
+
|
|
830
|
+
self.agent = Some(DefaultAgent::from_config(agent_config)?);
|
|
831
|
+
|
|
832
|
+
// Add hooks to agent
|
|
833
|
+
if let Some(ref mut _agent) = self.agent {
|
|
834
|
+
for _hook in &self.hooks {
|
|
835
|
+
// Can't clone hooks, so we skip this
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
Ok(())
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
fn next_attempt(&mut self) -> Result<()> {
|
|
843
|
+
self.i_attempt += 1;
|
|
844
|
+
|
|
845
|
+
// Reset environment if possible
|
|
846
|
+
if let Some(ref _env) = self.env {
|
|
847
|
+
// Would call hard_reset here
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
self.setup_agent()
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
fn finalize_agent_run(&mut self) {
|
|
854
|
+
if let Some(ref agent) = self.agent {
|
|
855
|
+
self.attempt_data.push(agent.get_trajectory_data());
|
|
856
|
+
self.total_instance_stats = self.total_instance_stats.add(&agent.model.get_stats());
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
fn save_trajectory(&self, choose: bool) -> Result<()> {
|
|
861
|
+
if let Some(ref path) = self.traj_path {
|
|
862
|
+
let data = self.get_trajectory_data_internal(choose);
|
|
863
|
+
let json = serde_json::to_string_pretty(&data)?;
|
|
864
|
+
std::fs::write(path, json)?;
|
|
865
|
+
}
|
|
866
|
+
Ok(())
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
fn get_trajectory_data_internal(&self, choose: bool) -> serde_json::Value {
|
|
870
|
+
let mut data = serde_json::json!({
|
|
871
|
+
"attempts": self.attempt_data,
|
|
872
|
+
});
|
|
873
|
+
|
|
874
|
+
if choose && !self.attempt_data.is_empty() {
|
|
875
|
+
let best_idx = self
|
|
876
|
+
.retry_loop
|
|
877
|
+
.as_ref()
|
|
878
|
+
.and_then(|rl| rl.get_best())
|
|
879
|
+
.unwrap_or(0);
|
|
880
|
+
|
|
881
|
+
if best_idx < self.attempt_data.len() {
|
|
882
|
+
data["info"] = serde_json::to_value(&self.attempt_data[best_idx].info).unwrap();
|
|
883
|
+
data["info"]["best_attempt_idx"] = serde_json::Value::from(best_idx);
|
|
884
|
+
data["trajectory"] =
|
|
885
|
+
serde_json::to_value(&self.attempt_data[best_idx].trajectory).unwrap();
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
data
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
#[async_trait]
|
|
894
|
+
impl Agent for RetryAgent {
|
|
895
|
+
fn add_hook(&mut self, hook: Box<dyn AgentHook>) {
|
|
896
|
+
self.chook.add_hook(hook);
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
fn get_trajectory_data(&self) -> TrajectoryData {
|
|
900
|
+
if let Some(ref agent) = self.agent {
|
|
901
|
+
agent.get_trajectory_data()
|
|
902
|
+
} else if !self.attempt_data.is_empty() {
|
|
903
|
+
self.attempt_data.last().unwrap().clone()
|
|
904
|
+
} else {
|
|
905
|
+
TrajectoryData {
|
|
906
|
+
trajectory: Vec::new(),
|
|
907
|
+
history: Vec::new(),
|
|
908
|
+
info: AgentInfo::default(),
|
|
909
|
+
replay_config: None,
|
|
910
|
+
environment: "unknown".to_string(),
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
async fn step(&mut self) -> Result<StepOutput> {
|
|
916
|
+
if let Some(ref mut agent) = self.agent {
|
|
917
|
+
agent.step().await
|
|
918
|
+
} else {
|
|
919
|
+
Err(SWEAgentError::RuntimeError(
|
|
920
|
+
"Agent not initialized".to_string(),
|
|
921
|
+
))
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
async fn run(
|
|
926
|
+
&mut self,
|
|
927
|
+
env: &mut SWEEnv,
|
|
928
|
+
problem_statement: Box<dyn ProblemStatement>,
|
|
929
|
+
output_dir: &Path,
|
|
930
|
+
) -> Result<AgentRunResult> {
|
|
931
|
+
std::fs::create_dir_all(output_dir)?;
|
|
932
|
+
|
|
933
|
+
self.traj_path = Some(output_dir.join(format!("{}.traj", problem_statement.id())));
|
|
934
|
+
self.problem_statement = Some(problem_statement);
|
|
935
|
+
self.output_dir = Some(output_dir.to_path_buf());
|
|
936
|
+
|
|
937
|
+
self.retry_loop = Some(get_retry_loop_from_config(&self.config.retry_loop));
|
|
938
|
+
|
|
939
|
+
let env_arc = Arc::new(tokio::sync::Mutex::new(std::mem::take(env)));
|
|
940
|
+
self.env = Some(env_arc.clone());
|
|
941
|
+
|
|
942
|
+
self.chook.on_run_start();
|
|
943
|
+
let mut step_output = StepOutput::default();
|
|
944
|
+
|
|
945
|
+
self.setup_agent()?;
|
|
946
|
+
|
|
947
|
+
// Setup agent with environment
|
|
948
|
+
if let (Some(ref mut agent), Some(ref ps)) = (&mut self.agent, &self.problem_statement) {
|
|
949
|
+
// Clone problem statement for agent
|
|
950
|
+
let ps_clone = create_problem_statement(&ProblemStatementConfig::Text {
|
|
951
|
+
text: ps.get_problem_statement(),
|
|
952
|
+
id: ps.id().to_string(),
|
|
953
|
+
})?;
|
|
954
|
+
|
|
955
|
+
agent.setup(env_arc.clone(), ps_clone, output_dir).await?;
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
while !step_output.done {
|
|
959
|
+
step_output = self.step().await?;
|
|
960
|
+
let _ = self.save_trajectory(false);
|
|
961
|
+
|
|
962
|
+
if step_output.done {
|
|
963
|
+
let traj_data = self.get_trajectory_data();
|
|
964
|
+
if let Some(ref mut retry_loop) = self.retry_loop {
|
|
965
|
+
retry_loop.on_submit(ReviewSubmission {
|
|
966
|
+
trajectory: traj_data.trajectory,
|
|
967
|
+
info: traj_data.info,
|
|
968
|
+
submission: step_output.submission.clone(),
|
|
969
|
+
});
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
self.finalize_agent_run();
|
|
973
|
+
let _ = self.save_trajectory(false);
|
|
974
|
+
|
|
975
|
+
if let Some(ref retry_loop) = self.retry_loop {
|
|
976
|
+
if retry_loop.should_retry() {
|
|
977
|
+
self.next_attempt()?;
|
|
978
|
+
step_output.done = false;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
let _ = self.save_trajectory(true);
|
|
985
|
+
|
|
986
|
+
if let Some(ref agent) = self.agent {
|
|
987
|
+
self.chook.on_run_done(&agent.trajectory, &agent.info);
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
tracing::info!(path = ?self.traj_path, "Trajectory saved");
|
|
991
|
+
|
|
992
|
+
// Restore env
|
|
993
|
+
let restored_env = Arc::try_unwrap(env_arc)
|
|
994
|
+
.map_err(|_| SWEAgentError::RuntimeError("Could not restore environment".to_string()))?
|
|
995
|
+
.into_inner();
|
|
996
|
+
*env = restored_env;
|
|
997
|
+
|
|
998
|
+
Ok(AgentRunResult {
|
|
999
|
+
info: self.get_trajectory_data().info,
|
|
1000
|
+
trajectory: self.get_trajectory_data().trajectory,
|
|
1001
|
+
})
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
/// Union type for agent configurations
|
|
1006
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
1007
|
+
#[serde(tag = "type", rename_all = "snake_case")]
|
|
1008
|
+
pub enum AgentConfig {
|
|
1009
|
+
Default(Box<DefaultAgentConfig>),
|
|
1010
|
+
Retry(RetryAgentConfig),
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
impl Default for AgentConfig {
|
|
1014
|
+
fn default() -> Self {
|
|
1015
|
+
Self::Default(Box::default())
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
/// Create an agent from configuration
|
|
1020
|
+
pub fn get_agent_from_config(config: AgentConfig) -> Result<Box<dyn Agent>> {
|
|
1021
|
+
match config {
|
|
1022
|
+
AgentConfig::Default(cfg) => Ok(Box::new(DefaultAgent::from_config(*cfg)?)),
|
|
1023
|
+
AgentConfig::Retry(cfg) => Ok(Box::new(RetryAgent::from_config(cfg))),
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
// Re-exports for convenience
|
|
1028
|
+
pub use super::hooks::AgentHook as AbstractAgentHook;
|
|
1029
|
+
pub use super::models::Model as AbstractModel;
|