@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the configuration for the tools that are made available to the agent.
|
|
3
|
+
|
|
4
|
+
The `ToolConfig` class is used to configure the tools that are available to the agent.
|
|
5
|
+
The `ToolHandler` class is used to handle the tools that are available to the agent.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from functools import cached_property
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
from swerex.runtime.abstract import Command as RexCommand
|
|
18
|
+
from swerex.runtime.abstract import UploadRequest
|
|
19
|
+
from typing_extensions import Self
|
|
20
|
+
|
|
21
|
+
from sweagent.environment.swe_env import SWEEnv
|
|
22
|
+
from sweagent.tools.bundle import Bundle
|
|
23
|
+
from sweagent.tools.commands import BASH_COMMAND, Command
|
|
24
|
+
from sweagent.tools.parsing import FunctionCallingParser, JsonParser, ParseFunction
|
|
25
|
+
from sweagent.tools.utils import _guard_multiline_input, generate_command_docs
|
|
26
|
+
from sweagent.utils.log import get_logger
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ToolFilterConfig(BaseModel):
|
|
30
|
+
"""Filter out commands that are blocked by the environment
|
|
31
|
+
(for example interactive commands like `vim`).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
blocklist_error_template: str = "Operation '{{action}}' is not supported by this environment."
|
|
35
|
+
"""The error template to use when a command is blocked."""
|
|
36
|
+
|
|
37
|
+
blocklist: list[str] = [
|
|
38
|
+
"vim",
|
|
39
|
+
"vi",
|
|
40
|
+
"emacs",
|
|
41
|
+
"nano",
|
|
42
|
+
"nohup",
|
|
43
|
+
"gdb",
|
|
44
|
+
"less",
|
|
45
|
+
"tail -f",
|
|
46
|
+
"python -m venv",
|
|
47
|
+
"make",
|
|
48
|
+
]
|
|
49
|
+
"""Block any command that starts with one of these"""
|
|
50
|
+
|
|
51
|
+
blocklist_standalone: list[str] = [
|
|
52
|
+
"python",
|
|
53
|
+
"python3",
|
|
54
|
+
"ipython",
|
|
55
|
+
"bash",
|
|
56
|
+
"sh",
|
|
57
|
+
"/bin/bash",
|
|
58
|
+
"/bin/sh",
|
|
59
|
+
"nohup",
|
|
60
|
+
"vi",
|
|
61
|
+
"vim",
|
|
62
|
+
"emacs",
|
|
63
|
+
"nano",
|
|
64
|
+
"su",
|
|
65
|
+
]
|
|
66
|
+
"""Block any command that matches one of these exactly"""
|
|
67
|
+
|
|
68
|
+
block_unless_regex: dict[str, str] = {
|
|
69
|
+
"radare2": r"\b(?:radare2)\b.*\s+-c\s+.*",
|
|
70
|
+
"r2": r"\b(?:radare2)\b.*\s+-c\s+.*",
|
|
71
|
+
}
|
|
72
|
+
"""Block any command that matches one of these names unless it also matches the regex"""
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ToolConfig(BaseModel):
|
|
76
|
+
"""Configuration for the tools that are made available to the agent."""
|
|
77
|
+
|
|
78
|
+
filter: ToolFilterConfig = ToolFilterConfig()
|
|
79
|
+
"""Filter out commands that are blocked by the environment
|
|
80
|
+
(for example interactive commands like `vim`).
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
bundles: list[Bundle] = Field(default_factory=list)
|
|
84
|
+
"""The tool bundles to load."""
|
|
85
|
+
|
|
86
|
+
propagate_env_variables: list[str] = []
|
|
87
|
+
"""Environment variables to propagate to the environment.
|
|
88
|
+
This is useful if you want to propagate API keys or similar from your own environment to the
|
|
89
|
+
environment in which the tools run.
|
|
90
|
+
IMPORTANT NOTE: The value of the environment variables can be read in debug log files,
|
|
91
|
+
so be careful with your API keys!
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
env_variables: dict[str, Any] = {
|
|
95
|
+
"PAGER": "cat",
|
|
96
|
+
"MANPAGER": "cat",
|
|
97
|
+
"LESS": "-R",
|
|
98
|
+
"PIP_PROGRESS_BAR": "off",
|
|
99
|
+
"TQDM_DISABLE": "1",
|
|
100
|
+
"GIT_PAGER": "cat",
|
|
101
|
+
}
|
|
102
|
+
"""Shorthand to set environment variables for the tools, effectively
|
|
103
|
+
equivalent to adding `export VARNAME=value` to the `reset_commands`.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
registry_variables: dict[str, Any] = {}
|
|
107
|
+
"""Populate the registry with these variables. Will be written out as json in the registry file."""
|
|
108
|
+
|
|
109
|
+
submit_command: str = "submit"
|
|
110
|
+
"""The command/tool to use to submit the solution."""
|
|
111
|
+
|
|
112
|
+
parse_function: ParseFunction = Field(default_factory=FunctionCallingParser)
|
|
113
|
+
"""The action parser that is responsible for parsing the model output into a thought and action.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
enable_bash_tool: bool = True
|
|
117
|
+
"""Whether to enable the bash tool in addition to the other tools specified in bundles."""
|
|
118
|
+
|
|
119
|
+
format_error_template: str = None # type: ignore
|
|
120
|
+
"""Defaults to format_error_template in ParseFunction"""
|
|
121
|
+
|
|
122
|
+
command_docs: str = None # type: ignore
|
|
123
|
+
"""Automatically generated documentation generated based on
|
|
124
|
+
the loaded tool bundles.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
multi_line_command_endings: dict[str, str] = {}
|
|
128
|
+
submit_command_end_name: str | None = None
|
|
129
|
+
|
|
130
|
+
"""Commands to install dependencies and tools.
|
|
131
|
+
These commands are executed in a subprocess and are not part of the environment state.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
reset_commands: list[str | list[str]] = []
|
|
135
|
+
"""Commands to reset the environment. They will also be called when we start the environment.
|
|
136
|
+
Unlike `install_commands`, these commands are part of the environment state.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
execution_timeout: int = 30
|
|
140
|
+
"""Timeout for executing commands in the environment"""
|
|
141
|
+
|
|
142
|
+
install_timeout: int = 300
|
|
143
|
+
"""Timeout used for each of the installation commands"""
|
|
144
|
+
|
|
145
|
+
total_execution_timeout: int = 1800
|
|
146
|
+
"""Timeout for executing all commands in the environment.
|
|
147
|
+
Note: Does not interrupt running commands, but will stop the agent for the next step.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
max_consecutive_execution_timeouts: int = 3
|
|
151
|
+
"""Maximum number of consecutive execution timeouts before the agent exits.
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
@cached_property
|
|
155
|
+
def use_function_calling(self) -> bool:
|
|
156
|
+
return isinstance(self.parse_function, FunctionCallingParser)
|
|
157
|
+
|
|
158
|
+
@cached_property
|
|
159
|
+
def state_commands(self) -> list[str]:
|
|
160
|
+
"""This property returns the state commands from all bundles.
|
|
161
|
+
State commands are commands that are used to get the state of the environment
|
|
162
|
+
(e.g., the current working directory).
|
|
163
|
+
"""
|
|
164
|
+
return [bundle.state_command for bundle in self.bundles if bundle.state_command]
|
|
165
|
+
|
|
166
|
+
# todo: move to ToolHandler?
|
|
167
|
+
@cached_property
|
|
168
|
+
def commands(self) -> list[Command]:
|
|
169
|
+
"""Read command files and return parsed command objects"""
|
|
170
|
+
commands = []
|
|
171
|
+
tool_sources: dict[str, Path] = {} # Track which file each tool comes from
|
|
172
|
+
# Add bash command if enabled
|
|
173
|
+
if self.enable_bash_tool:
|
|
174
|
+
commands.append(BASH_COMMAND)
|
|
175
|
+
tool_sources[BASH_COMMAND.name] = Path("<builtin>")
|
|
176
|
+
|
|
177
|
+
# Collect commands from all bundles
|
|
178
|
+
for bundle in self.bundles:
|
|
179
|
+
for command in bundle.commands:
|
|
180
|
+
if command.name in tool_sources:
|
|
181
|
+
existing_source = tool_sources[command.name]
|
|
182
|
+
msg = (
|
|
183
|
+
f"Tool '{command.name}' is defined multiple times:\n"
|
|
184
|
+
f" - First definition in: {existing_source}\n"
|
|
185
|
+
f" - Duplicate definition in: {bundle.path}"
|
|
186
|
+
)
|
|
187
|
+
raise ValueError(msg)
|
|
188
|
+
commands.append(command)
|
|
189
|
+
tool_sources[command.name] = bundle.path
|
|
190
|
+
|
|
191
|
+
return commands
|
|
192
|
+
|
|
193
|
+
@cached_property
|
|
194
|
+
def tools(self) -> list[dict]:
|
|
195
|
+
return [command.get_function_calling_tool() for command in self.commands]
|
|
196
|
+
|
|
197
|
+
# todo: can some of these be moved to ToolHandler?
|
|
198
|
+
def model_post_init(self, __context):
|
|
199
|
+
# for caching:
|
|
200
|
+
commands = self.commands
|
|
201
|
+
multi_line_command_endings = {
|
|
202
|
+
command.name: command.end_name for command in commands if command.end_name is not None
|
|
203
|
+
}
|
|
204
|
+
self.tools
|
|
205
|
+
|
|
206
|
+
# assert not self.enable_bash_tool and parse_function is FunctionCallingParser or JsonParser
|
|
207
|
+
if not self.enable_bash_tool and not (
|
|
208
|
+
isinstance(self.parse_function, FunctionCallingParser) or isinstance(self.parse_function, JsonParser)
|
|
209
|
+
):
|
|
210
|
+
msg = f"Bash tool can only be disabled if {FunctionCallingParser.type} parser or {JsonParser.type} parser is used."
|
|
211
|
+
raise ValueError(msg)
|
|
212
|
+
|
|
213
|
+
self.multi_line_command_endings = multi_line_command_endings
|
|
214
|
+
self.command_docs = generate_command_docs(
|
|
215
|
+
self.commands,
|
|
216
|
+
[],
|
|
217
|
+
**self.env_variables,
|
|
218
|
+
)
|
|
219
|
+
if self.format_error_template is None:
|
|
220
|
+
self.format_error_template = self.parse_function.format_error_template
|
|
221
|
+
for command in commands:
|
|
222
|
+
if command.name == self.submit_command:
|
|
223
|
+
self.submit_command_end_name = command.end_name
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class ToolHandler:
|
|
228
|
+
def __init__(self, tools: ToolConfig):
|
|
229
|
+
"""This class handles most of the tool usage. It has the following responsibilities:
|
|
230
|
+
|
|
231
|
+
- Install the tools
|
|
232
|
+
- Parse commands and handle multiline commands
|
|
233
|
+
- Decide if an action should be blocked
|
|
234
|
+
- Get the current state of the environment
|
|
235
|
+
"""
|
|
236
|
+
# Always copy config to avoid shared state between different instances across threads
|
|
237
|
+
self.config = tools.model_copy(deep=True)
|
|
238
|
+
# partially initialized in `install_commands`.
|
|
239
|
+
self._reset_commands = []
|
|
240
|
+
self._command_patterns = self._get_command_patterns()
|
|
241
|
+
self.logger = get_logger("swea-tools", emoji="🧰")
|
|
242
|
+
# For testing: Return this state instead of querying the environment
|
|
243
|
+
self.mock_state: dict[str, str] | None = None
|
|
244
|
+
|
|
245
|
+
@classmethod
|
|
246
|
+
def from_config(cls, config: ToolConfig) -> Self:
|
|
247
|
+
return cls(config)
|
|
248
|
+
|
|
249
|
+
# Installation & Reset
|
|
250
|
+
# --------------------
|
|
251
|
+
|
|
252
|
+
def install(self, env: SWEEnv) -> None:
|
|
253
|
+
self._install_commands(env)
|
|
254
|
+
self.reset(env)
|
|
255
|
+
|
|
256
|
+
def reset(self, env: SWEEnv) -> None:
|
|
257
|
+
self.logger.info("Resetting tools")
|
|
258
|
+
env_variables = self.config.env_variables.copy() | {
|
|
259
|
+
var: os.getenv(var) for var in self.config.propagate_env_variables
|
|
260
|
+
}
|
|
261
|
+
env.set_env_variables(env_variables)
|
|
262
|
+
env.write_file("/root/.swe-agent-env", json.dumps(self.config.registry_variables))
|
|
263
|
+
env.write_file("/root/state.json", "{}")
|
|
264
|
+
env.communicate(" && ".join(self._reset_commands), check="raise", timeout=self.config.install_timeout)
|
|
265
|
+
|
|
266
|
+
async def _upload_bundles(self, env: SWEEnv) -> None:
|
|
267
|
+
await asyncio.gather(
|
|
268
|
+
*(
|
|
269
|
+
env.deployment.runtime.upload(
|
|
270
|
+
UploadRequest(source_path=bundle.path.as_posix(), target_path=f"/root/tools/{bundle.path.name}")
|
|
271
|
+
)
|
|
272
|
+
for bundle in self.config.bundles
|
|
273
|
+
)
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
async def _is_command_available(self, env, command: str, env_vars: dict[str, str]) -> None:
|
|
277
|
+
if command == "bash":
|
|
278
|
+
return
|
|
279
|
+
try:
|
|
280
|
+
await env.deployment.runtime.execute(
|
|
281
|
+
RexCommand(command=f"which {command}", shell=True, check=True, env=env_vars)
|
|
282
|
+
)
|
|
283
|
+
except Exception:
|
|
284
|
+
msg = f"Tool {command} is not available in the container."
|
|
285
|
+
raise RuntimeError(msg) from None
|
|
286
|
+
|
|
287
|
+
async def _check_available_commands(self, env: SWEEnv, env_vars: dict[str, str]) -> None:
|
|
288
|
+
await asyncio.gather(
|
|
289
|
+
*(self._is_command_available(env, command.name, env_vars) for command in self.config.commands)
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
def _install_commands(self, env: SWEEnv) -> None:
|
|
293
|
+
"""Make sure all commands are available in the container"""
|
|
294
|
+
env.set_env_variables(self.config.env_variables)
|
|
295
|
+
cwd = env.communicate("pwd", check="raise").strip()
|
|
296
|
+
asyncio.run(self._upload_bundles(env))
|
|
297
|
+
for bundle in self.config.bundles:
|
|
298
|
+
cmds = [
|
|
299
|
+
f"export PATH=/root/tools/{bundle.path.name}/bin:$PATH",
|
|
300
|
+
f"chmod +x /root/tools/{bundle.path.name}/bin/*",
|
|
301
|
+
]
|
|
302
|
+
if (bundle.path / "install.sh").exists():
|
|
303
|
+
cmds.append(f"cd /root/tools/{bundle.path.name} && source install.sh")
|
|
304
|
+
cmds.append(f"chmod +x /root/tools/{bundle.path.name}/bin/*")
|
|
305
|
+
env.communicate(
|
|
306
|
+
" && ".join(cmds),
|
|
307
|
+
check="raise",
|
|
308
|
+
timeout=self.config.install_timeout,
|
|
309
|
+
)
|
|
310
|
+
env.communicate(f"cd {cwd}", check="raise")
|
|
311
|
+
path = env.communicate("echo $PATH", check="raise").strip()
|
|
312
|
+
asyncio.run(self._check_available_commands(env, {"PATH": path}))
|
|
313
|
+
|
|
314
|
+
# Getting state
|
|
315
|
+
# -------------
|
|
316
|
+
|
|
317
|
+
def _get_state(self, env: SWEEnv) -> dict[str, str]:
|
|
318
|
+
"""Retrieve the state from the environment"""
|
|
319
|
+
try:
|
|
320
|
+
state_str = env.read_file("/root/state.json")
|
|
321
|
+
except FileNotFoundError:
|
|
322
|
+
self.logger.warning("State file not found, returning empty state")
|
|
323
|
+
return {}
|
|
324
|
+
if not state_str.strip():
|
|
325
|
+
self.logger.warning("State file is empty, returning empty state")
|
|
326
|
+
return {}
|
|
327
|
+
try:
|
|
328
|
+
state = json.loads(state_str)
|
|
329
|
+
except json.JSONDecodeError as e:
|
|
330
|
+
msg = f"State {state_str!r} is not valid json. This is an internal error, please report it."
|
|
331
|
+
raise ValueError(msg) from e
|
|
332
|
+
if not isinstance(state, dict):
|
|
333
|
+
msg = f"State commands must return a dictionary. Got {state!r} instead."
|
|
334
|
+
raise ValueError(msg)
|
|
335
|
+
return state
|
|
336
|
+
|
|
337
|
+
def get_state(self, env: SWEEnv) -> dict[str, str]:
|
|
338
|
+
"""Execute state commands from all bundles and combine their results.
|
|
339
|
+
This can be used to extract environment variables etc. from the environment.
|
|
340
|
+
"""
|
|
341
|
+
if self.mock_state is not None:
|
|
342
|
+
return self.mock_state
|
|
343
|
+
|
|
344
|
+
for state_command in self.config.state_commands:
|
|
345
|
+
env.communicate(state_command, check="warn")
|
|
346
|
+
combined_state = self._get_state(env)
|
|
347
|
+
self.logger.debug(f"Retrieved state from environment: {combined_state}")
|
|
348
|
+
return combined_state
|
|
349
|
+
|
|
350
|
+
# Blocking
|
|
351
|
+
# --------
|
|
352
|
+
|
|
353
|
+
def should_block_action(self, action: str) -> bool:
|
|
354
|
+
"""Check if the command should be blocked."""
|
|
355
|
+
action = action.strip()
|
|
356
|
+
if not action:
|
|
357
|
+
return False
|
|
358
|
+
if any(f.startswith(action) for f in self.config.filter.blocklist):
|
|
359
|
+
return True
|
|
360
|
+
if action in self.config.filter.blocklist_standalone:
|
|
361
|
+
return True
|
|
362
|
+
name = action.split()[0]
|
|
363
|
+
if name in self.config.filter.block_unless_regex and not re.search(
|
|
364
|
+
self.config.filter.block_unless_regex[name], action
|
|
365
|
+
):
|
|
366
|
+
return True
|
|
367
|
+
return False
|
|
368
|
+
|
|
369
|
+
# Parsing & multiline commands
|
|
370
|
+
# -----------------------------
|
|
371
|
+
|
|
372
|
+
def check_for_submission_cmd(self, output: str) -> bool:
|
|
373
|
+
"""Function for checking submission request."""
|
|
374
|
+
if r"<<SWE_AGENT_SUBMISSION>>" in output:
|
|
375
|
+
return True
|
|
376
|
+
return False
|
|
377
|
+
|
|
378
|
+
def parse_actions(self, output: dict) -> tuple[str, str]:
|
|
379
|
+
"""Parse the model output into a thought and action."""
|
|
380
|
+
return self.config.parse_function(output, self.config.commands)
|
|
381
|
+
|
|
382
|
+
def guard_multiline_input(self, action: str) -> str:
|
|
383
|
+
"""Split action by multiline commands, then append the first line in each multiline command with "<< '{end_name}'".
|
|
384
|
+
Multiline commands (which are specified by an end_name) are commands that span multiple lines and are terminated by a specific end_name.
|
|
385
|
+
|
|
386
|
+
Their multi-line argument is sent using a heredoc, which is a way to send a multi-line string to a command in bash.
|
|
387
|
+
"""
|
|
388
|
+
return _guard_multiline_input(action, self._get_first_multiline_cmd)
|
|
389
|
+
|
|
390
|
+
def _get_first_multiline_cmd(self, action: str) -> re.Match | None:
|
|
391
|
+
"""Return the first match of a command pattern in the action string.
|
|
392
|
+
Where first match is defined by the start of the match.
|
|
393
|
+
|
|
394
|
+
The match object has three groups: (1) command name, (2) command arguments, (3) end name
|
|
395
|
+
"""
|
|
396
|
+
patterns = {
|
|
397
|
+
k: v
|
|
398
|
+
for k, v in self._command_patterns.items()
|
|
399
|
+
if k in self.config.multi_line_command_endings or k == self.config.submit_command
|
|
400
|
+
}
|
|
401
|
+
matches = list()
|
|
402
|
+
for _, pat in patterns.items():
|
|
403
|
+
match = pat.search(action)
|
|
404
|
+
if match:
|
|
405
|
+
matches.append(match)
|
|
406
|
+
if len(matches) == 0:
|
|
407
|
+
return None
|
|
408
|
+
matches = sorted(matches, key=lambda x: x.start())
|
|
409
|
+
return matches[0]
|
|
410
|
+
|
|
411
|
+
def _get_command_patterns(self) -> dict[str, re.Pattern]:
|
|
412
|
+
"""Creates regular expressions for the commands"""
|
|
413
|
+
|
|
414
|
+
_command_patterns = {}
|
|
415
|
+
for command in self.config.commands:
|
|
416
|
+
if command.end_name is not None:
|
|
417
|
+
pat = re.compile(
|
|
418
|
+
rf"^\s*({command.name})\s*(.*?)^({command.end_name})\s*$",
|
|
419
|
+
re.DOTALL | re.MULTILINE,
|
|
420
|
+
)
|
|
421
|
+
_command_patterns[command.name] = pat
|
|
422
|
+
else:
|
|
423
|
+
pat = re.compile(rf"^\s*({command.name})\s*(.*?)$", re.MULTILINE)
|
|
424
|
+
_command_patterns[command.name] = pat
|
|
425
|
+
submit_pat = re.compile(
|
|
426
|
+
rf"^\s*({self.config.submit_command})\s*(.*?)^({self.config.submit_command_end_name})\s*$",
|
|
427
|
+
re.DOTALL | re.MULTILINE,
|
|
428
|
+
)
|
|
429
|
+
_command_patterns[self.config.submit_command] = submit_pat
|
|
430
|
+
return _command_patterns
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from sweagent.tools.commands import Command
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _guard_multiline_input(action: str, match_fct: Callable[[str], re.Match | None]) -> str:
|
|
9
|
+
"""Split action by multiline commands, then append the first line in each multiline command with "<< '{end_name}'".
|
|
10
|
+
Multiline commands (which are specified by an end_name) are commands that span multiple lines and are terminated by a specific end_name.
|
|
11
|
+
|
|
12
|
+
Their multi-line argument is sent using a heredoc, which is a way to send a multi-line string to a command in bash.
|
|
13
|
+
"""
|
|
14
|
+
parsed_action = []
|
|
15
|
+
rem_action = action
|
|
16
|
+
while rem_action.strip():
|
|
17
|
+
first_match = match_fct(rem_action)
|
|
18
|
+
if first_match:
|
|
19
|
+
pre_action = rem_action[: first_match.start()]
|
|
20
|
+
match_action = rem_action[first_match.start() : first_match.end()]
|
|
21
|
+
rem_action = rem_action[first_match.end() :]
|
|
22
|
+
if pre_action.strip():
|
|
23
|
+
parsed_action.append(pre_action)
|
|
24
|
+
if match_action.strip():
|
|
25
|
+
eof = first_match.group(3).strip()
|
|
26
|
+
if not match_action.split("\n")[0].strip().endswith(f"<< '{eof}'"):
|
|
27
|
+
guarded_command = match_action[first_match.start() :]
|
|
28
|
+
first_line = guarded_command.split("\n")[0]
|
|
29
|
+
guarded_command = guarded_command.replace(first_line, first_line + f" << '{eof}'", 1)
|
|
30
|
+
parsed_action.append(guarded_command)
|
|
31
|
+
else:
|
|
32
|
+
parsed_action.append(match_action)
|
|
33
|
+
else:
|
|
34
|
+
parsed_action.append(rem_action)
|
|
35
|
+
rem_action = ""
|
|
36
|
+
return "\n".join(parsed_action)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _should_quote(value: Any, command: Command) -> bool:
|
|
40
|
+
"""Returns True if the value should be quoted, False otherwise."""
|
|
41
|
+
if command.name == "bash":
|
|
42
|
+
return False
|
|
43
|
+
return isinstance(value, str) and command.end_name is None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_signature(cmd):
|
|
47
|
+
"""Generate a command signature from its arguments.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
cmd: Command object to generate signature for
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Formatted signature string
|
|
54
|
+
"""
|
|
55
|
+
signature = cmd.name
|
|
56
|
+
if "arguments" in cmd.__dict__ and cmd.arguments is not None:
|
|
57
|
+
if cmd.end_name is None:
|
|
58
|
+
for argument in cmd.arguments:
|
|
59
|
+
param = argument.name
|
|
60
|
+
if argument.required:
|
|
61
|
+
signature += f" <{param}>"
|
|
62
|
+
else:
|
|
63
|
+
signature += f" [<{param}>]"
|
|
64
|
+
else:
|
|
65
|
+
for argument in cmd.arguments[:-1]:
|
|
66
|
+
param = argument.name
|
|
67
|
+
if argument.required:
|
|
68
|
+
signature += f" <{param}>"
|
|
69
|
+
else:
|
|
70
|
+
signature += f" [<{param}>]"
|
|
71
|
+
signature += f"\n{list(cmd.arguments[-1].keys())[0]}\n{cmd.end_name}"
|
|
72
|
+
return signature
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def generate_command_docs(
|
|
76
|
+
commands: list[Command],
|
|
77
|
+
subroutine_types,
|
|
78
|
+
**kwargs,
|
|
79
|
+
) -> str:
|
|
80
|
+
"""Generate detailed command documentation.
|
|
81
|
+
|
|
82
|
+
Format includes docstring, signature and argument details.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
commands: List of commands to document
|
|
86
|
+
subroutine_types: List of subroutines to document
|
|
87
|
+
**kwargs: Additional format variables for docstrings
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Formatted documentation string
|
|
91
|
+
"""
|
|
92
|
+
docs = ""
|
|
93
|
+
for cmd in commands + subroutine_types:
|
|
94
|
+
docs += f"{cmd.name}:\n"
|
|
95
|
+
if cmd.docstring is not None:
|
|
96
|
+
docs += f" docstring: {cmd.docstring.format(**kwargs)}\n"
|
|
97
|
+
if cmd.signature is not None:
|
|
98
|
+
docs += f" signature: {cmd.signature}\n"
|
|
99
|
+
else:
|
|
100
|
+
docs += f" signature: {get_signature(cmd)}\n"
|
|
101
|
+
if cmd.arguments:
|
|
102
|
+
docs += " arguments:\n"
|
|
103
|
+
for argument in cmd.arguments:
|
|
104
|
+
param = argument.name
|
|
105
|
+
req_string = "required" if argument.required else "optional"
|
|
106
|
+
docs += f" - {param} ({argument.type}) [{req_string}]: {argument.description}\n"
|
|
107
|
+
docs += "\n"
|
|
108
|
+
return docs
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""This file has types/dataclass definitions that are used in the SWE agent
|
|
2
|
+
for exchanging data between different modules/functions/classes.
|
|
3
|
+
They oftentimes cannot be defined in the same file where they are used
|
|
4
|
+
because of circular dependencies.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
from typing_extensions import TypedDict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StepOutput(BaseModel):
|
|
16
|
+
query: list[dict] = [{}]
|
|
17
|
+
thought: str = ""
|
|
18
|
+
action: str = ""
|
|
19
|
+
output: str = ""
|
|
20
|
+
observation: str = ""
|
|
21
|
+
execution_time: float = 0.0
|
|
22
|
+
done: bool = False
|
|
23
|
+
exit_status: int | str | None = None
|
|
24
|
+
submission: str | None = None
|
|
25
|
+
state: dict[str, str] = {}
|
|
26
|
+
tool_calls: list[dict[str, Any]] | None = None
|
|
27
|
+
tool_call_ids: list[str] | None = None
|
|
28
|
+
thinking_blocks: list[dict[str, Any]] | None = None
|
|
29
|
+
|
|
30
|
+
"""State of the environment at the end of the step"""
|
|
31
|
+
extra_info: dict[str, Any] = {}
|
|
32
|
+
|
|
33
|
+
def to_template_format_dict(self) -> dict[str, str | int | float | bool | None]:
|
|
34
|
+
"""Used for formatting (error) prompt templates"""
|
|
35
|
+
out = {}
|
|
36
|
+
for k, v in self.model_dump().items():
|
|
37
|
+
if k in ("tool_calls", "tool_call_ids", "state"):
|
|
38
|
+
continue
|
|
39
|
+
out[k] = v
|
|
40
|
+
out |= self.state
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TrajectoryStep(TypedDict):
|
|
45
|
+
action: str
|
|
46
|
+
observation: str
|
|
47
|
+
response: str
|
|
48
|
+
state: dict[str, str]
|
|
49
|
+
thought: str
|
|
50
|
+
execution_time: float
|
|
51
|
+
query: list[dict[str, Any]]
|
|
52
|
+
extra_info: dict[str, Any]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# required fields go here
|
|
56
|
+
class _HistoryItem(TypedDict):
|
|
57
|
+
role: str
|
|
58
|
+
content: str | list[dict[str, Any]]
|
|
59
|
+
message_type: Literal["thought", "action", "observation"]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# see _HistoryItem for required fields
|
|
63
|
+
class HistoryItem(_HistoryItem, total=False):
|
|
64
|
+
agent: str
|
|
65
|
+
is_demo: bool
|
|
66
|
+
thought: str
|
|
67
|
+
action: str | None
|
|
68
|
+
tool_calls: list[dict[str, str]] | None
|
|
69
|
+
tool_call_ids: list[str] | None
|
|
70
|
+
tags: list[str]
|
|
71
|
+
cache_control: dict[str, Any] | None
|
|
72
|
+
thinking_blocks: list[dict[str, Any]] | None
|
|
73
|
+
|
|
74
|
+
"""HistoryProcessors can add these tags to enable special processing"""
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
History = list[HistoryItem]
|
|
78
|
+
Trajectory = list[TrajectoryStep]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# todo: Make this actually have the dataclasses instead of dict versions
|
|
82
|
+
class AgentInfo(TypedDict, total=False):
|
|
83
|
+
# same as `APIStats` from models.py
|
|
84
|
+
model_stats: dict[str, float]
|
|
85
|
+
exit_status: str | None
|
|
86
|
+
submission: str | None
|
|
87
|
+
# same as `ReviewerResult`
|
|
88
|
+
review: dict[str, Any]
|
|
89
|
+
edited_files30: str
|
|
90
|
+
edited_files50: str
|
|
91
|
+
edited_files70: str
|
|
92
|
+
# only if summarizer is used
|
|
93
|
+
summarizer: dict
|
|
94
|
+
swe_agent_hash: str
|
|
95
|
+
swe_agent_version: str
|
|
96
|
+
swe_rex_version: str
|
|
97
|
+
swe_rex_hash: str
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class AgentRunResult(BaseModel):
|
|
101
|
+
info: AgentInfo
|
|
102
|
+
trajectory: Trajectory
|
|
File without changes
|