@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import random
|
|
3
|
+
import re
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
9
|
+
from swerex.deployment.config import (
|
|
10
|
+
DeploymentConfig,
|
|
11
|
+
DockerDeploymentConfig,
|
|
12
|
+
DummyDeploymentConfig,
|
|
13
|
+
LocalDeploymentConfig,
|
|
14
|
+
)
|
|
15
|
+
from typing_extensions import Self
|
|
16
|
+
|
|
17
|
+
from sweagent.agent.problem_statement import (
|
|
18
|
+
ProblemStatementConfig,
|
|
19
|
+
SWEBenchMultimodalProblemStatement,
|
|
20
|
+
TextProblemStatement,
|
|
21
|
+
)
|
|
22
|
+
from sweagent.environment.repo import GithubRepoConfig, LocalRepoConfig, PreExistingRepoConfig
|
|
23
|
+
from sweagent.environment.swe_env import EnvironmentConfig
|
|
24
|
+
from sweagent.utils.files import load_file
|
|
25
|
+
from sweagent.utils.log import get_logger
|
|
26
|
+
|
|
27
|
+
logger = get_logger("swea-config", emoji="🔧")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AbstractInstanceSource(ABC):
|
|
31
|
+
"""Anything that adheres to this standard can be used to load instances."""
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def get_instance_configs(self) -> list[EnvironmentConfig]: ...
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BatchInstance(BaseModel):
|
|
38
|
+
"""A single instance in a batch of instances.
|
|
39
|
+
This specifies both the environment configuration and the problem statement.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
env: EnvironmentConfig
|
|
43
|
+
problem_statement: ProblemStatementConfig
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _slice_spec_to_slice(slice_spec: str) -> slice:
|
|
47
|
+
if slice_spec == "":
|
|
48
|
+
return slice(None)
|
|
49
|
+
parts = slice_spec.split(":")
|
|
50
|
+
values = [None if p == "" else int(p) for p in parts]
|
|
51
|
+
if len(parts) == 1:
|
|
52
|
+
return slice(values[0])
|
|
53
|
+
if len(parts) == 2:
|
|
54
|
+
return slice(values[0], values[1])
|
|
55
|
+
if len(parts) == 3:
|
|
56
|
+
return slice(values[0], values[1], values[2])
|
|
57
|
+
msg = (
|
|
58
|
+
f"Invalid slice specification: {slice_spec!r}. "
|
|
59
|
+
"Here's the expected format: stop or start:stop or start:stop:step "
|
|
60
|
+
"(i.e., it behaves exactly like python's list slicing `list[slice]`)."
|
|
61
|
+
)
|
|
62
|
+
raise ValueError(msg)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _filter_batch_items(
|
|
66
|
+
instances: list[BatchInstance], *, filter_: str, slice_: str = "", shuffle: bool = False
|
|
67
|
+
) -> list[BatchInstance]:
|
|
68
|
+
if shuffle:
|
|
69
|
+
instances = sorted(instances.copy(), key=lambda x: x.problem_statement.id)
|
|
70
|
+
random.seed(42)
|
|
71
|
+
random.shuffle(instances)
|
|
72
|
+
before_filter = len(instances)
|
|
73
|
+
instances = [instance for instance in instances if re.match(filter_, instance.problem_statement.id)]
|
|
74
|
+
after_filter = len(instances)
|
|
75
|
+
if before_filter != after_filter:
|
|
76
|
+
logger.info("Instance filter: %d -> %d instances", before_filter, after_filter)
|
|
77
|
+
if slice_:
|
|
78
|
+
instances = instances[_slice_spec_to_slice(slice_)]
|
|
79
|
+
after_slice = len(instances)
|
|
80
|
+
if before_filter != after_slice:
|
|
81
|
+
logger.info("Instance slice: %d -> %d instances", before_filter, after_slice)
|
|
82
|
+
return instances
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SimpleBatchInstance(BaseModel):
|
|
86
|
+
"""A simple way to configure a single instance in a batch of instances that all
|
|
87
|
+
use similar deployment configurations.
|
|
88
|
+
|
|
89
|
+
Predominantly used for benchmarking purposes. Assumes that the repository is already
|
|
90
|
+
present in the docker container.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
image_name: str
|
|
94
|
+
problem_statement: str
|
|
95
|
+
instance_id: str
|
|
96
|
+
repo_name: str = ""
|
|
97
|
+
"""Specifies the repository to use. If empty, no repository is used.
|
|
98
|
+
If the string does not contain a slash, it is interpreted as an already existing repository at the root
|
|
99
|
+
of the docker container. If it contains the word "github", it is interpreted as a github repository.
|
|
100
|
+
Else, it is interpreted as a local repository.
|
|
101
|
+
"""
|
|
102
|
+
base_commit: str = "HEAD"
|
|
103
|
+
"""Used to reset repo."""
|
|
104
|
+
extra_fields: dict[str, Any] = Field(default_factory=dict)
|
|
105
|
+
"""Any additional data to be added to the instance.
|
|
106
|
+
This data will be available when formatting prompt templates.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
# Ignore instead of allow because they should be added as `extra_fields`
|
|
110
|
+
model_config = ConfigDict(extra="ignore")
|
|
111
|
+
|
|
112
|
+
def to_full_batch_instance(self, deployment: DeploymentConfig) -> BatchInstance:
|
|
113
|
+
"""Merge the deployment options into the `SimpleBatchInstance` object to get a full `BatchInstance`."""
|
|
114
|
+
# Very important: Make a copy of the deployment config because it will be shared among instances!!!
|
|
115
|
+
deployment = deployment.model_copy(deep=True)
|
|
116
|
+
|
|
117
|
+
if "issue_images" in self.extra_fields:
|
|
118
|
+
problem_statement = SWEBenchMultimodalProblemStatement(
|
|
119
|
+
text=self.problem_statement,
|
|
120
|
+
issue_images=self.extra_fields.pop("issue_images"),
|
|
121
|
+
id=self.instance_id,
|
|
122
|
+
extra_fields=self.extra_fields,
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
problem_statement = TextProblemStatement(
|
|
126
|
+
text=self.problem_statement, id=self.instance_id, extra_fields=self.extra_fields
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if not self.repo_name:
|
|
130
|
+
repo = None
|
|
131
|
+
elif "github" in self.repo_name:
|
|
132
|
+
repo = GithubRepoConfig(github_url=self.repo_name, base_commit=self.base_commit)
|
|
133
|
+
elif "/" not in self.repo_name:
|
|
134
|
+
repo = PreExistingRepoConfig(repo_name=self.repo_name, base_commit=self.base_commit)
|
|
135
|
+
else:
|
|
136
|
+
repo = LocalRepoConfig(path=Path(self.repo_name), base_commit=self.base_commit)
|
|
137
|
+
if isinstance(deployment, LocalDeploymentConfig):
|
|
138
|
+
if self.image_name:
|
|
139
|
+
msg = "Local deployment does not support image_name"
|
|
140
|
+
raise ValueError(msg)
|
|
141
|
+
return BatchInstance(
|
|
142
|
+
env=EnvironmentConfig(deployment=deployment, repo=repo), problem_statement=problem_statement
|
|
143
|
+
)
|
|
144
|
+
if isinstance(deployment, DummyDeploymentConfig):
|
|
145
|
+
return BatchInstance(
|
|
146
|
+
env=EnvironmentConfig(deployment=deployment, repo=repo), problem_statement=problem_statement
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
deployment.image = self.image_name # type: ignore
|
|
150
|
+
|
|
151
|
+
if isinstance(deployment, DockerDeploymentConfig) and deployment.python_standalone_dir is None:
|
|
152
|
+
# Note: you can disable this by setting python_standalone_dir to ""
|
|
153
|
+
deployment.python_standalone_dir = "/root" # type: ignore
|
|
154
|
+
|
|
155
|
+
return BatchInstance(
|
|
156
|
+
env=EnvironmentConfig(deployment=deployment, repo=repo), problem_statement=problem_statement
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@model_validator(mode="before")
|
|
160
|
+
@classmethod
|
|
161
|
+
def handle_legacy_id(cls, data):
|
|
162
|
+
# Handling compatibility with swe-agent <= 1.0.1
|
|
163
|
+
if isinstance(data, dict):
|
|
164
|
+
if "id" in data and "instance_id" not in data:
|
|
165
|
+
data["instance_id"] = data["id"]
|
|
166
|
+
data.pop("id")
|
|
167
|
+
return data
|
|
168
|
+
|
|
169
|
+
# todo: Maybe populate extra fields?
|
|
170
|
+
@classmethod
|
|
171
|
+
def from_swe_bench(cls, instance: dict[str, Any]) -> Self:
|
|
172
|
+
"""Convert instances from the classical SWE-bench dataset to the `SimpleBatchInstance` format."""
|
|
173
|
+
iid = instance["instance_id"]
|
|
174
|
+
image_name = instance.get("image_name", None)
|
|
175
|
+
if image_name is None:
|
|
176
|
+
# Docker doesn't allow double underscore, so we replace them with a magic token
|
|
177
|
+
id_docker_compatible = iid.replace("__", "_1776_")
|
|
178
|
+
image_name = f"docker.io/swebench/sweb.eval.x86_64.{id_docker_compatible}:latest".lower()
|
|
179
|
+
extra_fields = {}
|
|
180
|
+
if "image_assets" in instance:
|
|
181
|
+
issue_images = json.loads(instance["image_assets"])["problem_statement"]
|
|
182
|
+
extra_fields["issue_images"] = issue_images
|
|
183
|
+
return cls(
|
|
184
|
+
image_name=image_name,
|
|
185
|
+
problem_statement=instance["problem_statement"],
|
|
186
|
+
instance_id=iid,
|
|
187
|
+
repo_name="testbed",
|
|
188
|
+
base_commit=instance["base_commit"],
|
|
189
|
+
extra_fields=extra_fields,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class InstancesFromFile(BaseModel, AbstractInstanceSource):
|
|
194
|
+
"""Load instances from a file."""
|
|
195
|
+
|
|
196
|
+
path: Path
|
|
197
|
+
filter: str = ".*"
|
|
198
|
+
"""Regular expression to filter the instances by instance id."""
|
|
199
|
+
slice: str = ""
|
|
200
|
+
"""Select only a slice of the instances (after filtering by `filter`).
|
|
201
|
+
Possible values are stop or start:stop or start:stop:step
|
|
202
|
+
(i.e., it behaves exactly like python's list slicing `list[slice]`).
|
|
203
|
+
"""
|
|
204
|
+
shuffle: bool = False
|
|
205
|
+
"""Shuffle the instances (before filtering and slicing)."""
|
|
206
|
+
|
|
207
|
+
deployment: DeploymentConfig = Field(
|
|
208
|
+
default_factory=lambda: DockerDeploymentConfig(image="python:3.11"),
|
|
209
|
+
description="Deployment options.",
|
|
210
|
+
)
|
|
211
|
+
"""Note that the image_name option is overwritten by the images specified in the task instances."""
|
|
212
|
+
|
|
213
|
+
simple: Literal[True] = True
|
|
214
|
+
"""Convenience discriminator for (de)serialization/CLI. Do not change."""
|
|
215
|
+
|
|
216
|
+
type: Literal["file"] = "file"
|
|
217
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
218
|
+
|
|
219
|
+
def get_instance_configs(self) -> list[BatchInstance]:
|
|
220
|
+
instance_dicts = load_file(self.path)
|
|
221
|
+
simple_instances = [SimpleBatchInstance.model_validate(instance_dict) for instance_dict in instance_dicts]
|
|
222
|
+
instances = [instance.to_full_batch_instance(self.deployment) for instance in simple_instances]
|
|
223
|
+
return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
|
|
224
|
+
|
|
225
|
+
@property
|
|
226
|
+
def id(self) -> str:
|
|
227
|
+
return self.path.stem
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class InstancesFromHuggingFace(BaseModel, AbstractInstanceSource):
|
|
231
|
+
"""Load instances from HuggingFace."""
|
|
232
|
+
|
|
233
|
+
dataset_name: str
|
|
234
|
+
"""Name of the HuggingFace dataset. Same as when using `datasets.load_dataset`."""
|
|
235
|
+
split: str = "dev"
|
|
236
|
+
filter: str = ".*"
|
|
237
|
+
"""Regular expression to filter the instances by instance id."""
|
|
238
|
+
slice: str = ""
|
|
239
|
+
"""Select only a slice of the instances (after filtering by `filter`).
|
|
240
|
+
Possible values are stop or start:stop or start:stop:step.
|
|
241
|
+
(i.e., it behaves exactly like python's list slicing `list[slice]`).
|
|
242
|
+
"""
|
|
243
|
+
shuffle: bool = False
|
|
244
|
+
"""Shuffle the instances (before filtering and slicing)."""
|
|
245
|
+
|
|
246
|
+
deployment: DeploymentConfig = Field(
|
|
247
|
+
default_factory=lambda: DockerDeploymentConfig(image="python:3.11"),
|
|
248
|
+
)
|
|
249
|
+
"""Deployment configuration. Note that the `image_name` option is overwritten by the images specified in the task instances.
|
|
250
|
+
"""
|
|
251
|
+
type: Literal["huggingface"] = "huggingface"
|
|
252
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
253
|
+
|
|
254
|
+
def get_instance_configs(self) -> list[BatchInstance]:
|
|
255
|
+
from datasets import load_dataset
|
|
256
|
+
|
|
257
|
+
ds: list[dict[str, Any]] = load_dataset(self.dataset_name, split=self.split) # type: ignore
|
|
258
|
+
simple_instances: list[SimpleBatchInstance] = [SimpleBatchInstance.model_validate(instance) for instance in ds]
|
|
259
|
+
instances = [instance.to_full_batch_instance(self.deployment) for instance in simple_instances]
|
|
260
|
+
return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
|
|
261
|
+
|
|
262
|
+
@property
|
|
263
|
+
def id(self) -> str:
|
|
264
|
+
ds_name = "".join(l for l in self.dataset_name if l.isalnum() or l in ["-", "_"])
|
|
265
|
+
return f"{ds_name}_{self.split}"
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class SWEBenchInstances(BaseModel, AbstractInstanceSource):
|
|
269
|
+
"""Load instances from SWE-bench."""
|
|
270
|
+
|
|
271
|
+
subset: Literal["lite", "verified", "full", "multimodal", "multilingual"] = "lite"
|
|
272
|
+
"""Subset of swe-bench to use"""
|
|
273
|
+
|
|
274
|
+
# IMPORTANT: Do not call this `path`, because then if people do not specify instance.type,
|
|
275
|
+
# it might be resolved to ExpertInstancesFromFile or something like that.
|
|
276
|
+
path_override: str | Path | None = None
|
|
277
|
+
"""Allow to specify a different huggingface dataset name or path to a huggingface
|
|
278
|
+
dataset. This will override the automatic path set by `subset`.
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
split: Literal["dev", "test"] = "dev"
|
|
282
|
+
|
|
283
|
+
deployment: DeploymentConfig = Field(
|
|
284
|
+
default_factory=lambda: DockerDeploymentConfig(image="python:3.11"),
|
|
285
|
+
)
|
|
286
|
+
"""Deployment configuration. Note that the image_name option is overwritten by the images specified in the task instances.
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
type: Literal["swe_bench"] = "swe_bench"
|
|
290
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
291
|
+
|
|
292
|
+
filter: str = ".*"
|
|
293
|
+
"""Regular expression to filter the instances by instance id."""
|
|
294
|
+
slice: str = ""
|
|
295
|
+
"""Select only a slice of the instances (after filtering by `filter`).
|
|
296
|
+
Possible values are stop or start:stop or start:stop:step.
|
|
297
|
+
(i.e., it behaves exactly like python's list slicing `list[slice]`).
|
|
298
|
+
"""
|
|
299
|
+
shuffle: bool = False
|
|
300
|
+
"""Shuffle the instances (before filtering and slicing)."""
|
|
301
|
+
|
|
302
|
+
evaluate: bool = False
|
|
303
|
+
"""Run sb-cli to evaluate"""
|
|
304
|
+
|
|
305
|
+
def _get_dataset_path(self) -> str:
|
|
306
|
+
if self.path_override is not None:
|
|
307
|
+
return str(self.path_override)
|
|
308
|
+
dataset_mapping = {
|
|
309
|
+
"full": "princeton-nlp/SWE-Bench",
|
|
310
|
+
"verified": "princeton-nlp/SWE-Bench_Verified",
|
|
311
|
+
"lite": "princeton-nlp/SWE-Bench_Lite",
|
|
312
|
+
"multimodal": "princeton-nlp/SWE-Bench_Multimodal",
|
|
313
|
+
"multilingual": "swe-bench/SWE-Bench_Multilingual",
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if self.subset not in dataset_mapping:
|
|
317
|
+
msg = f"Unsupported subset: {self.subset}"
|
|
318
|
+
raise ValueError(msg)
|
|
319
|
+
|
|
320
|
+
return dataset_mapping[self.subset]
|
|
321
|
+
|
|
322
|
+
def get_instance_configs(self) -> list[BatchInstance]:
|
|
323
|
+
from datasets import load_dataset
|
|
324
|
+
|
|
325
|
+
ds: list[dict[str, Any]] = load_dataset(self._get_dataset_path(), split=self.split) # type: ignore
|
|
326
|
+
|
|
327
|
+
if isinstance(self.deployment, DockerDeploymentConfig):
|
|
328
|
+
self.deployment.platform = "linux/amd64"
|
|
329
|
+
|
|
330
|
+
instances = [
|
|
331
|
+
SimpleBatchInstance.from_swe_bench(instance).to_full_batch_instance(self.deployment) for instance in ds
|
|
332
|
+
]
|
|
333
|
+
return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
|
|
334
|
+
|
|
335
|
+
@property
|
|
336
|
+
def id(self) -> str:
|
|
337
|
+
return f"swe_bench_{self.subset}_{self.split}"
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class ExpertInstancesFromFile(BaseModel, AbstractInstanceSource):
|
|
341
|
+
"""Load instances from a file. The difference to `InstancesFromFile` is that the instances are configured as full
|
|
342
|
+
`EnvironmentInstanceConfig` objects, i.e., we could specify separate deployment configurations etc.
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
path: Path
|
|
346
|
+
filter: str = ".*"
|
|
347
|
+
"""Regular expression to filter the instances by instance id."""
|
|
348
|
+
slice: str = ""
|
|
349
|
+
"""Select only a slice of the instances (after filtering by `filter`).
|
|
350
|
+
Possible values are stop or start:stop or start:stop:step.
|
|
351
|
+
(i.e., it behaves exactly like python's list slicing `list[slice]`).
|
|
352
|
+
"""
|
|
353
|
+
shuffle: bool = False
|
|
354
|
+
"""Shuffle the instances (before filtering and slicing)."""
|
|
355
|
+
|
|
356
|
+
type: Literal["expert_file"] = "expert_file"
|
|
357
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
358
|
+
|
|
359
|
+
def get_instance_configs(self) -> list[BatchInstance]:
|
|
360
|
+
instance_dicts = load_file(self.path)
|
|
361
|
+
instances = [BatchInstance.model_validate(instance_dict) for instance_dict in instance_dicts]
|
|
362
|
+
return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
|
|
363
|
+
|
|
364
|
+
@property
|
|
365
|
+
def id(self) -> str:
|
|
366
|
+
return self.path.stem
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
class SWESmithInstances(BaseModel, AbstractInstanceSource):
|
|
370
|
+
"""Load instances from SWE-smith."""
|
|
371
|
+
|
|
372
|
+
path: Path
|
|
373
|
+
|
|
374
|
+
deployment: DeploymentConfig = Field(
|
|
375
|
+
default_factory=lambda: DockerDeploymentConfig(image="python:3.11"),
|
|
376
|
+
)
|
|
377
|
+
"""Deployment configuration. Note that the image_name option is overwritten by the images specified in the task instances.
|
|
378
|
+
"""
|
|
379
|
+
|
|
380
|
+
filter: str = ".*"
|
|
381
|
+
"""Regular expression to filter the instances by instance id."""
|
|
382
|
+
slice: str = ""
|
|
383
|
+
"""Select only a slice of the instances (after filtering by `filter`).
|
|
384
|
+
Possible values are stop or start:stop or start:stop:step.
|
|
385
|
+
(i.e., it behaves exactly like python's list slicing `list[slice]`).
|
|
386
|
+
"""
|
|
387
|
+
shuffle: bool = False
|
|
388
|
+
"""Shuffle the instances (before filtering and slicing)."""
|
|
389
|
+
|
|
390
|
+
type: Literal["swesmith"] = "swesmith"
|
|
391
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
392
|
+
|
|
393
|
+
def get_instance_configs(self) -> list[BatchInstance]:
|
|
394
|
+
def convert_instance_dict(instance_dict: dict[str, Any]) -> dict[str, Any]:
|
|
395
|
+
instance_dict["id"] = instance_dict["instance_id"]
|
|
396
|
+
# todo: The base_commit is currently incorrect
|
|
397
|
+
instance_dict["base_commit"] = instance_dict["id"]
|
|
398
|
+
instance_dict["problem_statement"] = instance_dict.get("problem_statement", "")
|
|
399
|
+
instance_dict["repo_name"] = "testbed"
|
|
400
|
+
instance_dict["extra_fields"] = {"fail_to_pass": instance_dict["FAIL_TO_PASS"]}
|
|
401
|
+
return instance_dict
|
|
402
|
+
|
|
403
|
+
instance_dicts = load_file(self.path)
|
|
404
|
+
instances = [
|
|
405
|
+
SimpleBatchInstance.model_validate(convert_instance_dict(instance_dict)).to_full_batch_instance(
|
|
406
|
+
self.deployment
|
|
407
|
+
)
|
|
408
|
+
for instance_dict in instance_dicts
|
|
409
|
+
]
|
|
410
|
+
return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
|
|
411
|
+
|
|
412
|
+
@property
|
|
413
|
+
def id(self) -> str:
|
|
414
|
+
return f"swesmith_{self.path.stem}"
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
BatchInstanceSourceConfig = (
|
|
418
|
+
InstancesFromHuggingFace | InstancesFromFile | SWEBenchInstances | ExpertInstancesFromFile | SWESmithInstances
|
|
419
|
+
)
|