@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import rich
|
|
5
|
+
import rich.markdown
|
|
6
|
+
import rich.panel
|
|
7
|
+
|
|
8
|
+
from sweagent.agent.problem_statement import ProblemStatementConfig
|
|
9
|
+
from sweagent.environment.repo import LocalRepoConfig
|
|
10
|
+
from sweagent.environment.swe_env import SWEEnv
|
|
11
|
+
from sweagent.run.common import _is_promising_patch
|
|
12
|
+
from sweagent.run.hooks.abstract import RunHook
|
|
13
|
+
from sweagent.types import AgentRunResult
|
|
14
|
+
from sweagent.utils.log import get_logger
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SaveApplyPatchHook(RunHook):
|
|
18
|
+
"""This hook saves patches to a separate directory and optionally applies them to a local repository."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, apply_patch_locally: bool = False, show_success_message: bool = True):
|
|
21
|
+
self.logger = get_logger("swea-save_apply_patch", emoji="⚡️")
|
|
22
|
+
self._apply_patch_locally = apply_patch_locally
|
|
23
|
+
self._show_success_message = show_success_message
|
|
24
|
+
|
|
25
|
+
def on_init(self, *, run):
|
|
26
|
+
self._output_dir = Path(run.output_dir)
|
|
27
|
+
|
|
28
|
+
def on_instance_start(self, *, index: int, env: SWEEnv, problem_statement: ProblemStatementConfig):
|
|
29
|
+
self._env = env
|
|
30
|
+
self._problem_statement = problem_statement
|
|
31
|
+
|
|
32
|
+
def on_instance_completed(self, *, result: AgentRunResult):
|
|
33
|
+
instance_id = self._problem_statement.id
|
|
34
|
+
patch_path = self._save_patch(instance_id, result.info)
|
|
35
|
+
if patch_path:
|
|
36
|
+
if not self._apply_patch_locally:
|
|
37
|
+
return
|
|
38
|
+
if not _is_promising_patch(result.info):
|
|
39
|
+
return
|
|
40
|
+
if self._env.repo is None:
|
|
41
|
+
return
|
|
42
|
+
if not isinstance(self._env.repo, LocalRepoConfig):
|
|
43
|
+
return
|
|
44
|
+
local_dir = Path(self._env.repo.path)
|
|
45
|
+
self._apply_patch(patch_path, local_dir)
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def _print_patch_message(patch_output_file: Path):
|
|
49
|
+
console = rich.console.Console()
|
|
50
|
+
msg = [
|
|
51
|
+
"SWE-agent has produced a patch that it believes will solve the issue you submitted!",
|
|
52
|
+
"Use the code snippet below to inspect or apply it!",
|
|
53
|
+
]
|
|
54
|
+
panel = rich.panel.Panel.fit(
|
|
55
|
+
"\n".join(msg),
|
|
56
|
+
title="🎉 Submission successful 🎉",
|
|
57
|
+
)
|
|
58
|
+
console.print(panel)
|
|
59
|
+
content = [
|
|
60
|
+
"```bash",
|
|
61
|
+
"# The patch has been saved to your local filesystem at:",
|
|
62
|
+
f"PATCH_FILE_PATH='{patch_output_file.resolve()}'",
|
|
63
|
+
"# Inspect it:",
|
|
64
|
+
'cat "${PATCH_FILE_PATH}"',
|
|
65
|
+
"# Apply it to a local repository:",
|
|
66
|
+
"cd <your local repo root>",
|
|
67
|
+
'git apply "${PATCH_FILE_PATH}"',
|
|
68
|
+
"```",
|
|
69
|
+
]
|
|
70
|
+
console.print(rich.markdown.Markdown("\n".join(content)))
|
|
71
|
+
|
|
72
|
+
def _save_patch(self, instance_id: str, info) -> Path | None:
|
|
73
|
+
"""Create patch files that can be applied with `git am`.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
The path to the patch file, if it was saved. Otherwise, returns None.
|
|
77
|
+
"""
|
|
78
|
+
patch_output_dir = self._output_dir / instance_id
|
|
79
|
+
patch_output_dir.mkdir(exist_ok=True, parents=True)
|
|
80
|
+
patch_output_file = patch_output_dir / f"{instance_id}.patch"
|
|
81
|
+
if info.get("submission") is None:
|
|
82
|
+
self.logger.info("No patch to save.")
|
|
83
|
+
return None
|
|
84
|
+
model_patch = info["submission"]
|
|
85
|
+
patch_output_file.write_text(model_patch)
|
|
86
|
+
if _is_promising_patch(info):
|
|
87
|
+
# Only print big congratulations if we actually believe
|
|
88
|
+
# the patch will solve the issue
|
|
89
|
+
if self._show_success_message:
|
|
90
|
+
self._print_patch_message(patch_output_file)
|
|
91
|
+
return patch_output_file
|
|
92
|
+
|
|
93
|
+
def _apply_patch(self, patch_file: Path, local_dir: Path) -> None:
|
|
94
|
+
"""Apply a patch to a local directory."""
|
|
95
|
+
|
|
96
|
+
assert local_dir.is_dir()
|
|
97
|
+
assert patch_file.exists()
|
|
98
|
+
# The resolve() is important, because we're gonna run the cmd
|
|
99
|
+
# somewhere else
|
|
100
|
+
cmd = ["git", "apply", str(patch_file.resolve())]
|
|
101
|
+
try:
|
|
102
|
+
subprocess.run(cmd, cwd=local_dir, check=True)
|
|
103
|
+
except subprocess.CalledProcessError as e:
|
|
104
|
+
self.logger.error(f"Failed to apply patch {patch_file} to {local_dir}: {e}")
|
|
105
|
+
return
|
|
106
|
+
self.logger.info(f"Applied patch {patch_file} to {local_dir}")
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import random
|
|
3
|
+
import shlex
|
|
4
|
+
|
|
5
|
+
from ghapi.all import GhApi
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from sweagent.environment.swe_env import SWEEnv
|
|
9
|
+
from sweagent.run.hooks.abstract import RunHook
|
|
10
|
+
from sweagent.types import AgentRunResult
|
|
11
|
+
from sweagent.utils.github import (
|
|
12
|
+
InvalidGithubURL,
|
|
13
|
+
_get_associated_commit_urls,
|
|
14
|
+
_get_gh_issue_data,
|
|
15
|
+
_parse_gh_issue_url,
|
|
16
|
+
)
|
|
17
|
+
from sweagent.utils.log import get_logger
|
|
18
|
+
|
|
19
|
+
# NOTE
|
|
20
|
+
# THE IMPLEMENTATION DETAILS HERE WILL CHANGE SOON!
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# fixme: Bring back the ability to open the PR to a fork
|
|
24
|
+
def open_pr(*, logger, token, env: SWEEnv, github_url, trajectory, _dry_run: bool = False) -> None:
|
|
25
|
+
"""Create PR to repository
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
trajectory: Trajectory of actions taken by the agent
|
|
29
|
+
_dry_run: Whether to actually push anything or just simulate it
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
issue_url = github_url
|
|
33
|
+
logger.info("Opening PR")
|
|
34
|
+
try:
|
|
35
|
+
issue = _get_gh_issue_data(issue_url, token=token)
|
|
36
|
+
except InvalidGithubURL as e:
|
|
37
|
+
msg = "Data path must be a github issue URL if open_pr is set to True."
|
|
38
|
+
raise ValueError(msg) from e
|
|
39
|
+
branch_name = f"swe-agent-fix-#{issue.number}-" + str(random.random())[2:10]
|
|
40
|
+
env.communicate(
|
|
41
|
+
input="git config user.email 'noemail@swe-agent.com' && git config user.name 'SWE-agent'",
|
|
42
|
+
error_msg="Failed to set git user",
|
|
43
|
+
timeout=10,
|
|
44
|
+
check="raise",
|
|
45
|
+
)
|
|
46
|
+
env.communicate(input="rm -f model.patch", error_msg="Failed to remove model patch", timeout=10, check="raise")
|
|
47
|
+
env.communicate(
|
|
48
|
+
input=f"git checkout -b {branch_name}", error_msg="Failed to switch to new branch", timeout=10, check="raise"
|
|
49
|
+
)
|
|
50
|
+
env.communicate(input="git add .", error_msg="Failed to add commits", timeout=10, check="raise")
|
|
51
|
+
dry_run_flag = "--allow-empty" if _dry_run else ""
|
|
52
|
+
commit_msg = [
|
|
53
|
+
shlex.quote(f"Fix: {issue.title}"),
|
|
54
|
+
shlex.quote(f"Closes #{issue.number}"),
|
|
55
|
+
]
|
|
56
|
+
out = env.communicate(
|
|
57
|
+
input=f"git commit -m {commit_msg[0]} -m {commit_msg[1]} {dry_run_flag}",
|
|
58
|
+
error_msg="Failed to commit changes",
|
|
59
|
+
timeout=10,
|
|
60
|
+
check="raise",
|
|
61
|
+
)
|
|
62
|
+
logger.debug(f"Committed changes: {out}")
|
|
63
|
+
|
|
64
|
+
owner, repo, _ = _parse_gh_issue_url(issue_url)
|
|
65
|
+
# fixme: bring this back
|
|
66
|
+
# If `--repo_path` was specified with a different github URL, then the record will contain
|
|
67
|
+
# the forking user
|
|
68
|
+
forker = owner
|
|
69
|
+
head = branch_name
|
|
70
|
+
remote = "origin"
|
|
71
|
+
if forker != owner:
|
|
72
|
+
head = f"{forker}:{branch_name}"
|
|
73
|
+
token_prefix = ""
|
|
74
|
+
if token:
|
|
75
|
+
token_prefix = f"{token}@"
|
|
76
|
+
fork_url = f"https://{token_prefix}github.com/{forker}/{repo}.git"
|
|
77
|
+
logger.debug(f"Using fork: {fork_url}")
|
|
78
|
+
env.communicate(
|
|
79
|
+
input=f"git remote add fork {fork_url}",
|
|
80
|
+
error_msg="Failed to create new git remote",
|
|
81
|
+
timeout=10,
|
|
82
|
+
)
|
|
83
|
+
remote = "fork"
|
|
84
|
+
dry_run_prefix = "echo " if _dry_run else ""
|
|
85
|
+
out = env.communicate(
|
|
86
|
+
input=f"{dry_run_prefix} git push {remote} {branch_name}",
|
|
87
|
+
error_msg=(
|
|
88
|
+
"Failed to push branch to remote. Please check your token and permissions. "
|
|
89
|
+
"You might want to push to a fork with the push_gh_repo_url option."
|
|
90
|
+
),
|
|
91
|
+
timeout=10,
|
|
92
|
+
)
|
|
93
|
+
logger.debug(f"Pushed commit to {remote=} {branch_name=}: {out}")
|
|
94
|
+
body = (
|
|
95
|
+
f"This is a PR opened by AI tool [SWE Agent](https://github.com/SWE-agent/SWE-agent/) "
|
|
96
|
+
f"to close [#{issue.number}]({issue_url}) ({issue.title}).\n\nCloses #{issue.number}."
|
|
97
|
+
)
|
|
98
|
+
body += "\n\n" + format_trajectory_markdown(trajectory, char_limit=60_000)
|
|
99
|
+
api = GhApi(token=token)
|
|
100
|
+
default_branch = api.repos.get(owner, repo).default_branch
|
|
101
|
+
if not _dry_run:
|
|
102
|
+
args = dict(
|
|
103
|
+
owner=owner,
|
|
104
|
+
repo=repo,
|
|
105
|
+
title=f"SWE-agent[bot] PR to fix: {issue.title}",
|
|
106
|
+
head=head,
|
|
107
|
+
base=default_branch,
|
|
108
|
+
body=body,
|
|
109
|
+
draft=True,
|
|
110
|
+
)
|
|
111
|
+
logger.debug(f"Creating PR with args: {args}")
|
|
112
|
+
pr_info = api.pulls.create(**args) # type: ignore
|
|
113
|
+
logger.info(
|
|
114
|
+
f"🎉 PR created as a draft at {pr_info.html_url}. Please review it carefully, push "
|
|
115
|
+
"any required changes onto the branch and then click "
|
|
116
|
+
"'Ready for Review' to bring it to the attention of the maintainers.",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class OpenPRConfig(BaseModel):
|
|
121
|
+
# Option to be used with open_pr: Skip action if there are already commits claiming
|
|
122
|
+
# to fix the issue. Please only set this to False if you are sure the commits are
|
|
123
|
+
# not fixes or if this is your own repository!
|
|
124
|
+
skip_if_commits_reference_issue: bool = True
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class OpenPRHook(RunHook):
|
|
128
|
+
"""This hook opens a PR if the issue is solved and the user has enabled the option."""
|
|
129
|
+
|
|
130
|
+
def __init__(self, config: OpenPRConfig):
|
|
131
|
+
self.logger = get_logger("swea-open_pr", emoji="⚡️")
|
|
132
|
+
self._config = config
|
|
133
|
+
|
|
134
|
+
def on_init(self, *, run):
|
|
135
|
+
self._env = run.env
|
|
136
|
+
self._token: str = os.getenv("GITHUB_TOKEN", "")
|
|
137
|
+
self._problem_statement = run.problem_statement
|
|
138
|
+
|
|
139
|
+
def on_instance_completed(self, result: AgentRunResult):
|
|
140
|
+
if self.should_open_pr(result):
|
|
141
|
+
open_pr(
|
|
142
|
+
logger=self.logger,
|
|
143
|
+
token=self._token,
|
|
144
|
+
env=self._env,
|
|
145
|
+
github_url=self._problem_statement.github_url,
|
|
146
|
+
trajectory=result.trajectory,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def should_open_pr(self, result: AgentRunResult) -> bool:
|
|
150
|
+
"""Does opening a PR make sense?"""
|
|
151
|
+
if not result.info.get("submission"):
|
|
152
|
+
self.logger.info("Not opening PR because no submission was made.")
|
|
153
|
+
return False
|
|
154
|
+
if result.info.get("exit_status") != "submitted":
|
|
155
|
+
self.logger.info(
|
|
156
|
+
"Not opening PR because exit status was %s and not submitted.", result.info.get("exit_status")
|
|
157
|
+
)
|
|
158
|
+
return False
|
|
159
|
+
try:
|
|
160
|
+
issue = _get_gh_issue_data(self._problem_statement.github_url, token=self._token)
|
|
161
|
+
except InvalidGithubURL:
|
|
162
|
+
self.logger.info("Currently only GitHub is supported to open PRs to. Skipping PR creation.")
|
|
163
|
+
return False
|
|
164
|
+
if issue.state != "open":
|
|
165
|
+
self.logger.info(f"Issue is not open (state={issue.state}. Skipping PR creation.")
|
|
166
|
+
return False
|
|
167
|
+
if issue.assignee:
|
|
168
|
+
self.logger.info("Issue is already assigned. Skipping PR creation. Be nice :)")
|
|
169
|
+
return False
|
|
170
|
+
if issue.locked:
|
|
171
|
+
self.logger.info("Issue is locked. Skipping PR creation.")
|
|
172
|
+
return False
|
|
173
|
+
org, repo, issue_number = _parse_gh_issue_url(self._problem_statement.github_url)
|
|
174
|
+
associated_commits = _get_associated_commit_urls(org, repo, issue_number, token=self._token)
|
|
175
|
+
if associated_commits:
|
|
176
|
+
commit_url_strs = ", ".join(associated_commits)
|
|
177
|
+
if self._config.skip_if_commits_reference_issue:
|
|
178
|
+
self.logger.info(f"Issue already has associated commits (see {commit_url_strs}). Skipping PR creation.")
|
|
179
|
+
return False
|
|
180
|
+
else:
|
|
181
|
+
self.logger.warning(
|
|
182
|
+
"Proceeding with PR creation even though there are already commits "
|
|
183
|
+
f"({commit_url_strs}) associated with the issue. Please only do this for your own repositories "
|
|
184
|
+
"or after verifying that the existing commits do not fix the issue.",
|
|
185
|
+
)
|
|
186
|
+
return True
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _remove_triple_backticks(text: str) -> str:
|
|
190
|
+
return "\n".join(line.removeprefix("```") for line in text.splitlines())
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def format_trajectory_markdown(trajectory: list[dict[str, str]], char_limit: int | None = None):
|
|
194
|
+
"""Format a trajectory as a markdown string for use in gh PR description.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
char_limit: If not None, truncate the trajectory to this many characters.
|
|
198
|
+
"""
|
|
199
|
+
prefix = [
|
|
200
|
+
"<details>",
|
|
201
|
+
"<summary>Thought process ('trajectory') of SWE-agent (click to expand)</summary>",
|
|
202
|
+
"",
|
|
203
|
+
"",
|
|
204
|
+
]
|
|
205
|
+
prefix_text = "\n".join(prefix)
|
|
206
|
+
suffix = [
|
|
207
|
+
"",
|
|
208
|
+
"</details>",
|
|
209
|
+
]
|
|
210
|
+
suffix_text = "\n".join(suffix)
|
|
211
|
+
|
|
212
|
+
steps = []
|
|
213
|
+
current_length = len(prefix_text) + len(suffix_text)
|
|
214
|
+
|
|
215
|
+
for i, step in enumerate(trajectory):
|
|
216
|
+
step_strs = [
|
|
217
|
+
f"**🧑🚒 Response ({i})**: ",
|
|
218
|
+
f"{step['response'].strip()}",
|
|
219
|
+
f"**👀 Observation ({i})**:",
|
|
220
|
+
"```",
|
|
221
|
+
f"{_remove_triple_backticks(step['observation']).strip()}",
|
|
222
|
+
"```",
|
|
223
|
+
]
|
|
224
|
+
step_text = "\n".join(step_strs)
|
|
225
|
+
|
|
226
|
+
# Calculate separator length (only needed for steps after the first one)
|
|
227
|
+
separator_length = 0
|
|
228
|
+
if steps:
|
|
229
|
+
separator_length = len("\n\n---\n\n")
|
|
230
|
+
|
|
231
|
+
# Check if adding this step would exceed the character limit
|
|
232
|
+
if char_limit is not None and current_length + separator_length + len(step_text) > char_limit:
|
|
233
|
+
if i > 0:
|
|
234
|
+
steps.append("\n\n... (truncated due to length limit)")
|
|
235
|
+
break
|
|
236
|
+
|
|
237
|
+
if steps:
|
|
238
|
+
steps.append("\n\n---\n\n")
|
|
239
|
+
current_length += separator_length
|
|
240
|
+
|
|
241
|
+
steps.append(step_text)
|
|
242
|
+
current_length += len(step_text)
|
|
243
|
+
|
|
244
|
+
return prefix_text + "".join(steps) + suffix_text
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""SweBench evaluation hook.
|
|
2
|
+
|
|
3
|
+
Will be automatically added to `run_batch` if `SWEBenchInstances.evaluate` is set to true
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from threading import Lock
|
|
11
|
+
from time import time
|
|
12
|
+
|
|
13
|
+
from sweagent.run.hooks.abstract import RunHook
|
|
14
|
+
from sweagent.run.merge_predictions import merge_predictions
|
|
15
|
+
from sweagent.types import AgentRunResult
|
|
16
|
+
from sweagent.utils.log import get_logger
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SweBenchEvaluate(RunHook):
|
|
20
|
+
_SUBSET_MAP = {"lite": "swe-bench_lite", "verified": "swe-bench_verified", "multimodal": "swe-bench_multimodal"}
|
|
21
|
+
|
|
22
|
+
def __init__(self, output_dir: Path, subset: str, split: str, continuous_submission_every: int = 0) -> None:
|
|
23
|
+
super().__init__()
|
|
24
|
+
self.output_dir = output_dir
|
|
25
|
+
self.subset = subset
|
|
26
|
+
self.split = split
|
|
27
|
+
self.continuous_submission_every = continuous_submission_every
|
|
28
|
+
self.logger = get_logger("SB-evaluate", emoji="😬")
|
|
29
|
+
self.merge_lock = Lock()
|
|
30
|
+
self.last_evaluation_time = time()
|
|
31
|
+
self.evaluation_interval = continuous_submission_every
|
|
32
|
+
self._running_calls = []
|
|
33
|
+
# We need to add a suffix to the run_id to avoid collisions when you reuse the name of your run
|
|
34
|
+
self._time_suffix = datetime.now().strftime("%Y%m%d%H%M%S%f")
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def run_id(self) -> str:
|
|
38
|
+
return f"{self.output_dir.name}_{self._time_suffix}"
|
|
39
|
+
|
|
40
|
+
def _get_sb_call(self, preds_path: Path, submit_only: bool = False) -> list[str]:
|
|
41
|
+
args = [
|
|
42
|
+
"sb-cli",
|
|
43
|
+
"submit",
|
|
44
|
+
self._SUBSET_MAP[self.subset],
|
|
45
|
+
self.split,
|
|
46
|
+
"--predictions_path",
|
|
47
|
+
str(preds_path),
|
|
48
|
+
"--run_id",
|
|
49
|
+
self.run_id,
|
|
50
|
+
"--output_dir",
|
|
51
|
+
str(self.output_dir / "sb-cli-reports"),
|
|
52
|
+
]
|
|
53
|
+
if submit_only:
|
|
54
|
+
args.extend(["--wait_for_evaluation", "0", "--gen_report", "0", "--verify_submission", "0"])
|
|
55
|
+
return args
|
|
56
|
+
|
|
57
|
+
def check_running_calls(self) -> None:
|
|
58
|
+
"""Warn if one of the running calls failed."""
|
|
59
|
+
for call in self._running_calls:
|
|
60
|
+
if call.poll() is not None:
|
|
61
|
+
if call.returncode != 0:
|
|
62
|
+
self.logger.error("Failed to submit results to SweBench eval: %s", call.stderr.read())
|
|
63
|
+
self._running_calls.remove(call)
|
|
64
|
+
|
|
65
|
+
def on_instance_completed(self, *, result: AgentRunResult):
|
|
66
|
+
if self.evaluation_interval == 0:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
current_time = time()
|
|
70
|
+
if current_time - self.last_evaluation_time < self.evaluation_interval:
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
with self.merge_lock:
|
|
74
|
+
merge_predictions([self.output_dir], self.output_dir / "tmppreds.json")
|
|
75
|
+
self.last_evaluation_time = current_time
|
|
76
|
+
|
|
77
|
+
self._running_calls.append(
|
|
78
|
+
subprocess.Popen(
|
|
79
|
+
self._get_sb_call(preds_path=self.output_dir / "tmppreds.json", submit_only=True),
|
|
80
|
+
stdout=subprocess.PIPE,
|
|
81
|
+
stderr=subprocess.PIPE,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def move_sb_cli_report(self) -> None:
|
|
86
|
+
"""Move report from `sb-cli-reports` to `results.json`."""
|
|
87
|
+
output_dir = self.output_dir / "sb-cli-reports"
|
|
88
|
+
if not output_dir.exists():
|
|
89
|
+
self.logger.warning("No SweBench report found at %s", output_dir)
|
|
90
|
+
return
|
|
91
|
+
(self.output_dir / "results.json").unlink(missing_ok=True)
|
|
92
|
+
reports = list(output_dir.glob("*.json"))
|
|
93
|
+
if len(reports) != 1:
|
|
94
|
+
self.logger.warning("Expected 1 SweBench report at %s, found %d. Cannot rename.", output_dir, len(reports))
|
|
95
|
+
return
|
|
96
|
+
reports[0].rename(self.output_dir / "results.json")
|
|
97
|
+
|
|
98
|
+
def on_end(self) -> None:
|
|
99
|
+
self.logger.info("Submitting results to SWE-Bench")
|
|
100
|
+
try:
|
|
101
|
+
subprocess.run(
|
|
102
|
+
self._get_sb_call(preds_path=self.output_dir / "preds.json"),
|
|
103
|
+
check=True,
|
|
104
|
+
stdout=sys.stdout,
|
|
105
|
+
stderr=sys.stderr,
|
|
106
|
+
)
|
|
107
|
+
except subprocess.CalledProcessError as e:
|
|
108
|
+
self.logger.error("Failed to submit results to SweBench eval: %s", e)
|
|
109
|
+
else:
|
|
110
|
+
# remove temporary predictions if they exist
|
|
111
|
+
if (self.output_dir / "tmppreds.json").exists():
|
|
112
|
+
(self.output_dir / "tmppreds.json").unlink()
|
|
113
|
+
self.move_sb_cli_report()
|