@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Generator
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from types import SimpleNamespace
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
from swerex.deployment.config import DockerDeploymentConfig, DummyDeploymentConfig
|
|
14
|
+
from swerex.runtime.abstract import ReadFileRequest, WriteFileRequest
|
|
15
|
+
|
|
16
|
+
from sweagent.agent import problem_statement as ps
|
|
17
|
+
from sweagent.environment.repo import LocalRepoConfig
|
|
18
|
+
from sweagent.environment.swe_env import EnvironmentConfig, SWEEnv
|
|
19
|
+
from sweagent.utils import github as gh
|
|
20
|
+
|
|
21
|
+
# this is a hack and should be removed when we have a better solution
|
|
22
|
+
_this_dir = Path(__file__).resolve().parent
|
|
23
|
+
root_dir = _this_dir.parent
|
|
24
|
+
package_dir = root_dir / "sweagent"
|
|
25
|
+
sys.path.insert(0, str(root_dir))
|
|
26
|
+
sys.path.insert(1, str(package_dir))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.fixture(autouse=True)
|
|
30
|
+
def _disable_github_api_calls(monkeypatch: pytest.MonkeyPatch):
|
|
31
|
+
"""Prevent tests from making live GitHub API requests.
|
|
32
|
+
|
|
33
|
+
Several code paths can fetch issue text via GitHub's REST API when a problem
|
|
34
|
+
statement is provided as a GitHub URL. That makes the suite flaky due to
|
|
35
|
+
rate limits and missing credentials. For unit/integration tests we use a
|
|
36
|
+
deterministic placeholder instead.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def fake_get_problem_statement_from_github_issue(
|
|
40
|
+
owner: str, repo: str, issue_number: str, *, token: str | None = ""
|
|
41
|
+
) -> str:
|
|
42
|
+
if (owner.lower(), repo.lower(), issue_number) == ("swe-agent", "test-repo", "1"):
|
|
43
|
+
return "Test issue (offline fixture)\n"
|
|
44
|
+
msg = f"Unexpected GitHub issue fetch in tests: {owner}/{repo}#{issue_number}"
|
|
45
|
+
raise RuntimeError(msg)
|
|
46
|
+
|
|
47
|
+
monkeypatch.setattr(
|
|
48
|
+
gh,
|
|
49
|
+
"_get_problem_statement_from_github_issue",
|
|
50
|
+
fake_get_problem_statement_from_github_issue,
|
|
51
|
+
)
|
|
52
|
+
# Also patch the symbol imported into problem_statement.py (it imports the
|
|
53
|
+
# function directly, so patching the module attribute alone isn't enough).
|
|
54
|
+
monkeypatch.setattr(
|
|
55
|
+
ps,
|
|
56
|
+
"_get_problem_statement_from_github_issue",
|
|
57
|
+
fake_get_problem_statement_from_github_issue,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@pytest.fixture
|
|
62
|
+
def test_data_path() -> Path:
|
|
63
|
+
p = _this_dir / "test_data"
|
|
64
|
+
assert p.is_dir()
|
|
65
|
+
return p
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@pytest.fixture
|
|
69
|
+
def test_trajectories_path(test_data_path) -> Path:
|
|
70
|
+
p = test_data_path / "trajectories"
|
|
71
|
+
assert p.is_dir()
|
|
72
|
+
return p
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@pytest.fixture
|
|
76
|
+
def test_ctf_trajectories_path(test_data_path) -> Path:
|
|
77
|
+
p = test_data_path / "trajectories" / "ctf"
|
|
78
|
+
assert p.is_dir()
|
|
79
|
+
return p
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@pytest.fixture
|
|
83
|
+
def ctf_data_path(test_data_sources_path) -> Path:
|
|
84
|
+
p = test_data_sources_path / "ctf"
|
|
85
|
+
assert p.is_dir()
|
|
86
|
+
return p
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@pytest.fixture
|
|
90
|
+
def test_data_sources_path(test_data_path) -> Path:
|
|
91
|
+
p = test_data_path / "data_sources"
|
|
92
|
+
assert p.is_dir()
|
|
93
|
+
return p
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@pytest.fixture
|
|
97
|
+
def test_trajectory_path(test_trajectories_path) -> Path:
|
|
98
|
+
traj = (
|
|
99
|
+
test_trajectories_path
|
|
100
|
+
/ "gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1"
|
|
101
|
+
/ "swe-agent__test-repo-i1.traj"
|
|
102
|
+
)
|
|
103
|
+
assert traj.exists()
|
|
104
|
+
return traj
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@pytest.fixture
|
|
108
|
+
def test_trajectory(test_trajectory_path):
|
|
109
|
+
return json.loads(test_trajectory_path.read_text())
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@pytest.fixture(scope="module")
|
|
113
|
+
def test_env_args(
|
|
114
|
+
tmpdir_factory,
|
|
115
|
+
) -> Generator[EnvironmentConfig]:
|
|
116
|
+
"""This will use a persistent container"""
|
|
117
|
+
local_repo_path = tmpdir_factory.getbasetemp() / "test-repo"
|
|
118
|
+
clone_cmd = ["git", "clone", "https://github.com/swe-agent/test-repo", str(local_repo_path)]
|
|
119
|
+
subprocess.run(clone_cmd, check=True)
|
|
120
|
+
test_env_args = EnvironmentConfig(
|
|
121
|
+
deployment=DockerDeploymentConfig(image="python:3.11"),
|
|
122
|
+
repo=LocalRepoConfig(path=Path(local_repo_path)),
|
|
123
|
+
)
|
|
124
|
+
yield test_env_args
|
|
125
|
+
shutil.rmtree(local_repo_path)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@pytest.fixture
|
|
129
|
+
def dummy_env_args() -> EnvironmentConfig:
|
|
130
|
+
return EnvironmentConfig(
|
|
131
|
+
deployment=DummyDeploymentConfig(),
|
|
132
|
+
repo=None,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@pytest.fixture
|
|
137
|
+
def dummy_env(dummy_env_args, monkeypatch: pytest.MonkeyPatch) -> Generator[SWEEnv, None, None]:
|
|
138
|
+
env = SWEEnv.from_config(dummy_env_args)
|
|
139
|
+
env.start()
|
|
140
|
+
|
|
141
|
+
# Provide an in-memory filesystem for DummyDeployment so tests that rely on
|
|
142
|
+
# `read_file` / `write_file` can run deterministically without Docker.
|
|
143
|
+
files: dict[str, str] = {}
|
|
144
|
+
|
|
145
|
+
async def _read_file(request: ReadFileRequest):
|
|
146
|
+
content = files.get(request.path)
|
|
147
|
+
if content is None:
|
|
148
|
+
raise FileNotFoundError(request.path)
|
|
149
|
+
return SimpleNamespace(content=content)
|
|
150
|
+
|
|
151
|
+
async def _write_file(request: WriteFileRequest):
|
|
152
|
+
files[request.path] = request.content
|
|
153
|
+
|
|
154
|
+
monkeypatch.setattr(env.deployment.runtime, "read_file", _read_file)
|
|
155
|
+
monkeypatch.setattr(env.deployment.runtime, "write_file", _write_file)
|
|
156
|
+
|
|
157
|
+
yield env
|
|
158
|
+
env.close()
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@contextmanager
|
|
162
|
+
def swe_env_context(env_args):
|
|
163
|
+
"""Context manager to make sure we close the shell on the container
|
|
164
|
+
so that we can reuse it.
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
env = SWEEnv.from_config(env_args)
|
|
168
|
+
env.start()
|
|
169
|
+
try:
|
|
170
|
+
yield env
|
|
171
|
+
finally:
|
|
172
|
+
env.close()
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@pytest.fixture
|
|
176
|
+
def swe_agent_test_repo_clone(tmp_path):
|
|
177
|
+
local_repo_path = tmp_path / "test-repo"
|
|
178
|
+
clone_cmd = ["git", "clone", "https://github.com/swe-agent/test-repo", local_repo_path]
|
|
179
|
+
subprocess.run(clone_cmd, check=True)
|
|
180
|
+
return local_repo_path
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@pytest.fixture
|
|
184
|
+
def swe_agent_test_repo_traj(test_trajectories_path) -> Path:
|
|
185
|
+
p = (
|
|
186
|
+
test_trajectories_path
|
|
187
|
+
/ "gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1"
|
|
188
|
+
/ "6e44b9__sweagenttestrepo-1c2844.traj"
|
|
189
|
+
)
|
|
190
|
+
assert p.is_file()
|
|
191
|
+
return p
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import yaml
|
|
3
|
+
from swerex.exceptions import SwerexException
|
|
4
|
+
from swerex.runtime.abstract import Action, BashObservation, Observation
|
|
5
|
+
from swerex.runtime.dummy import DummyRuntime
|
|
6
|
+
|
|
7
|
+
from sweagent import CONFIG_DIR
|
|
8
|
+
from sweagent.agent.agents import DefaultAgent, DefaultAgentConfig
|
|
9
|
+
from sweagent.agent.models import InstantEmptySubmitModelConfig, PredeterminedTestModel
|
|
10
|
+
from sweagent.agent.problem_statement import EmptyProblemStatement, TextProblemStatement
|
|
11
|
+
from sweagent.environment.swe_env import SWEEnv
|
|
12
|
+
from sweagent.tools.parsing import FunctionCallingParser, Identity, ThoughtActionParser
|
|
13
|
+
from sweagent.tools.tools import ToolConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_dummy_env(dummy_env):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def identity_agent_config():
|
|
22
|
+
return DefaultAgentConfig(
|
|
23
|
+
model=InstantEmptySubmitModelConfig(),
|
|
24
|
+
tools=ToolConfig(
|
|
25
|
+
parse_function=Identity(),
|
|
26
|
+
),
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture
|
|
31
|
+
def thought_action_agent_config():
|
|
32
|
+
return DefaultAgentConfig(
|
|
33
|
+
model=InstantEmptySubmitModelConfig(),
|
|
34
|
+
tools=ToolConfig(
|
|
35
|
+
parse_function=ThoughtActionParser(),
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.fixture
|
|
41
|
+
def function_calling_agent_config():
|
|
42
|
+
return DefaultAgentConfig(
|
|
43
|
+
model=InstantEmptySubmitModelConfig(),
|
|
44
|
+
tools=ToolConfig(
|
|
45
|
+
parse_function=FunctionCallingParser(),
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.fixture
|
|
51
|
+
def default_agent_config():
|
|
52
|
+
config = yaml.safe_load((CONFIG_DIR / "sweagent_0_7/07.yaml").read_text())
|
|
53
|
+
config["agent"]["model"] = {"name": "instant_empty_submit"}
|
|
54
|
+
print(yaml.dump(config))
|
|
55
|
+
return DefaultAgentConfig.model_validate(config["agent"])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@pytest.fixture
|
|
59
|
+
def default_agent(default_agent_config: DefaultAgentConfig) -> DefaultAgent:
|
|
60
|
+
a = DefaultAgent.from_config(default_agent_config)
|
|
61
|
+
a.tools.mock_state = {"open_file": "asdf123", "working_dir": "/root"}
|
|
62
|
+
return a
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@pytest.fixture
|
|
66
|
+
def test_agent(identity_agent_config: DefaultAgentConfig) -> DefaultAgent:
|
|
67
|
+
return DefaultAgent.from_config(identity_agent_config)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.fixture
|
|
71
|
+
def thought_action_agent(thought_action_agent_config: DefaultAgentConfig) -> DefaultAgent:
|
|
72
|
+
return DefaultAgent.from_config(thought_action_agent_config)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@pytest.fixture
|
|
76
|
+
def function_calling_agent(function_calling_agent_config: DefaultAgentConfig) -> DefaultAgent:
|
|
77
|
+
return DefaultAgent.from_config(function_calling_agent_config)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_exit_cost(dummy_env: SWEEnv, test_agent: DefaultAgent, tmp_path):
|
|
81
|
+
test_agent.model = PredeterminedTestModel(["raise_cost"]) # type: ignore
|
|
82
|
+
r = test_agent.run(
|
|
83
|
+
problem_statement=EmptyProblemStatement(),
|
|
84
|
+
env=dummy_env,
|
|
85
|
+
output_dir=tmp_path,
|
|
86
|
+
)
|
|
87
|
+
assert r.info["exit_status"] == "exit_cost" # type: ignore
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_exit_context(dummy_env: SWEEnv, test_agent: DefaultAgent, tmp_path):
|
|
91
|
+
test_agent.model = PredeterminedTestModel(["raise_context"]) # type: ignore
|
|
92
|
+
r = test_agent.run(
|
|
93
|
+
problem_statement=EmptyProblemStatement(),
|
|
94
|
+
env=dummy_env,
|
|
95
|
+
output_dir=tmp_path,
|
|
96
|
+
)
|
|
97
|
+
assert r.info["exit_status"] == "exit_context" # type: ignore
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_exit_model_error(dummy_env: SWEEnv, test_agent: DefaultAgent, tmp_path):
|
|
101
|
+
test_agent.model = PredeterminedTestModel(["raise_runtime"]) # type: ignore
|
|
102
|
+
r = test_agent.run(
|
|
103
|
+
problem_statement=EmptyProblemStatement(),
|
|
104
|
+
env=dummy_env,
|
|
105
|
+
output_dir=tmp_path,
|
|
106
|
+
)
|
|
107
|
+
assert r.info["exit_status"] == "exit_environment_error" # type: ignore
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_exit_format(dummy_env: SWEEnv, thought_action_agent: DefaultAgent, tmp_path):
|
|
111
|
+
thought_action_agent.model = PredeterminedTestModel(["a", "b", "c", "d"]) # type: ignore
|
|
112
|
+
r = thought_action_agent.run(
|
|
113
|
+
problem_statement=EmptyProblemStatement(),
|
|
114
|
+
env=dummy_env,
|
|
115
|
+
output_dir=tmp_path,
|
|
116
|
+
)
|
|
117
|
+
assert r.info["exit_status"] == "exit_format" # type: ignore
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_exit_blocklist(dummy_env: SWEEnv, test_agent: DefaultAgent, tmp_path):
|
|
121
|
+
test_agent.model = PredeterminedTestModel(["vim", "python", "su", "nano"]) # type: ignore
|
|
122
|
+
r = test_agent.run(
|
|
123
|
+
problem_statement=EmptyProblemStatement(),
|
|
124
|
+
env=dummy_env,
|
|
125
|
+
output_dir=tmp_path,
|
|
126
|
+
)
|
|
127
|
+
assert r.info["exit_status"] == "exit_format" # type: ignore
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class RuntimeRaisesFirst(DummyRuntime):
|
|
131
|
+
async def run_in_session(self, action: Action) -> Observation:
|
|
132
|
+
if action.action_type == "bash" and action.command == "raise":
|
|
133
|
+
raise SwerexException()
|
|
134
|
+
return await super().run_in_session(action)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_early_exit(dummy_env: SWEEnv, test_agent: DefaultAgent, tmp_path):
|
|
138
|
+
test_agent.model = PredeterminedTestModel(["raise"]) # type: ignore
|
|
139
|
+
test_agent._catch_errors = True
|
|
140
|
+
dummy_env.deployment.runtime = RuntimeRaisesFirst() # type: ignore
|
|
141
|
+
r = test_agent.run(
|
|
142
|
+
problem_statement=EmptyProblemStatement(),
|
|
143
|
+
env=dummy_env,
|
|
144
|
+
output_dir=tmp_path,
|
|
145
|
+
)
|
|
146
|
+
assert r.info["exit_status"] == "exit_environment_error" # type: ignore
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_run_step_by_step_checking_history(dummy_env: SWEEnv, default_agent: DefaultAgent, tmp_path):
|
|
150
|
+
a = default_agent
|
|
151
|
+
a.model = PredeterminedTestModel(["asdf", "```\nls\n```", "```\necho 'asdf'\n```", "raise_cost"]) # type: ignore
|
|
152
|
+
a.setup(dummy_env, TextProblemStatement(text="asdf123"))
|
|
153
|
+
dummy_env.deployment.runtime.run_in_session_outputs = [ # type: ignore
|
|
154
|
+
BashObservation(output="file_a file_b"),
|
|
155
|
+
BashObservation(output=""), # set last action
|
|
156
|
+
BashObservation(output="asdf"),
|
|
157
|
+
BashObservation(output=""),
|
|
158
|
+
]
|
|
159
|
+
assert "asdf123" in a._problem_statement.get_problem_statement() # type: ignore
|
|
160
|
+
# system template and demo and instance template
|
|
161
|
+
assert len(a.messages) == 3
|
|
162
|
+
system_prompt = a.messages[0]["content"]
|
|
163
|
+
assert "You are an autonomous programmer" in system_prompt
|
|
164
|
+
demo = a.messages[1]["content"]
|
|
165
|
+
# print(demo)
|
|
166
|
+
assert "demonstration" in demo # demo
|
|
167
|
+
assert "marshmallow" in demo # demo
|
|
168
|
+
instance_template = a.messages[2]["content"]
|
|
169
|
+
assert "the following issue within our repository" in instance_template
|
|
170
|
+
assert "asdf123" in instance_template
|
|
171
|
+
assert len(a.trajectory) == 0
|
|
172
|
+
print(a.step())
|
|
173
|
+
assert len(a.trajectory) == 2 # we requery once because format error
|
|
174
|
+
assert len(a.messages) == 5 # first action performed + observation
|
|
175
|
+
print(yaml.dump(a.messages, indent=2))
|
|
176
|
+
assert a.messages[3]["content"].strip() == "```\nls\n```"
|
|
177
|
+
assert "file_a file_b" in a.messages[4]["content"]
|
|
178
|
+
assert "Open file: asdf123" in a.messages[4]["content"]
|
|
179
|
+
assert "Current directory: /root" in a.messages[4]["content"]
|
|
180
|
+
print(a.step())
|
|
181
|
+
print(yaml.dump(a.messages, indent=2))
|
|
182
|
+
assert len(a.trajectory) == 3
|
|
183
|
+
assert len(a.messages) == 7
|
|
184
|
+
print(a.step())
|
|
185
|
+
assert len(a.trajectory) == 4
|
|
186
|
+
assert a.info["exit_status"] == "exit_cost" # type: ignore
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_run_autosubmit(dummy_env: SWEEnv, default_agent: DefaultAgent, tmp_path):
|
|
190
|
+
a = default_agent
|
|
191
|
+
a.model = PredeterminedTestModel(["raise_cost"]) # type: ignore
|
|
192
|
+
a.setup(dummy_env, EmptyProblemStatement())
|
|
193
|
+
dummy_env.write_file("/root/model.patch", "mysubmission")
|
|
194
|
+
dummy_env.deployment.runtime.run_in_session_outputs = [ # type: ignore
|
|
195
|
+
BashObservation(output=""),
|
|
196
|
+
BashObservation(output=r"<<SWE_AGENT_SUBMISSION>>\nmysubmission\n<<SWE_AGENT_SUBMISSION>>"),
|
|
197
|
+
]
|
|
198
|
+
r = a.step()
|
|
199
|
+
assert a.info is not None
|
|
200
|
+
assert a.info["exit_status"] == "submitted (exit_cost)" # type: ignore
|
|
201
|
+
assert a.info["submission"] == "mysubmission" # type: ignore
|
|
202
|
+
assert r.done
|
|
203
|
+
assert r.submission == "mysubmission"
|
|
204
|
+
assert r.exit_status == "submitted (exit_cost)"
|
|
205
|
+
assert not r.action
|
|
206
|
+
assert "cost limit" in r.thought
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def test_show_no_output_template(dummy_env: SWEEnv, default_agent: DefaultAgent, tmp_path):
|
|
210
|
+
a = default_agent
|
|
211
|
+
a.templates.next_step_no_output_template = "no output template"
|
|
212
|
+
a.setup(dummy_env, EmptyProblemStatement())
|
|
213
|
+
a.model = PredeterminedTestModel(["```\nls\n```", "```\ntest\n```"]) # type: ignore
|
|
214
|
+
dummy_env.deployment.runtime.run_in_session_outputs = [BashObservation(output="")] # type: ignore
|
|
215
|
+
a.step()
|
|
216
|
+
a.step()
|
|
217
|
+
# todo: actually test that the template is used
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def test_successful_submission(dummy_env: SWEEnv, default_agent: DefaultAgent, tmp_path):
|
|
221
|
+
a = default_agent
|
|
222
|
+
a.model = PredeterminedTestModel(["```\nsubmit\n```"]) # type: ignore
|
|
223
|
+
a.setup(dummy_env, EmptyProblemStatement())
|
|
224
|
+
dummy_env.write_file("/root/model.patch", "test")
|
|
225
|
+
dummy_env.deployment.runtime.run_in_session_outputs = BashObservation(output=r"<<SWE_AGENT_SUBMISSION>>") # type: ignore
|
|
226
|
+
a.step()
|
|
227
|
+
assert a.info["exit_status"] == "submitted" # type: ignore
|
|
228
|
+
assert a.info["submission"] == "test" # type: ignore
|
|
229
|
+
assert a.trajectory[-1]["observation"] == "test"
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def test_human_exit(dummy_env: SWEEnv, default_agent: DefaultAgent, tmp_path):
|
|
233
|
+
a = default_agent
|
|
234
|
+
a.model = PredeterminedTestModel(["```\nexit\n```"]) # type: ignore
|
|
235
|
+
a.setup(dummy_env, EmptyProblemStatement())
|
|
236
|
+
r = a.step()
|
|
237
|
+
assert r.done
|
|
238
|
+
assert r.exit_status == "exit_command"
|
|
239
|
+
assert r.action.strip() == "exit"
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def test_function_calling(dummy_env: SWEEnv, function_calling_agent: DefaultAgent, tmp_path):
|
|
243
|
+
a = function_calling_agent
|
|
244
|
+
# Simulate a valid function call response from the model
|
|
245
|
+
valid_response = {
|
|
246
|
+
"message": "I'll list the contents of the directory",
|
|
247
|
+
"tool_calls": [{"function": {"name": "bash", "arguments": '{"command": "ls"}'}, "id": "abc123"}],
|
|
248
|
+
}
|
|
249
|
+
a.model = PredeterminedTestModel([valid_response]) # type: ignore
|
|
250
|
+
a.setup(dummy_env, EmptyProblemStatement())
|
|
251
|
+
dummy_env.deployment.runtime.run_in_session_outputs = [ # type: ignore
|
|
252
|
+
BashObservation(output="file1 file2"),
|
|
253
|
+
BashObservation(output="file1 file2"), # TODO, there's actually a bug in swe-rex, requiring two observations
|
|
254
|
+
] # type: ignore
|
|
255
|
+
r = a.step()
|
|
256
|
+
assert not r.done, "Expected not done, because we haven't submitted yet"
|
|
257
|
+
assert r.action.strip() == "ls", "Expected the tool call to be executed"
|
|
258
|
+
assert "file1 file2" in r.observation, "Expected the tool call to return the output of the command"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from swerex.deployment.config import DockerDeploymentConfig
|
|
5
|
+
|
|
6
|
+
from sweagent.agent.problem_statement import TextProblemStatement
|
|
7
|
+
from sweagent.environment.repo import PreExistingRepoConfig
|
|
8
|
+
from sweagent.run.batch_instances import BatchInstance, SimpleBatchInstance, SWEBenchInstances, _slice_spec_to_slice
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_simple_batch_from_swe_bench_to_full_batch_instance(test_data_sources_path):
|
|
12
|
+
sb_instance = json.loads((test_data_sources_path / "swe-bench-dev-easy.json").read_text())[0]
|
|
13
|
+
instance = SimpleBatchInstance.from_swe_bench(sb_instance).to_full_batch_instance(
|
|
14
|
+
DockerDeploymentConfig(image="python:3.11")
|
|
15
|
+
)
|
|
16
|
+
assert isinstance(instance.env.repo, PreExistingRepoConfig)
|
|
17
|
+
assert instance.env.repo.repo_name == "testbed"
|
|
18
|
+
assert isinstance(instance.env.deployment, DockerDeploymentConfig)
|
|
19
|
+
assert instance.env.deployment.image == "docker.io/swebench/sweb.eval.x86_64.pydicom_1776_pydicom-1458:latest"
|
|
20
|
+
assert isinstance(instance.problem_statement, TextProblemStatement)
|
|
21
|
+
assert instance.problem_statement.text == sb_instance["problem_statement"]
|
|
22
|
+
assert instance.problem_statement.id == "pydicom__pydicom-1458"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_slice_spec_to_slice():
|
|
26
|
+
assert _slice_spec_to_slice("10") == slice(10)
|
|
27
|
+
assert _slice_spec_to_slice("10:20") == slice(10, 20)
|
|
28
|
+
assert _slice_spec_to_slice("10:20:3") == slice(10, 20, 3)
|
|
29
|
+
with pytest.raises(ValueError):
|
|
30
|
+
_slice_spec_to_slice("10:20:3:4")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.mark.slow
|
|
34
|
+
def test_get_swe_bench_instances():
|
|
35
|
+
for subset in ["lite", "verified", "full"]:
|
|
36
|
+
for split in ["dev", "test"]:
|
|
37
|
+
if subset in ["verified", "multilingual"] and split == "dev":
|
|
38
|
+
continue
|
|
39
|
+
print(subset, split)
|
|
40
|
+
instance_config = SWEBenchInstances(subset=subset, split=split) # type: ignore
|
|
41
|
+
instances = instance_config.get_instance_configs()
|
|
42
|
+
assert len(instances) > 0
|
|
43
|
+
assert all(isinstance(instance, BatchInstance) for instance in instances)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class InteractiveDummyCommand:
|
|
8
|
+
PROMPT = "(dummy) "
|
|
9
|
+
|
|
10
|
+
def start(self):
|
|
11
|
+
print("Started interactive dummy command")
|
|
12
|
+
|
|
13
|
+
def send(self, input: str):
|
|
14
|
+
print(f"Received input: {input}")
|
|
15
|
+
time.sleep(0.5)
|
|
16
|
+
|
|
17
|
+
def stop(self):
|
|
18
|
+
print("Stopped interactive dummy command")
|
|
19
|
+
|
|
20
|
+
def __call__(self):
|
|
21
|
+
self.start()
|
|
22
|
+
while True:
|
|
23
|
+
input = input(self.PROMPT)
|
|
24
|
+
cmd, _, args = input.partition(" ")
|
|
25
|
+
if cmd == "stop":
|
|
26
|
+
self.stop()
|
|
27
|
+
break
|
|
28
|
+
if cmd == "send":
|
|
29
|
+
self.send(args)
|
|
30
|
+
else:
|
|
31
|
+
print(f"Unknown command: {cmd}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__":
|
|
35
|
+
InteractiveDummyCommand()()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
_debug_command() {
|
|
2
|
+
echo "<<INTERACTIVE||$@||INTERACTIVE>>"
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# @yaml
|
|
7
|
+
# signature: dummy_start
|
|
8
|
+
# docstring:
|
|
9
|
+
dummy_start() {
|
|
10
|
+
_debug_command "SESSION=dummy"
|
|
11
|
+
_debug_command "START"
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# @yaml
|
|
15
|
+
# signature: dummy_stop
|
|
16
|
+
# docstring:
|
|
17
|
+
dummy_stop() {
|
|
18
|
+
_debug_command "SESSION=dummy"
|
|
19
|
+
_debug_command "stop"
|
|
20
|
+
_debug_command "STOP"
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
# @yaml
|
|
24
|
+
# signature: dummy_send <input>
|
|
25
|
+
# docstring:
|
|
26
|
+
dummy_send() {
|
|
27
|
+
_debug_command "SESSION=dummy"
|
|
28
|
+
_debug_command "send $@"
|
|
29
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
system_template: |-
|
|
2
|
+
SETTING
|
|
3
|
+
|
|
4
|
+
COMMANDS:
|
|
5
|
+
{command_docs}
|
|
6
|
+
instance_template: |-
|
|
7
|
+
(Open file: {open_file})
|
|
8
|
+
(Current directory: {working_dir})
|
|
9
|
+
(Interactive session: {interactive_session})
|
|
10
|
+
bash-$
|
|
11
|
+
next_step_template: |-
|
|
12
|
+
{observation}
|
|
13
|
+
(Open file: {open_file})
|
|
14
|
+
(Current directory: {working_dir})
|
|
15
|
+
(Interactive session: {interactive_session})
|
|
16
|
+
bash-$
|
|
17
|
+
next_step_no_output_template: |-
|
|
18
|
+
Your command ran successfully and did not produce any output.
|
|
19
|
+
(Open file: {open_file})
|
|
20
|
+
(Current directory: {working_dir})
|
|
21
|
+
(Interactive session: {interactive_session})
|
|
22
|
+
bash-$
|
|
23
|
+
state_command:
|
|
24
|
+
name: state
|
|
25
|
+
code: |
|
|
26
|
+
state() {
|
|
27
|
+
local working_dir="$PWD";
|
|
28
|
+
local open_file="n/a";
|
|
29
|
+
local interactive_session="${INTERACTIVE_SESSION:-n/a}";
|
|
30
|
+
if [ ! -z $CURRENT_FILE ]; then
|
|
31
|
+
open_file=$(realpath $CURRENT_FILE);
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
echo '{"open_file": "'$open_file'", "working_dir": "'$working_dir'", "interactive_session": "'$interactive_session'"}'
|
|
35
|
+
};
|
|
36
|
+
parse_function: ThoughtActionParser
|
|
37
|
+
env_variables:
|
|
38
|
+
WINDOW: 100
|
|
39
|
+
OVERLAP: 2
|
|
40
|
+
CURRENT_LINE: 0
|
|
41
|
+
CURRENT_FILE: ''
|
|
42
|
+
SEARCH_RESULTS: ()
|
|
43
|
+
SEARCH_FILES: ()
|
|
44
|
+
SEARCH_INDEX: 0
|
|
45
|
+
command_files:
|
|
46
|
+
- config/commands/defaults.sh
|
|
47
|
+
- tests/test_commands/interactive_dummy_wrapper.sh
|
|
48
|
+
- tests/test_commands/_interactive_dummy.py
|
|
49
|
+
parse_command: ParseCommandDetailed
|
|
50
|
+
history_processors:
|
|
51
|
+
- type: last_n_observations
|
|
52
|
+
n: 5
|
|
53
|
+
blocklist:
|
|
54
|
+
- vim
|
|
55
|
+
- vi
|
|
56
|
+
- emacs
|
|
57
|
+
- nano
|
|
58
|
+
- nohup
|
|
59
|
+
- gdb
|
|
60
|
+
blocklist_error_template: |
|
|
61
|
+
Interactive operation '{name}' is not supported by this environment.
|
|
62
|
+
Please consider using one of the interactive commands available to you in this environment.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
FROM ubuntu:14.04
|
|
2
|
+
MAINTAINER unknonwn
|
|
3
|
+
LABEL Description="CSAW 2016 LCG" VERSION='1.0'
|
|
4
|
+
|
|
5
|
+
#installation
|
|
6
|
+
RUN dpkg --add-architecture i386
|
|
7
|
+
RUN apt-get update && apt-get upgrade -y
|
|
8
|
+
RUN apt-get install -y build-essential socat
|
|
9
|
+
|
|
10
|
+
#user
|
|
11
|
+
RUN adduser --disabled-password --gecos '' katy
|
|
12
|
+
RUN chown -R root:katy /home/katy/
|
|
13
|
+
RUN chmod 750 /home/katy
|
|
14
|
+
|
|
15
|
+
#Copying file
|
|
16
|
+
WORKDIR /home/katy/
|
|
17
|
+
COPY server /home/katy
|
|
18
|
+
|
|
19
|
+
#Run the program with socat
|
|
20
|
+
CMD su katy -c "socat TCP-LISTEN:4242,reuseaddr,fork EXEC:/home/katy/server"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Title
|
|
2
|
+
Katy
|
|
3
|
+
|
|
4
|
+
# Description
|
|
5
|
+
hi every1 im new!!!!!!! holds up spork my name is katy but u can call me t3h PeNgU1N oF d00m!!!!!!!! lol…as u can see im very random!!!! thats why i came here, 2 meet random ppl like me _… im 13 years old (im mature 4 my age tho!!) i like 2 watch invader zim w/ my girlfreind (im bi if u dont like it deal w/it) its our favorite tv show!!! bcuz its SOOOO random!!!! shes random 2 of course but i want 2 meet more random ppl =) like they say the more the merrier!!!! lol…neways i hope 2 make alot of freinds here so give me lots of commentses!!!!
|
|
6
|
+
DOOOOOMMMM!!!!!!!!!!!!!!!! <--- me bein random again _^ hehe…toodles!!!!!
|
|
7
|
+
|
|
8
|
+
love and waffles,
|
|
9
|
+
|
|
10
|
+
t3h PeNgU1N oF d00m
|
|
11
|
+
|
|
12
|
+
# Points
|
|
13
|
+
50
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Katy",
|
|
3
|
+
"category": "crypto",
|
|
4
|
+
"description": "i every1 im new!!!!!!! holds up spork my name is katy but u can call me t3h PeNgU1N oF d00m!!!!!!!! lol…as u can see im very random!!!! thats why i came here, 2 meet random ppl like me _… im 13 years old (im mature 4 my age tho!!) i like 2 watch invader zim w/ my girlfreind (im bi if u dont like it deal w/it) its our favorite tv show!!! bcuz its SOOOO random!!!! shes random 2 of course but i want 2 meet more random ppl =) like they say the more the merrier!!!! lol…neways i hope 2 make alot of freinds here so give me lots of commentses!!!! DOOOOOMMMM!!!!!!!!!!!!!!!! <--- me bein random again _^ hehe…toodles!!!!! love and waffles, t3h PeNgU1N oF d00m nc crypto.chal.csaw.io 4242",
|
|
5
|
+
"box": "crypto.chal.csaw.io",
|
|
6
|
+
"files":[
|
|
7
|
+
"release"
|
|
8
|
+
],
|
|
9
|
+
"compose": true,
|
|
10
|
+
"internal_port": 4242,
|
|
11
|
+
"flag": "flag{xxxxxxxxxxxxxx}"
|
|
12
|
+
}
|