@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shaw Walters and elizaOS Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# SWE-agent
|
|
2
|
+
|
|
3
|
+
> AI-powered Software Engineering Agent with Python, TypeScript, and Rust implementations
|
|
4
|
+
|
|
5
|
+
[](https://github.com/elizaos/eliza)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
SWE-agent is an autonomous software engineering agent that can solve coding tasks, fix bugs, and implement features across codebases. This package provides production-ready implementations in three languages with full feature parity.
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- **Multi-language Support**: Full implementations in Python, TypeScript, and Rust
|
|
13
|
+
- **Flexible Deployment**: Docker-based isolation or local execution
|
|
14
|
+
- **Multiple LLM Backends**: OpenAI, Anthropic Claude, Google Gemini, and more via LiteLLM
|
|
15
|
+
- **Batch Processing**: Run on multiple problem instances in parallel
|
|
16
|
+
- **Configurable Tools**: Extensible command set for code editing, search, and navigation
|
|
17
|
+
- **History Processing**: Intelligent context management for long interactions
|
|
18
|
+
- **Retry Mechanisms**: Smart retry loops with reviewers and choosers
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
### TypeScript
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
npm install @elizaos/sweagent
|
|
26
|
+
# or
|
|
27
|
+
bun add @elizaos/sweagent
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Python
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install sweagent
|
|
34
|
+
# or using uv
|
|
35
|
+
uv pip install sweagent
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Rust
|
|
39
|
+
|
|
40
|
+
Add to your `Cargo.toml`:
|
|
41
|
+
|
|
42
|
+
```toml
|
|
43
|
+
[dependencies]
|
|
44
|
+
elizaos-sweagent = "1.1.0"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
### TypeScript
|
|
50
|
+
|
|
51
|
+
```typescript
|
|
52
|
+
import { RunSingle, RunSingleConfig } from '@elizaos/sweagent';
|
|
53
|
+
|
|
54
|
+
const config: RunSingleConfig = {
|
|
55
|
+
agent: {
|
|
56
|
+
model: { name: 'claude-3-5-sonnet-20241022' }
|
|
57
|
+
},
|
|
58
|
+
env: {
|
|
59
|
+
repo: { github_url: 'https://github.com/owner/repo' }
|
|
60
|
+
},
|
|
61
|
+
problem_statement: {
|
|
62
|
+
type: 'text',
|
|
63
|
+
text: 'Fix the bug in main.py',
|
|
64
|
+
id: 'bug-123'
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const runner = RunSingle.fromConfig(config);
|
|
69
|
+
const result = await runner.run();
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Python
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from sweagent.run.run_single import run
|
|
76
|
+
|
|
77
|
+
result = run(
|
|
78
|
+
model_name="claude-3-5-sonnet-20241022",
|
|
79
|
+
problem_statement="Fix the bug in main.py",
|
|
80
|
+
repo="https://github.com/owner/repo"
|
|
81
|
+
)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Rust
|
|
85
|
+
|
|
86
|
+
```rust
|
|
87
|
+
use elizaos_sweagent::run::{RunSingle, RunSingleConfig};
|
|
88
|
+
|
|
89
|
+
#[tokio::main]
|
|
90
|
+
async fn main() -> anyhow::Result<()> {
|
|
91
|
+
let config = RunSingleConfig::default();
|
|
92
|
+
let mut runner = RunSingle::from_config(config)?;
|
|
93
|
+
let result = runner.run().await?;
|
|
94
|
+
Ok(())
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### CLI
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
# TypeScript CLI
|
|
102
|
+
npx sweagent run --agent.model.name=gpt-4 --problem_statement.path=issue.md
|
|
103
|
+
|
|
104
|
+
# Python CLI
|
|
105
|
+
sweagent run --agent.model.name gpt-4 --problem_statement.path issue.md
|
|
106
|
+
|
|
107
|
+
# Rust CLI
|
|
108
|
+
cargo run --bin sweagent -- run --agent.model.name=gpt-4 --problem_statement.path=issue.md
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Configuration
|
|
112
|
+
|
|
113
|
+
SWE-agent uses YAML configuration files. A typical configuration looks like:
|
|
114
|
+
|
|
115
|
+
```yaml
|
|
116
|
+
agent:
|
|
117
|
+
model:
|
|
118
|
+
name: claude-3-5-sonnet-20241022
|
|
119
|
+
per_instance_cost_limit: 3.0
|
|
120
|
+
templates:
|
|
121
|
+
system_template: |
|
|
122
|
+
You are a software engineering expert...
|
|
123
|
+
|
|
124
|
+
env:
|
|
125
|
+
deployment:
|
|
126
|
+
type: docker
|
|
127
|
+
image: python:3.11
|
|
128
|
+
repo:
|
|
129
|
+
type: github
|
|
130
|
+
github_url: https://github.com/owner/repo
|
|
131
|
+
|
|
132
|
+
tools:
|
|
133
|
+
execution_timeout: 500
|
|
134
|
+
submit_command: submit
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
See the `config/` directory for example configurations.
|
|
138
|
+
|
|
139
|
+
## Project Structure
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
packages/sweagent/
|
|
143
|
+
├── python/ # Python implementation
|
|
144
|
+
│ ├── sweagent/ # Core package
|
|
145
|
+
│ └── tests/ # Python tests
|
|
146
|
+
├── typescript/ # TypeScript implementation
|
|
147
|
+
│ ├── src/ # Source code
|
|
148
|
+
│ └── tests/ # TypeScript tests
|
|
149
|
+
├── rust/ # Rust implementation
|
|
150
|
+
│ ├── src/ # Source code
|
|
151
|
+
│ └── tests/ # Rust tests
|
|
152
|
+
├── config/ # Shared YAML configurations
|
|
153
|
+
├── scripts/ # Build and test scripts
|
|
154
|
+
└── package.json # Root orchestration
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Development
|
|
158
|
+
|
|
159
|
+
### Running Tests
|
|
160
|
+
|
|
161
|
+
All tests can be run from the root:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
# Run all tests (TypeScript, Python, Rust)
|
|
165
|
+
bun run test
|
|
166
|
+
|
|
167
|
+
# Individual language tests
|
|
168
|
+
bun run test:ts
|
|
169
|
+
bun run test:python
|
|
170
|
+
bun run test:rust
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Or use the test script directly:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
./scripts/run-all-tests.sh
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### Building
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
# Build all implementations
|
|
183
|
+
bun run build
|
|
184
|
+
|
|
185
|
+
# Individual builds
|
|
186
|
+
bun run build:ts
|
|
187
|
+
bun run build:rust
|
|
188
|
+
bun run build:python
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Linting
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
# Lint all
|
|
195
|
+
bun run lint
|
|
196
|
+
|
|
197
|
+
# Individual
|
|
198
|
+
bun run lint:check # TypeScript
|
|
199
|
+
bun run lint:rust
|
|
200
|
+
bun run lint:python
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Architecture
|
|
204
|
+
|
|
205
|
+
### Agent
|
|
206
|
+
|
|
207
|
+
The agent module handles the core problem-solving loop:
|
|
208
|
+
|
|
209
|
+
- **DefaultAgent**: Main agent implementation with configurable models and tools
|
|
210
|
+
- **RetryAgent**: Wrapper that retries with different configurations
|
|
211
|
+
- **Models**: LiteLLM, Human, Replay, InstantEmptySubmit models
|
|
212
|
+
- **History Processors**: Context management for conversation history
|
|
213
|
+
|
|
214
|
+
### Environment
|
|
215
|
+
|
|
216
|
+
The environment module provides isolated execution:
|
|
217
|
+
|
|
218
|
+
- **SWEEnv**: Main environment orchestrator
|
|
219
|
+
- **Deployment**: Docker-based or mock deployments
|
|
220
|
+
- **Repo**: GitHub, local, or pre-existing repository sources
|
|
221
|
+
|
|
222
|
+
### Tools
|
|
223
|
+
|
|
224
|
+
The tools module provides command parsing and execution:
|
|
225
|
+
|
|
226
|
+
- **Bundle**: Tool definitions with arguments and documentation
|
|
227
|
+
- **Parsing**: Multiple parse functions (ThoughtAction, XML, JSON, FunctionCalling)
|
|
228
|
+
- **Registry**: Tool registration and discovery
|
|
229
|
+
|
|
230
|
+
### Run
|
|
231
|
+
|
|
232
|
+
The run module handles execution:
|
|
233
|
+
|
|
234
|
+
- **RunSingle**: Single instance runner
|
|
235
|
+
- **RunBatch**: Batch processing with parallelism
|
|
236
|
+
- **Hooks**: Extension points for monitoring and actions
|
|
237
|
+
|
|
238
|
+
## API Reference
|
|
239
|
+
|
|
240
|
+
### TypeScript
|
|
241
|
+
|
|
242
|
+
See TypeScript type definitions in `typescript/src/index.ts`.
|
|
243
|
+
|
|
244
|
+
### Python
|
|
245
|
+
|
|
246
|
+
See Python docstrings and type hints in `python/sweagent/`.
|
|
247
|
+
|
|
248
|
+
### Rust
|
|
249
|
+
|
|
250
|
+
Generate docs with:
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
cd rust && cargo doc --open
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## Contributing
|
|
257
|
+
|
|
258
|
+
1. Fork the repository
|
|
259
|
+
2. Create a feature branch
|
|
260
|
+
3. Make your changes ensuring parity across all three implementations
|
|
261
|
+
4. Add tests for new functionality
|
|
262
|
+
5. Submit a pull request
|
|
263
|
+
|
|
264
|
+
## License
|
|
265
|
+
|
|
266
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
267
|
+
|
|
268
|
+
## Acknowledgments
|
|
269
|
+
|
|
270
|
+
Based on the original [SWE-agent](https://github.com/princeton-nlp/SWE-agent) by Princeton NLP.
|
package/package.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@elizaos/sweagent-root",
|
|
3
|
+
"private": false,
|
|
4
|
+
"version": "2.0.0-alpha",
|
|
5
|
+
"description": "SWE-agent: AI software engineering agent with Python, TypeScript, and Rust implementations",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"main": "typescript/dist/index.js",
|
|
8
|
+
"types": "typescript/dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
"./package.json": "./package.json",
|
|
11
|
+
".": {
|
|
12
|
+
"types": "./typescript/dist/index.d.ts",
|
|
13
|
+
"import": "./typescript/dist/index.js",
|
|
14
|
+
"default": "./typescript/dist/index.js"
|
|
15
|
+
},
|
|
16
|
+
"./rust": {
|
|
17
|
+
"import": "./rust/pkg/node/elizaos_sweagent.js"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"files": [
|
|
21
|
+
"typescript/dist",
|
|
22
|
+
"README.md",
|
|
23
|
+
"python",
|
|
24
|
+
"rust/src",
|
|
25
|
+
"rust/Cargo.toml",
|
|
26
|
+
"rust/pkg"
|
|
27
|
+
],
|
|
28
|
+
"keywords": [
|
|
29
|
+
"software-engineering",
|
|
30
|
+
"agent",
|
|
31
|
+
"ai",
|
|
32
|
+
"automation",
|
|
33
|
+
"development-tools",
|
|
34
|
+
"swe-agent",
|
|
35
|
+
"elizaos"
|
|
36
|
+
],
|
|
37
|
+
"author": "elizaOS",
|
|
38
|
+
"license": "MIT",
|
|
39
|
+
"repository": {
|
|
40
|
+
"type": "git",
|
|
41
|
+
"url": "git+https://github.com/elizaos/eliza.git"
|
|
42
|
+
},
|
|
43
|
+
"scripts": {
|
|
44
|
+
"build": "bun run build:ts && bun run build:rust && bun run build:python",
|
|
45
|
+
"build:ts": "cd typescript && bun run build",
|
|
46
|
+
"build:rust": "test -d rust && cd rust && cargo build --release || echo 'Rust build skipped - no rust directory'",
|
|
47
|
+
"build:python": "test -d python && cd python && (python3 -m build 2>/dev/null || pyproject-build) || echo 'Python build skipped - no python directory'",
|
|
48
|
+
"dev": "cd typescript && bun --hot build.ts",
|
|
49
|
+
"test": "bun run test:ts && bun run test:rust && bun run test:python",
|
|
50
|
+
"test:ts": "cd typescript && bun run build && vitest run",
|
|
51
|
+
"test:rust": "test -d rust && cd rust && cargo test || echo 'Rust tests skipped'",
|
|
52
|
+
"test:python": "test -d python && cd python && pytest -p no:anchorpy --asyncio-mode=auto",
|
|
53
|
+
"typecheck": "tsc --noEmit -p typescript/tsconfig.json",
|
|
54
|
+
"lint": "bunx @biomejs/biome check --write ./typescript",
|
|
55
|
+
"lint:check": "bunx @biomejs/biome check ./typescript",
|
|
56
|
+
"lint:rust": "test -d rust && cd rust && cargo clippy --all-targets --fix --allow-dirty --allow-staged -- -D warnings && cargo fmt || echo 'Rust lint skipped'",
|
|
57
|
+
"lint:python": "test -d python && cd python && ruff check --fix . && ruff format . || echo 'Python lint skipped'",
|
|
58
|
+
"clean": "rm -rf typescript/dist python/dist rust/target .turbo node_modules"
|
|
59
|
+
},
|
|
60
|
+
"devDependencies": {
|
|
61
|
+
"@biomejs/biome": "^2.3.11",
|
|
62
|
+
"@types/bun": "^1.3.5",
|
|
63
|
+
"@types/node": "^25.0.3",
|
|
64
|
+
"typescript": "^5.9.3",
|
|
65
|
+
"vitest": "^4.0.17"
|
|
66
|
+
},
|
|
67
|
+
"publishConfig": {
|
|
68
|
+
"access": "public"
|
|
69
|
+
},
|
|
70
|
+
"gitHead": "53fe0ff5652913ee99f49d363262dc5291993e26"
|
|
71
|
+
}
|
package/python/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 John Yang, Carlos E. Jimenez, Alexander Wettig, Shunyu Yao, Karthik Narasimhan, Ofir Press
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
* Default config: `anthropic_filemap.yaml`
|
|
2
|
+
* `swebench_submissions`: Configs that were used for swebench submissions
|
|
3
|
+
* `sweagent_0_7`: Configs from SWE-agent 0.7, similar to the one used in the paper
|
|
4
|
+
* `exotic`: Various specific configurations that might be more of niche interest
|
|
5
|
+
* `human`: Demo/debug configs that have the human type commands and run without a LM
|
|
6
|
+
* `demo`: Configs for demonstrations/talks
|
|
7
|
+
* Configs for running with SWE-smith are at https://github.com/SWE-bench/SWE-smith/blob/main/agent/swesmith_infer.yaml
|
|
8
|
+
|
|
9
|
+
🔗 Tutorial on [adding custom tools](https://swe-agent.com/latest/usage/adding_custom_tools/)
|
|
10
|
+
🔗 For more information on config files, visit [our documentation website][docs].
|
|
11
|
+
|
|
12
|
+
You can also find the corresponding markdown files in the [`docs/` folder][source].
|
|
13
|
+
|
|
14
|
+
[docs]: https://swe-agent.com/latest/config/config
|
|
15
|
+
[source]: https://github.com/SWE-agent/SWE-agent/tree/main/docs
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# This config is a super basic, stripped down config that should be compatible with any instruction following LM
|
|
2
|
+
agent:
|
|
3
|
+
type: default
|
|
4
|
+
templates:
|
|
5
|
+
system_template: |-
|
|
6
|
+
You are a helpful assistant that can interact multiple times with a computer shell to solve programming tasks.
|
|
7
|
+
You operate in a REPL (Read-Eval-Print Loop) environment where you must issue exactly ONE command at a time.
|
|
8
|
+
Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
|
|
9
|
+
|
|
10
|
+
Include a THOUGHT section before your command where you explain your reasoning process.
|
|
11
|
+
Format your response as:
|
|
12
|
+
|
|
13
|
+
THOUGHT: Your reasoning and analysis here
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
your_command_here
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Failure to follow these rules will cause your response to be rejected.
|
|
20
|
+
instance_template: |-
|
|
21
|
+
<uploaded_files>
|
|
22
|
+
{{working_dir}}
|
|
23
|
+
</uploaded_files>
|
|
24
|
+
|
|
25
|
+
<pr_description>
|
|
26
|
+
I've uploaded a python code repository in the directory {{working_dir}}.
|
|
27
|
+
Consider the following PR description:
|
|
28
|
+
{{problem_statement}}
|
|
29
|
+
</pr_description>
|
|
30
|
+
|
|
31
|
+
<instructions>
|
|
32
|
+
# Task Instructions
|
|
33
|
+
|
|
34
|
+
## Overview
|
|
35
|
+
You're a software engineer interacting continuously with a computer shell in a REPL (Read-Eval-Print Loop) environment.
|
|
36
|
+
You'll be helping implement necessary changes to meet requirements in the PR description.
|
|
37
|
+
Your task is specifically to make changes to non-test files in the {{working_dir}} directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
|
|
38
|
+
|
|
39
|
+
IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
|
|
40
|
+
|
|
41
|
+
For each response:
|
|
42
|
+
1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
|
|
43
|
+
2. Provide exactly ONE bash command to execute
|
|
44
|
+
|
|
45
|
+
## Important Boundaries
|
|
46
|
+
- MODIFY: Regular source code files in {{working_dir}}
|
|
47
|
+
- DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
|
|
48
|
+
|
|
49
|
+
## Recommended Workflow
|
|
50
|
+
1. Analyze the codebase by finding and reading relevant files
|
|
51
|
+
2. Create a script to reproduce the issue
|
|
52
|
+
3. Edit the source code to resolve the issue
|
|
53
|
+
4. Verify your fix works by running your script again
|
|
54
|
+
5. Test edge cases to ensure your fix is robust
|
|
55
|
+
|
|
56
|
+
## Command Execution Rules
|
|
57
|
+
You are operating in a REPL (Read-Eval-Print Loop) environment where:
|
|
58
|
+
1. You write a single command
|
|
59
|
+
2. The system executes that command
|
|
60
|
+
3. You see the result
|
|
61
|
+
4. You write your next command
|
|
62
|
+
|
|
63
|
+
Each response should include:
|
|
64
|
+
1. A **THOUGHT** section where you explain your reasoning and plan
|
|
65
|
+
2. A single bash code block with your command
|
|
66
|
+
|
|
67
|
+
Format your responses like this:
|
|
68
|
+
```
|
|
69
|
+
THOUGHT: Here I explain my reasoning process, analysis of the current situation,
|
|
70
|
+
and what I'm trying to accomplish with the command below.
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
your_command_here
|
|
74
|
+
```
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Commands must be specified in a single bash code block:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
your_command_here
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**CRITICAL REQUIREMENTS:**
|
|
84
|
+
- Your response SHOULD include a THOUGHT section explaining your reasoning
|
|
85
|
+
- Your response MUST include EXACTLY ONE bash code block
|
|
86
|
+
- This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
|
|
87
|
+
- If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
|
|
88
|
+
- Do NOT try to run multiple independent commands in separate blocks in one response
|
|
89
|
+
|
|
90
|
+
Example of a CORRECT response:
|
|
91
|
+
<example_response>
|
|
92
|
+
THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
ls -la
|
|
96
|
+
```
|
|
97
|
+
</example_response>
|
|
98
|
+
|
|
99
|
+
Example of an INCORRECT response:
|
|
100
|
+
<example_response>
|
|
101
|
+
THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
ls -la
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Now I'll read the file:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
cat file.txt
|
|
111
|
+
```
|
|
112
|
+
</example_response>
|
|
113
|
+
|
|
114
|
+
If you need to run multiple commands, either:
|
|
115
|
+
1. Combine them in one block using && or ||
|
|
116
|
+
```bash
|
|
117
|
+
command1 && command2 || echo "Error occurred"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
2. Wait for the first command to complete, see its output, then issue the next command in your following response.
|
|
121
|
+
|
|
122
|
+
## Environment Details
|
|
123
|
+
- You have a full Linux shell environment
|
|
124
|
+
- Always use non-interactive flags (-y, -f) for commands
|
|
125
|
+
- Avoid interactive tools like vi, nano, or any that require user input
|
|
126
|
+
- If a command isn't available, you can install it
|
|
127
|
+
|
|
128
|
+
## Useful Command Examples
|
|
129
|
+
|
|
130
|
+
### Create a new file:
|
|
131
|
+
```bash
|
|
132
|
+
cat <<'EOF' > newfile.py
|
|
133
|
+
import numpy as np
|
|
134
|
+
hello = "world"
|
|
135
|
+
print(hello)
|
|
136
|
+
EOF
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Edit files with sed:
|
|
140
|
+
```bash
|
|
141
|
+
# Replace all occurrences
|
|
142
|
+
sed -i 's/old_string/new_string/g' filename.py
|
|
143
|
+
|
|
144
|
+
# Replace only first occurrence
|
|
145
|
+
sed -i 's/old_string/new_string/' filename.py
|
|
146
|
+
|
|
147
|
+
# Replace first occurrence on line 1
|
|
148
|
+
sed -i '1s/old_string/new_string/' filename.py
|
|
149
|
+
|
|
150
|
+
# Replace all occurrences in lines 1-10
|
|
151
|
+
sed -i '1,10s/old_string/new_string/g' filename.py
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### View file content:
|
|
155
|
+
```bash
|
|
156
|
+
# View specific lines with numbers
|
|
157
|
+
nl -ba filename.py | sed -n '10,20p'
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Any other command you want to run
|
|
161
|
+
```bash
|
|
162
|
+
anything
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Submission
|
|
166
|
+
When you've completed your changes or can't make further progress:
|
|
167
|
+
```bash
|
|
168
|
+
submit
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
We'll automatically save your work and have maintainers evaluate it.
|
|
172
|
+
</instructions>
|
|
173
|
+
next_step_template: |-
|
|
174
|
+
<observation>
|
|
175
|
+
{{observation}}
|
|
176
|
+
</observation>
|
|
177
|
+
next_step_no_output_template: |-
|
|
178
|
+
<warning>
|
|
179
|
+
Your last command ran successfully and did not produce any output.
|
|
180
|
+
</warning>
|
|
181
|
+
max_observation_length: 10_000
|
|
182
|
+
next_step_truncated_observation_template: |-
|
|
183
|
+
<warning>
|
|
184
|
+
The output of your last command was too long.
|
|
185
|
+
Please try a different command that produces less output.
|
|
186
|
+
If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
|
|
187
|
+
If you're using grep or find and it produced too much output, you can use a more selective search pattern.
|
|
188
|
+
If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
|
|
189
|
+
</warning>
|
|
190
|
+
|
|
191
|
+
<observation_head>
|
|
192
|
+
{{observation[ : max_observation_length // 2]}}
|
|
193
|
+
</observation_head>
|
|
194
|
+
|
|
195
|
+
<elided_chars>
|
|
196
|
+
{{elided_chars}} characters elided
|
|
197
|
+
</elided_chars>
|
|
198
|
+
|
|
199
|
+
<observation_tail>
|
|
200
|
+
{{observation[- max_observation_length // 2:]}}
|
|
201
|
+
</observation_tail>
|
|
202
|
+
command_cancelled_timeout_template: |-
|
|
203
|
+
<warning>
|
|
204
|
+
The command '{{command}}' was cancelled because it took more than {{timeout}} seconds to complete.
|
|
205
|
+
It may have been waiting for user input or otherwise blocked.
|
|
206
|
+
Please try a different command.
|
|
207
|
+
</warning>
|
|
208
|
+
tools:
|
|
209
|
+
execution_timeout: 60
|
|
210
|
+
bundles:
|
|
211
|
+
- path: tools/submit
|
|
212
|
+
parse_function:
|
|
213
|
+
type: single_bash_code_block
|
|
214
|
+
model:
|
|
215
|
+
per_instance_cost_limit: 3
|
|
216
|
+
per_instance_call_limit: 250
|
|
217
|
+
total_cost_limit: 1500.0
|
|
218
|
+
temperature: 0.0
|
|
219
|
+
delay: 0.0
|
|
220
|
+
retry:
|
|
221
|
+
retries: 6
|
|
222
|
+
max_wait: 30
|