@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,1294 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import copy
5
+ import json
6
+ import logging
7
+ import time
8
+ from pathlib import Path, PurePosixPath
9
+ from typing import Annotated, Any, Literal
10
+
11
+ import yaml
12
+ from jinja2 import Template
13
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
14
+ from simple_parsing.helpers.fields import field
15
+ from swerex.exceptions import BashIncorrectSyntaxError, CommandTimeoutError, SwerexException
16
+ from tenacity import RetryError
17
+ from typing_extensions import Self
18
+ from unidiff import UnidiffParseError
19
+
20
+ from sweagent import __version__, get_agent_commit_hash, get_rex_commit_hash, get_rex_version
21
+ from sweagent.agent.action_sampler import AbstractActionSampler, ActionSamplerConfig
22
+ from sweagent.agent.history_processors import DefaultHistoryProcessor, HistoryProcessor
23
+ from sweagent.agent.hooks.abstract import AbstractAgentHook, CombinedAgentHook
24
+ from sweagent.agent.models import (
25
+ AbstractModel,
26
+ HumanModel,
27
+ HumanThoughtModel,
28
+ InstanceStats,
29
+ ModelConfig,
30
+ get_model,
31
+ )
32
+ from sweagent.agent.problem_statement import ProblemStatement, ProblemStatementConfig
33
+ from sweagent.agent.reviewer import (
34
+ ChooserRetryLoop,
35
+ RetryLoopConfig,
36
+ ReviewSubmission,
37
+ ScoreRetryLoop,
38
+ get_retry_loop_from_config,
39
+ )
40
+ from sweagent.environment.swe_env import SWEEnv
41
+ from sweagent.exceptions import (
42
+ ContentPolicyViolationError,
43
+ ContextWindowExceededError,
44
+ CostLimitExceededError,
45
+ FormatError,
46
+ TotalCostLimitExceededError,
47
+ )
48
+ from sweagent.tools.parsing import (
49
+ ActionOnlyParser,
50
+ ThoughtActionParser,
51
+ )
52
+ from sweagent.tools.tools import ToolConfig, ToolHandler
53
+ from sweagent.types import AgentInfo, AgentRunResult, StepOutput, Trajectory, TrajectoryStep
54
+ from sweagent.utils.config import _convert_paths_to_abspath, _strip_abspath_from_dict
55
+ from sweagent.utils.jinja_warnings import _warn_probably_wrong_jinja_syntax
56
+ from sweagent.utils.log import get_logger
57
+ from sweagent.utils.patch_formatter import PatchFormatter
58
+
59
+
60
+ class TemplateConfig(BaseModel):
61
+ """This configuration is used to define almost all message templates that are
62
+ formatted by the agent and sent to the LM.
63
+ """
64
+
65
+ system_template: str = ""
66
+ instance_template: str = ""
67
+ next_step_template: str = "Observation: {{observation}}"
68
+
69
+ next_step_truncated_observation_template: str = (
70
+ "Observation: {{observation[:max_observation_length]}}<response clipped>"
71
+ "<NOTE>Observations should not exceeded {{max_observation_length}} characters. "
72
+ "{{elided_chars}} characters were elided. Please try a different command that produces less output "
73
+ "or use head/tail/grep/redirect the output to a file. Do not use interactive pagers.</NOTE>"
74
+ )
75
+ """Message template for when the agent's observation was truncated.
76
+ Available variables: `observation`, `max_observation_length`, `elided_chars`
77
+ """
78
+
79
+ max_observation_length: int = 100_000
80
+ """Truncate observation to this length if it exceeds it.
81
+ This in measured in characters, i.e., as `len(observation)`.
82
+ """
83
+
84
+ next_step_no_output_template: str = None # type: ignore
85
+ """Template for the next step when the last output was empty. Defaults to next_step_template."""
86
+
87
+ strategy_template: str | None = None
88
+ demonstration_template: str | None = None
89
+
90
+ demonstrations: list[Path] = field(default_factory=list)
91
+ """Paths to demonstrations. If path is not absolute, it is assumed to be
92
+ relative to the SWE_AGENT_CONFIG_ROOT (if set) or the SWE-agent repository root
93
+ """
94
+
95
+ put_demos_in_history: bool = False
96
+ """If True, add demonstration to history instead of as a single message"""
97
+
98
+ disable_image_processing: bool = False
99
+ """If True, disable image processing for multimodal problem statements (i.e. SWEBenchMultimodalProblemStatement).
100
+ """
101
+
102
+ shell_check_error_template: str = (
103
+ "Your bash command contained syntax errors and was NOT executed. "
104
+ "Please fix the syntax errors and try again. This can be the result "
105
+ "of not adhering to the syntax for multi-line commands. Here is the output of `bash -n`:\n"
106
+ "{{bash_stdout}}\n{{bash_stderr}}"
107
+ )
108
+ """Message template for when the agent's bash command contains syntax errors.
109
+ Available variables: `bash_stdout`, `bash_stderr`
110
+ """
111
+
112
+ command_cancelled_timeout_template: str = (
113
+ "The command '{{command}}' was cancelled because it took more than {{timeout}} seconds. "
114
+ "Please try a different command that completes more quickly. "
115
+ "Note: A common source of this error is if the command is interactive or requires user input "
116
+ "(it is impossible to receive user input in the current environment, so the command will never complete)."
117
+ )
118
+ """Message template for when the agent's command was cancelled because it took too long.
119
+ Available variables: `timeout`, `command`
120
+ """
121
+
122
+ def model_post_init(self, __context):
123
+ self.demonstrations = _convert_paths_to_abspath(self.demonstrations)
124
+ if self.next_step_no_output_template is None:
125
+ self.next_step_no_output_template = self.next_step_template
126
+
127
+ @model_validator(mode="after")
128
+ def validate_template_jinja_syntax(self) -> Self:
129
+ template_fields = [field for field in self.model_fields.keys() if field.endswith("_template")]
130
+ for field in template_fields:
131
+ value = getattr(self, field)
132
+ _warn_probably_wrong_jinja_syntax(value)
133
+ return self
134
+
135
+ @model_validator(mode="after")
136
+ def warnings(self) -> Self:
137
+ logger = get_logger("swea-config", emoji="🔧")
138
+ if self.put_demos_in_history and self.demonstration_template is not None:
139
+ logger.warning("demonstration_template is ignored when put_demos_in_history is True")
140
+ if not self.system_template or not self.instance_template:
141
+ logger.warning(
142
+ "system_template/instance_template is not set, using empty string. Perhaps you were"
143
+ " overwriting the default config? See https://swe-agent.com/latest/usage/cl_tutorial/"
144
+ " for more information. Note: You can ignore this warning in human mode."
145
+ )
146
+ return self
147
+
148
+
149
+ class DefaultAgentConfig(BaseModel):
150
+ """This configuration object specifies the behavior of an agent."""
151
+
152
+ name: str = "main"
153
+ templates: TemplateConfig = Field(default_factory=TemplateConfig)
154
+ tools: ToolConfig = Field(default_factory=ToolConfig)
155
+ history_processors: list[HistoryProcessor] = Field(default_factory=lambda: [DefaultHistoryProcessor()])
156
+ model: ModelConfig = Field(description="Model options.")
157
+
158
+ max_requeries: int = 3
159
+ """Maximum number of times to requery the model after an error, such as a
160
+ formatting error, a blocked action, or a bash syntax error.
161
+ """
162
+ action_sampler: ActionSamplerConfig | None = None
163
+
164
+ type: Literal["default"] = "default"
165
+
166
+ # pydantic config
167
+ model_config = ConfigDict(extra="forbid")
168
+
169
+
170
+ class ShellAgentConfig(BaseModel):
171
+ name: str = "main"
172
+ templates: TemplateConfig = Field(default_factory=TemplateConfig)
173
+ tools: ToolConfig = Field(default_factory=ToolConfig)
174
+ history_processors: list[HistoryProcessor] = Field(default_factory=lambda: [DefaultHistoryProcessor()])
175
+ model: ModelConfig = Field(description="Model options.")
176
+
177
+ max_requeries: int = 3
178
+ """Maximum number of times to requery the model after an error, such as a
179
+ formatting error, a blocked action, or a bash syntax error.
180
+ """
181
+
182
+ type: Literal["shell"] = "shell"
183
+
184
+ # pydantic config
185
+ model_config = ConfigDict(extra="forbid")
186
+
187
+
188
+ class RetryAgentConfig(BaseModel):
189
+ name: str = "retry_main"
190
+ agent_configs: list[DefaultAgentConfig]
191
+ retry_loop: RetryLoopConfig
192
+ type: Literal["retry"] = "retry"
193
+ model_config = ConfigDict(extra="forbid")
194
+
195
+
196
+ AgentConfig = Annotated[DefaultAgentConfig | RetryAgentConfig | ShellAgentConfig, Field(union_mode="left_to_right")]
197
+
198
+
199
+ class _BlockedActionError(Exception):
200
+ """Raised when the agent's action is blocked"""
201
+
202
+
203
+ class _RetryWithOutput(Exception):
204
+ """Used for internal control flow"""
205
+
206
+
207
+ class _RetryWithoutOutput(Exception):
208
+ """Used for internal control flow"""
209
+
210
+
211
+ class _ExitForfeit(Exception):
212
+ """Used for internal control flow"""
213
+
214
+
215
+ class _TotalExecutionTimeExceeded(Exception):
216
+ """Used for internal control flow"""
217
+
218
+
219
+ RETRY_WITH_OUTPUT_TOKEN = "###SWE-AGENT-RETRY-WITH-OUTPUT###"
220
+ RETRY_WITHOUT_OUTPUT_TOKEN = "###SWE-AGENT-RETRY-WITHOUT-OUTPUT###"
221
+ EXIT_FORFEIT_TOKEN = "###SWE-AGENT-EXIT-FORFEIT###"
222
+
223
+
224
+ class AbstractAgent:
225
+ def __init__(self, *args, **kwargs):
226
+ model: AbstractModel
227
+ replay_config: BaseModel | None
228
+ logger: logging.Logger
229
+
230
+ @classmethod
231
+ def from_config(cls, config: AgentConfig) -> Self: ...
232
+
233
+ def add_hook(self, hook: AbstractAgentHook) -> None: ...
234
+
235
+ def get_trajectory_data(self) -> dict[str, Any]: ...
236
+
237
+ def step(self) -> StepOutput: ...
238
+
239
+ def run(self, *args, **kwargs) -> AgentRunResult: ...
240
+
241
+
242
+ def get_agent_from_config(config: AgentConfig) -> AbstractAgent:
243
+ if config.type == "default":
244
+ return DefaultAgent.from_config(config)
245
+ elif config.type == "retry":
246
+ return RetryAgent.from_config(config)
247
+ elif config.type == "shell":
248
+ # Need to defer import to avoid circular dependency
249
+ from sweagent.agent.extra.shell_agent import ShellAgent
250
+
251
+ return ShellAgent.from_config(config)
252
+ else:
253
+ msg = f"Unknown agent type: {config.type}"
254
+ raise ValueError(msg)
255
+
256
+
257
+ class RetryAgent(AbstractAgent):
258
+ def __init__(self, config: RetryAgentConfig):
259
+ # Always copy config to avoid shared state between different instances
260
+ self.config = config.model_copy(deep=True)
261
+ self._hooks = []
262
+ self._i_attempt = 0
263
+ self.logger = get_logger("swea-agent", emoji="🤠")
264
+ self._agent: DefaultAgent | None = None
265
+ self._attempt_data: list[dict[str, Any]] = []
266
+ self._total_instance_attempt_stats = InstanceStats()
267
+ """Note that total_instance_attempt_stats only accumulates the states of the sub-agent,
268
+ not the reviewer. Use self._total_instance_stats for the total stats.
269
+ """
270
+ self._chook = CombinedAgentHook()
271
+ self._traj_path: Path | None = None
272
+ self._problem_statement: ProblemStatement | None = None
273
+ self._env: SWEEnv | None = None
274
+ self._output_dir: Path | None = None
275
+ self._rloop: ScoreRetryLoop | ChooserRetryLoop | None = None
276
+
277
+ @property
278
+ def _total_instance_stats(self) -> InstanceStats:
279
+ assert self._rloop is not None
280
+ return self._total_instance_attempt_stats + self._rloop.review_model_stats
281
+
282
+ @classmethod
283
+ def from_config(cls, config: RetryAgentConfig) -> Self:
284
+ return cls(config)
285
+
286
+ def add_hook(self, hook: AbstractAgentHook) -> None:
287
+ self._chook.add_hook(hook)
288
+ self._hooks.append(hook)
289
+
290
+ def setup(
291
+ self, env: SWEEnv, problem_statement: ProblemStatement | ProblemStatementConfig, output_dir: Path = Path(".")
292
+ ) -> None:
293
+ """Setup the retry agent for a new problem instance.
294
+ This is mostly a bookkeeping step.
295
+ """
296
+ self._total_instance_attempt_stats = InstanceStats()
297
+ self._problem_statement = problem_statement
298
+ self._traj_path = output_dir / (self._problem_statement.id + ".traj")
299
+ self._env = env
300
+ self._output_dir = output_dir
301
+ self._rloop = get_retry_loop_from_config(self.config.retry_loop, problem_statement=problem_statement)
302
+
303
+ def _setup_agent(self) -> AbstractAgent:
304
+ """Setup the agent for the current attempt."""
305
+ # todo: Could select "best" agent config based on previous attempts if I run > number of set up configs
306
+ agent_config = self.config.agent_configs[self._i_attempt % len(self.config.agent_configs)].model_copy(deep=True)
307
+ remaining_budget = self.config.retry_loop.cost_limit - self._total_instance_stats.instance_cost
308
+ if remaining_budget < agent_config.model.per_instance_cost_limit:
309
+ self.logger.debug("Setting agent per-attempt cost limit to remaining budget: %s", remaining_budget)
310
+ agent_config.model.per_instance_cost_limit = remaining_budget
311
+ self._agent = DefaultAgent.from_config(agent_config)
312
+ for hook in self._hooks:
313
+ self._agent.add_hook(hook)
314
+ assert self._output_dir is not None
315
+ sub_agent_output_dir = self._output_dir / f"attempt_{self._i_attempt}"
316
+ assert self._problem_statement is not None
317
+ assert self._env is not None
318
+ self._agent.setup(env=self._env, problem_statement=self._problem_statement, output_dir=sub_agent_output_dir)
319
+ return self._agent
320
+
321
+ def _next_attempt(self) -> None:
322
+ """Prepare for the next attempt: Reset the environment and setup the next agent."""
323
+ assert self._env is not None
324
+ self._i_attempt += 1
325
+ self._env.hard_reset()
326
+ self._setup_agent()
327
+
328
+ def step(self) -> StepOutput:
329
+ """Step the agent of the current attempt.
330
+ Attempt autosubmit if an error occurs (though all errors should already be handled by the attempt agent).
331
+ """
332
+ assert self._agent is not None
333
+ # Failsafe cost check, this should not actually happen, because the sub-agent should have already been
334
+ # initialized with the correct cost limit to not exceed the total cost limit. Using factor of 1.1, because
335
+ # sub-agent might only catch the cost limit after attempting.
336
+ if self._total_instance_stats.instance_cost > 1.1 * self.config.retry_loop.cost_limit > 0:
337
+ msg = "Total instance cost exceeded cost limit. This should not happen, please report this. Triggering autosubmit."
338
+ self.logger.critical(msg)
339
+ return self._agent.attempt_autosubmission_after_error(step=StepOutput())
340
+ try:
341
+ step = self._agent.step()
342
+ except TotalCostLimitExceededError:
343
+ # Need to make sure that this error causes everything to stop
344
+ raise
345
+ except Exception as e:
346
+ msg = "Error in agent step: %s. This really shouldn't happen, please report this. Triggering autosubmit."
347
+ self.logger.critical(msg, e, exc_info=True)
348
+ step = self._agent.attempt_autosubmission_after_error(step=StepOutput())
349
+ return step
350
+
351
+ def _finalize_agent_run(self) -> None:
352
+ """Add the agent results to our list of results"""
353
+ assert self._agent is not None
354
+ self._agent.save_trajectory()
355
+ self._attempt_data.append(self._agent.get_trajectory_data())
356
+ self._total_instance_attempt_stats += self._agent.model.stats
357
+
358
+ def get_trajectory_data(self, choose: bool) -> dict[str, Any]:
359
+ """Get all data that we save in .traj files."""
360
+ assert self._rloop is not None
361
+
362
+ data = {
363
+ "attempts": self._attempt_data,
364
+ }
365
+
366
+ if choose:
367
+ try:
368
+ best_attempt_idx = self._rloop.get_best()
369
+ except TotalCostLimitExceededError:
370
+ raise
371
+ except Exception as e:
372
+ self.logger.critical(f"Error getting best attempt index: {e}. Setting to 0.", exc_info=True)
373
+ best_attempt_idx = 0
374
+ data |= copy.deepcopy(self._attempt_data[best_attempt_idx]) # type: ignore
375
+ data["info"]["best_attempt_idx"] = best_attempt_idx
376
+ data["info"]["rloop_model_stats"] = self._rloop.review_model_stats.model_dump()
377
+ # Overwrite model stats with total stats
378
+ data["info"]["model_stats"] = self._total_instance_stats.model_dump()
379
+ if isinstance(self._rloop, ChooserRetryLoop):
380
+ data["info"]["chooser"] = (
381
+ self._rloop._chooser_output.model_dump() if self._rloop._chooser_output else {}
382
+ )
383
+ return data
384
+
385
+ def save_trajectory(self, choose: bool) -> None:
386
+ data = self.get_trajectory_data(choose=choose)
387
+ assert self._traj_path is not None
388
+ self._traj_path.write_text(json.dumps(data, indent=2))
389
+
390
+ def run(
391
+ self,
392
+ env: SWEEnv,
393
+ problem_statement: ProblemStatement | ProblemStatementConfig,
394
+ output_dir: Path = Path("."),
395
+ ) -> AgentRunResult:
396
+ """Run the agent on a problem instance. This method contains the
397
+ main loop that repeatedly calls `self._step` until the problem is solved.
398
+
399
+ Args:
400
+ env: The environment to run the agent on.
401
+ problem_statement: The problem statement to run the agent on.
402
+ output_dir: Directory to save the trajectory to
403
+ """
404
+ output_dir.mkdir(parents=True, exist_ok=True)
405
+ self.setup(env=env, problem_statement=problem_statement, output_dir=output_dir)
406
+ assert self._rloop is not None
407
+
408
+ # Run action/observation loop
409
+ self._chook.on_run_start()
410
+ step_output = StepOutput()
411
+ self._setup_agent()
412
+ assert self._agent is not None
413
+ while not step_output.done:
414
+ step_output = self.step()
415
+ self.save_trajectory(choose=False)
416
+ if step_output.done:
417
+ self._rloop.on_submit(
418
+ ReviewSubmission(
419
+ trajectory=self._agent.trajectory,
420
+ info=self._agent.info,
421
+ model_stats=self._agent.model.stats,
422
+ )
423
+ )
424
+ if isinstance(self._rloop, ScoreRetryLoop):
425
+ self._agent.info["review"] = self._rloop.reviews[-1].model_dump() # type: ignore
426
+ self._finalize_agent_run()
427
+ self.save_trajectory(choose=False)
428
+ if self._rloop.retry():
429
+ assert self._env is not None
430
+ self._next_attempt()
431
+ step_output.done = False
432
+ self.save_trajectory(choose=True) # call again after we finalized
433
+ self._chook.on_run_done(trajectory=self._agent.trajectory, info=self._agent.info)
434
+
435
+ self.logger.info("Trajectory saved to %s", self._traj_path)
436
+
437
+ # Here we want to return the "global" information (e.g., submission should
438
+ # be the best submission instead of the last one, etc.), so we get it from the traj file
439
+ data = self.get_trajectory_data(choose=True)
440
+ return AgentRunResult(info=data["info"], trajectory=data["trajectory"])
441
+
442
+
443
+ class DefaultAgent(AbstractAgent):
444
+ def __init__(
445
+ self,
446
+ *,
447
+ templates: TemplateConfig,
448
+ tools: ToolHandler,
449
+ history_processors: list[HistoryProcessor],
450
+ model: AbstractModel,
451
+ max_requeries: int = 3,
452
+ name: str = "main",
453
+ _catch_errors: bool = True,
454
+ _always_require_zero_exit_code: bool = False,
455
+ action_sampler_config: ActionSamplerConfig | None = None,
456
+ ):
457
+ """The agent handles the behaviour of the model and how it interacts with the environment.
458
+
459
+ To run the agent, either call `self.run` or `self.setup` and then `self.step` in a loop.
460
+ """
461
+ self._catch_errors = _catch_errors
462
+ self._always_require_zero_exit_code = _always_require_zero_exit_code
463
+ self.name = name
464
+ self.model = model
465
+ self.templates = templates
466
+ self.tools = tools
467
+ if isinstance(self.model, HumanThoughtModel):
468
+ self.tools.config.parse_function = ThoughtActionParser()
469
+ elif isinstance(self.model, HumanModel):
470
+ self.tools.config.parse_function = ActionOnlyParser()
471
+ self.history_processors = history_processors
472
+ self.max_requeries = max_requeries
473
+ self.logger = get_logger("swea-agent", emoji="🤠")
474
+ # Set in run method
475
+ self._env: SWEEnv | None = None
476
+ self._problem_statement: ProblemStatement | ProblemStatementConfig | None = None
477
+ self.traj_path: Path | None = None
478
+
479
+ #: The following three attributes collect the information about how the agent
480
+ #: solved the problem.
481
+ self.history = []
482
+ self._trajectory = []
483
+ self.info = AgentInfo()
484
+
485
+ self._chook = CombinedAgentHook()
486
+
487
+ self._replay_config: BaseModel | None = None
488
+ """This can be set to a RunSingleConfig from the Run instance whenever possible.
489
+ It can be used to replay the agent's trajectory in an environment.
490
+ """
491
+
492
+ self._action_sampler: AbstractActionSampler | None = None
493
+ if action_sampler_config is not None:
494
+ self._action_sampler = action_sampler_config.get(self.model, self.tools)
495
+
496
+ #: Count how many timeout errors have occurred consecutively. Kills agent
497
+ #: after 5 of them.
498
+ self._n_consecutive_timeouts = 0
499
+ self._total_execution_time = 0.0
500
+
501
+ @classmethod
502
+ def from_config(cls, config: DefaultAgentConfig) -> Self:
503
+ # To ensure that all models stay completely independent, we deepcopy the
504
+ # model config, because it lives on as a property in the model, tools, etc.
505
+ config = config.model_copy(deep=True)
506
+ model = get_model(config.model, config.tools)
507
+ return cls(
508
+ templates=config.templates,
509
+ tools=ToolHandler(config.tools),
510
+ history_processors=config.history_processors,
511
+ model=model,
512
+ max_requeries=config.max_requeries,
513
+ action_sampler_config=config.action_sampler,
514
+ )
515
+
516
+ def add_hook(self, hook: AbstractAgentHook) -> None:
517
+ """Add hook to agent"""
518
+ hook.on_init(agent=self)
519
+ self._chook.add_hook(hook)
520
+
521
+ # Properties
522
+ # ----------
523
+
524
+ @property
525
+ def trajectory(self) -> Trajectory:
526
+ return self._trajectory
527
+
528
+ @property
529
+ def replay_config(self) -> BaseModel | None:
530
+ return self._replay_config
531
+
532
+ @replay_config.setter
533
+ def replay_config(self, value: BaseModel):
534
+ # Do import here to avoid circular dependency
535
+ from sweagent.run.run_single import RunSingleConfig
536
+
537
+ self._replay_config = RunSingleConfig.model_validate(_strip_abspath_from_dict(value.model_dump()))
538
+
539
+ @property
540
+ def messages(self) -> list[dict[str, Any]]:
541
+ """Return the history of the agent for this attempt since the last reset,
542
+ processed through all history processors.
543
+ """
544
+ filtered_history = [entry for entry in self.history if entry["agent"] == self.name] # type: ignore
545
+
546
+ # Chain the history processors
547
+ messages = filtered_history
548
+ for processor in self.history_processors:
549
+ messages = processor(messages)
550
+
551
+ return messages # type: ignore
552
+
553
+ # Methods
554
+ # -------
555
+
556
+ def _append_history(self, item: dict[str, Any]) -> None:
557
+ """Adds an item to the history."""
558
+ self._chook.on_query_message_added(**item)
559
+ self.history.append(item) # type: ignore
560
+
561
+ def setup(
562
+ self,
563
+ env: SWEEnv,
564
+ problem_statement: ProblemStatement | ProblemStatementConfig,
565
+ output_dir: Path = Path("."),
566
+ ) -> None:
567
+ """Setup the agent for a new instance. This includes
568
+ formatting the system message and adding demonstrations to the history.
569
+
570
+ This method is called by `self.run`.
571
+ """
572
+ output_dir.mkdir(parents=True, exist_ok=True)
573
+
574
+ # apply template configuration to multimodal problem statements
575
+ if hasattr(problem_statement, "type") and problem_statement.type == "swe_bench_multimodal":
576
+ from sweagent.agent.problem_statement import SWEBenchMultimodalProblemStatement
577
+
578
+ if isinstance(problem_statement, SWEBenchMultimodalProblemStatement):
579
+ # apply the global disable_image_processing setting if it's not explicitly set
580
+ if not problem_statement.disable_image_processing and self.templates.disable_image_processing:
581
+ problem_statement.disable_image_processing = True
582
+
583
+ self._problem_statement = problem_statement
584
+ self._env = env
585
+ iid = self._problem_statement.id
586
+ self.logger.info("Setting up agent for instance %s", iid)
587
+
588
+ # Save/reset some attributes
589
+ self.traj_path = output_dir / (self._problem_statement.id + ".traj")
590
+ self.logger.info("Trajectory will be saved to %s", self.traj_path)
591
+
592
+ self._chook.on_tools_installation_started()
593
+ self.tools.install(self._env)
594
+ self._chook.on_setup_attempt()
595
+ self.info = AgentInfo()
596
+ self.info["swe_agent_hash"] = get_agent_commit_hash()
597
+ self.info["swe_agent_version"] = __version__
598
+ self.info["swe_rex_version"] = get_rex_version()
599
+ self.info["swe_rex_hash"] = get_rex_commit_hash()
600
+ assert self._env is not None
601
+ assert self._problem_statement is not None
602
+ self._env.set_env_variables({"PROBLEM_STATEMENT": self._problem_statement.get_problem_statement_for_env()})
603
+ self.add_system_message_to_history()
604
+ self.add_demonstrations_to_history()
605
+ self.add_instance_template_to_history(state=self.tools.get_state(self._env))
606
+ self._chook.on_setup_done()
607
+
608
+ def add_system_message_to_history(self) -> None:
609
+ """Add system message to history"""
610
+ assert self._problem_statement is not None
611
+ system_msg = Template(self.templates.system_template).render(**self._get_format_dict())
612
+ self.logger.info(f"SYSTEM ({self.name})\n{system_msg}")
613
+ self._append_history(
614
+ {"role": "system", "content": system_msg, "agent": self.name, "message_type": "system_prompt"}
615
+ )
616
+
617
+ def add_demonstrations_to_history(self) -> None:
618
+ """Add demonstrations to history"""
619
+ for demonstration_path in self.templates.demonstrations:
620
+ self._add_demonstration_to_history(demonstration_path)
621
+
622
+ def _add_demonstration_to_history(self, demonstration_path: Path) -> None:
623
+ """Load demonstration from disk and add to history"""
624
+ if self.templates.demonstration_template is None and not self.templates.put_demos_in_history:
625
+ msg = "Cannot use demonstrations without a demonstration template or put_demos_in_history=True"
626
+ raise ValueError(msg)
627
+
628
+ # Load history
629
+ self.logger.info(f"DEMONSTRATION: {demonstration_path}")
630
+ _demo_text = Path(demonstration_path).read_text()
631
+ if demonstration_path.suffix == ".yaml":
632
+ demo_history = yaml.safe_load(_demo_text)["history"]
633
+ else:
634
+ demo_history = json.loads(_demo_text)["history"]
635
+
636
+ if self.templates.put_demos_in_history:
637
+ # Add demonstrations to history step-by-step
638
+ for entry in demo_history:
639
+ if entry["role"] != "system":
640
+ entry["is_demo"] = True
641
+ self._append_history(entry)
642
+ else:
643
+ # Add demonstration as single message to history
644
+ demo_history = [entry for entry in demo_history if entry["role"] != "system"]
645
+ demo_message = "\n".join([entry["content"] for entry in demo_history])
646
+ assert self.templates.demonstration_template is not None
647
+ demonstration = Template(self.templates.demonstration_template).render(demonstration=demo_message)
648
+ self._append_history(
649
+ {
650
+ "agent": self.name,
651
+ "content": demonstration,
652
+ "is_demo": True,
653
+ "role": "user",
654
+ "message_type": "demonstration",
655
+ },
656
+ )
657
+
658
+ def _get_format_dict(self, **kwargs) -> dict[str, Any]:
659
+ """Get the dictionary of key value pairs used to format the templates
660
+
661
+ Args:
662
+ **kwargs: additional keyword arguments to be added to the format dictionary
663
+ """
664
+ assert self._problem_statement is not None
665
+ assert self._env is not None
666
+ return dict(
667
+ command_docs=self.tools.config.command_docs,
668
+ **self.tools.config.env_variables,
669
+ **kwargs,
670
+ problem_statement=self._problem_statement.get_problem_statement(),
671
+ repo=self._env.repo.repo_name if self._env.repo is not None else "",
672
+ **self._problem_statement.get_extra_fields(),
673
+ )
674
+
675
+ def _add_templated_messages_to_history(
676
+ self, templates: list[str], tool_call_ids: list[str] | None = None, **kwargs: str | int | None
677
+ ) -> None:
678
+ """Populate selected template(s) with information (e.g., issue, arguments, state)
679
+ and add to history.
680
+
681
+ Args:
682
+ templates: templates to populate and add to history
683
+ tool_call_ids: tool call ids to be added to the history
684
+ **kwargs: keyword arguments to be passed to the templates (in addition to the
685
+ ones in `self._get_format_dict`)
686
+ """
687
+ messages = []
688
+
689
+ format_dict = self._get_format_dict(**kwargs)
690
+ for template in templates:
691
+ try:
692
+ messages.append(Template(template).render(**format_dict))
693
+ except KeyError:
694
+ self.logger.debug("The following keys are available: %s", format_dict.keys())
695
+ raise
696
+
697
+ message = "\n".join(messages)
698
+
699
+ # We disable syntax highlighting here, because some inputs can lead to a complete cross-thread
700
+ # freeze in the agent. See https://github.com/SWE-agent/SWE-agent/issues/901 .
701
+ self.logger.info(f"🤖 MODEL INPUT\n{message}", extra={"highlighter": None})
702
+ history_item: dict[str, Any] = {
703
+ "role": "user",
704
+ "content": message,
705
+ "agent": self.name,
706
+ "message_type": "observation",
707
+ }
708
+ if tool_call_ids:
709
+ assert len(tool_call_ids) == 1, "This should be ensured by the FunctionCalling parse method"
710
+ history_item["role"] = "tool"
711
+ history_item["tool_call_ids"] = tool_call_ids
712
+ self._append_history(history_item)
713
+
714
+ def add_step_to_history(self, step: StepOutput) -> None:
715
+ """Adds a step (command that was run and output) to the model history"""
716
+ self._append_history(
717
+ {
718
+ "role": "assistant",
719
+ "content": step.output,
720
+ "thought": step.thought,
721
+ "action": step.action,
722
+ "agent": self.name,
723
+ "tool_calls": step.tool_calls,
724
+ "message_type": "action",
725
+ "thinking_blocks": step.thinking_blocks,
726
+ },
727
+ )
728
+
729
+ elided_chars = 0
730
+ if step.observation.strip() == "":
731
+ # Show no output template if observation content was empty
732
+ templates = [self.templates.next_step_no_output_template]
733
+ elif len(step.observation) > self.templates.max_observation_length:
734
+ templates = [self.templates.next_step_truncated_observation_template]
735
+ elided_chars = len(step.observation) - self.templates.max_observation_length
736
+ else:
737
+ # Show standard output template if there is observation content
738
+ templates = [self.templates.next_step_template]
739
+ self._add_templated_messages_to_history(
740
+ templates,
741
+ observation=step.observation,
742
+ elided_chars=elided_chars,
743
+ max_observation_length=self.templates.max_observation_length,
744
+ tool_call_ids=step.tool_call_ids,
745
+ **step.state,
746
+ )
747
+
748
+ def add_instance_template_to_history(self, state: dict[str, str]) -> None:
749
+ """Add observation to history, as well as the instance template or demonstrations if we're
750
+ at the start of a new attempt.
751
+ """
752
+ templates: list[str] = []
753
+ # Determine observation template based on what prior observation was
754
+ assert self.history[-1]["role"] == "system" or self.history[-1].get("is_demo", False)
755
+ # Show instance template if prev. obs. was initial system message
756
+ templates = [self.templates.instance_template]
757
+ if self.templates.strategy_template is not None:
758
+ templates.append(self.templates.strategy_template)
759
+
760
+ self._add_templated_messages_to_history(templates, **state) # type: ignore
761
+
762
+ def get_trajectory_data(self) -> dict[str, Any]:
763
+ """Get all data that we save in .traj files."""
764
+
765
+ assert self._env is not None
766
+ # The deepcopy here is important because else the
767
+ # data["info"]["model_stats"] update will create havoc!
768
+ attempt_data = copy.deepcopy(
769
+ {
770
+ "trajectory": self.trajectory,
771
+ "history": self.history,
772
+ "info": self.info,
773
+ }
774
+ )
775
+ attempt_data["replay_config"] = self.replay_config.model_dump_json() if self.replay_config is not None else None
776
+ attempt_data["environment"] = self._env.name
777
+ return attempt_data
778
+
779
+ def save_trajectory(
780
+ self,
781
+ ) -> None:
782
+ """Save the trajectory to disk.
783
+ This includes the history, the environment state, and the model stats.
784
+ """
785
+ data = self.get_trajectory_data()
786
+ assert self.traj_path is not None
787
+ self.traj_path.write_text(json.dumps(data, indent=2))
788
+
789
+ def get_model_requery_history(
790
+ self, error_template: str, *, output: str, **kwargs: str | int | float | bool | None
791
+ ) -> list[dict[str, str]]:
792
+ """Ask the model to correct after a hitting one of the following errors:
793
+
794
+ 1. Malformatted output (could not parse action)
795
+ 2. Blocked action (command is on the blocklist)
796
+ 3. Bash command syntax error
797
+
798
+ At the time this function is called, the proposed action and observation are not part of the history
799
+ yet.
800
+
801
+ This function adds temporary history based on the error template and queries the model.
802
+ If the model is able to correct itself, the records of the mistakes will not be part of the history
803
+ (but they are saved in the trajectory).
804
+
805
+ Args:
806
+ error_template: error template
807
+ output: model output
808
+ **kwargs: keyword arguments to be passed to the error template
809
+
810
+ Returns:
811
+ model output after requery
812
+ """
813
+ format_dict = {**kwargs, **self._get_format_dict()}
814
+ error_template = Template(error_template).render(**format_dict)
815
+
816
+ self.logger.warning(f"{error_template}")
817
+
818
+ return self.messages + [
819
+ {"role": "assistant", "content": output, "agent": self.name, "message_type": "assistant"},
820
+ {"role": "user", "content": error_template, "agent": self.name, "message_type": "user"},
821
+ ]
822
+
823
+ def attempt_autosubmission_after_error(self, step: StepOutput) -> StepOutput:
824
+ """For most exceptions, we attempt to still extract the patch and submit that.
825
+ This means we send the `submit` command to the runtime and parse the output.
826
+ """
827
+ self.logger.warning("Attempting autosubmission after error")
828
+ step = step.model_copy(deep=True)
829
+ step.done = True
830
+ assert self._env is not None
831
+ if not asyncio.run(self._env.deployment.is_alive(timeout=10)):
832
+ # The agent is dead. This is very bad. Maybe we can take a 'diff' that was saved
833
+ # for a previous step? (if running with diff in tools)
834
+ self.logger.error("Runtime is no longer alive")
835
+ try:
836
+ last_trajectory_step = self.trajectory[-1]
837
+ except IndexError:
838
+ self.logger.info("No last trajectory step to extract patch from")
839
+ return step
840
+ if "diff" not in last_trajectory_step["state"]:
841
+ self.logger.info("No diff in last trajectory step state, cannot autosubmit")
842
+ return step
843
+ diff = last_trajectory_step["state"]["diff"]
844
+ self.logger.info("Using diff from last trajectory step to autosubmit")
845
+ step.submission = diff
846
+ if step.submission:
847
+ step.observation = "Environment died unexpectedly. Exited (autosubmitted)"
848
+ step.exit_status = f"submitted ({step.exit_status})"
849
+ else:
850
+ self.logger.info("Diff from last traj step empty.")
851
+ return step
852
+ # Let us manually run the submission command and collect the output
853
+ repo_name = "/"
854
+ if self._env.repo is not None:
855
+ repo_name = f"/{self._env.repo.repo_name}"
856
+ submission_command = "git add -A && git diff --cached > /root/model.patch"
857
+ self.logger.info("Executing submission command %s in %s", submission_command, repo_name)
858
+ try:
859
+ self._env.execute_command(submission_command, check=True, cwd=repo_name)
860
+ except Exception as e:
861
+ self.logger.error("Failed to execute submission command, got %s", e)
862
+ # There's still hope for the submission, because the `/root/model.patch` file might have been
863
+ # generated by the state command
864
+ step = self.handle_submission(step, observation="", force_submission=True)
865
+ if step.submission:
866
+ self.logger.info("Exiting with autosubmission")
867
+ step.observation = "Exited (autosubmitted)"
868
+ return step
869
+
870
+ def handle_submission(self, step: StepOutput, *, observation="", force_submission: bool = False) -> StepOutput:
871
+ """Check if there was a submission in the observation and handle it.
872
+
873
+ Args:
874
+ step:
875
+ observation: If specified, will use this rather than stepobservation
876
+ force_submission: If True, will always submit even if no submission is found
877
+
878
+ Returns:
879
+ step: step with submission and observation updated (if submission was found)
880
+ """
881
+ step = step.model_copy(deep=True)
882
+ assert self.tools is not None
883
+ is_submission = self.tools.check_for_submission_cmd(observation or step.observation)
884
+ if is_submission or force_submission:
885
+ assert self._env is not None
886
+ try:
887
+ submission = self._env.read_file("/root/model.patch", encoding="utf-8", errors="backslashreplace")
888
+ except FileNotFoundError:
889
+ self.logger.warning("Submission file not found, no submission was made")
890
+ return step
891
+ except Exception as e:
892
+ self.logger.exception("Failed to read submission file, got %s", e)
893
+ return step
894
+ if submission.strip() != "":
895
+ step.submission = submission
896
+ else:
897
+ step.submission = None
898
+ step.observation = submission
899
+ if not step.exit_status:
900
+ step.exit_status = "submitted"
901
+ elif step.submission:
902
+ step.exit_status = f"submitted ({step.exit_status})"
903
+ step.done = True
904
+ self.logger.info(f"Found submission: {submission}")
905
+ return step
906
+
907
+ def _get_edited_files_with_context(self, patch: str) -> dict[str, str]:
908
+ """Get the edited files with context from the patch"""
909
+ assert self._env is not None
910
+ try:
911
+ if self._env.repo is None:
912
+ pf = None
913
+ else:
914
+ pf = (
915
+ PatchFormatter(
916
+ patch,
917
+ read_method=lambda path: self._env.read_file( # type: ignore[attr-defined]
918
+ PurePosixPath("/") / self._env.repo.repo_name / path # type: ignore[attr-defined]
919
+ ),
920
+ )
921
+ if patch
922
+ else None
923
+ )
924
+ except UnidiffParseError:
925
+ self.logger.error("Failed to parse patch with unidiff. Some variables will be empty.")
926
+ pf = None
927
+ # We still need to populate the variables
928
+ out = {}
929
+ for context_length in [30, 50, 70]:
930
+ value = "Empty. No edited files found."
931
+ if pf is not None:
932
+ value = pf.get_files_str(original=False, context_length=context_length)
933
+ out[f"edited_files{context_length}"] = value
934
+ return out
935
+
936
+ def handle_action(self, step: StepOutput) -> StepOutput:
937
+ """Runs an action proposed by the agent in the environment and returns the corresponding output.
938
+
939
+ Args:
940
+ action: command to run in bash shell
941
+ output: output from model (only used for error handling)
942
+
943
+ Returns:
944
+ action_execution_output: action execution output
945
+ """
946
+ if self.tools.should_block_action(step.action):
947
+ raise _BlockedActionError()
948
+
949
+ if step.action.strip() == "exit":
950
+ self.logger.info("Exiting agent")
951
+ step.done = True
952
+ step.observation = "Exited"
953
+ step.exit_status = "exit_command"
954
+ assert self._env is not None
955
+ step.state = self.tools.get_state(env=self._env) # for history
956
+ return step
957
+
958
+ assert self._env is not None
959
+ self._chook.on_action_started(step=step)
960
+ execution_t0 = time.perf_counter()
961
+ run_action: str = self.tools.guard_multiline_input(step.action).strip()
962
+ try:
963
+ step.observation = self._env.communicate(
964
+ input=run_action,
965
+ timeout=self.tools.config.execution_timeout,
966
+ check="raise" if self._always_require_zero_exit_code else "ignore",
967
+ )
968
+ except CommandTimeoutError:
969
+ self._n_consecutive_timeouts += 1
970
+ if self._n_consecutive_timeouts >= self.tools.config.max_consecutive_execution_timeouts:
971
+ msg = "Exiting agent due to too many consecutive execution timeouts"
972
+ self.logger.critical(msg)
973
+ step.execution_time = time.perf_counter() - execution_t0
974
+ self._total_execution_time += step.execution_time
975
+ raise
976
+ try:
977
+ self._env.interrupt_session()
978
+ except Exception as f:
979
+ self.logger.exception("Failed to interrupt session after command timeout: %s", f, exc_info=True)
980
+ step.execution_time = time.perf_counter() - execution_t0
981
+ self._total_execution_time += step.execution_time
982
+ raise
983
+ step.observation = Template(self.templates.command_cancelled_timeout_template).render(
984
+ **self._get_format_dict(),
985
+ timeout=self.tools.config.execution_timeout,
986
+ command=run_action,
987
+ )
988
+ else:
989
+ self._n_consecutive_timeouts = 0
990
+ step.execution_time = time.perf_counter() - execution_t0
991
+ self._total_execution_time += step.execution_time
992
+ self._chook.on_action_executed(step=step)
993
+ step.state = self.tools.get_state(env=self._env)
994
+
995
+ if RETRY_WITH_OUTPUT_TOKEN in step.observation:
996
+ step.observation = step.observation.replace(RETRY_WITH_OUTPUT_TOKEN, "")
997
+ raise _RetryWithOutput()
998
+ elif RETRY_WITHOUT_OUTPUT_TOKEN in step.observation:
999
+ step.observation = step.observation.replace(RETRY_WITHOUT_OUTPUT_TOKEN, "")
1000
+ raise _RetryWithoutOutput()
1001
+ elif EXIT_FORFEIT_TOKEN in step.observation:
1002
+ raise _ExitForfeit()
1003
+
1004
+ return self.handle_submission(step)
1005
+
1006
+ def forward(self, history: list[dict[str, str]]) -> StepOutput:
1007
+ """Forward the model without handling errors.
1008
+
1009
+ All exceptions raised will contain the `StepOutput` object
1010
+ with some of the attributes set.
1011
+
1012
+ Args:
1013
+ history: history to query the model with
1014
+
1015
+ Returns:
1016
+ step_output: step output
1017
+ """
1018
+ if self._total_execution_time > self.tools.config.total_execution_timeout:
1019
+ raise _TotalExecutionTimeExceeded()
1020
+
1021
+ # we continuously add actions, output etc. to the step object
1022
+ # because some of the specific exception handling requires some of these
1023
+ # attributes (e.g., if we want to requery the model for a bash syntax error, we
1024
+ # need to have the previous model output to format the requery template)
1025
+ step = StepOutput()
1026
+ step.query = copy.deepcopy(history)
1027
+ try:
1028
+ # Forward model and get actions
1029
+ self._chook.on_model_query(messages=history, agent=self.name)
1030
+ # todo: Add all options to the extra info
1031
+ if self._action_sampler is not None:
1032
+ assert self._problem_statement is not None
1033
+ best = self._action_sampler.get_action(
1034
+ problem_statement=self._problem_statement,
1035
+ trajectory=self.trajectory,
1036
+ history=history,
1037
+ )
1038
+ output = best.completion
1039
+ # todo: Handle history and trajectory
1040
+ step.extra_info.update(best.extra_info)
1041
+ else:
1042
+ output = self.model.query(history) # type: ignore
1043
+ step.output = output["message"]
1044
+ # todo: Can't I override the parser in __init__?
1045
+ step.thought, step.action = self.tools.parse_actions(output)
1046
+ step.thinking_blocks = output.get("thinking_blocks", [])
1047
+ if output.get("tool_calls") is not None:
1048
+ step.tool_call_ids = [call["id"] for call in output["tool_calls"]]
1049
+ step.tool_calls = output["tool_calls"]
1050
+ self.logger.info(f"💭 THOUGHT\n{step.thought}\n\n🎬 ACTION\n{step.action.strip()}")
1051
+ self._chook.on_actions_generated(step=step)
1052
+ return self.handle_action(step)
1053
+ except Exception as e:
1054
+ if step.action == step.thought == "":
1055
+ # Probably the parsing failed/no action included. Let's still fill in thought
1056
+ # so that trajectory viewers have something to show us for this step.
1057
+ step.thought = step.output
1058
+ # Attach the step object to the exception
1059
+ e.step = step # type: ignore
1060
+ raise
1061
+
1062
+ def forward_with_handling(self, history: list[dict[str, str]]) -> StepOutput:
1063
+ """Forward the model and handle errors, requerying the model if we can.
1064
+ For example, if the model outputs a bash command that has syntax errors,
1065
+ we will not execute it but requery the model for a corrected command.
1066
+
1067
+ Note: This will update the trajectory, but not the history.
1068
+
1069
+ Args:
1070
+ history: history to forward
1071
+
1072
+ Returns:
1073
+ step_output: step output
1074
+ """
1075
+
1076
+ def handle_error_with_autosubmission(exit_status: str, message: str) -> StepOutput:
1077
+ """Attempts to autosubmit (extract patch from the environment) and stops the loop."""
1078
+ self.logger.warning(message)
1079
+ return self.attempt_autosubmission_after_error(
1080
+ StepOutput(
1081
+ thought=message,
1082
+ exit_status=exit_status,
1083
+ output=message,
1084
+ done=True,
1085
+ )
1086
+ )
1087
+
1088
+ def handle_error_with_retry(exception: Exception, template: str, n_requeries: int) -> list[dict[str, str]]:
1089
+ """Requeries the model if the error is a format/blocklist/bash syntax error."""
1090
+ self.logger.warning("Requerying model after %s (%dth requery)", type(exception).__name__, n_requeries)
1091
+ step: StepOutput = getattr(exception, "step", StepOutput())
1092
+ self.add_step_to_trajectory(step)
1093
+ exception_message = getattr(exception, "message", "")
1094
+ if not exception_message:
1095
+ try:
1096
+ exception_message = exception.args[0]
1097
+ except (IndexError, AttributeError):
1098
+ pass
1099
+ return self.get_model_requery_history(
1100
+ error_template=template,
1101
+ **step.to_template_format_dict(),
1102
+ **getattr(exception, "extra_info", {}),
1103
+ exception_message=exception_message,
1104
+ )
1105
+
1106
+ n_format_fails = 0
1107
+ while n_format_fails < self.max_requeries:
1108
+ try:
1109
+ return self.forward(history)
1110
+
1111
+ # Errors that are raised
1112
+
1113
+ except KeyboardInterrupt:
1114
+ raise
1115
+ except EOFError:
1116
+ raise
1117
+
1118
+ # Errors that cause requery
1119
+
1120
+ except FormatError as e:
1121
+ n_format_fails += 1
1122
+ history = handle_error_with_retry(
1123
+ exception=e, template=self.tools.config.format_error_template, n_requeries=n_format_fails
1124
+ )
1125
+ except _BlockedActionError as e:
1126
+ n_format_fails += 1
1127
+ history = handle_error_with_retry(
1128
+ exception=e, template=self.tools.config.filter.blocklist_error_template, n_requeries=n_format_fails
1129
+ )
1130
+ except ContentPolicyViolationError:
1131
+ self.logger.warning("Content policy violation, trying to resample")
1132
+ n_format_fails += 1
1133
+ # Try if simply resampling helps here
1134
+ pass
1135
+ except BashIncorrectSyntaxError as e:
1136
+ n_format_fails += 1
1137
+ history = handle_error_with_retry(
1138
+ exception=e,
1139
+ template=self.templates.shell_check_error_template,
1140
+ n_requeries=n_format_fails,
1141
+ )
1142
+ except _RetryWithOutput as e:
1143
+ history = handle_error_with_retry(
1144
+ exception=e,
1145
+ template=self.templates.next_step_template,
1146
+ n_requeries=n_format_fails,
1147
+ )
1148
+ except _RetryWithoutOutput:
1149
+ pass
1150
+ # Requery with the same template as the last step
1151
+
1152
+ # Errors that cause exit
1153
+
1154
+ except _ExitForfeit:
1155
+ self.logger.info("Exiting due to forfeit")
1156
+ return handle_error_with_autosubmission(
1157
+ "exit_forfeit",
1158
+ "Exiting due to forfeit",
1159
+ )
1160
+
1161
+ except _TotalExecutionTimeExceeded:
1162
+ self.logger.exception("Exiting due to total execution time exceeded", exc_info=True)
1163
+ return handle_error_with_autosubmission(
1164
+ "exit_total_execution_time",
1165
+ "Exit due to total execution time exceeded",
1166
+ )
1167
+
1168
+ except CommandTimeoutError:
1169
+ self.logger.exception("Exiting due to multiple consecutive command timeouts", exc_info=True)
1170
+ return handle_error_with_autosubmission(
1171
+ "exit_command_timeout",
1172
+ "Exit due to multiple consecutive command timeouts",
1173
+ )
1174
+
1175
+ except ContextWindowExceededError:
1176
+ return handle_error_with_autosubmission(
1177
+ "exit_context",
1178
+ "Exit due to context window",
1179
+ )
1180
+ except TotalCostLimitExceededError:
1181
+ raise
1182
+ except CostLimitExceededError:
1183
+ return handle_error_with_autosubmission(
1184
+ "exit_cost",
1185
+ "Exit due to cost limit",
1186
+ )
1187
+ except RetryError as e:
1188
+ self.logger.exception(f"Exiting due to retry error: {e}", exc_info=True)
1189
+ return handle_error_with_autosubmission(
1190
+ "exit_api",
1191
+ f"Exit due to retry error: {e}",
1192
+ )
1193
+ except SwerexException as e:
1194
+ self.logger.exception(f"Exiting due to environment error: {e}", exc_info=True)
1195
+ return handle_error_with_autosubmission(
1196
+ "exit_environment_error",
1197
+ f"Exit due to environment error: {e}",
1198
+ )
1199
+ except RuntimeError as e:
1200
+ self.logger.exception(f"Exiting due to runtime error: {e}", exc_info=True)
1201
+ return handle_error_with_autosubmission(
1202
+ "exit_error",
1203
+ f"Exit due to runtime error: {e}",
1204
+ )
1205
+ except Exception as e:
1206
+ self.logger.exception(f"Exiting due to unknown error: {e}", exc_info=True)
1207
+ return handle_error_with_autosubmission(
1208
+ "exit_error",
1209
+ f"Exit due to unknown error: {e}",
1210
+ )
1211
+ self.logger.exception(
1212
+ "Exit due to repeated format/blocklist/bash syntax errors",
1213
+ exc_info=True,
1214
+ )
1215
+ return handle_error_with_autosubmission(
1216
+ "exit_format",
1217
+ "Exit due to repeated format/blocklist/bash syntax errors",
1218
+ )
1219
+
1220
+ def add_step_to_trajectory(self, step: StepOutput) -> None:
1221
+ trajectory_step = TrajectoryStep(
1222
+ {
1223
+ "action": step.action,
1224
+ "observation": step.observation,
1225
+ "response": step.output,
1226
+ "thought": step.thought,
1227
+ "execution_time": step.execution_time,
1228
+ "state": step.state,
1229
+ "query": step.query,
1230
+ "extra_info": step.extra_info,
1231
+ },
1232
+ )
1233
+ self.trajectory.append(trajectory_step)
1234
+
1235
+ def step(self) -> StepOutput:
1236
+ """Run a step of the agent. This is a wrapper around `self.forward_with_handling`
1237
+ with additional bookkeeping:
1238
+
1239
+ 1. Update message history with performed action and observation
1240
+ 2. Update trajectory with the final executed result
1241
+ 3. Update the info dictionary
1242
+
1243
+ Returns:
1244
+ step_output: step output (same as the output of `self.forward_with_handling`)
1245
+ """
1246
+
1247
+ assert self._env is not None
1248
+ self._chook.on_step_start()
1249
+
1250
+ n_step = len(self.trajectory) + 1
1251
+ self.logger.info("=" * 25 + f" STEP {n_step} " + "=" * 25)
1252
+ step_output = self.forward_with_handling(self.messages)
1253
+ self.add_step_to_history(step_output)
1254
+
1255
+ self.info["submission"] = step_output.submission
1256
+ self.info["exit_status"] = step_output.exit_status # type: ignore
1257
+ self.info.update(self._get_edited_files_with_context(patch=step_output.submission or "")) # type: ignore
1258
+ self.info["model_stats"] = self.model.stats.model_dump()
1259
+
1260
+ self.add_step_to_trajectory(step_output)
1261
+
1262
+ self._chook.on_step_done(step=step_output, info=self.info)
1263
+ return step_output
1264
+
1265
+ def run(
1266
+ self,
1267
+ env: SWEEnv,
1268
+ problem_statement: ProblemStatement | ProblemStatementConfig,
1269
+ output_dir: Path = Path("."),
1270
+ ) -> AgentRunResult:
1271
+ """Run the agent on a problem instance. This method contains the
1272
+ main loop that repeatedly calls `self._step` until the problem is solved.
1273
+
1274
+ Args:
1275
+ setup_args: Arguments to pass to the agent's setup method.
1276
+ env: The environment to run the agent on.
1277
+ traj_dir: Directory to save the trajectory to
1278
+ """
1279
+ self.setup(env=env, problem_statement=problem_statement, output_dir=output_dir)
1280
+
1281
+ # Run action/observation loop
1282
+ self._chook.on_run_start()
1283
+ step_output = StepOutput()
1284
+ while not step_output.done:
1285
+ step_output = self.step()
1286
+ self.save_trajectory()
1287
+ self._chook.on_run_done(trajectory=self.trajectory, info=self.info)
1288
+
1289
+ self.logger.info("Trajectory saved to %s", self.traj_path)
1290
+
1291
+ # Here we want to return the "global" information (e.g., submission should
1292
+ # be the best submission instead of the last one, etc.), so we get it from the traj file
1293
+ data = self.get_trajectory_data()
1294
+ return AgentRunResult(info=data["info"], trajectory=data["trajectory"])