@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,1029 @@
1
+ //! Core agent implementations for SWE-agent
2
+ //!
3
+ //! This module contains the main agent types that coordinate the problem-solving loop.
4
+
5
+ use super::history_processors::{
6
+ create_history_processor, ChainedHistoryProcessor, HistoryProcessor, HistoryProcessorConfig,
7
+ };
8
+ use super::hooks::{AgentHook, CombinedAgentHook, QueryMessageEvent};
9
+ use super::models::{get_model, GlobalStats, InstanceStats, Model, ModelConfig};
10
+ use super::problem_statement::{
11
+ create_problem_statement, ProblemStatement, ProblemStatementConfig,
12
+ };
13
+ use super::reviewer::{get_retry_loop_from_config, RetryLoop, RetryLoopConfig, ReviewSubmission};
14
+ use crate::environment::SWEEnv;
15
+ use crate::exceptions::{tokens, Result, SWEAgentError};
16
+ use crate::tools::{ToolConfig, ToolHandler};
17
+ use crate::types::{
18
+ AgentInfo, AgentRunResult, Content, EnvironmentState, History, HistoryItem, MessageType,
19
+ QueryMessage, Role, StepOutput, TemplateConfig, Trajectory, TrajectoryStep,
20
+ };
21
+ use crate::utils::template::render_template;
22
+ use crate::VERSION;
23
+ use async_trait::async_trait;
24
+ use serde::{Deserialize, Serialize};
25
+ use std::collections::HashMap;
26
+ use std::path::{Path, PathBuf};
27
+ use std::sync::Arc;
28
+
29
+ /// Trait for all agent types
30
+ #[async_trait]
31
+ pub trait Agent: Send + Sync {
32
+ /// Add a hook to the agent
33
+ fn add_hook(&mut self, hook: Box<dyn AgentHook>);
34
+
35
+ /// Get trajectory data
36
+ fn get_trajectory_data(&self) -> TrajectoryData;
37
+
38
+ /// Run a single step
39
+ async fn step(&mut self) -> Result<StepOutput>;
40
+
41
+ /// Run the agent on a problem
42
+ async fn run(
43
+ &mut self,
44
+ env: &mut SWEEnv,
45
+ problem_statement: Box<dyn ProblemStatement>,
46
+ output_dir: &Path,
47
+ ) -> Result<AgentRunResult>;
48
+ }
49
+
50
+ /// Data from a trajectory
51
+ #[derive(Debug, Clone, Serialize, Deserialize)]
52
+ pub struct TrajectoryData {
53
+ pub trajectory: Trajectory,
54
+ pub history: History,
55
+ pub info: AgentInfo,
56
+ #[serde(skip_serializing_if = "Option::is_none")]
57
+ pub replay_config: Option<String>,
58
+ pub environment: String,
59
+ }
60
+
61
+ /// Configuration for the default agent
62
+ #[derive(Debug, Clone, Serialize, Deserialize)]
63
+ pub struct DefaultAgentConfig {
64
+ #[serde(default)]
65
+ pub name: String,
66
+ #[serde(default)]
67
+ pub templates: TemplateConfig,
68
+ #[serde(default)]
69
+ pub tools: ToolConfig,
70
+ #[serde(default)]
71
+ pub history_processors: Vec<HistoryProcessorConfig>,
72
+ #[serde(default)]
73
+ pub model: ModelConfig,
74
+ #[serde(default = "default_max_requeries")]
75
+ pub max_requeries: usize,
76
+ }
77
+
78
+ fn default_max_requeries() -> usize {
79
+ 3
80
+ }
81
+
82
+ impl Default for DefaultAgentConfig {
83
+ fn default() -> Self {
84
+ Self {
85
+ name: "main".to_string(),
86
+ templates: TemplateConfig::default(),
87
+ tools: ToolConfig::default(),
88
+ history_processors: Vec::new(),
89
+ model: ModelConfig::default(),
90
+ max_requeries: default_max_requeries(),
91
+ }
92
+ }
93
+ }
94
+
95
+ /// Default agent implementation
96
+ pub struct DefaultAgent {
97
+ pub name: String,
98
+ model: Box<dyn Model>,
99
+ templates: TemplateConfig,
100
+ tools: ToolHandler,
101
+ history_processors: Box<dyn HistoryProcessor>,
102
+ max_requeries: usize,
103
+
104
+ // Runtime state
105
+ env: Option<Arc<tokio::sync::Mutex<SWEEnv>>>,
106
+ problem_statement: Option<Box<dyn ProblemStatement>>,
107
+ traj_path: Option<PathBuf>,
108
+ history: History,
109
+ trajectory: Trajectory,
110
+ info: AgentInfo,
111
+ chook: CombinedAgentHook,
112
+
113
+ // Counters
114
+ n_consecutive_timeouts: usize,
115
+ total_execution_time: f64,
116
+ }
117
+
118
+ impl DefaultAgent {
119
+ pub fn new(
120
+ name: impl Into<String>,
121
+ model: Box<dyn Model>,
122
+ templates: TemplateConfig,
123
+ tools: ToolHandler,
124
+ history_processors: Box<dyn HistoryProcessor>,
125
+ max_requeries: usize,
126
+ ) -> Self {
127
+ Self {
128
+ name: name.into(),
129
+ model,
130
+ templates,
131
+ tools,
132
+ history_processors,
133
+ max_requeries,
134
+ env: None,
135
+ problem_statement: None,
136
+ traj_path: None,
137
+ history: Vec::new(),
138
+ trajectory: Vec::new(),
139
+ info: AgentInfo::default(),
140
+ chook: CombinedAgentHook::new(),
141
+ n_consecutive_timeouts: 0,
142
+ total_execution_time: 0.0,
143
+ }
144
+ }
145
+
146
+ pub fn from_config(config: DefaultAgentConfig) -> Result<Self> {
147
+ let global_stats = Arc::new(GlobalStats::default());
148
+ let model = get_model(config.model, global_stats)?;
149
+ let tools = ToolHandler::new(config.tools)?;
150
+
151
+ let processors: Vec<Box<dyn HistoryProcessor>> = config
152
+ .history_processors
153
+ .iter()
154
+ .map(create_history_processor)
155
+ .collect();
156
+
157
+ let history_processor: Box<dyn HistoryProcessor> = if processors.is_empty() {
158
+ Box::new(super::history_processors::DefaultHistoryProcessor)
159
+ } else {
160
+ Box::new(ChainedHistoryProcessor::new(processors))
161
+ };
162
+
163
+ Ok(Self::new(
164
+ config.name,
165
+ model,
166
+ config.templates,
167
+ tools,
168
+ history_processor,
169
+ config.max_requeries,
170
+ ))
171
+ }
172
+
173
+ /// Get processed messages for model query
174
+ fn get_messages(&self) -> History {
175
+ let filtered: History = self
176
+ .history
177
+ .iter()
178
+ .filter(|item| item.agent.as_deref() == Some(&self.name) || item.agent.is_none())
179
+ .cloned()
180
+ .collect();
181
+
182
+ self.history_processors.process(filtered)
183
+ }
184
+
185
+ fn append_history(&mut self, item: HistoryItem) {
186
+ let event = QueryMessageEvent {
187
+ agent: item.agent.clone().unwrap_or_default(),
188
+ role: format!("{:?}", item.role),
189
+ content: item.content.as_str(),
190
+ message_type: item
191
+ .message_type
192
+ .as_ref()
193
+ .map(|t| format!("{:?}", t))
194
+ .unwrap_or_default(),
195
+ is_demo: item.is_demo,
196
+ thought: item.thought.clone(),
197
+ action: item.action.clone(),
198
+ };
199
+ self.chook.on_query_message_added(&event);
200
+ self.history.push(item);
201
+ }
202
+
203
+ /// Setup the agent for a new problem instance
204
+ pub async fn setup(
205
+ &mut self,
206
+ env: Arc<tokio::sync::Mutex<SWEEnv>>,
207
+ problem_statement: Box<dyn ProblemStatement>,
208
+ output_dir: &Path,
209
+ ) -> Result<()> {
210
+ std::fs::create_dir_all(output_dir)?;
211
+
212
+ self.problem_statement = Some(problem_statement);
213
+ self.env = Some(env.clone());
214
+
215
+ let ps = self.problem_statement.as_ref().unwrap();
216
+ let iid = ps.id();
217
+ tracing::info!(instance_id = iid, "Setting up agent");
218
+
219
+ self.traj_path = Some(output_dir.join(format!("{}.traj", iid)));
220
+ tracing::info!(path = ?self.traj_path, "Trajectory will be saved");
221
+
222
+ self.chook.on_tools_installation_started();
223
+
224
+ {
225
+ let mut env_guard = env.lock().await;
226
+ self.tools.install(&mut env_guard).await?;
227
+ }
228
+
229
+ self.chook.on_setup_attempt();
230
+
231
+ self.info = AgentInfo {
232
+ swe_agent_version: Some(VERSION.to_string()),
233
+ ..Default::default()
234
+ };
235
+
236
+ // Add system message
237
+ self.add_system_message_to_history();
238
+
239
+ // Add demonstrations
240
+ self.add_demonstrations_to_history()?;
241
+
242
+ // Add instance template
243
+ let state = {
244
+ let env_guard = env.lock().await;
245
+ self.tools.get_state(&env_guard).await
246
+ };
247
+ self.add_instance_template_to_history(&state);
248
+
249
+ self.chook.on_setup_done();
250
+
251
+ Ok(())
252
+ }
253
+
254
+ fn get_format_dict(&self, extra: Option<HashMap<String, String>>) -> HashMap<String, String> {
255
+ let mut dict = extra.unwrap_or_default();
256
+
257
+ if let Some(ref ps) = self.problem_statement {
258
+ dict.insert("problem_statement".to_string(), ps.get_problem_statement());
259
+ for (k, v) in ps.get_extra_fields() {
260
+ dict.insert(k, v);
261
+ }
262
+ }
263
+
264
+ if let Some(ref cmd_docs) = self.tools.config.command_docs {
265
+ dict.insert("command_docs".to_string(), cmd_docs.clone());
266
+ }
267
+
268
+ dict
269
+ }
270
+
271
+ fn add_system_message_to_history(&mut self) {
272
+ let format_dict = self.get_format_dict(None);
273
+ let system_msg = render_template(&self.templates.system_template, &format_dict)
274
+ .unwrap_or_else(|_| self.templates.system_template.clone());
275
+
276
+ tracing::info!(agent = %self.name, "SYSTEM\n{}", system_msg);
277
+
278
+ self.append_history(HistoryItem {
279
+ role: Role::System,
280
+ content: Content::Text(system_msg),
281
+ agent: Some(self.name.clone()),
282
+ message_type: Some(MessageType::System),
283
+ ..Default::default()
284
+ });
285
+ }
286
+
287
+ fn add_demonstrations_to_history(&mut self) -> Result<()> {
288
+ for demo_path in &self.templates.demonstrations.clone() {
289
+ self.add_demonstration_to_history(demo_path)?;
290
+ }
291
+ Ok(())
292
+ }
293
+
294
+ fn add_demonstration_to_history(&mut self, demo_path: &str) -> Result<()> {
295
+ if self.templates.demonstration_template.is_none() && !self.templates.put_demos_in_history {
296
+ return Err(SWEAgentError::ConfigurationError(
297
+ "Cannot use demonstrations without demonstration_template or put_demos_in_history"
298
+ .to_string(),
299
+ ));
300
+ }
301
+
302
+ tracing::info!(path = demo_path, "Loading demonstration");
303
+ let content = std::fs::read_to_string(demo_path)?;
304
+
305
+ // Parse demonstration (YAML or JSON)
306
+ let demo_history: Vec<HistoryItem> =
307
+ if demo_path.ends_with(".yaml") || demo_path.ends_with(".yml") {
308
+ let parsed: serde_yaml::Value = serde_yaml::from_str(&content)?;
309
+ if let Some(history) = parsed.get("history") {
310
+ serde_yaml::from_value(history.clone())?
311
+ } else {
312
+ Vec::new()
313
+ }
314
+ } else {
315
+ let parsed: serde_json::Value = serde_json::from_str(&content)?;
316
+ if let Some(history) = parsed.get("history") {
317
+ serde_json::from_value(history.clone())?
318
+ } else {
319
+ Vec::new()
320
+ }
321
+ };
322
+
323
+ if self.templates.put_demos_in_history {
324
+ for mut entry in demo_history {
325
+ if entry.role != Role::System {
326
+ entry.is_demo = Some(true);
327
+ entry.agent = Some(entry.agent.unwrap_or_else(|| self.name.clone()));
328
+ self.append_history(entry);
329
+ }
330
+ }
331
+ } else if let Some(ref template) = self.templates.demonstration_template {
332
+ let demo_text: String = demo_history
333
+ .iter()
334
+ .filter(|e| e.role != Role::System)
335
+ .map(|e| e.content.as_str())
336
+ .collect::<Vec<_>>()
337
+ .join("\n");
338
+
339
+ let mut vars = HashMap::new();
340
+ vars.insert("demonstration".to_string(), demo_text);
341
+ let demonstration = render_template(template, &vars)?;
342
+
343
+ self.append_history(HistoryItem {
344
+ role: Role::User,
345
+ content: Content::Text(demonstration),
346
+ agent: Some(self.name.clone()),
347
+ is_demo: Some(true),
348
+ message_type: Some(MessageType::Demonstration),
349
+ ..Default::default()
350
+ });
351
+ }
352
+
353
+ Ok(())
354
+ }
355
+
356
+ fn add_instance_template_to_history(&mut self, state: &HashMap<String, String>) {
357
+ let format_dict = self.get_format_dict(Some(state.clone()));
358
+
359
+ let mut templates = vec![self.templates.instance_template.clone()];
360
+ if let Some(ref strategy) = self.templates.strategy_template {
361
+ templates.push(strategy.clone());
362
+ }
363
+
364
+ let message: String = templates
365
+ .iter()
366
+ .filter_map(|t| render_template(t, &format_dict).ok())
367
+ .collect::<Vec<_>>()
368
+ .join("\n");
369
+
370
+ self.append_history(HistoryItem {
371
+ role: Role::User,
372
+ content: Content::Text(message),
373
+ agent: Some(self.name.clone()),
374
+ message_type: Some(MessageType::Observation),
375
+ ..Default::default()
376
+ });
377
+ }
378
+
379
+ #[allow(dead_code)]
380
+ fn get_trajectory(&self) -> Trajectory {
381
+ self.trajectory.clone()
382
+ }
383
+
384
+ fn save_trajectory(&self) -> Result<()> {
385
+ if let Some(ref path) = self.traj_path {
386
+ let data = self.get_trajectory_data();
387
+ let json = serde_json::to_string_pretty(&data)?;
388
+ std::fs::write(path, json)?;
389
+ }
390
+ Ok(())
391
+ }
392
+
393
+ async fn forward(&mut self, history: History) -> Result<StepOutput> {
394
+ if self.total_execution_time > self.tools.config.total_execution_timeout as f64 {
395
+ return Err(SWEAgentError::TotalExecutionTimeExceeded);
396
+ }
397
+
398
+ let mut step = StepOutput {
399
+ query: history
400
+ .iter()
401
+ .map(|h| QueryMessage {
402
+ role: h.role.clone(),
403
+ content: h.content.as_str(),
404
+ message_type: h.message_type.clone(),
405
+ })
406
+ .collect(),
407
+ ..Default::default()
408
+ };
409
+
410
+ // Query model
411
+ self.chook.on_model_query(&history, &self.name);
412
+
413
+ let output = self.model.query(&history).await?;
414
+
415
+ step.output = output.message.clone();
416
+
417
+ // Parse thought and action
418
+ let (thought, action) = self.tools.parse_actions(&output)?;
419
+ step.thought = thought;
420
+ step.action = action;
421
+ step.thinking_blocks = output.thinking_blocks;
422
+ step.tool_calls = output.tool_calls.clone();
423
+
424
+ if let Some(ref tool_calls) = output.tool_calls {
425
+ step.tool_call_ids = Some(tool_calls.iter().map(|tc| tc.id.clone()).collect());
426
+ }
427
+
428
+ tracing::info!(
429
+ thought = %step.thought,
430
+ action = %step.action,
431
+ "💭 THOUGHT / 🎬 ACTION"
432
+ );
433
+
434
+ self.chook.on_actions_generated(&step);
435
+
436
+ self.handle_action(&mut step).await
437
+ }
438
+
439
+ async fn handle_action(&mut self, step: &mut StepOutput) -> Result<StepOutput> {
440
+ // Check if action is blocked
441
+ if self.tools.should_block_action(&step.action) {
442
+ return Err(SWEAgentError::BlockedAction(step.action.clone()));
443
+ }
444
+
445
+ // Handle exit command
446
+ if step.action.trim() == "exit" {
447
+ tracing::info!("Exiting agent");
448
+ step.done = true;
449
+ step.observation = "Exited".to_string();
450
+ step.exit_status = Some("exit_command".to_string());
451
+
452
+ if let Some(ref env) = self.env {
453
+ let env_guard = env.lock().await;
454
+ let state_map = self.tools.get_state(&env_guard).await;
455
+ step.state = EnvironmentState {
456
+ working_dir: state_map.get("working_dir").cloned(),
457
+ open_files: state_map
458
+ .get("open_files")
459
+ .map(|s| s.split(", ").map(String::from).collect()),
460
+ git_status: state_map.get("git_status").cloned(),
461
+ ..Default::default()
462
+ };
463
+ }
464
+
465
+ return Ok(step.clone());
466
+ }
467
+
468
+ self.chook.on_action_started(step);
469
+ let execution_start = std::time::Instant::now();
470
+
471
+ let run_action = self.tools.guard_multiline_input(&step.action);
472
+
473
+ // Execute command
474
+ let observation = if let Some(ref env) = self.env {
475
+ let env_guard = env.lock().await;
476
+ match env_guard
477
+ .communicate(&run_action, Some(self.tools.config.execution_timeout))
478
+ .await
479
+ {
480
+ Ok(output) => {
481
+ self.n_consecutive_timeouts = 0;
482
+ output
483
+ }
484
+ Err(SWEAgentError::CommandTimeout { timeout, command }) => {
485
+ self.n_consecutive_timeouts += 1;
486
+ if self.n_consecutive_timeouts
487
+ >= self.tools.config.max_consecutive_execution_timeouts
488
+ {
489
+ return Err(SWEAgentError::CommandTimeout { timeout, command });
490
+ }
491
+
492
+ env_guard.interrupt_session().await?;
493
+
494
+ let mut vars = HashMap::new();
495
+ vars.insert("timeout".to_string(), timeout.to_string());
496
+ vars.insert("command".to_string(), command);
497
+ render_template(&self.templates.command_cancelled_timeout_template, &vars)?
498
+ }
499
+ Err(e) => return Err(e),
500
+ }
501
+ } else {
502
+ return Err(SWEAgentError::EnvironmentError(
503
+ "Environment not initialized".to_string(),
504
+ ));
505
+ };
506
+
507
+ step.observation = observation.clone();
508
+ step.execution_time = execution_start.elapsed().as_secs_f64();
509
+ self.total_execution_time += step.execution_time;
510
+
511
+ self.chook.on_action_executed(step);
512
+
513
+ if let Some(ref env) = self.env {
514
+ let env_guard = env.lock().await;
515
+ let state_map = self.tools.get_state(&env_guard).await;
516
+ step.state = EnvironmentState {
517
+ working_dir: state_map.get("working_dir").cloned(),
518
+ open_files: state_map
519
+ .get("open_files")
520
+ .map(|s| s.split(", ").map(String::from).collect()),
521
+ git_status: state_map.get("git_status").cloned(),
522
+ ..Default::default()
523
+ };
524
+ }
525
+
526
+ // Check for special tokens
527
+ if observation.contains(tokens::RETRY_WITH_OUTPUT) {
528
+ step.observation = observation.replace(tokens::RETRY_WITH_OUTPUT, "");
529
+ return Err(SWEAgentError::RetryWithOutput);
530
+ } else if observation.contains(tokens::RETRY_WITHOUT_OUTPUT) {
531
+ step.observation = observation.replace(tokens::RETRY_WITHOUT_OUTPUT, "");
532
+ return Err(SWEAgentError::RetryWithoutOutput);
533
+ } else if observation.contains(tokens::EXIT_FORFEIT) {
534
+ return Err(SWEAgentError::ExitForfeit);
535
+ }
536
+
537
+ self.handle_submission(step, None, false).await
538
+ }
539
+
540
+ async fn handle_submission(
541
+ &self,
542
+ step: &mut StepOutput,
543
+ observation: Option<&str>,
544
+ force_submission: bool,
545
+ ) -> Result<StepOutput> {
546
+ let obs = observation.unwrap_or(&step.observation);
547
+ let is_submission = self.tools.check_for_submission_cmd(obs);
548
+
549
+ if is_submission || force_submission {
550
+ if let Some(ref env) = self.env {
551
+ let env_guard = env.lock().await;
552
+ match env_guard.read_file("/root/model.patch").await {
553
+ Ok(submission) => {
554
+ let trimmed = submission.trim();
555
+ if !trimmed.is_empty() {
556
+ step.submission = Some(submission.clone());
557
+ step.observation = submission;
558
+ }
559
+
560
+ if step.exit_status.is_none() {
561
+ step.exit_status = Some("submitted".to_string());
562
+ } else if step.submission.is_some() {
563
+ let status = step.exit_status.as_ref().unwrap();
564
+ step.exit_status = Some(format!("submitted ({})", status));
565
+ }
566
+
567
+ step.done = true;
568
+ tracing::info!(submission = ?step.submission, "Found submission");
569
+ }
570
+ Err(_) => {
571
+ tracing::warn!("Submission file not found");
572
+ }
573
+ }
574
+ }
575
+ }
576
+
577
+ Ok(step.clone())
578
+ }
579
+
580
+ fn add_step_to_trajectory(&mut self, step: &StepOutput) {
581
+ self.trajectory.push(TrajectoryStep::from(step));
582
+ }
583
+
584
+ async fn forward_with_handling(&mut self, mut history: History) -> Result<StepOutput> {
585
+ let mut n_format_fails = 0;
586
+
587
+ loop {
588
+ match self.forward(history.clone()).await {
589
+ Ok(step) => return Ok(step),
590
+ Err(e) => {
591
+ if e.should_exit() {
592
+ let mut step = StepOutput {
593
+ done: true,
594
+ thought: e.to_string(),
595
+ exit_status: Some(e.exit_status().to_string()),
596
+ ..Default::default()
597
+ };
598
+ return self.attempt_autosubmission_after_error(&mut step).await;
599
+ }
600
+
601
+ if e.should_retry() {
602
+ n_format_fails += 1;
603
+ if n_format_fails >= self.max_requeries {
604
+ let mut step = StepOutput {
605
+ done: true,
606
+ thought: "Exit due to repeated format errors".to_string(),
607
+ exit_status: Some("exit_format".to_string()),
608
+ ..Default::default()
609
+ };
610
+ return self.attempt_autosubmission_after_error(&mut step).await;
611
+ }
612
+
613
+ // Prepare requery
614
+ let template = &self.tools.config.format_error_template;
615
+ let vars = self.get_format_dict(None);
616
+ let error_msg = render_template(template, &vars)?;
617
+
618
+ history = self.get_messages();
619
+ history.push(HistoryItem {
620
+ role: Role::User,
621
+ content: Content::Text(error_msg),
622
+ agent: Some(self.name.clone()),
623
+ message_type: Some(MessageType::User),
624
+ ..Default::default()
625
+ });
626
+
627
+ continue;
628
+ }
629
+
630
+ return Err(e);
631
+ }
632
+ }
633
+ }
634
+ }
635
+
636
+ async fn attempt_autosubmission_after_error(
637
+ &self,
638
+ step: &mut StepOutput,
639
+ ) -> Result<StepOutput> {
640
+ tracing::warn!("Attempting autosubmission after error");
641
+ step.done = true;
642
+
643
+ if let Some(ref env) = self.env {
644
+ let env_guard = env.lock().await;
645
+
646
+ // Try to create submission
647
+ let submission_cmd = "git add -A && git diff --cached > /root/model.patch";
648
+ let _ = env_guard.communicate(submission_cmd, Some(30)).await;
649
+ }
650
+
651
+ self.handle_submission(step, None, true).await
652
+ }
653
+ }
654
+
655
+ #[async_trait]
656
+ impl Agent for DefaultAgent {
657
+ fn add_hook(&mut self, hook: Box<dyn AgentHook>) {
658
+ self.chook.add_hook(hook);
659
+ }
660
+
661
+ fn get_trajectory_data(&self) -> TrajectoryData {
662
+ TrajectoryData {
663
+ trajectory: self.trajectory.clone(),
664
+ history: self.history.clone(),
665
+ info: self.info.clone(),
666
+ replay_config: None,
667
+ environment: "unknown".to_string(),
668
+ }
669
+ }
670
+
671
+ async fn step(&mut self) -> Result<StepOutput> {
672
+ self.chook.on_step_start();
673
+
674
+ let n_step = self.trajectory.len() + 1;
675
+ tracing::info!(step = n_step, "Starting step");
676
+
677
+ let messages = self.get_messages();
678
+ let step_output = self.forward_with_handling(messages).await?;
679
+
680
+ // Add to history
681
+ self.append_history(HistoryItem {
682
+ role: Role::Assistant,
683
+ content: Content::Text(step_output.output.clone()),
684
+ thought: Some(step_output.thought.clone()),
685
+ action: Some(step_output.action.clone()),
686
+ agent: Some(self.name.clone()),
687
+ tool_calls: step_output.tool_calls.clone(),
688
+ message_type: Some(MessageType::Action),
689
+ thinking_blocks: step_output.thinking_blocks.clone(),
690
+ ..Default::default()
691
+ });
692
+
693
+ // Add observation
694
+ let observation = &step_output.observation;
695
+ let template = if observation.trim().is_empty() {
696
+ self.templates
697
+ .next_step_no_output_template
698
+ .as_ref()
699
+ .unwrap_or(&self.templates.next_step_template)
700
+ } else if observation.len() > self.templates.max_observation_length {
701
+ &self.templates.next_step_truncated_observation_template
702
+ } else {
703
+ &self.templates.next_step_template
704
+ };
705
+
706
+ let mut format_dict = self.get_format_dict(None);
707
+ format_dict.insert("observation".to_string(), observation.clone());
708
+ format_dict.insert(
709
+ "elided_chars".to_string(),
710
+ (observation
711
+ .len()
712
+ .saturating_sub(self.templates.max_observation_length))
713
+ .to_string(),
714
+ );
715
+ format_dict.insert(
716
+ "max_observation_length".to_string(),
717
+ self.templates.max_observation_length.to_string(),
718
+ );
719
+
720
+ let obs_message = render_template(template, &format_dict)?;
721
+
722
+ self.append_history(HistoryItem {
723
+ role: Role::User,
724
+ content: Content::Text(obs_message),
725
+ agent: Some(self.name.clone()),
726
+ message_type: Some(MessageType::Observation),
727
+ tool_call_ids: step_output.tool_call_ids.clone(),
728
+ ..Default::default()
729
+ });
730
+
731
+ // Update info
732
+ self.info.submission = step_output.submission.clone();
733
+ self.info.exit_status = step_output.exit_status.clone();
734
+ self.info.model_stats = Some(self.model.get_stats().to_model_stats());
735
+
736
+ self.add_step_to_trajectory(&step_output);
737
+ self.chook.on_step_done(&step_output, &self.info);
738
+
739
+ Ok(step_output)
740
+ }
741
+
742
+ async fn run(
743
+ &mut self,
744
+ env: &mut SWEEnv,
745
+ problem_statement: Box<dyn ProblemStatement>,
746
+ output_dir: &Path,
747
+ ) -> Result<AgentRunResult> {
748
+ // Wrap env in Arc<Mutex> for shared access
749
+ let env_arc = Arc::new(tokio::sync::Mutex::new(std::mem::take(env)));
750
+
751
+ self.setup(env_arc.clone(), problem_statement, output_dir)
752
+ .await?;
753
+
754
+ self.chook.on_run_start();
755
+ let mut step_output = StepOutput::default();
756
+
757
+ while !step_output.done {
758
+ step_output = self.step().await?;
759
+ let _ = self.save_trajectory();
760
+ }
761
+
762
+ self.chook.on_run_done(&self.trajectory, &self.info);
763
+ tracing::info!(path = ?self.traj_path, "Trajectory saved");
764
+
765
+ // Restore env
766
+ let restored_env = Arc::try_unwrap(env_arc)
767
+ .map_err(|_| SWEAgentError::RuntimeError("Could not restore environment".to_string()))?
768
+ .into_inner();
769
+ *env = restored_env;
770
+
771
+ Ok(AgentRunResult {
772
+ info: self.info.clone(),
773
+ trajectory: self.trajectory.clone(),
774
+ })
775
+ }
776
+ }
777
+
778
+ /// Configuration for retry agent
779
+ #[derive(Debug, Clone, Serialize, Deserialize)]
780
+ pub struct RetryAgentConfig {
781
+ #[serde(default)]
782
+ pub name: String,
783
+ pub agent_configs: Vec<DefaultAgentConfig>,
784
+ #[serde(default)]
785
+ pub retry_loop: RetryLoopConfig,
786
+ }
787
+
788
+ /// Retry agent that tries multiple configurations
789
+ pub struct RetryAgent {
790
+ config: RetryAgentConfig,
791
+ hooks: Vec<Box<dyn AgentHook>>,
792
+ i_attempt: usize,
793
+ agent: Option<DefaultAgent>,
794
+ attempt_data: Vec<TrajectoryData>,
795
+ total_instance_stats: InstanceStats,
796
+ chook: CombinedAgentHook,
797
+ traj_path: Option<PathBuf>,
798
+ problem_statement: Option<Box<dyn ProblemStatement>>,
799
+ env: Option<Arc<tokio::sync::Mutex<SWEEnv>>>,
800
+ output_dir: Option<PathBuf>,
801
+ retry_loop: Option<Box<dyn RetryLoop>>,
802
+ }
803
+
804
+ impl RetryAgent {
805
+ pub fn new(config: RetryAgentConfig) -> Self {
806
+ Self {
807
+ config,
808
+ hooks: Vec::new(),
809
+ i_attempt: 0,
810
+ agent: None,
811
+ attempt_data: Vec::new(),
812
+ total_instance_stats: InstanceStats::default(),
813
+ chook: CombinedAgentHook::new(),
814
+ traj_path: None,
815
+ problem_statement: None,
816
+ env: None,
817
+ output_dir: None,
818
+ retry_loop: None,
819
+ }
820
+ }
821
+
822
+ pub fn from_config(config: RetryAgentConfig) -> Self {
823
+ Self::new(config)
824
+ }
825
+
826
+ fn setup_agent(&mut self) -> Result<()> {
827
+ let agent_config_idx = self.i_attempt % self.config.agent_configs.len();
828
+ let agent_config = self.config.agent_configs[agent_config_idx].clone();
829
+
830
+ self.agent = Some(DefaultAgent::from_config(agent_config)?);
831
+
832
+ // Add hooks to agent
833
+ if let Some(ref mut _agent) = self.agent {
834
+ for _hook in &self.hooks {
835
+ // Can't clone hooks, so we skip this
836
+ }
837
+ }
838
+
839
+ Ok(())
840
+ }
841
+
842
+ fn next_attempt(&mut self) -> Result<()> {
843
+ self.i_attempt += 1;
844
+
845
+ // Reset environment if possible
846
+ if let Some(ref _env) = self.env {
847
+ // Would call hard_reset here
848
+ }
849
+
850
+ self.setup_agent()
851
+ }
852
+
853
+ fn finalize_agent_run(&mut self) {
854
+ if let Some(ref agent) = self.agent {
855
+ self.attempt_data.push(agent.get_trajectory_data());
856
+ self.total_instance_stats = self.total_instance_stats.add(&agent.model.get_stats());
857
+ }
858
+ }
859
+
860
+ fn save_trajectory(&self, choose: bool) -> Result<()> {
861
+ if let Some(ref path) = self.traj_path {
862
+ let data = self.get_trajectory_data_internal(choose);
863
+ let json = serde_json::to_string_pretty(&data)?;
864
+ std::fs::write(path, json)?;
865
+ }
866
+ Ok(())
867
+ }
868
+
869
+ fn get_trajectory_data_internal(&self, choose: bool) -> serde_json::Value {
870
+ let mut data = serde_json::json!({
871
+ "attempts": self.attempt_data,
872
+ });
873
+
874
+ if choose && !self.attempt_data.is_empty() {
875
+ let best_idx = self
876
+ .retry_loop
877
+ .as_ref()
878
+ .and_then(|rl| rl.get_best())
879
+ .unwrap_or(0);
880
+
881
+ if best_idx < self.attempt_data.len() {
882
+ data["info"] = serde_json::to_value(&self.attempt_data[best_idx].info).unwrap();
883
+ data["info"]["best_attempt_idx"] = serde_json::Value::from(best_idx);
884
+ data["trajectory"] =
885
+ serde_json::to_value(&self.attempt_data[best_idx].trajectory).unwrap();
886
+ }
887
+ }
888
+
889
+ data
890
+ }
891
+ }
892
+
893
+ #[async_trait]
894
+ impl Agent for RetryAgent {
895
+ fn add_hook(&mut self, hook: Box<dyn AgentHook>) {
896
+ self.chook.add_hook(hook);
897
+ }
898
+
899
+ fn get_trajectory_data(&self) -> TrajectoryData {
900
+ if let Some(ref agent) = self.agent {
901
+ agent.get_trajectory_data()
902
+ } else if !self.attempt_data.is_empty() {
903
+ self.attempt_data.last().unwrap().clone()
904
+ } else {
905
+ TrajectoryData {
906
+ trajectory: Vec::new(),
907
+ history: Vec::new(),
908
+ info: AgentInfo::default(),
909
+ replay_config: None,
910
+ environment: "unknown".to_string(),
911
+ }
912
+ }
913
+ }
914
+
915
+ async fn step(&mut self) -> Result<StepOutput> {
916
+ if let Some(ref mut agent) = self.agent {
917
+ agent.step().await
918
+ } else {
919
+ Err(SWEAgentError::RuntimeError(
920
+ "Agent not initialized".to_string(),
921
+ ))
922
+ }
923
+ }
924
+
925
+ async fn run(
926
+ &mut self,
927
+ env: &mut SWEEnv,
928
+ problem_statement: Box<dyn ProblemStatement>,
929
+ output_dir: &Path,
930
+ ) -> Result<AgentRunResult> {
931
+ std::fs::create_dir_all(output_dir)?;
932
+
933
+ self.traj_path = Some(output_dir.join(format!("{}.traj", problem_statement.id())));
934
+ self.problem_statement = Some(problem_statement);
935
+ self.output_dir = Some(output_dir.to_path_buf());
936
+
937
+ self.retry_loop = Some(get_retry_loop_from_config(&self.config.retry_loop));
938
+
939
+ let env_arc = Arc::new(tokio::sync::Mutex::new(std::mem::take(env)));
940
+ self.env = Some(env_arc.clone());
941
+
942
+ self.chook.on_run_start();
943
+ let mut step_output = StepOutput::default();
944
+
945
+ self.setup_agent()?;
946
+
947
+ // Setup agent with environment
948
+ if let (Some(ref mut agent), Some(ref ps)) = (&mut self.agent, &self.problem_statement) {
949
+ // Clone problem statement for agent
950
+ let ps_clone = create_problem_statement(&ProblemStatementConfig::Text {
951
+ text: ps.get_problem_statement(),
952
+ id: ps.id().to_string(),
953
+ })?;
954
+
955
+ agent.setup(env_arc.clone(), ps_clone, output_dir).await?;
956
+ }
957
+
958
+ while !step_output.done {
959
+ step_output = self.step().await?;
960
+ let _ = self.save_trajectory(false);
961
+
962
+ if step_output.done {
963
+ let traj_data = self.get_trajectory_data();
964
+ if let Some(ref mut retry_loop) = self.retry_loop {
965
+ retry_loop.on_submit(ReviewSubmission {
966
+ trajectory: traj_data.trajectory,
967
+ info: traj_data.info,
968
+ submission: step_output.submission.clone(),
969
+ });
970
+ }
971
+
972
+ self.finalize_agent_run();
973
+ let _ = self.save_trajectory(false);
974
+
975
+ if let Some(ref retry_loop) = self.retry_loop {
976
+ if retry_loop.should_retry() {
977
+ self.next_attempt()?;
978
+ step_output.done = false;
979
+ }
980
+ }
981
+ }
982
+ }
983
+
984
+ let _ = self.save_trajectory(true);
985
+
986
+ if let Some(ref agent) = self.agent {
987
+ self.chook.on_run_done(&agent.trajectory, &agent.info);
988
+ }
989
+
990
+ tracing::info!(path = ?self.traj_path, "Trajectory saved");
991
+
992
+ // Restore env
993
+ let restored_env = Arc::try_unwrap(env_arc)
994
+ .map_err(|_| SWEAgentError::RuntimeError("Could not restore environment".to_string()))?
995
+ .into_inner();
996
+ *env = restored_env;
997
+
998
+ Ok(AgentRunResult {
999
+ info: self.get_trajectory_data().info,
1000
+ trajectory: self.get_trajectory_data().trajectory,
1001
+ })
1002
+ }
1003
+ }
1004
+
1005
+ /// Union type for agent configurations
1006
+ #[derive(Debug, Clone, Serialize, Deserialize)]
1007
+ #[serde(tag = "type", rename_all = "snake_case")]
1008
+ pub enum AgentConfig {
1009
+ Default(Box<DefaultAgentConfig>),
1010
+ Retry(RetryAgentConfig),
1011
+ }
1012
+
1013
+ impl Default for AgentConfig {
1014
+ fn default() -> Self {
1015
+ Self::Default(Box::default())
1016
+ }
1017
+ }
1018
+
1019
+ /// Create an agent from configuration
1020
+ pub fn get_agent_from_config(config: AgentConfig) -> Result<Box<dyn Agent>> {
1021
+ match config {
1022
+ AgentConfig::Default(cfg) => Ok(Box::new(DefaultAgent::from_config(*cfg)?)),
1023
+ AgentConfig::Retry(cfg) => Ok(Box::new(RetryAgent::from_config(cfg))),
1024
+ }
1025
+ }
1026
+
1027
+ // Re-exports for convenience
1028
+ pub use super::hooks::AgentHook as AbstractAgentHook;
1029
+ pub use super::models::Model as AbstractModel;