@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,106 @@
1
+ import subprocess
2
+ from pathlib import Path
3
+
4
+ import rich
5
+ import rich.markdown
6
+ import rich.panel
7
+
8
+ from sweagent.agent.problem_statement import ProblemStatementConfig
9
+ from sweagent.environment.repo import LocalRepoConfig
10
+ from sweagent.environment.swe_env import SWEEnv
11
+ from sweagent.run.common import _is_promising_patch
12
+ from sweagent.run.hooks.abstract import RunHook
13
+ from sweagent.types import AgentRunResult
14
+ from sweagent.utils.log import get_logger
15
+
16
+
17
+ class SaveApplyPatchHook(RunHook):
18
+ """This hook saves patches to a separate directory and optionally applies them to a local repository."""
19
+
20
+ def __init__(self, apply_patch_locally: bool = False, show_success_message: bool = True):
21
+ self.logger = get_logger("swea-save_apply_patch", emoji="⚡️")
22
+ self._apply_patch_locally = apply_patch_locally
23
+ self._show_success_message = show_success_message
24
+
25
+ def on_init(self, *, run):
26
+ self._output_dir = Path(run.output_dir)
27
+
28
+ def on_instance_start(self, *, index: int, env: SWEEnv, problem_statement: ProblemStatementConfig):
29
+ self._env = env
30
+ self._problem_statement = problem_statement
31
+
32
+ def on_instance_completed(self, *, result: AgentRunResult):
33
+ instance_id = self._problem_statement.id
34
+ patch_path = self._save_patch(instance_id, result.info)
35
+ if patch_path:
36
+ if not self._apply_patch_locally:
37
+ return
38
+ if not _is_promising_patch(result.info):
39
+ return
40
+ if self._env.repo is None:
41
+ return
42
+ if not isinstance(self._env.repo, LocalRepoConfig):
43
+ return
44
+ local_dir = Path(self._env.repo.path)
45
+ self._apply_patch(patch_path, local_dir)
46
+
47
+ @staticmethod
48
+ def _print_patch_message(patch_output_file: Path):
49
+ console = rich.console.Console()
50
+ msg = [
51
+ "SWE-agent has produced a patch that it believes will solve the issue you submitted!",
52
+ "Use the code snippet below to inspect or apply it!",
53
+ ]
54
+ panel = rich.panel.Panel.fit(
55
+ "\n".join(msg),
56
+ title="🎉 Submission successful 🎉",
57
+ )
58
+ console.print(panel)
59
+ content = [
60
+ "```bash",
61
+ "# The patch has been saved to your local filesystem at:",
62
+ f"PATCH_FILE_PATH='{patch_output_file.resolve()}'",
63
+ "# Inspect it:",
64
+ 'cat "${PATCH_FILE_PATH}"',
65
+ "# Apply it to a local repository:",
66
+ "cd <your local repo root>",
67
+ 'git apply "${PATCH_FILE_PATH}"',
68
+ "```",
69
+ ]
70
+ console.print(rich.markdown.Markdown("\n".join(content)))
71
+
72
+ def _save_patch(self, instance_id: str, info) -> Path | None:
73
+ """Create patch files that can be applied with `git am`.
74
+
75
+ Returns:
76
+ The path to the patch file, if it was saved. Otherwise, returns None.
77
+ """
78
+ patch_output_dir = self._output_dir / instance_id
79
+ patch_output_dir.mkdir(exist_ok=True, parents=True)
80
+ patch_output_file = patch_output_dir / f"{instance_id}.patch"
81
+ if info.get("submission") is None:
82
+ self.logger.info("No patch to save.")
83
+ return None
84
+ model_patch = info["submission"]
85
+ patch_output_file.write_text(model_patch)
86
+ if _is_promising_patch(info):
87
+ # Only print big congratulations if we actually believe
88
+ # the patch will solve the issue
89
+ if self._show_success_message:
90
+ self._print_patch_message(patch_output_file)
91
+ return patch_output_file
92
+
93
+ def _apply_patch(self, patch_file: Path, local_dir: Path) -> None:
94
+ """Apply a patch to a local directory."""
95
+
96
+ assert local_dir.is_dir()
97
+ assert patch_file.exists()
98
+ # The resolve() is important, because we're gonna run the cmd
99
+ # somewhere else
100
+ cmd = ["git", "apply", str(patch_file.resolve())]
101
+ try:
102
+ subprocess.run(cmd, cwd=local_dir, check=True)
103
+ except subprocess.CalledProcessError as e:
104
+ self.logger.error(f"Failed to apply patch {patch_file} to {local_dir}: {e}")
105
+ return
106
+ self.logger.info(f"Applied patch {patch_file} to {local_dir}")
@@ -0,0 +1,244 @@
1
+ import os
2
+ import random
3
+ import shlex
4
+
5
+ from ghapi.all import GhApi
6
+ from pydantic import BaseModel
7
+
8
+ from sweagent.environment.swe_env import SWEEnv
9
+ from sweagent.run.hooks.abstract import RunHook
10
+ from sweagent.types import AgentRunResult
11
+ from sweagent.utils.github import (
12
+ InvalidGithubURL,
13
+ _get_associated_commit_urls,
14
+ _get_gh_issue_data,
15
+ _parse_gh_issue_url,
16
+ )
17
+ from sweagent.utils.log import get_logger
18
+
19
+ # NOTE
20
+ # THE IMPLEMENTATION DETAILS HERE WILL CHANGE SOON!
21
+
22
+
23
+ # fixme: Bring back the ability to open the PR to a fork
24
+ def open_pr(*, logger, token, env: SWEEnv, github_url, trajectory, _dry_run: bool = False) -> None:
25
+ """Create PR to repository
26
+
27
+ Args:
28
+ trajectory: Trajectory of actions taken by the agent
29
+ _dry_run: Whether to actually push anything or just simulate it
30
+ """
31
+
32
+ issue_url = github_url
33
+ logger.info("Opening PR")
34
+ try:
35
+ issue = _get_gh_issue_data(issue_url, token=token)
36
+ except InvalidGithubURL as e:
37
+ msg = "Data path must be a github issue URL if open_pr is set to True."
38
+ raise ValueError(msg) from e
39
+ branch_name = f"swe-agent-fix-#{issue.number}-" + str(random.random())[2:10]
40
+ env.communicate(
41
+ input="git config user.email 'noemail@swe-agent.com' && git config user.name 'SWE-agent'",
42
+ error_msg="Failed to set git user",
43
+ timeout=10,
44
+ check="raise",
45
+ )
46
+ env.communicate(input="rm -f model.patch", error_msg="Failed to remove model patch", timeout=10, check="raise")
47
+ env.communicate(
48
+ input=f"git checkout -b {branch_name}", error_msg="Failed to switch to new branch", timeout=10, check="raise"
49
+ )
50
+ env.communicate(input="git add .", error_msg="Failed to add commits", timeout=10, check="raise")
51
+ dry_run_flag = "--allow-empty" if _dry_run else ""
52
+ commit_msg = [
53
+ shlex.quote(f"Fix: {issue.title}"),
54
+ shlex.quote(f"Closes #{issue.number}"),
55
+ ]
56
+ out = env.communicate(
57
+ input=f"git commit -m {commit_msg[0]} -m {commit_msg[1]} {dry_run_flag}",
58
+ error_msg="Failed to commit changes",
59
+ timeout=10,
60
+ check="raise",
61
+ )
62
+ logger.debug(f"Committed changes: {out}")
63
+
64
+ owner, repo, _ = _parse_gh_issue_url(issue_url)
65
+ # fixme: bring this back
66
+ # If `--repo_path` was specified with a different github URL, then the record will contain
67
+ # the forking user
68
+ forker = owner
69
+ head = branch_name
70
+ remote = "origin"
71
+ if forker != owner:
72
+ head = f"{forker}:{branch_name}"
73
+ token_prefix = ""
74
+ if token:
75
+ token_prefix = f"{token}@"
76
+ fork_url = f"https://{token_prefix}github.com/{forker}/{repo}.git"
77
+ logger.debug(f"Using fork: {fork_url}")
78
+ env.communicate(
79
+ input=f"git remote add fork {fork_url}",
80
+ error_msg="Failed to create new git remote",
81
+ timeout=10,
82
+ )
83
+ remote = "fork"
84
+ dry_run_prefix = "echo " if _dry_run else ""
85
+ out = env.communicate(
86
+ input=f"{dry_run_prefix} git push {remote} {branch_name}",
87
+ error_msg=(
88
+ "Failed to push branch to remote. Please check your token and permissions. "
89
+ "You might want to push to a fork with the push_gh_repo_url option."
90
+ ),
91
+ timeout=10,
92
+ )
93
+ logger.debug(f"Pushed commit to {remote=} {branch_name=}: {out}")
94
+ body = (
95
+ f"This is a PR opened by AI tool [SWE Agent](https://github.com/SWE-agent/SWE-agent/) "
96
+ f"to close [#{issue.number}]({issue_url}) ({issue.title}).\n\nCloses #{issue.number}."
97
+ )
98
+ body += "\n\n" + format_trajectory_markdown(trajectory, char_limit=60_000)
99
+ api = GhApi(token=token)
100
+ default_branch = api.repos.get(owner, repo).default_branch
101
+ if not _dry_run:
102
+ args = dict(
103
+ owner=owner,
104
+ repo=repo,
105
+ title=f"SWE-agent[bot] PR to fix: {issue.title}",
106
+ head=head,
107
+ base=default_branch,
108
+ body=body,
109
+ draft=True,
110
+ )
111
+ logger.debug(f"Creating PR with args: {args}")
112
+ pr_info = api.pulls.create(**args) # type: ignore
113
+ logger.info(
114
+ f"🎉 PR created as a draft at {pr_info.html_url}. Please review it carefully, push "
115
+ "any required changes onto the branch and then click "
116
+ "'Ready for Review' to bring it to the attention of the maintainers.",
117
+ )
118
+
119
+
120
+ class OpenPRConfig(BaseModel):
121
+ # Option to be used with open_pr: Skip action if there are already commits claiming
122
+ # to fix the issue. Please only set this to False if you are sure the commits are
123
+ # not fixes or if this is your own repository!
124
+ skip_if_commits_reference_issue: bool = True
125
+
126
+
127
+ class OpenPRHook(RunHook):
128
+ """This hook opens a PR if the issue is solved and the user has enabled the option."""
129
+
130
+ def __init__(self, config: OpenPRConfig):
131
+ self.logger = get_logger("swea-open_pr", emoji="⚡️")
132
+ self._config = config
133
+
134
+ def on_init(self, *, run):
135
+ self._env = run.env
136
+ self._token: str = os.getenv("GITHUB_TOKEN", "")
137
+ self._problem_statement = run.problem_statement
138
+
139
+ def on_instance_completed(self, result: AgentRunResult):
140
+ if self.should_open_pr(result):
141
+ open_pr(
142
+ logger=self.logger,
143
+ token=self._token,
144
+ env=self._env,
145
+ github_url=self._problem_statement.github_url,
146
+ trajectory=result.trajectory,
147
+ )
148
+
149
+ def should_open_pr(self, result: AgentRunResult) -> bool:
150
+ """Does opening a PR make sense?"""
151
+ if not result.info.get("submission"):
152
+ self.logger.info("Not opening PR because no submission was made.")
153
+ return False
154
+ if result.info.get("exit_status") != "submitted":
155
+ self.logger.info(
156
+ "Not opening PR because exit status was %s and not submitted.", result.info.get("exit_status")
157
+ )
158
+ return False
159
+ try:
160
+ issue = _get_gh_issue_data(self._problem_statement.github_url, token=self._token)
161
+ except InvalidGithubURL:
162
+ self.logger.info("Currently only GitHub is supported to open PRs to. Skipping PR creation.")
163
+ return False
164
+ if issue.state != "open":
165
+ self.logger.info(f"Issue is not open (state={issue.state}. Skipping PR creation.")
166
+ return False
167
+ if issue.assignee:
168
+ self.logger.info("Issue is already assigned. Skipping PR creation. Be nice :)")
169
+ return False
170
+ if issue.locked:
171
+ self.logger.info("Issue is locked. Skipping PR creation.")
172
+ return False
173
+ org, repo, issue_number = _parse_gh_issue_url(self._problem_statement.github_url)
174
+ associated_commits = _get_associated_commit_urls(org, repo, issue_number, token=self._token)
175
+ if associated_commits:
176
+ commit_url_strs = ", ".join(associated_commits)
177
+ if self._config.skip_if_commits_reference_issue:
178
+ self.logger.info(f"Issue already has associated commits (see {commit_url_strs}). Skipping PR creation.")
179
+ return False
180
+ else:
181
+ self.logger.warning(
182
+ "Proceeding with PR creation even though there are already commits "
183
+ f"({commit_url_strs}) associated with the issue. Please only do this for your own repositories "
184
+ "or after verifying that the existing commits do not fix the issue.",
185
+ )
186
+ return True
187
+
188
+
189
+ def _remove_triple_backticks(text: str) -> str:
190
+ return "\n".join(line.removeprefix("```") for line in text.splitlines())
191
+
192
+
193
+ def format_trajectory_markdown(trajectory: list[dict[str, str]], char_limit: int | None = None):
194
+ """Format a trajectory as a markdown string for use in gh PR description.
195
+
196
+ Args:
197
+ char_limit: If not None, truncate the trajectory to this many characters.
198
+ """
199
+ prefix = [
200
+ "<details>",
201
+ "<summary>Thought process ('trajectory') of SWE-agent (click to expand)</summary>",
202
+ "",
203
+ "",
204
+ ]
205
+ prefix_text = "\n".join(prefix)
206
+ suffix = [
207
+ "",
208
+ "</details>",
209
+ ]
210
+ suffix_text = "\n".join(suffix)
211
+
212
+ steps = []
213
+ current_length = len(prefix_text) + len(suffix_text)
214
+
215
+ for i, step in enumerate(trajectory):
216
+ step_strs = [
217
+ f"**🧑‍🚒 Response ({i})**: ",
218
+ f"{step['response'].strip()}",
219
+ f"**👀‍ Observation ({i})**:",
220
+ "```",
221
+ f"{_remove_triple_backticks(step['observation']).strip()}",
222
+ "```",
223
+ ]
224
+ step_text = "\n".join(step_strs)
225
+
226
+ # Calculate separator length (only needed for steps after the first one)
227
+ separator_length = 0
228
+ if steps:
229
+ separator_length = len("\n\n---\n\n")
230
+
231
+ # Check if adding this step would exceed the character limit
232
+ if char_limit is not None and current_length + separator_length + len(step_text) > char_limit:
233
+ if i > 0:
234
+ steps.append("\n\n... (truncated due to length limit)")
235
+ break
236
+
237
+ if steps:
238
+ steps.append("\n\n---\n\n")
239
+ current_length += separator_length
240
+
241
+ steps.append(step_text)
242
+ current_length += len(step_text)
243
+
244
+ return prefix_text + "".join(steps) + suffix_text
@@ -0,0 +1,113 @@
1
+ """SweBench evaluation hook.
2
+
3
+ Will be automatically added to `run_batch` if `SWEBenchInstances.evaluate` is set to true
4
+ """
5
+
6
+ import subprocess
7
+ import sys
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from threading import Lock
11
+ from time import time
12
+
13
+ from sweagent.run.hooks.abstract import RunHook
14
+ from sweagent.run.merge_predictions import merge_predictions
15
+ from sweagent.types import AgentRunResult
16
+ from sweagent.utils.log import get_logger
17
+
18
+
19
+ class SweBenchEvaluate(RunHook):
20
+ _SUBSET_MAP = {"lite": "swe-bench_lite", "verified": "swe-bench_verified", "multimodal": "swe-bench_multimodal"}
21
+
22
+ def __init__(self, output_dir: Path, subset: str, split: str, continuous_submission_every: int = 0) -> None:
23
+ super().__init__()
24
+ self.output_dir = output_dir
25
+ self.subset = subset
26
+ self.split = split
27
+ self.continuous_submission_every = continuous_submission_every
28
+ self.logger = get_logger("SB-evaluate", emoji="😬")
29
+ self.merge_lock = Lock()
30
+ self.last_evaluation_time = time()
31
+ self.evaluation_interval = continuous_submission_every
32
+ self._running_calls = []
33
+ # We need to add a suffix to the run_id to avoid collisions when you reuse the name of your run
34
+ self._time_suffix = datetime.now().strftime("%Y%m%d%H%M%S%f")
35
+
36
+ @property
37
+ def run_id(self) -> str:
38
+ return f"{self.output_dir.name}_{self._time_suffix}"
39
+
40
+ def _get_sb_call(self, preds_path: Path, submit_only: bool = False) -> list[str]:
41
+ args = [
42
+ "sb-cli",
43
+ "submit",
44
+ self._SUBSET_MAP[self.subset],
45
+ self.split,
46
+ "--predictions_path",
47
+ str(preds_path),
48
+ "--run_id",
49
+ self.run_id,
50
+ "--output_dir",
51
+ str(self.output_dir / "sb-cli-reports"),
52
+ ]
53
+ if submit_only:
54
+ args.extend(["--wait_for_evaluation", "0", "--gen_report", "0", "--verify_submission", "0"])
55
+ return args
56
+
57
+ def check_running_calls(self) -> None:
58
+ """Warn if one of the running calls failed."""
59
+ for call in self._running_calls:
60
+ if call.poll() is not None:
61
+ if call.returncode != 0:
62
+ self.logger.error("Failed to submit results to SweBench eval: %s", call.stderr.read())
63
+ self._running_calls.remove(call)
64
+
65
+ def on_instance_completed(self, *, result: AgentRunResult):
66
+ if self.evaluation_interval == 0:
67
+ return
68
+
69
+ current_time = time()
70
+ if current_time - self.last_evaluation_time < self.evaluation_interval:
71
+ return
72
+
73
+ with self.merge_lock:
74
+ merge_predictions([self.output_dir], self.output_dir / "tmppreds.json")
75
+ self.last_evaluation_time = current_time
76
+
77
+ self._running_calls.append(
78
+ subprocess.Popen(
79
+ self._get_sb_call(preds_path=self.output_dir / "tmppreds.json", submit_only=True),
80
+ stdout=subprocess.PIPE,
81
+ stderr=subprocess.PIPE,
82
+ )
83
+ )
84
+
85
+ def move_sb_cli_report(self) -> None:
86
+ """Move report from `sb-cli-reports` to `results.json`."""
87
+ output_dir = self.output_dir / "sb-cli-reports"
88
+ if not output_dir.exists():
89
+ self.logger.warning("No SweBench report found at %s", output_dir)
90
+ return
91
+ (self.output_dir / "results.json").unlink(missing_ok=True)
92
+ reports = list(output_dir.glob("*.json"))
93
+ if len(reports) != 1:
94
+ self.logger.warning("Expected 1 SweBench report at %s, found %d. Cannot rename.", output_dir, len(reports))
95
+ return
96
+ reports[0].rename(self.output_dir / "results.json")
97
+
98
+ def on_end(self) -> None:
99
+ self.logger.info("Submitting results to SWE-Bench")
100
+ try:
101
+ subprocess.run(
102
+ self._get_sb_call(preds_path=self.output_dir / "preds.json"),
103
+ check=True,
104
+ stdout=sys.stdout,
105
+ stderr=sys.stderr,
106
+ )
107
+ except subprocess.CalledProcessError as e:
108
+ self.logger.error("Failed to submit results to SweBench eval: %s", e)
109
+ else:
110
+ # remove temporary predictions if they exist
111
+ if (self.output_dir / "tmppreds.json").exists():
112
+ (self.output_dir / "tmppreds.json").unlink()
113
+ self.move_sb_cli_report()