@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,442 @@
1
+ """
2
+ Run on a batch of instances/issues, e.g., SWE-bench.
3
+
4
+ [cyan][bold]=== BASIC OPTIONS ===[/bold][/cyan]
5
+
6
+ -h --help Show help text and exit
7
+ --help_option Print specific help text and exit
8
+
9
+ [cyan][bold]=== EXAMPLES ===[/bold][/cyan]
10
+
11
+ Basic usage: Run over a [bold][cyan]SWE-bench lite[/bold][/cyan][green]:
12
+
13
+ sweagent run-batch \\
14
+ --instances.type swe_bench \\ # configure instances
15
+ --instances.subset lite \\
16
+ --instances.split dev \\
17
+ --instances.slice :50 \\ # first 50 instances
18
+ --instances.shuffle=True \\ # shuffle instances (with fixed seed)
19
+ --config config/default.yaml \\
20
+ --agent.model.name gpt-4o # configure model
21
+ [/green]
22
+
23
+ [cyan][bold]=== LOADING INSTANCES ===[/bold][/cyan]
24
+
25
+ [cyan][bold]From a file[/bold][/cyan] [green]--instances.type file --instances.path /path/to/file[/green].
26
+ [cyan][bold]From huggingface[/bold][/cyan] [green]--instances.type huggingface --instances.dataset_name=SWE_Bench_lite --instances.split=dev[/green].
27
+
28
+ All instance specifications support the [green]filter[/green], [green]slice[/green], and [green]shuffle[/green] options.
29
+ With [green]filter[/green], you can select specific instances, e.g., [green]--instances.filter='instance_id_1|instance_id_2'[/green].
30
+ """
31
+
32
+ import getpass
33
+ import json
34
+ import logging
35
+ import random
36
+ import sys
37
+ import time
38
+ import traceback
39
+ from concurrent.futures import ThreadPoolExecutor, as_completed
40
+ from contextlib import ExitStack
41
+ from pathlib import Path
42
+ from typing import Self
43
+
44
+ import yaml
45
+ from pydantic import Field, model_validator
46
+ from pydantic_settings import BaseSettings, SettingsConfigDict
47
+ from rich.live import Live
48
+ from swerex.deployment.hooks.status import SetStatusDeploymentHook
49
+
50
+ from sweagent import TRAJECTORY_DIR
51
+ from sweagent.agent.agents import AgentConfig, get_agent_from_config
52
+ from sweagent.agent.hooks.status import SetStatusAgentHook
53
+ from sweagent.environment.hooks.status import SetStatusEnvironmentHook
54
+ from sweagent.environment.swe_env import SWEEnv
55
+ from sweagent.exceptions import ModelConfigurationError, TotalCostLimitExceededError
56
+ from sweagent.run._progress import RunBatchProgressManager
57
+ from sweagent.run.batch_instances import BatchInstance, BatchInstanceSourceConfig, SWEBenchInstances
58
+ from sweagent.run.common import BasicCLI, ConfigHelper, save_predictions
59
+ from sweagent.run.hooks.abstract import CombinedRunHooks, RunHook
60
+ from sweagent.run.hooks.apply_patch import SaveApplyPatchHook
61
+ from sweagent.run.merge_predictions import merge_predictions
62
+ from sweagent.run.run_single import RunSingleConfig
63
+ from sweagent.types import AgentRunResult
64
+ from sweagent.utils.config import load_environment_variables
65
+ from sweagent.utils.log import (
66
+ add_file_handler,
67
+ add_logger_names_to_stream_handlers,
68
+ get_logger,
69
+ register_thread_name,
70
+ remove_file_handler,
71
+ set_stream_handler_levels,
72
+ )
73
+
74
+
75
+ class RunBatchConfig(BaseSettings, cli_implicit_flags=False):
76
+ instances: BatchInstanceSourceConfig = Field(description="Instances to run.")
77
+ agent: AgentConfig = Field(description="Agent options.")
78
+ output_dir: Path = Field(default=Path("DEFAULT"), description="Output directory.")
79
+ suffix: str = ""
80
+ """Suffix to add to the output directory. Only used if `output_dir` is `DEFAULT`."""
81
+ raise_exceptions: bool = False
82
+ """Raise exceptions instead of skipping instances."""
83
+ redo_existing: bool = False
84
+ """Do not skip instances that already have a trajectory."""
85
+ env_var_path: Path | None = None
86
+ """Path to a .env file to load environment variables from."""
87
+ num_workers: int = Field(default=1)
88
+ """Number of parallel workers to use."""
89
+ random_delay_multiplier: float = 0.3
90
+ """We will wait for a random amount of time between 0 and `random_delay_multiplier`
91
+ times the number of workers at the start of each instance. This is to avoid any
92
+ potential race condition or issues with bottlenecks, e.g., when running on a platform
93
+ with few CPUs that cannot handle the startup of all containers in time.
94
+ """
95
+ progress_bar: bool = True
96
+ """Whether to show a progress bar. Progress bar is never shown for human models.
97
+ Progress bar is always shown for multi-worker runs.
98
+ """
99
+
100
+ # pydantic config
101
+ model_config = SettingsConfigDict(extra="forbid", env_prefix="SWE_AGENT_")
102
+
103
+ def set_default_output_dir(self) -> None:
104
+ # Needs to be called explicitly, because self._config_files will be setup
105
+ # post-init.
106
+ if self.output_dir == Path("DEFAULT"):
107
+ user_id = getpass.getuser()
108
+ source_id = self.instances.id
109
+ try:
110
+ model_id = self.agent.model.id # type: ignore[attr-defined]
111
+ except AttributeError:
112
+ model_id = "unknown"
113
+ config_file = getattr(self, "_config_files", ["no_config"])[0]
114
+ if config_file != "no_config":
115
+ config_file = Path(config_file).stem
116
+ suffix = f"__{self.suffix}" if self.suffix else ""
117
+ self.output_dir = TRAJECTORY_DIR / user_id / f"{config_file}__{model_id}___{source_id}{suffix}"
118
+
119
+ @model_validator(mode="after")
120
+ def evaluate_and_redo_existing(self) -> Self:
121
+ if not isinstance(self.instances, SWEBenchInstances):
122
+ return self
123
+ if self.instances.evaluate and self.redo_existing:
124
+ msg = (
125
+ "Cannot evaluate and redo existing at the same time. This would cause invalid results, because "
126
+ "after the first merge_preds gives you a preds.json, this file would be submitted to SB-CLI, causing"
127
+ "evaluation of old instances, which could then not be overwritten by the new ones."
128
+ )
129
+ raise ValueError(msg)
130
+ return self
131
+
132
+
133
+ class _BreakLoop(Exception):
134
+ """Used for internal control flow"""
135
+
136
+
137
+ class RunBatch:
138
+ def __init__(
139
+ self,
140
+ instances: list[BatchInstance],
141
+ agent_config: AgentConfig,
142
+ *,
143
+ output_dir: Path = Path("."),
144
+ hooks: list[RunHook] | None = None,
145
+ raise_exceptions: bool = False,
146
+ redo_existing: bool = False,
147
+ num_workers: int = 1,
148
+ progress_bar: bool = True,
149
+ random_delay_multiplier: float = 0.3,
150
+ ):
151
+ """Note: When initializing this class, make sure to add the hooks that are required by your actions.
152
+ See `from_config` for an example.
153
+
154
+ Args:
155
+ hooks: If not specified, the default hooks will be used.
156
+ num_workers: Number of parallel workers to use. Default is 1 (sequential execution).
157
+ progress_bar: Whether to show a progress bar. Progress bar is never shown for human models.
158
+ Progress bar is always shown for multi-worker runs.
159
+ random_delay_multiplier: We will wait for a random amount of time between 0 and `random_delay_multiplier`
160
+ times the number of workers at the start of each instance. This is to avoid any
161
+ potential race conditions.
162
+ """
163
+ if self._model_id in ["human", "human_thought"] and num_workers > 1:
164
+ msg = "Cannot run with human model in parallel"
165
+ raise ValueError(msg)
166
+
167
+ self.logger = get_logger("swea-run", emoji="🏃")
168
+ add_file_handler(
169
+ output_dir / "run_batch.log",
170
+ id_="progress",
171
+ filter=lambda name: "swea-run" in name or "config" in name,
172
+ )
173
+ self.instances = instances
174
+ self.agent_config = agent_config
175
+ self.output_dir = output_dir
176
+ self._raise_exceptions = raise_exceptions
177
+ self._chooks = CombinedRunHooks()
178
+ self._redo_existing = redo_existing
179
+ self._num_workers = min(num_workers, len(instances))
180
+ for hook in hooks or [SaveApplyPatchHook(show_success_message=False)]:
181
+ self.add_hook(hook)
182
+ self._progress_manager = RunBatchProgressManager(
183
+ num_instances=len(instances), yaml_report_path=output_dir / "run_batch_exit_statuses.yaml"
184
+ )
185
+ self._show_progress_bar = progress_bar
186
+ self._random_delay_multiplier = random_delay_multiplier
187
+
188
+ @property
189
+ def _model_id(self) -> str:
190
+ try:
191
+ return self.agent_config.model.id # type: ignore[attr-defined]
192
+ except AttributeError:
193
+ return "unknown"
194
+
195
+ @classmethod
196
+ def from_config(cls, config: RunBatchConfig) -> Self:
197
+ load_environment_variables(config.env_var_path)
198
+ config.set_default_output_dir()
199
+ config.output_dir.mkdir(parents=True, exist_ok=True)
200
+ (config.output_dir / "run_batch.config.yaml").write_text(yaml.dump(config.model_dump_json(), indent=2))
201
+ logger = get_logger("run", emoji="🏃")
202
+ logger.debug("Loading instances from %s", f"{config.instances!r}")
203
+ instances = config.instances.get_instance_configs()
204
+ logger.info("Loaded %d instances", len(instances))
205
+ if not instances:
206
+ msg = (
207
+ "No instances to run. Here are a few things to check:\n"
208
+ "- With huggingface data: Check that you have the right split (test or dev)\n"
209
+ "- Check your filter does not exclude all instances (check the info log messages)"
210
+ )
211
+ raise ValueError(msg)
212
+ logger.debug("The first instance is %s", f"{instances[0]!r}")
213
+ rb = cls(
214
+ instances=instances,
215
+ agent_config=config.agent,
216
+ output_dir=config.output_dir,
217
+ raise_exceptions=config.raise_exceptions,
218
+ redo_existing=config.redo_existing,
219
+ num_workers=config.num_workers,
220
+ progress_bar=config.progress_bar,
221
+ random_delay_multiplier=config.random_delay_multiplier,
222
+ )
223
+ if isinstance(config.instances, SWEBenchInstances) and config.instances.evaluate:
224
+ from sweagent.run.hooks.swe_bench_evaluate import SweBenchEvaluate
225
+
226
+ rb.add_hook(
227
+ SweBenchEvaluate(
228
+ output_dir=config.output_dir,
229
+ subset=config.instances.subset,
230
+ split=config.instances.split,
231
+ continuous_submission_every=30,
232
+ )
233
+ )
234
+ return rb
235
+
236
+ def add_hook(self, hook: RunHook) -> None:
237
+ hook.on_init(run=self)
238
+ self._chooks.add_hook(hook)
239
+
240
+ def main(self) -> None:
241
+ self.logger.info("Starting run. Find output files at %s", self.output_dir)
242
+ self._chooks.on_start()
243
+
244
+ if self._num_workers <= 1:
245
+ self.main_single_worker()
246
+ else:
247
+ self.main_multi_worker()
248
+
249
+ output_dirs = []
250
+ for instance in self.instances:
251
+ output_dirs.append(self.output_dir / instance.problem_statement.id)
252
+ merge_predictions(output_dirs, self.output_dir / "preds.json")
253
+
254
+ self._chooks.on_end()
255
+
256
+ def main_single_worker(self) -> None:
257
+ with ExitStack() as stack:
258
+ # Conditionally add progress bar
259
+ if self._model_id not in ["human", "human_thought"] and self._show_progress_bar:
260
+ stack.enter_context(Live(self._progress_manager.render_group))
261
+ for instance in self.instances:
262
+ try:
263
+ self.run_instance(instance)
264
+ except _BreakLoop:
265
+ self.logger.info("Stopping loop over instances")
266
+ break
267
+
268
+ def main_multi_worker(self) -> None:
269
+ add_logger_names_to_stream_handlers()
270
+ # Set all stream handlers to WARNING and set everything where we want to have
271
+ # more verbosity explicitly
272
+ set_stream_handler_levels(logging.WARNING)
273
+ self.logger.setLevel(logging.TRACE) # type: ignore
274
+
275
+ with Live(self._progress_manager.render_group):
276
+ with ThreadPoolExecutor(max_workers=self._num_workers) as executor:
277
+ futures = [executor.submit(self.run_instance, instance) for instance in self.instances]
278
+ try:
279
+ for future in as_completed(futures):
280
+ future.result()
281
+ except (KeyboardInterrupt, _BreakLoop):
282
+ msg = (
283
+ "Received keyboard interrupt, waiting for running instances "
284
+ "to finish, but cancelled everything else"
285
+ )
286
+ self.logger.info(msg)
287
+ executor.shutdown(wait=False, cancel_futures=True)
288
+ finally:
289
+ self._progress_manager.print_report()
290
+
291
+ def run_instance(self, instance: BatchInstance) -> None:
292
+ self.logger.info("Running on instance %s", instance.problem_statement.id)
293
+ register_thread_name(instance.problem_statement.id)
294
+ self._add_instance_log_file_handlers(instance.problem_statement.id, multi_worker=self._num_workers > 1)
295
+ # Let's add some randomness to avoid any potential race conditions or thundering herd
296
+ if self._progress_manager.n_completed < self._num_workers:
297
+ time.sleep(random.random() * self._random_delay_multiplier * (self._num_workers - 1))
298
+
299
+ self._progress_manager.on_instance_start(instance.problem_statement.id)
300
+
301
+ if previous_exit_status := self.should_skip(instance):
302
+ self._progress_manager.on_instance_end(
303
+ instance.problem_statement.id, exit_status=f"skipped ({previous_exit_status})"
304
+ )
305
+ self._remove_instance_log_file_handlers(instance.problem_statement.id)
306
+ return
307
+
308
+ # Either catch and silence exception, or raise _BreakLoop to stop the loop
309
+ # over the instances
310
+ try:
311
+ result = self._run_instance(instance)
312
+ except KeyboardInterrupt:
313
+ raise _BreakLoop
314
+ except (SystemExit, ModelConfigurationError, TotalCostLimitExceededError) as e:
315
+ if self._raise_exceptions:
316
+ raise
317
+ self.logger.critical(f"❌ Exiting because {e.__class__.__name__} was called")
318
+ raise _BreakLoop
319
+ except Exception as e:
320
+ self.logger.error(traceback.format_exc())
321
+ self.logger.error(f"❌ Failed on {instance.problem_statement.id}: {e}")
322
+ self._progress_manager.on_uncaught_exception(instance.problem_statement.id, e)
323
+ if self._raise_exceptions:
324
+ raise
325
+ else:
326
+ self._progress_manager.on_instance_end(
327
+ instance.problem_statement.id, exit_status=result.info.get("exit_status", "unknown_exit")
328
+ )
329
+ finally:
330
+ self._progress_manager.update_exit_status_table()
331
+ self._remove_instance_log_file_handlers(instance.problem_statement.id)
332
+
333
+ def _run_instance(self, instance: BatchInstance) -> AgentRunResult:
334
+ output_dir = Path(self.output_dir) / instance.problem_statement.id
335
+ output_dir.mkdir(parents=True, exist_ok=True)
336
+ self.agent_config.name = f"{instance.problem_statement.id}"
337
+ agent = get_agent_from_config(self.agent_config)
338
+ single_run_replay_config = RunSingleConfig(
339
+ agent=self.agent_config,
340
+ problem_statement=instance.problem_statement,
341
+ env=instance.env,
342
+ )
343
+ (output_dir / f"{instance.problem_statement.id}.config.yaml").write_text(
344
+ yaml.dump(single_run_replay_config.model_dump_json(), indent=2)
345
+ )
346
+ agent.replay_config = single_run_replay_config # type: ignore[attr-defined]
347
+ agent.add_hook(SetStatusAgentHook(instance.problem_statement.id, self._progress_manager.update_instance_status))
348
+ self._progress_manager.update_instance_status(instance.problem_statement.id, "Starting environment")
349
+ instance.env.name = f"{instance.problem_statement.id}"
350
+ env = SWEEnv.from_config(instance.env)
351
+ env.add_hook(
352
+ SetStatusEnvironmentHook(instance.problem_statement.id, self._progress_manager.update_instance_status)
353
+ )
354
+ env.deployment.add_hook(
355
+ SetStatusDeploymentHook(instance.problem_statement.id, self._progress_manager.update_instance_status)
356
+ )
357
+ try:
358
+ env.start()
359
+ self._chooks.on_instance_start(index=0, env=env, problem_statement=instance.problem_statement)
360
+ result = agent.run(
361
+ problem_statement=instance.problem_statement,
362
+ env=env,
363
+ output_dir=output_dir,
364
+ )
365
+ except Exception:
366
+ # The actual handling is happening in `run_instance`, but we need to make sure that
367
+ # we log it to the agent specific logger as well
368
+ agent.logger.error(traceback.format_exc()) # type: ignore[attr-defined]
369
+ raise
370
+ finally:
371
+ env.close()
372
+ save_predictions(self.output_dir, instance.problem_statement.id, result)
373
+ self._chooks.on_instance_completed(result=result)
374
+ return result
375
+
376
+ def should_skip(self, instance: BatchInstance) -> bool | str:
377
+ """Check if we should skip this instance.
378
+ Returns previous exit status if the instance should be skipped.
379
+ """
380
+ if self._redo_existing:
381
+ return False
382
+
383
+ # Check if there's an existing trajectory for this instance
384
+ log_path = self.output_dir / instance.problem_statement.id / (instance.problem_statement.id + ".traj")
385
+ if not log_path.exists():
386
+ return False
387
+
388
+ content = log_path.read_text()
389
+ if not content.strip():
390
+ self.logger.warning("Found empty trajectory: %s. Removing.", log_path)
391
+ log_path.unlink()
392
+ return False
393
+
394
+ try:
395
+ data = json.loads(content)
396
+ # If the trajectory has no exit status, it's incomplete and we will redo it
397
+ exit_status = data["info"].get("exit_status", None)
398
+ if exit_status == "early_exit" or exit_status is None:
399
+ self.logger.warning(f"Found existing trajectory with no exit status: {log_path}. Removing.")
400
+ log_path.unlink()
401
+ return False
402
+ except Exception as e:
403
+ self.logger.error(f"Failed to check existing trajectory: {log_path}: {e}. Removing.")
404
+ # If we can't check the trajectory, we will redo it
405
+ log_path.unlink()
406
+ return False
407
+ # otherwise, we will skip it
408
+ self.logger.info(f"⏭️ Skipping existing trajectory: {log_path}")
409
+ return exit_status
410
+
411
+ def _add_instance_log_file_handlers(self, instance_id: str, multi_worker: bool = False) -> None:
412
+ filename_template = f"{instance_id}.{{level}}.log"
413
+ for level in ["trace", "debug", "info"]:
414
+ filter = instance_id if multi_worker else ""
415
+ add_file_handler(
416
+ self.output_dir / instance_id / filename_template.format(level=level),
417
+ filter=filter,
418
+ level=level,
419
+ id_=f"{instance_id}-{level}",
420
+ )
421
+
422
+ def _remove_instance_log_file_handlers(self, instance_id: str) -> None:
423
+ for level in ["trace", "debug", "info"]:
424
+ remove_file_handler(f"{instance_id}-{level}")
425
+
426
+
427
+ def run_from_config(config: RunBatchConfig):
428
+ RunBatch.from_config(config).main()
429
+
430
+
431
+ def run_from_cli(args: list[str] | None = None):
432
+ if args is None:
433
+ args = sys.argv[1:]
434
+ assert __doc__ is not None
435
+ help_text = ( # type: ignore
436
+ __doc__ + "\n[cyan][bold]=== ALL THE OPTIONS ===[/bold][/cyan]\n\n" + ConfigHelper().get_help(RunBatchConfig)
437
+ )
438
+ run_from_config(BasicCLI(RunBatchConfig, help_text=help_text).get_config(args)) # type: ignore
439
+
440
+
441
+ if __name__ == "__main__":
442
+ run_from_cli()
@@ -0,0 +1,219 @@
1
+ """[cyan][bold]Replay a trajectory file.[/bold][/cyan]
2
+
3
+ [cyan][bold]=== DESCRIPTION ===[/bold][/cyan]
4
+
5
+ We will take all actions in the trajectory and execute them in an environment.
6
+
7
+ This has two main use cases:
8
+
9
+ 1. Create a demo from a yaml file containing actions (can also be created from a trajectory file with [green]sweagent run traj-to-demo[/green]).
10
+ [green]run-replay[/green] will execute the actions to get the environment output and produce a full trajectory to be used as a demo.
11
+ 2. Debugging and testing of tools and environment behavior.
12
+
13
+ [cyan][bold]=== EXAMPLES ===[/bold][/cyan]
14
+
15
+ Replay a trajectory file:
16
+
17
+ [green]sweagent run replay --traj_path mytraj.traj[/green]
18
+
19
+ Replay a demo file:
20
+
21
+ [green]sweagent run replay --traj_path mydemo.demo.yaml[/green]
22
+ """
23
+
24
+ import json
25
+ import sys
26
+ import tempfile
27
+ from getpass import getuser
28
+ from pathlib import Path
29
+ from typing import Any
30
+
31
+ import yaml
32
+ from pydantic_settings import BaseSettings, SettingsConfigDict
33
+ from swerex.deployment.abstract import AbstractDeployment
34
+ from swerex.deployment.config import DeploymentConfig, get_deployment
35
+ from typing_extensions import Self
36
+
37
+ from sweagent.agent.agents import DefaultAgent
38
+ from sweagent.agent.models import ReplayModelConfig
39
+ from sweagent.environment.swe_env import SWEEnv
40
+ from sweagent.run.common import BasicCLI, ConfigHelper
41
+ from sweagent.run.run_single import RunSingle, RunSingleConfig
42
+ from sweagent.utils.config import load_environment_variables
43
+ from sweagent.utils.log import get_logger
44
+
45
+
46
+ class RunReplayConfig(BaseSettings, cli_implicit_flags=False):
47
+ traj_path: Path
48
+ deployment: DeploymentConfig | None = None
49
+ """Override the deployment in the trajectory."""
50
+ output_dir: Path = Path("DEFAULT")
51
+ env_var_path: Path | None = None
52
+ """Path to a .env file to load environment variables from."""
53
+ update_config: list[Path] = []
54
+ """Additional config files to merge with the replay config."""
55
+
56
+ # pydantic config
57
+ model_config = SettingsConfigDict(extra="forbid", env_prefix="SWE_AGENT_")
58
+
59
+ def model_post_init(self, __context: Any) -> None:
60
+ if self.output_dir == Path("DEFAULT"):
61
+ user_id = getuser()
62
+ self.output_dir = Path.cwd() / "trajectories" / user_id / f"replay___{self.traj_path.stem}"
63
+ self.output_dir.mkdir(parents=True, exist_ok=True)
64
+
65
+
66
+ class RunReplay:
67
+ def __init__(
68
+ self,
69
+ *,
70
+ traj_path: Path,
71
+ deployment: AbstractDeployment | None,
72
+ output_dir: Path,
73
+ update_config: list[Path] | None = None,
74
+ _catch_errors: bool = False,
75
+ _require_zero_exit_code: bool = False,
76
+ ):
77
+ self.traj_path = traj_path
78
+ self.output_dir = output_dir
79
+ self._replay_action_trajs_path = Path(tempfile.NamedTemporaryFile(suffix=".json").name)
80
+ self.logger = get_logger("swea-run", emoji="🏃")
81
+ self._catch_errors = _catch_errors
82
+ self._require_zero_exit_code = _require_zero_exit_code
83
+ self._update_config = update_config if update_config is not None else []
84
+
85
+ if traj_path.suffix == ".yaml":
86
+ self._traj_data = yaml.safe_load(traj_path.read_text())
87
+ else:
88
+ self._traj_data = json.loads(traj_path.read_text())
89
+ self.config = self._get_config_from_agent(self._traj_data)
90
+
91
+ if deployment is None:
92
+ self.deployment = get_deployment(self.config.env.deployment)
93
+ else:
94
+ self.deployment = deployment
95
+
96
+ def _get_config_from_agent(self, traj_data):
97
+ try:
98
+ if isinstance(traj_data["replay_config"], str):
99
+ traj_data["replay_config"] = json.loads(traj_data["replay_config"])
100
+ config = RunSingleConfig.model_validate(traj_data["replay_config"])
101
+ except KeyError:
102
+ msg = "Replay config not found in trajectory. Are you running on an old trajectory?"
103
+ raise ValueError(msg)
104
+
105
+ # Merge any additional config files
106
+ for config_path in self._update_config:
107
+ update_data = yaml.safe_load(config_path.read_text())
108
+ # Store the current model config before merging
109
+ current_model = config.agent.model
110
+ # Convert the merged data back to a RunSingleConfig
111
+ config_dict = config.model_dump(mode="json")
112
+ merged_dict = config_dict | update_data
113
+
114
+ # Ensure agent.model is preserved if not explicitly updated
115
+ if "agent" in merged_dict and "model" not in merged_dict["agent"]:
116
+ merged_dict["agent"]["model"] = current_model.model_dump(mode="json")
117
+
118
+ config = RunSingleConfig.model_validate(merged_dict)
119
+
120
+ config.agent.model = ReplayModelConfig(replay_path=self._replay_action_trajs_path)
121
+ return config
122
+
123
+ @property
124
+ def instance_id(self) -> str:
125
+ return Path(self.traj_path).stem
126
+
127
+ @classmethod
128
+ def from_config(cls, config: RunReplayConfig, **kwargs) -> Self:
129
+ load_environment_variables(config.env_var_path)
130
+ return cls(
131
+ traj_path=config.traj_path,
132
+ deployment=get_deployment(config.deployment) if config.deployment else None,
133
+ output_dir=config.output_dir,
134
+ update_config=config.update_config,
135
+ **kwargs,
136
+ )
137
+
138
+ def _create_actions_file(self) -> None:
139
+ # Verify config compatibility with tool calls
140
+ has_tool_calls = any(
141
+ "tool_calls" in item and item["tool_calls"] is not None
142
+ for item in self._traj_data["history"]
143
+ if item["role"] == "assistant"
144
+ )
145
+
146
+ agent_config = self.config.agent
147
+ parse_function = agent_config.tools.parse_function.type
148
+ use_function_calling = parse_function == "function_calling"
149
+
150
+ if has_tool_calls and not use_function_calling:
151
+ msg = (
152
+ "Trajectory contains tool calls but config is not set up for function calling. "
153
+ "Check that the config you want to use has agent.tools.parse_function.type set to 'function_calling'."
154
+ )
155
+ raise ValueError(msg)
156
+ actions = []
157
+ for ix, item in enumerate(self._traj_data["history"]):
158
+ if item["role"] != "assistant":
159
+ continue
160
+ action = {"message": item["content"]}
161
+ if use_function_calling:
162
+ assert "tool_calls" in item and item["tool_calls"] is not None, (
163
+ f"Config is set to use `function_calling` but trajectory item {ix} is missing a tool call "
164
+ f"or has tool_calls set to None"
165
+ )
166
+ action["tool_calls"] = item["tool_calls"]
167
+ actions.append(action)
168
+ if len(actions) == 0:
169
+ msg = "No actions found in trajectory"
170
+ raise ValueError(msg)
171
+ self._replay_action_trajs_path.write_text(json.dumps({self.instance_id: actions}))
172
+
173
+ def _get_env(self) -> SWEEnv:
174
+ return SWEEnv(
175
+ deployment=self.deployment,
176
+ repo=self.config.env.repo,
177
+ post_startup_commands=[],
178
+ )
179
+
180
+ def _get_agent(self) -> DefaultAgent:
181
+ agent = DefaultAgent.from_config(self.config.agent)
182
+ agent._catch_errors = self._catch_errors
183
+ agent._always_require_zero_exit_code = self._require_zero_exit_code
184
+ return agent
185
+
186
+ def _get_run_single(self) -> RunSingle:
187
+ return RunSingle(
188
+ self._get_env(),
189
+ self._get_agent(),
190
+ problem_statement=self.config.problem_statement,
191
+ output_dir=Path(self.output_dir),
192
+ )
193
+
194
+ def main(self):
195
+ self._create_actions_file()
196
+ run_single = self._get_run_single()
197
+ run_single.agent.replay_config = RunSingleConfig(
198
+ agent=self.config.agent,
199
+ problem_statement=run_single.problem_statement,
200
+ env=self.config.env,
201
+ )
202
+ run_single.run()
203
+
204
+
205
+ def run_from_config(config: RunReplayConfig):
206
+ RunReplay.from_config(config).main()
207
+
208
+
209
+ def run_from_cli(args: list[str] | None = None):
210
+ if args is None:
211
+ args = sys.argv[1:]
212
+ help_text = ( # type: ignore
213
+ __doc__ + "\n[cyan][bold]=== ALL THE OPTIONS ===[/bold][/cyan]\n\n" + ConfigHelper().get_help(RunReplayConfig)
214
+ )
215
+ run_from_config(BasicCLI(RunReplayConfig, help_text=help_text, default_settings=False).get_config(args)) # type: ignore
216
+
217
+
218
+ if __name__ == "__main__":
219
+ run_from_cli()