@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,419 @@
1
+ import json
2
+ import random
3
+ import re
4
+ from abc import ABC, abstractmethod
5
+ from pathlib import Path
6
+ from typing import Any, Literal
7
+
8
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
9
+ from swerex.deployment.config import (
10
+ DeploymentConfig,
11
+ DockerDeploymentConfig,
12
+ DummyDeploymentConfig,
13
+ LocalDeploymentConfig,
14
+ )
15
+ from typing_extensions import Self
16
+
17
+ from sweagent.agent.problem_statement import (
18
+ ProblemStatementConfig,
19
+ SWEBenchMultimodalProblemStatement,
20
+ TextProblemStatement,
21
+ )
22
+ from sweagent.environment.repo import GithubRepoConfig, LocalRepoConfig, PreExistingRepoConfig
23
+ from sweagent.environment.swe_env import EnvironmentConfig
24
+ from sweagent.utils.files import load_file
25
+ from sweagent.utils.log import get_logger
26
+
27
+ logger = get_logger("swea-config", emoji="🔧")
28
+
29
+
30
+ class AbstractInstanceSource(ABC):
31
+ """Anything that adheres to this standard can be used to load instances."""
32
+
33
+ @abstractmethod
34
+ def get_instance_configs(self) -> list[EnvironmentConfig]: ...
35
+
36
+
37
+ class BatchInstance(BaseModel):
38
+ """A single instance in a batch of instances.
39
+ This specifies both the environment configuration and the problem statement.
40
+ """
41
+
42
+ env: EnvironmentConfig
43
+ problem_statement: ProblemStatementConfig
44
+
45
+
46
+ def _slice_spec_to_slice(slice_spec: str) -> slice:
47
+ if slice_spec == "":
48
+ return slice(None)
49
+ parts = slice_spec.split(":")
50
+ values = [None if p == "" else int(p) for p in parts]
51
+ if len(parts) == 1:
52
+ return slice(values[0])
53
+ if len(parts) == 2:
54
+ return slice(values[0], values[1])
55
+ if len(parts) == 3:
56
+ return slice(values[0], values[1], values[2])
57
+ msg = (
58
+ f"Invalid slice specification: {slice_spec!r}. "
59
+ "Here's the expected format: stop or start:stop or start:stop:step "
60
+ "(i.e., it behaves exactly like python's list slicing `list[slice]`)."
61
+ )
62
+ raise ValueError(msg)
63
+
64
+
65
+ def _filter_batch_items(
66
+ instances: list[BatchInstance], *, filter_: str, slice_: str = "", shuffle: bool = False
67
+ ) -> list[BatchInstance]:
68
+ if shuffle:
69
+ instances = sorted(instances.copy(), key=lambda x: x.problem_statement.id)
70
+ random.seed(42)
71
+ random.shuffle(instances)
72
+ before_filter = len(instances)
73
+ instances = [instance for instance in instances if re.match(filter_, instance.problem_statement.id)]
74
+ after_filter = len(instances)
75
+ if before_filter != after_filter:
76
+ logger.info("Instance filter: %d -> %d instances", before_filter, after_filter)
77
+ if slice_:
78
+ instances = instances[_slice_spec_to_slice(slice_)]
79
+ after_slice = len(instances)
80
+ if before_filter != after_slice:
81
+ logger.info("Instance slice: %d -> %d instances", before_filter, after_slice)
82
+ return instances
83
+
84
+
85
+ class SimpleBatchInstance(BaseModel):
86
+ """A simple way to configure a single instance in a batch of instances that all
87
+ use similar deployment configurations.
88
+
89
+ Predominantly used for benchmarking purposes. Assumes that the repository is already
90
+ present in the docker container.
91
+ """
92
+
93
+ image_name: str
94
+ problem_statement: str
95
+ instance_id: str
96
+ repo_name: str = ""
97
+ """Specifies the repository to use. If empty, no repository is used.
98
+ If the string does not contain a slash, it is interpreted as an already existing repository at the root
99
+ of the docker container. If it contains the word "github", it is interpreted as a github repository.
100
+ Else, it is interpreted as a local repository.
101
+ """
102
+ base_commit: str = "HEAD"
103
+ """Used to reset repo."""
104
+ extra_fields: dict[str, Any] = Field(default_factory=dict)
105
+ """Any additional data to be added to the instance.
106
+ This data will be available when formatting prompt templates.
107
+ """
108
+
109
+ # Ignore instead of allow because they should be added as `extra_fields`
110
+ model_config = ConfigDict(extra="ignore")
111
+
112
+ def to_full_batch_instance(self, deployment: DeploymentConfig) -> BatchInstance:
113
+ """Merge the deployment options into the `SimpleBatchInstance` object to get a full `BatchInstance`."""
114
+ # Very important: Make a copy of the deployment config because it will be shared among instances!!!
115
+ deployment = deployment.model_copy(deep=True)
116
+
117
+ if "issue_images" in self.extra_fields:
118
+ problem_statement = SWEBenchMultimodalProblemStatement(
119
+ text=self.problem_statement,
120
+ issue_images=self.extra_fields.pop("issue_images"),
121
+ id=self.instance_id,
122
+ extra_fields=self.extra_fields,
123
+ )
124
+ else:
125
+ problem_statement = TextProblemStatement(
126
+ text=self.problem_statement, id=self.instance_id, extra_fields=self.extra_fields
127
+ )
128
+
129
+ if not self.repo_name:
130
+ repo = None
131
+ elif "github" in self.repo_name:
132
+ repo = GithubRepoConfig(github_url=self.repo_name, base_commit=self.base_commit)
133
+ elif "/" not in self.repo_name:
134
+ repo = PreExistingRepoConfig(repo_name=self.repo_name, base_commit=self.base_commit)
135
+ else:
136
+ repo = LocalRepoConfig(path=Path(self.repo_name), base_commit=self.base_commit)
137
+ if isinstance(deployment, LocalDeploymentConfig):
138
+ if self.image_name:
139
+ msg = "Local deployment does not support image_name"
140
+ raise ValueError(msg)
141
+ return BatchInstance(
142
+ env=EnvironmentConfig(deployment=deployment, repo=repo), problem_statement=problem_statement
143
+ )
144
+ if isinstance(deployment, DummyDeploymentConfig):
145
+ return BatchInstance(
146
+ env=EnvironmentConfig(deployment=deployment, repo=repo), problem_statement=problem_statement
147
+ )
148
+
149
+ deployment.image = self.image_name # type: ignore
150
+
151
+ if isinstance(deployment, DockerDeploymentConfig) and deployment.python_standalone_dir is None:
152
+ # Note: you can disable this by setting python_standalone_dir to ""
153
+ deployment.python_standalone_dir = "/root" # type: ignore
154
+
155
+ return BatchInstance(
156
+ env=EnvironmentConfig(deployment=deployment, repo=repo), problem_statement=problem_statement
157
+ )
158
+
159
+ @model_validator(mode="before")
160
+ @classmethod
161
+ def handle_legacy_id(cls, data):
162
+ # Handling compatibility with swe-agent <= 1.0.1
163
+ if isinstance(data, dict):
164
+ if "id" in data and "instance_id" not in data:
165
+ data["instance_id"] = data["id"]
166
+ data.pop("id")
167
+ return data
168
+
169
+ # todo: Maybe populate extra fields?
170
+ @classmethod
171
+ def from_swe_bench(cls, instance: dict[str, Any]) -> Self:
172
+ """Convert instances from the classical SWE-bench dataset to the `SimpleBatchInstance` format."""
173
+ iid = instance["instance_id"]
174
+ image_name = instance.get("image_name", None)
175
+ if image_name is None:
176
+ # Docker doesn't allow double underscore, so we replace them with a magic token
177
+ id_docker_compatible = iid.replace("__", "_1776_")
178
+ image_name = f"docker.io/swebench/sweb.eval.x86_64.{id_docker_compatible}:latest".lower()
179
+ extra_fields = {}
180
+ if "image_assets" in instance:
181
+ issue_images = json.loads(instance["image_assets"])["problem_statement"]
182
+ extra_fields["issue_images"] = issue_images
183
+ return cls(
184
+ image_name=image_name,
185
+ problem_statement=instance["problem_statement"],
186
+ instance_id=iid,
187
+ repo_name="testbed",
188
+ base_commit=instance["base_commit"],
189
+ extra_fields=extra_fields,
190
+ )
191
+
192
+
193
+ class InstancesFromFile(BaseModel, AbstractInstanceSource):
194
+ """Load instances from a file."""
195
+
196
+ path: Path
197
+ filter: str = ".*"
198
+ """Regular expression to filter the instances by instance id."""
199
+ slice: str = ""
200
+ """Select only a slice of the instances (after filtering by `filter`).
201
+ Possible values are stop or start:stop or start:stop:step
202
+ (i.e., it behaves exactly like python's list slicing `list[slice]`).
203
+ """
204
+ shuffle: bool = False
205
+ """Shuffle the instances (before filtering and slicing)."""
206
+
207
+ deployment: DeploymentConfig = Field(
208
+ default_factory=lambda: DockerDeploymentConfig(image="python:3.11"),
209
+ description="Deployment options.",
210
+ )
211
+ """Note that the image_name option is overwritten by the images specified in the task instances."""
212
+
213
+ simple: Literal[True] = True
214
+ """Convenience discriminator for (de)serialization/CLI. Do not change."""
215
+
216
+ type: Literal["file"] = "file"
217
+ """Discriminator for (de)serialization/CLI. Do not change."""
218
+
219
+ def get_instance_configs(self) -> list[BatchInstance]:
220
+ instance_dicts = load_file(self.path)
221
+ simple_instances = [SimpleBatchInstance.model_validate(instance_dict) for instance_dict in instance_dicts]
222
+ instances = [instance.to_full_batch_instance(self.deployment) for instance in simple_instances]
223
+ return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
224
+
225
+ @property
226
+ def id(self) -> str:
227
+ return self.path.stem
228
+
229
+
230
+ class InstancesFromHuggingFace(BaseModel, AbstractInstanceSource):
231
+ """Load instances from HuggingFace."""
232
+
233
+ dataset_name: str
234
+ """Name of the HuggingFace dataset. Same as when using `datasets.load_dataset`."""
235
+ split: str = "dev"
236
+ filter: str = ".*"
237
+ """Regular expression to filter the instances by instance id."""
238
+ slice: str = ""
239
+ """Select only a slice of the instances (after filtering by `filter`).
240
+ Possible values are stop or start:stop or start:stop:step.
241
+ (i.e., it behaves exactly like python's list slicing `list[slice]`).
242
+ """
243
+ shuffle: bool = False
244
+ """Shuffle the instances (before filtering and slicing)."""
245
+
246
+ deployment: DeploymentConfig = Field(
247
+ default_factory=lambda: DockerDeploymentConfig(image="python:3.11"),
248
+ )
249
+ """Deployment configuration. Note that the `image_name` option is overwritten by the images specified in the task instances.
250
+ """
251
+ type: Literal["huggingface"] = "huggingface"
252
+ """Discriminator for (de)serialization/CLI. Do not change."""
253
+
254
+ def get_instance_configs(self) -> list[BatchInstance]:
255
+ from datasets import load_dataset
256
+
257
+ ds: list[dict[str, Any]] = load_dataset(self.dataset_name, split=self.split) # type: ignore
258
+ simple_instances: list[SimpleBatchInstance] = [SimpleBatchInstance.model_validate(instance) for instance in ds]
259
+ instances = [instance.to_full_batch_instance(self.deployment) for instance in simple_instances]
260
+ return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
261
+
262
+ @property
263
+ def id(self) -> str:
264
+ ds_name = "".join(l for l in self.dataset_name if l.isalnum() or l in ["-", "_"])
265
+ return f"{ds_name}_{self.split}"
266
+
267
+
268
+ class SWEBenchInstances(BaseModel, AbstractInstanceSource):
269
+ """Load instances from SWE-bench."""
270
+
271
+ subset: Literal["lite", "verified", "full", "multimodal", "multilingual"] = "lite"
272
+ """Subset of swe-bench to use"""
273
+
274
+ # IMPORTANT: Do not call this `path`, because then if people do not specify instance.type,
275
+ # it might be resolved to ExpertInstancesFromFile or something like that.
276
+ path_override: str | Path | None = None
277
+ """Allow to specify a different huggingface dataset name or path to a huggingface
278
+ dataset. This will override the automatic path set by `subset`.
279
+ """
280
+
281
+ split: Literal["dev", "test"] = "dev"
282
+
283
+ deployment: DeploymentConfig = Field(
284
+ default_factory=lambda: DockerDeploymentConfig(image="python:3.11"),
285
+ )
286
+ """Deployment configuration. Note that the image_name option is overwritten by the images specified in the task instances.
287
+ """
288
+
289
+ type: Literal["swe_bench"] = "swe_bench"
290
+ """Discriminator for (de)serialization/CLI. Do not change."""
291
+
292
+ filter: str = ".*"
293
+ """Regular expression to filter the instances by instance id."""
294
+ slice: str = ""
295
+ """Select only a slice of the instances (after filtering by `filter`).
296
+ Possible values are stop or start:stop or start:stop:step.
297
+ (i.e., it behaves exactly like python's list slicing `list[slice]`).
298
+ """
299
+ shuffle: bool = False
300
+ """Shuffle the instances (before filtering and slicing)."""
301
+
302
+ evaluate: bool = False
303
+ """Run sb-cli to evaluate"""
304
+
305
+ def _get_dataset_path(self) -> str:
306
+ if self.path_override is not None:
307
+ return str(self.path_override)
308
+ dataset_mapping = {
309
+ "full": "princeton-nlp/SWE-Bench",
310
+ "verified": "princeton-nlp/SWE-Bench_Verified",
311
+ "lite": "princeton-nlp/SWE-Bench_Lite",
312
+ "multimodal": "princeton-nlp/SWE-Bench_Multimodal",
313
+ "multilingual": "swe-bench/SWE-Bench_Multilingual",
314
+ }
315
+
316
+ if self.subset not in dataset_mapping:
317
+ msg = f"Unsupported subset: {self.subset}"
318
+ raise ValueError(msg)
319
+
320
+ return dataset_mapping[self.subset]
321
+
322
+ def get_instance_configs(self) -> list[BatchInstance]:
323
+ from datasets import load_dataset
324
+
325
+ ds: list[dict[str, Any]] = load_dataset(self._get_dataset_path(), split=self.split) # type: ignore
326
+
327
+ if isinstance(self.deployment, DockerDeploymentConfig):
328
+ self.deployment.platform = "linux/amd64"
329
+
330
+ instances = [
331
+ SimpleBatchInstance.from_swe_bench(instance).to_full_batch_instance(self.deployment) for instance in ds
332
+ ]
333
+ return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
334
+
335
+ @property
336
+ def id(self) -> str:
337
+ return f"swe_bench_{self.subset}_{self.split}"
338
+
339
+
340
+ class ExpertInstancesFromFile(BaseModel, AbstractInstanceSource):
341
+ """Load instances from a file. The difference to `InstancesFromFile` is that the instances are configured as full
342
+ `EnvironmentInstanceConfig` objects, i.e., we could specify separate deployment configurations etc.
343
+ """
344
+
345
+ path: Path
346
+ filter: str = ".*"
347
+ """Regular expression to filter the instances by instance id."""
348
+ slice: str = ""
349
+ """Select only a slice of the instances (after filtering by `filter`).
350
+ Possible values are stop or start:stop or start:stop:step.
351
+ (i.e., it behaves exactly like python's list slicing `list[slice]`).
352
+ """
353
+ shuffle: bool = False
354
+ """Shuffle the instances (before filtering and slicing)."""
355
+
356
+ type: Literal["expert_file"] = "expert_file"
357
+ """Discriminator for (de)serialization/CLI. Do not change."""
358
+
359
+ def get_instance_configs(self) -> list[BatchInstance]:
360
+ instance_dicts = load_file(self.path)
361
+ instances = [BatchInstance.model_validate(instance_dict) for instance_dict in instance_dicts]
362
+ return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
363
+
364
+ @property
365
+ def id(self) -> str:
366
+ return self.path.stem
367
+
368
+
369
+ class SWESmithInstances(BaseModel, AbstractInstanceSource):
370
+ """Load instances from SWE-smith."""
371
+
372
+ path: Path
373
+
374
+ deployment: DeploymentConfig = Field(
375
+ default_factory=lambda: DockerDeploymentConfig(image="python:3.11"),
376
+ )
377
+ """Deployment configuration. Note that the image_name option is overwritten by the images specified in the task instances.
378
+ """
379
+
380
+ filter: str = ".*"
381
+ """Regular expression to filter the instances by instance id."""
382
+ slice: str = ""
383
+ """Select only a slice of the instances (after filtering by `filter`).
384
+ Possible values are stop or start:stop or start:stop:step.
385
+ (i.e., it behaves exactly like python's list slicing `list[slice]`).
386
+ """
387
+ shuffle: bool = False
388
+ """Shuffle the instances (before filtering and slicing)."""
389
+
390
+ type: Literal["swesmith"] = "swesmith"
391
+ """Discriminator for (de)serialization/CLI. Do not change."""
392
+
393
+ def get_instance_configs(self) -> list[BatchInstance]:
394
+ def convert_instance_dict(instance_dict: dict[str, Any]) -> dict[str, Any]:
395
+ instance_dict["id"] = instance_dict["instance_id"]
396
+ # todo: The base_commit is currently incorrect
397
+ instance_dict["base_commit"] = instance_dict["id"]
398
+ instance_dict["problem_statement"] = instance_dict.get("problem_statement", "")
399
+ instance_dict["repo_name"] = "testbed"
400
+ instance_dict["extra_fields"] = {"fail_to_pass": instance_dict["FAIL_TO_PASS"]}
401
+ return instance_dict
402
+
403
+ instance_dicts = load_file(self.path)
404
+ instances = [
405
+ SimpleBatchInstance.model_validate(convert_instance_dict(instance_dict)).to_full_batch_instance(
406
+ self.deployment
407
+ )
408
+ for instance_dict in instance_dicts
409
+ ]
410
+ return _filter_batch_items(instances, filter_=self.filter, slice_=self.slice, shuffle=self.shuffle)
411
+
412
+ @property
413
+ def id(self) -> str:
414
+ return f"swesmith_{self.path.stem}"
415
+
416
+
417
+ BatchInstanceSourceConfig = (
418
+ InstancesFromHuggingFace | InstancesFromFile | SWEBenchInstances | ExpertInstancesFromFile | SWESmithInstances
419
+ )