@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,188 @@
1
+ # Used for our SWE-Bench lite benchmark submission from 12 Feb 2025
2
+ # Used together with swe-agent as
3
+ # sweagent run-batch --num_workers=12 --instances.type=swe_bench --instances.subset=lite --instances.split=test
4
+ # --instances.shuffle=True --instances.evaluate=True --instances.deployment.docker_args=--memory=10g --config config/retry_heavy_v3.yaml
5
+ # This template is heavily inspired by anthropic's computer use demo
6
+ agent:
7
+ type: retry
8
+ agent_configs:
9
+ # +filemap
10
+ - type: default
11
+ model: &model
12
+ name: claude-3-7-sonnet-latest
13
+ api_key: $CLAUDE_API_KEY_ROTATION
14
+ per_instance_cost_limit: 1.5
15
+ per_instance_call_limit: 75
16
+ total_cost_limit: 1000.0
17
+ temperature: 0.0
18
+ delay: 1.0
19
+ templates:
20
+ system_template: &system_template |-
21
+ You are a helpful assistant that can interact with a computer to solve tasks.
22
+ instance_template: &instance_template |-
23
+ <uploaded_files>
24
+ {{working_dir}}
25
+ </uploaded_files>
26
+ I've uploaded a python code repository in the directory {{working_dir}}. Consider the following PR description:
27
+
28
+ <pr_description>
29
+ {{problem_statement}}
30
+ </pr_description>
31
+
32
+ Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?
33
+ I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
34
+ Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the <pr_description> is satisfied.
35
+ Follow these steps to resolve the issue:
36
+ 1. As a first step, it might be a good idea to find and read code relevant to the <pr_description>
37
+ 2. Create a script to reproduce the error and execute it with `python <filename.py>` using the bash tool, to confirm the error
38
+ 3. Edit the sourcecode of the repo to resolve the issue
39
+ 4. Rerun your reproduce script and confirm that the error is fixed!
40
+ 5. Think about edgecases and make sure your fix handles them as well
41
+ Your thinking should be thorough and so it's fine if it's very long.
42
+ next_step_template: &next_step_no_diff |-
43
+ OBSERVATION:
44
+ {{observation}}
45
+ next_step_no_output_template: &next_step_no_output_no_diff |-
46
+ Your last command ran successfully and did not produce any output.
47
+ tools:
48
+ execution_timeout: &execution_timeout 300
49
+ bundles: &vanilla_bundles
50
+ - path: tools/registry
51
+ - path: tools/edit_anthropic
52
+ - path: tools/review_on_submit_m
53
+ - path: tools/diff_state
54
+ enable_bash_tool: true
55
+ parse_function: &parse_function
56
+ type: function_calling
57
+ registry_variables:
58
+ USE_FILEMAP: 'true'
59
+ SUBMIT_REVIEW_MESSAGES: &submit_review_messages
60
+ - |
61
+ Thank you for your work on this issue. Please carefully follow the steps below to help review your changes.
62
+
63
+ 1. If you made any changes to your code after running the reproduction script, please run the reproduction script again.
64
+ If the reproduction script is failing, please revisit your changes and make sure they are correct.
65
+ If you have already removed your reproduction script, please ignore this step.
66
+ 2. Remove your reproduction script (if you haven't done so already).
67
+ 3. If you have modified any TEST files, please revert them to the state they had before you started fixing the issue.
68
+ You can do this with `git checkout -- /path/to/test/file.py`. Use below <diff> to find the files you need to revert.
69
+ 4. Run the submit command again to confirm.
70
+
71
+ Here is a list of all of your changes:
72
+
73
+ <diff>
74
+ {{diff}}
75
+ </diff>
76
+ history_processors: &vanilla_history_processors
77
+ - type: cache_control
78
+ last_n_messages: 2
79
+ # vanilla anthropic
80
+ - type: default
81
+ model: *model
82
+ templates:
83
+ system_template: *system_template
84
+ instance_template: *instance_template
85
+ next_step_template: *next_step_no_diff
86
+ next_step_no_output_template: *next_step_no_output_no_diff
87
+ tools:
88
+ execution_timeout: *execution_timeout
89
+ bundles: *vanilla_bundles
90
+ enable_bash_tool: true
91
+ parse_function: *parse_function
92
+ registry_variables:
93
+ SUBMIT_REVIEW_MESSAGES: *submit_review_messages
94
+ history_processors: *vanilla_history_processors
95
+ # + state
96
+ - type: default
97
+ model: *model
98
+ templates:
99
+ system_template: *system_template
100
+ instance_template: *instance_template
101
+ next_step_template: &next_step_with_diff |-
102
+ {% if diff %}
103
+ <diff>
104
+ Your cumulative changes so far:
105
+ {{diff}}
106
+ </diff>
107
+
108
+ {% endif %}
109
+ The observation from the last command:
110
+ {{observation}}
111
+ next_step_no_output_template: &next_step_no_output_with_diff |-
112
+ {% if diff %}
113
+ <diff>
114
+ Your cumulative changes so far:
115
+ {{diff}}
116
+ </diff>
117
+ {% endif %}
118
+
119
+ Your last command ran successfully and did not produce any output.
120
+ tools:
121
+ execution_timeout: *execution_timeout
122
+ bundles: *vanilla_bundles
123
+ enable_bash_tool: true
124
+ parse_function: *parse_function
125
+ registry_variables:
126
+ SUBMIT_REVIEW_MESSAGES: *submit_review_messages
127
+ history_processors: &diff_history_processors
128
+ - type: remove_regex
129
+ keep_last: 2
130
+ remove:
131
+ - "<diff>.*</diff>"
132
+ - type: cache_control
133
+ last_n_messages: 2
134
+ last_n_messages_offset: 2
135
+ retry_loop:
136
+ type: chooser
137
+ cost_limit: 6.0
138
+ max_attempts: 10
139
+ min_budget_for_new_attempt: 1.0
140
+ chooser:
141
+ system_template: |
142
+ You are an expert software engineer reviewing code. Your thinking is very thorough, so it is ok if its very long.
143
+ instance_template: |
144
+ You will be given a problem statement and a list of patch submissions.
145
+
146
+ Pick the most reasonable patch.
147
+ The patch should solve the problem described in the problem statement in a way that is consistent with the rest of the codebase and the conventions of the codebase.
148
+
149
+ Note: Disregard all testing code in the patch, as testing was already done in a separate step.
150
+ Having a test in the patch does not make it any better.
151
+
152
+ <IMPORTANT>The last line of your response should be the index of the patch you chose.
153
+ You must choose a single index no matter what. If you cannot decide between two or more
154
+ submissions, choose the first one of these.
155
+ </IMPORTANT>
156
+
157
+ Problem statement:
158
+ {{problem_statement}}
159
+
160
+ Submissions:
161
+ {% for submission in submissions %}
162
+ Submission {{loop.index0}}:
163
+
164
+ {{submission}}
165
+
166
+ {% endfor %}
167
+
168
+ <IMPORTANT>The last line of your response should be the index of the patch you chose without any other text.</IMPORTANT>
169
+ submission_template: |
170
+ Patch:
171
+
172
+ ```python
173
+ {{submission}}
174
+ ```
175
+
176
+ The final edited file with 30 lines of context:
177
+
178
+ ```python
179
+ {{edited_files30}}
180
+ ```
181
+ max_len_submission: &chooser_max_len_submission 5000
182
+ model: &chooser_model
183
+ name: o1
184
+ top_p: null
185
+ temperature: 1.
186
+ per_instance_cost_limit: 30
187
+ completion_kwargs:
188
+ reasoning_effort: "high"
@@ -0,0 +1,75 @@
1
+ # This template is heavily inspired by anthropic and openhands
2
+ # For running on lite:
3
+ # sweagent run-batch --num_workers=20 --instances.type=swe_bench --instances.subset=lite --instances.split=test --instances.shuffle=True --instances.evaluate=True --instances.deployment.docker_args='--memory=10g' --config config/250225_anthropic_filemap_simple_review.yaml
4
+ # For running on test:
5
+
6
+ agent:
7
+ type: default
8
+ templates:
9
+ system_template: |-
10
+ You are a helpful assistant that can interact with a computer to solve tasks.
11
+ instance_template: |-
12
+ <uploaded_files>
13
+ {{working_dir}}
14
+ </uploaded_files>
15
+ I've uploaded a python code repository in the directory {{working_dir}}. Consider the following PR description:
16
+
17
+ <pr_description>
18
+ {{problem_statement}}
19
+ </pr_description>
20
+
21
+ Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?
22
+ I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
23
+ Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the <pr_description> is satisfied.
24
+ Follow these steps to resolve the issue:
25
+ 1. As a first step, it might be a good idea to find and read code relevant to the <pr_description>
26
+ 2. Create a script to reproduce the error and execute it with `python <filename.py>` using the bash tool, to confirm the error
27
+ 3. Edit the sourcecode of the repo to resolve the issue
28
+ 4. Rerun your reproduce script and confirm that the error is fixed!
29
+ 5. Think about edgecases and make sure your fix handles them as well
30
+ Your thinking should be thorough and so it's fine if it's very long.
31
+ next_step_template: |-
32
+ OBSERVATION:
33
+ {{observation}}
34
+ next_step_no_output_template: |-
35
+ Your last command ran successfully and did not produce any output.
36
+ tools:
37
+ execution_timeout: 300
38
+ bundles:
39
+ - path: tools/registry
40
+ - path: tools/edit_anthropic
41
+ - path: tools/review_on_submit_m
42
+ - path: tools/diff_state
43
+ enable_bash_tool: true
44
+ parse_function:
45
+ type: function_calling
46
+ registry_variables:
47
+ USE_FILEMAP: 'true'
48
+ SUBMIT_REVIEW_MESSAGES:
49
+ - |
50
+ Thank you for your work on this issue. Please carefully follow the steps below to help review your changes.
51
+
52
+ 1. If you made any changes to your code after running the reproduction script, please run the reproduction script again.
53
+ If the reproduction script is failing, please revisit your changes and make sure they are correct.
54
+ If you have already removed your reproduction script, please ignore this step.
55
+ 2. Remove your reproduction script (if you haven't done so already).
56
+ 3. If you have modified any TEST files, please revert them to the state they had before you started fixing the issue.
57
+ You can do this with `git checkout -- /path/to/test/file.py`. Use below <diff> to find the files you need to revert.
58
+ 4. Run the submit command again to confirm.
59
+
60
+ Here is a list of all of your changes:
61
+
62
+ <diff>
63
+ {{diff}}
64
+ </diff>
65
+ history_processors:
66
+ - type: cache_control
67
+ last_n_messages: 2
68
+ model:
69
+ name: claude-3-7-sonnet-20250219
70
+ api_key: $CLAUDE_API_KEY_ROTATION
71
+ per_instance_cost_limit: 2
72
+ per_instance_call_limit: 150
73
+ total_cost_limit: 1000.0
74
+ temperature: 0.0
75
+ delay: 0.0
@@ -0,0 +1,92 @@
1
+ # This template only features minor adaptions from the 250225 config.
2
+ # For running on lite:
3
+ # sweagent run-batch --config config/benchmarks/250522_anthropic_filemap_simple_review.yaml --num_workers=20
4
+ # To fully reproduce, please run from the submissions/250522-sonnet-4-sbv branch
5
+ # For running on test:
6
+ random_delay_multiplier: 1.0
7
+ instances:
8
+ type: swe_bench
9
+ subset: verified
10
+ split: test
11
+ shuffle: true
12
+ evaluate: true
13
+ deployment:
14
+ type: docker
15
+ docker_args:
16
+ - '--memory=10g'
17
+ agent:
18
+ type: default
19
+ templates:
20
+ system_template: |-
21
+ You are a helpful assistant that can interact with a computer to solve tasks.
22
+ instance_template: |-
23
+ <uploaded_files>
24
+ {{working_dir}}
25
+ </uploaded_files>
26
+ I've uploaded a python code repository in the directory {{working_dir}}. Consider the following PR description:
27
+
28
+ <pr_description>
29
+ {{problem_statement}}
30
+ </pr_description>
31
+
32
+ Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?
33
+ I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
34
+ Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the <pr_description> is satisfied.
35
+ Follow these steps to resolve the issue:
36
+ 1. As a first step, it might be a good idea to find and read code relevant to the <pr_description>
37
+ 2. Create a script to reproduce the error and execute it with `python <filename.py>` using the bash tool, to confirm the error
38
+ 3. Edit the sourcecode of the repo to resolve the issue
39
+ 4. Rerun your reproduce script and confirm that the error is fixed!
40
+ 5. Think about edgecases and make sure your fix handles them as well
41
+ Your thinking should be thorough and so it's fine if it's very long.
42
+ next_step_template: |-
43
+ OBSERVATION:
44
+ {{observation}}
45
+ next_step_no_output_template: |-
46
+ Your last command ran successfully and did not produce any output.
47
+ tools:
48
+ execution_timeout: 300
49
+ bundles:
50
+ - path: tools/registry
51
+ - path: tools/edit_anthropic
52
+ - path: tools/review_on_submit_m
53
+ - path: tools/diff_state
54
+ enable_bash_tool: true
55
+ parse_function:
56
+ type: function_calling
57
+ env_variables:
58
+ PAGER: cat
59
+ MANPAGER: cat
60
+ LESS: -R
61
+ PIP_PROGRESS_BAR: 'off'
62
+ TQDM_DISABLE: '1'
63
+ registry_variables:
64
+ USE_FILEMAP: 'true'
65
+ SUBMIT_REVIEW_MESSAGES:
66
+ - |
67
+ Thank you for your work on this issue. Please carefully follow the steps below to help review your changes.
68
+
69
+ 1. If you made any changes to your code after running the reproduction script, please run the reproduction script again.
70
+ If the reproduction script is failing, please revisit your changes and make sure they are correct.
71
+ If you have already removed your reproduction script, please ignore this step.
72
+ 2. Remove your reproduction script (if you haven't done so already).
73
+ 3. If you have modified any TEST files, please revert them to the state they had before you started fixing the issue.
74
+ You can do this with `git checkout -- /path/to/test/file.py`. Use below <diff> to find the files you need to revert.
75
+ 4. Run the submit command again to confirm.
76
+
77
+ Here is a list of all of your changes:
78
+
79
+ <diff>
80
+ {{diff}}
81
+ </diff>
82
+ history_processors:
83
+ - type: cache_control
84
+ last_n_messages: 2
85
+ model:
86
+ name: claude-sonnet-4-20250514
87
+ api_key: $CLAUDE_API_KEY_ROTATION
88
+ per_instance_cost_limit: 3
89
+ per_instance_call_limit: 150
90
+ total_cost_limit: 1000.0
91
+ temperature: 0.0
92
+ delay: 0.0
@@ -0,0 +1,93 @@
1
+ # Identical to the 250522 config except for a $5 limit/instance
2
+ # For running on lite:
3
+ # sweagent run-batch --config config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml --num_workers=20
4
+ # To fully reproduce, please run from the submissions/250526-sonnet-4-sbl branch
5
+ # For running on test:
6
+ random_delay_multiplier: 1.0
7
+ instances:
8
+ type: swe_bench
9
+ subset: lite
10
+ split: test
11
+ shuffle: true
12
+ evaluate: true
13
+ deployment:
14
+ type: docker
15
+ docker_args:
16
+ - '--memory=10g'
17
+ agent:
18
+ type: default
19
+ templates:
20
+ system_template: |-
21
+ You are a helpful assistant that can interact with a computer to solve tasks.
22
+ instance_template: |-
23
+ <uploaded_files>
24
+ {{working_dir}}
25
+ </uploaded_files>
26
+ I've uploaded a python code repository in the directory {{working_dir}}. Consider the following PR description:
27
+
28
+ <pr_description>
29
+ {{problem_statement}}
30
+ </pr_description>
31
+
32
+ Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?
33
+ I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
34
+ Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the <pr_description> is satisfied.
35
+ Follow these steps to resolve the issue:
36
+ 1. As a first step, it might be a good idea to find and read code relevant to the <pr_description>
37
+ 2. Create a script to reproduce the error and execute it with `python <filename.py>` using the bash tool, to confirm the error
38
+ 3. Edit the sourcecode of the repo to resolve the issue
39
+ 4. Rerun your reproduce script and confirm that the error is fixed!
40
+ 5. Think about edgecases and make sure your fix handles them as well
41
+ Your thinking should be thorough and so it's fine if it's very long.
42
+ next_step_template: |-
43
+ OBSERVATION:
44
+ {{observation}}
45
+ next_step_no_output_template: |-
46
+ Your last command ran successfully and did not produce any output.
47
+ tools:
48
+ execution_timeout: 300
49
+ bundles:
50
+ - path: tools/registry
51
+ - path: tools/edit_anthropic
52
+ - path: tools/review_on_submit_m
53
+ - path: tools/diff_state
54
+ enable_bash_tool: true
55
+ parse_function:
56
+ type: function_calling
57
+ env_variables:
58
+ PAGER: cat
59
+ MANPAGER: cat
60
+ LESS: -R
61
+ PIP_PROGRESS_BAR: 'off'
62
+ TQDM_DISABLE: '1'
63
+ registry_variables:
64
+ USE_FILEMAP: 'true'
65
+ SUBMIT_REVIEW_MESSAGES:
66
+ - |
67
+ Thank you for your work on this issue. Please carefully follow the steps below to help review your changes.
68
+
69
+ 1. If you made any changes to your code after running the reproduction script, please run the reproduction script again.
70
+ If the reproduction script is failing, please revisit your changes and make sure they are correct.
71
+ If you have already removed your reproduction script, please ignore this step.
72
+ 2. Remove your reproduction script (if you haven't done so already).
73
+ 3. If you have modified any TEST files, please revert them to the state they had before you started fixing the issue.
74
+ You can do this with `git checkout -- /path/to/test/file.py`. Use below <diff> to find the files you need to revert.
75
+ 4. Run the submit command again to confirm.
76
+
77
+ Here is a list of all of your changes:
78
+
79
+ <diff>
80
+ {{diff}}
81
+ </diff>
82
+ history_processors:
83
+ - type: cache_control
84
+ last_n_messages: 2
85
+ model:
86
+ name: claude-sonnet-4-20250514
87
+ api_key: $CLAUDE_API_KEY_ROTATION
88
+ per_instance_cost_limit: 5
89
+ per_instance_call_limit: 0
90
+ total_cost_limit: 1000.0
91
+ temperature: 0.0
92
+ delay: 0.0
93
+ completion_kwargs: {'extra_headers': {'anthropic-beta': 'output-128k-2025-02-19'}}
@@ -0,0 +1,66 @@
1
+ # This template is heavily inspired by anthropic, but you can use it with any LM. It is almost
2
+ # identical to anthropic_filemap.yaml, but it removes python-specific language
3
+ # and adds the multilingual_setup tool to support evaluation on the Multilingual dataset.
4
+ agent:
5
+ type: default
6
+ templates:
7
+ system_template: |-
8
+ You are a helpful assistant that can interact with a computer to solve tasks.
9
+ instance_template: |-
10
+ <uploaded_files>
11
+ {{working_dir}}
12
+ </uploaded_files>
13
+ I've uploaded a code repository in the directory {{working_dir}}. Consider the following PR description:
14
+
15
+ <pr_description>
16
+ {{problem_statement}}
17
+ </pr_description>
18
+
19
+ Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?
20
+ I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
21
+ Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the <pr_description> is satisfied.
22
+ Follow these steps to resolve the issue:
23
+ 1. As a first step, it might be a good idea to find and read code relevant to the <pr_description>
24
+ 2. Create a script to reproduce the error and execute it using the bash tool, to confirm the error
25
+ 3. Edit the sourcecode of the repo to resolve the issue
26
+ 4. Rerun your reproduce script and confirm that the error is fixed!
27
+ 5. Think about edgecases and make sure your fix handles them as well
28
+ Your thinking should be thorough and so it's fine if it's very long.
29
+ next_step_template: |-
30
+ OBSERVATION:
31
+ {{observation}}
32
+ next_step_no_output_template: |-
33
+ Your command ran successfully and did not produce any output.
34
+ tools:
35
+ execution_timeout: 300
36
+ bundles:
37
+ - path: tools/multilingual_setup
38
+ - path: tools/registry
39
+ - path: tools/edit_anthropic
40
+ - path: tools/review_on_submit_m
41
+ - path: tools/diff_state
42
+ enable_bash_tool: true
43
+ parse_function:
44
+ type: function_calling
45
+ registry_variables:
46
+ USE_FILEMAP: 'true'
47
+ SUBMIT_REVIEW_MESSAGES:
48
+ - |
49
+ Thank you for your work on this issue. Please carefully follow the steps below to help review your changes.
50
+
51
+ 1. If you made any changes to your code after running the reproduction script, please run the reproduction script again.
52
+ If the reproduction script is failing, please revisit your changes and make sure they are correct.
53
+ If you have already removed your reproduction script, please ignore this step.
54
+ 2. Remove your reproduction script (if you haven't done so already).
55
+ 3. If you have modified any TEST files, please revert them to the state they had before you started fixing the issue.
56
+ You can do this with `git checkout -- /path/to/test/file`. Use below <diff> to find the files you need to revert.
57
+ 4. Run the submit command again to confirm.
58
+
59
+ Here is a list of all of your changes:
60
+
61
+ <diff>
62
+ {{diff}}
63
+ </diff>
64
+ history_processors:
65
+ - type: cache_control
66
+ last_n_messages: 2
@@ -0,0 +1,104 @@
1
+ # This is the template you should use when using SWE-agent to solve a coding challenge (i.e. LeetCode).
2
+ # It also shows how to repurpose the agent to do tasks different from software engineering.
3
+ agent:
4
+ templates:
5
+ system_template: |-
6
+ SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface.
7
+
8
+ The special interface consists of a file editor that shows you {{WINDOW}} lines of a file at a time.
9
+ In addition to typical bash commands, you can also use the following commands to help you navigate and edit files.
10
+
11
+ COMMANDS:
12
+ {{command_docs}}
13
+
14
+ Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION.
15
+ If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
16
+
17
+ RESPONSE FORMAT:
18
+ Your shell prompt is formatted as follows:
19
+ (Open file: <path>) <cwd> $
20
+
21
+ You need to format your output using two fields; discussion and command.
22
+ Your output should always include _one_ discussion and _one_ command field EXACTLY as in the following example:
23
+ DISCUSSION
24
+ First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like.
25
+ ```
26
+ ls -a
27
+ ```
28
+
29
+ You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference.
30
+ If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command.
31
+ You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above.
32
+ However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them.
33
+ instance_template: |-
34
+ We're currently attempting to solve the following problem:
35
+ ISSUE:
36
+ {{issue}}
37
+
38
+ INSTRUCTIONS:
39
+ Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want.
40
+ Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command.
41
+ When you're satisfied with all of the changes you've made, you can submit your changes to the code base by simply running the submit command.
42
+ Note however that you cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python <script_name>.py`.
43
+
44
+ NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line!
45
+
46
+ IMPORTANT TIPS:
47
+ 1. Write your solution in main.py. Always test your code thoroughly before submitting, and if any of the tests fail, try to fix the code before continuing.
48
+
49
+ 2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it!
50
+
51
+ 3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker.
52
+
53
+ 4. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file.
54
+
55
+ 5. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it.
56
+
57
+ (Open file: {{open_file}})
58
+ (Current directory: {{working_dir}})
59
+ bash-$
60
+ next_step_template: |-
61
+ {{observation}}
62
+ (Open file: {{open_file}})
63
+ (Current directory: {{working_dir}})
64
+ bash-$
65
+ next_step_no_output_template: |-
66
+ Your command ran successfully and did not produce any output.
67
+ (Open file: {{open_file}})
68
+ (Current directory: {{working_dir}})
69
+ bash-$
70
+ demonstration_template: |
71
+ Here is a demonstration of how to correctly accomplish this task.
72
+ It is included to show you how to correctly use the interface.
73
+ You do not need to follow exactly what is done in the demonstration.
74
+ --- DEMONSTRATION ---
75
+ {{demonstration}}
76
+ --- END OF DEMONSTRATION ---
77
+ demonstrations:
78
+ - trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj
79
+ tools:
80
+ env_variables:
81
+ WINDOW: 100
82
+ OVERLAP: 2
83
+ CURRENT_LINE: 0
84
+ CURRENT_FILE: ""
85
+ SEARCH_RESULTS: ()
86
+ SEARCH_FILES: ()
87
+ SEARCH_INDEX: 0
88
+ PAGER: cat
89
+ MANPAGER: cat
90
+ LESS: -R
91
+ PIP_PROGRESS_BAR: 'off'
92
+ TQDM_DISABLE: '1'
93
+ GIT_PAGER: cat
94
+ bundles:
95
+ - path: tools/registry
96
+ - path: tools/windowed
97
+ - path: tools/search
98
+ - path: tools/windowed_edit_replace
99
+ - path: tools/submit
100
+ parse_function:
101
+ type: thought_action
102
+ history_processors:
103
+ - type: last_n_observations
104
+ n: 5