@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,295 @@
1
+ //! Tools module for SWE-agent
2
+ //!
3
+ //! This module provides tool handling, command parsing, and action processing.
4
+
5
+ pub mod bundle;
6
+ pub mod commands;
7
+ pub mod parsing;
8
+ pub mod registry;
9
+
10
+ pub use bundle::*;
11
+ pub use commands::*;
12
+ pub use parsing::*;
13
+ pub use registry::*;
14
+
15
+ use crate::environment::SWEEnv;
16
+ use crate::exceptions::Result;
17
+ use crate::types::ModelOutput;
18
+ use regex::Regex;
19
+ use serde::{Deserialize, Serialize};
20
+ use std::collections::HashMap;
21
+
22
+ /// Configuration for tool filtering
23
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
24
+ pub struct ToolFilterConfig {
25
+ #[serde(default)]
26
+ pub blocklist_error_template: String,
27
+ #[serde(default)]
28
+ pub blocklist: Vec<String>,
29
+ #[serde(default)]
30
+ pub blocklist_standalone: Vec<String>,
31
+ #[serde(default)]
32
+ pub block_unless_regex: HashMap<String, String>,
33
+ }
34
+
35
+ /// Configuration for tools
36
+ #[derive(Debug, Clone, Serialize, Deserialize)]
37
+ pub struct ToolConfig {
38
+ #[serde(default)]
39
+ pub commands: Vec<BundleConfig>,
40
+ #[serde(skip_serializing_if = "Option::is_none")]
41
+ pub parse_function: Option<ParseFunctionConfig>,
42
+ #[serde(default = "default_execution_timeout")]
43
+ pub execution_timeout: u64,
44
+ #[serde(default = "default_max_consecutive_timeouts")]
45
+ pub max_consecutive_execution_timeouts: usize,
46
+ #[serde(default = "default_total_execution_timeout")]
47
+ pub total_execution_timeout: u64,
48
+ #[serde(default = "default_submit_command")]
49
+ pub submit_command: String,
50
+ #[serde(default)]
51
+ pub use_function_calling: bool,
52
+ #[serde(skip_serializing_if = "Option::is_none")]
53
+ pub filter: Option<ToolFilterConfig>,
54
+ #[serde(default = "default_format_error_template")]
55
+ pub format_error_template: String,
56
+ #[serde(skip_serializing_if = "Option::is_none")]
57
+ pub command_docs: Option<String>,
58
+ #[serde(default)]
59
+ pub env_variables: HashMap<String, String>,
60
+ }
61
+
62
+ fn default_execution_timeout() -> u64 {
63
+ 500
64
+ }
65
+
66
+ fn default_max_consecutive_timeouts() -> usize {
67
+ 3
68
+ }
69
+
70
+ fn default_total_execution_timeout() -> u64 {
71
+ 7200
72
+ }
73
+
74
+ fn default_submit_command() -> String {
75
+ "submit".to_string()
76
+ }
77
+
78
+ fn default_format_error_template() -> String {
79
+ "Invalid format. Please use the correct format for actions.".to_string()
80
+ }
81
+
82
+ impl Default for ToolConfig {
83
+ fn default() -> Self {
84
+ Self {
85
+ commands: Vec::new(),
86
+ parse_function: None,
87
+ execution_timeout: default_execution_timeout(),
88
+ max_consecutive_execution_timeouts: default_max_consecutive_timeouts(),
89
+ total_execution_timeout: default_total_execution_timeout(),
90
+ submit_command: default_submit_command(),
91
+ use_function_calling: false,
92
+ filter: None,
93
+ format_error_template: default_format_error_template(),
94
+ command_docs: None,
95
+ env_variables: HashMap::new(),
96
+ }
97
+ }
98
+ }
99
+
100
+ /// Tool handler for managing agent tools
101
+ pub struct ToolHandler {
102
+ pub config: ToolConfig,
103
+ bundles: Vec<Bundle>,
104
+ parser: Box<dyn ParseFunction>,
105
+ multiline_commands: HashMap<String, String>,
106
+ }
107
+
108
+ impl ToolHandler {
109
+ pub fn new(config: ToolConfig) -> Result<Self> {
110
+ let bundles: Vec<Bundle> = config
111
+ .commands
112
+ .iter()
113
+ .map(create_bundle)
114
+ .collect::<Result<Vec<_>>>()?;
115
+
116
+ let parser = config
117
+ .parse_function
118
+ .as_ref()
119
+ .map(create_parser)
120
+ .unwrap_or_else(|| Box::new(ThoughtActionParser::new()));
121
+
122
+ let mut multiline_commands = HashMap::new();
123
+ for bundle in &bundles {
124
+ if let Some(ref end_name) = bundle.end_name {
125
+ multiline_commands.insert(bundle.name.clone(), end_name.clone());
126
+ }
127
+ }
128
+
129
+ Ok(Self {
130
+ config,
131
+ bundles,
132
+ parser,
133
+ multiline_commands,
134
+ })
135
+ }
136
+
137
+ /// Install tools in the environment
138
+ pub async fn install(&self, env: &mut SWEEnv) -> Result<()> {
139
+ // Set environment variables
140
+ if !self.config.env_variables.is_empty() {
141
+ env.set_env_variables(self.config.env_variables.clone())
142
+ .await?;
143
+ }
144
+
145
+ // Install each bundle
146
+ let cwd = env.communicate("pwd", Some(5)).await?;
147
+
148
+ for bundle in &self.bundles {
149
+ if let Some(ref install_script) = bundle.install_script {
150
+ env.communicate(install_script, Some(300)).await?;
151
+ }
152
+ }
153
+
154
+ // Return to original directory
155
+ env.communicate(&format!("cd {}", cwd.trim()), Some(5))
156
+ .await?;
157
+
158
+ Ok(())
159
+ }
160
+
161
+ /// Get current state from environment
162
+ pub async fn get_state(&self, env: &SWEEnv) -> HashMap<String, String> {
163
+ let mut state = HashMap::new();
164
+
165
+ if let Some(cwd) = env.get_cwd() {
166
+ state.insert("working_dir".to_string(), cwd);
167
+ }
168
+
169
+ let open_files = env.get_open_files();
170
+ if !open_files.is_empty() {
171
+ state.insert("open_files".to_string(), open_files.join(", "));
172
+ }
173
+
174
+ if let Ok(git_status) = env.get_git_status().await {
175
+ state.insert("git_status".to_string(), git_status);
176
+ }
177
+
178
+ state
179
+ }
180
+
181
+ /// Parse thought and action from model output
182
+ pub fn parse_actions(&self, output: &ModelOutput) -> Result<(String, String)> {
183
+ self.parser.parse(&output.message, &self.bundles, true)
184
+ }
185
+
186
+ /// Check if an action should be blocked
187
+ pub fn should_block_action(&self, action: &str) -> bool {
188
+ let action = action.trim();
189
+ if action.is_empty() {
190
+ return false;
191
+ }
192
+
193
+ if let Some(ref filter) = self.config.filter {
194
+ // Check blocklist
195
+ for blocked in &filter.blocklist {
196
+ if action.starts_with(blocked) {
197
+ return true;
198
+ }
199
+ }
200
+
201
+ // Check standalone blocklist
202
+ if filter.blocklist_standalone.contains(&action.to_string()) {
203
+ return true;
204
+ }
205
+
206
+ // Check block unless regex
207
+ let command_name = action.split_whitespace().next().unwrap_or("");
208
+ if let Some(pattern) = filter.block_unless_regex.get(command_name) {
209
+ if let Ok(re) = Regex::new(pattern) {
210
+ if !re.is_match(action) {
211
+ return true;
212
+ }
213
+ }
214
+ }
215
+ }
216
+
217
+ false
218
+ }
219
+
220
+ /// Check if observation contains submission command
221
+ pub fn check_for_submission_cmd(&self, observation: &str) -> bool {
222
+ observation.contains(crate::exceptions::tokens::SUBMISSION_MARKER)
223
+ }
224
+
225
+ /// Guard multiline input with heredoc syntax
226
+ pub fn guard_multiline_input(&self, action: &str) -> String {
227
+ for (cmd_name, end_name) in &self.multiline_commands {
228
+ let pattern = format!(r"^{}\b", regex::escape(cmd_name));
229
+ if let Ok(re) = Regex::new(&pattern) {
230
+ if re.is_match(action) {
231
+ // Check if already has heredoc syntax
232
+ if !action.contains("<<") {
233
+ let lines: Vec<&str> = action.lines().collect();
234
+ if lines.len() > 1 {
235
+ let mut guarded_lines = Vec::new();
236
+ guarded_lines.push(format!("{} << '{}'", lines[0], end_name));
237
+ guarded_lines.extend(lines[1..].iter().map(|s| s.to_string()));
238
+ guarded_lines.push(end_name.clone());
239
+ return guarded_lines.join("\n");
240
+ }
241
+ }
242
+ break;
243
+ }
244
+ }
245
+ }
246
+
247
+ action.to_string()
248
+ }
249
+ }
250
+
251
+ #[cfg(test)]
252
+ mod tests {
253
+ use super::*;
254
+
255
+ #[test]
256
+ fn test_tool_handler_creation() {
257
+ let config = ToolConfig::default();
258
+ let handler = ToolHandler::new(config).unwrap();
259
+ assert!(!handler.should_block_action("ls -la"));
260
+ }
261
+
262
+ #[test]
263
+ fn test_should_block_action() {
264
+ let config = ToolConfig {
265
+ filter: Some(ToolFilterConfig {
266
+ blocklist: vec!["rm -rf".to_string()],
267
+ blocklist_standalone: vec!["exit".to_string()],
268
+ ..Default::default()
269
+ }),
270
+ ..Default::default()
271
+ };
272
+ let handler = ToolHandler::new(config).unwrap();
273
+
274
+ assert!(handler.should_block_action("rm -rf /"));
275
+ assert!(!handler.should_block_action("rm file.txt"));
276
+ }
277
+
278
+ #[test]
279
+ fn test_guard_multiline_input() {
280
+ let config = ToolConfig {
281
+ commands: vec![BundleConfig {
282
+ name: "edit".to_string(),
283
+ end_name: Some("ENDEDIT".to_string()),
284
+ ..Default::default()
285
+ }],
286
+ ..Default::default()
287
+ };
288
+ let handler = ToolHandler::new(config).unwrap();
289
+
290
+ let multiline = "edit file.txt\nline1\nline2";
291
+ let guarded = handler.guard_multiline_input(multiline);
292
+ assert!(guarded.contains("<<"));
293
+ assert!(guarded.contains("ENDEDIT"));
294
+ }
295
+ }
@@ -0,0 +1,354 @@
1
+ //! Action parsing implementations
2
+
3
+ use super::Bundle;
4
+ use crate::exceptions::{Result, SWEAgentError};
5
+ use regex::Regex;
6
+ use serde::{Deserialize, Serialize};
7
+
8
+ /// Trait for parsing model output into thought and action
9
+ pub trait ParseFunction: Send + Sync {
10
+ fn parse(&self, output: &str, bundles: &[Bundle], strict: bool) -> Result<(String, String)>;
11
+ }
12
+
13
+ /// Thought and action parser (default)
14
+ pub struct ThoughtActionParser {
15
+ thought_pattern: Regex,
16
+ action_pattern: Regex,
17
+ }
18
+
19
+ impl ThoughtActionParser {
20
+ pub fn new() -> Self {
21
+ Self {
22
+ thought_pattern: Regex::new(r"(?s)^(.*?)```").unwrap(),
23
+ action_pattern: Regex::new(r"(?s)```(?:\w+)?\n?(.*?)```").unwrap(),
24
+ }
25
+ }
26
+ }
27
+
28
+ impl Default for ThoughtActionParser {
29
+ fn default() -> Self {
30
+ Self::new()
31
+ }
32
+ }
33
+
34
+ impl ParseFunction for ThoughtActionParser {
35
+ fn parse(&self, output: &str, _bundles: &[Bundle], strict: bool) -> Result<(String, String)> {
36
+ // Extract thought (everything before the code block)
37
+ let thought = self
38
+ .thought_pattern
39
+ .captures(output)
40
+ .and_then(|c| c.get(1))
41
+ .map(|m| m.as_str().trim().to_string())
42
+ .unwrap_or_default();
43
+
44
+ // Extract action (content of the code block)
45
+ let action = self
46
+ .action_pattern
47
+ .captures(output)
48
+ .and_then(|c| c.get(1))
49
+ .map(|m| m.as_str().trim().to_string());
50
+
51
+ match action {
52
+ Some(a) => Ok((thought, a)),
53
+ None => {
54
+ if strict {
55
+ Err(SWEAgentError::FormatError(
56
+ "Could not find action in code block".to_string(),
57
+ ))
58
+ } else {
59
+ Ok((output.to_string(), String::new()))
60
+ }
61
+ }
62
+ }
63
+ }
64
+ }
65
+
66
+ /// Action-only parser (no thought required)
67
+ pub struct ActionOnlyParser {
68
+ action_pattern: Regex,
69
+ }
70
+
71
+ impl ActionOnlyParser {
72
+ pub fn new() -> Self {
73
+ Self {
74
+ action_pattern: Regex::new(r"(?s)```(?:\w+)?\n?(.*?)```").unwrap(),
75
+ }
76
+ }
77
+ }
78
+
79
+ impl Default for ActionOnlyParser {
80
+ fn default() -> Self {
81
+ Self::new()
82
+ }
83
+ }
84
+
85
+ impl ParseFunction for ActionOnlyParser {
86
+ fn parse(&self, output: &str, _bundles: &[Bundle], strict: bool) -> Result<(String, String)> {
87
+ let action = self
88
+ .action_pattern
89
+ .captures(output)
90
+ .and_then(|c| c.get(1))
91
+ .map(|m| m.as_str().trim().to_string());
92
+
93
+ match action {
94
+ Some(a) => Ok((String::new(), a)),
95
+ None => {
96
+ if strict {
97
+ Err(SWEAgentError::FormatError(
98
+ "Could not find action in code block".to_string(),
99
+ ))
100
+ } else {
101
+ // Treat the entire output as the action
102
+ Ok((String::new(), output.trim().to_string()))
103
+ }
104
+ }
105
+ }
106
+ }
107
+ }
108
+
109
+ /// XML-style thought action parser
110
+ pub struct XmlThoughtActionParser {
111
+ thought_pattern: Regex,
112
+ action_pattern: Regex,
113
+ }
114
+
115
+ impl XmlThoughtActionParser {
116
+ pub fn new() -> Self {
117
+ Self {
118
+ thought_pattern: Regex::new(r"(?s)<thought>(.*?)</thought>").unwrap(),
119
+ action_pattern: Regex::new(r"(?s)<action>(.*?)</action>").unwrap(),
120
+ }
121
+ }
122
+ }
123
+
124
+ impl Default for XmlThoughtActionParser {
125
+ fn default() -> Self {
126
+ Self::new()
127
+ }
128
+ }
129
+
130
+ impl ParseFunction for XmlThoughtActionParser {
131
+ fn parse(&self, output: &str, _bundles: &[Bundle], strict: bool) -> Result<(String, String)> {
132
+ let thought = self
133
+ .thought_pattern
134
+ .captures(output)
135
+ .and_then(|c| c.get(1))
136
+ .map(|m| m.as_str().trim().to_string())
137
+ .unwrap_or_default();
138
+
139
+ let action = self
140
+ .action_pattern
141
+ .captures(output)
142
+ .and_then(|c| c.get(1))
143
+ .map(|m| m.as_str().trim().to_string());
144
+
145
+ match action {
146
+ Some(a) => Ok((thought, a)),
147
+ None => {
148
+ if strict {
149
+ Err(SWEAgentError::FormatError(
150
+ "Could not find action in <action> tags".to_string(),
151
+ ))
152
+ } else {
153
+ Ok((output.to_string(), String::new()))
154
+ }
155
+ }
156
+ }
157
+ }
158
+ }
159
+
160
+ /// Function calling parser for OpenAI-style function calls
161
+ pub struct FunctionCallingParser;
162
+
163
+ impl FunctionCallingParser {
164
+ pub fn new() -> Self {
165
+ Self
166
+ }
167
+ }
168
+
169
+ impl Default for FunctionCallingParser {
170
+ fn default() -> Self {
171
+ Self::new()
172
+ }
173
+ }
174
+
175
+ impl ParseFunction for FunctionCallingParser {
176
+ fn parse(&self, output: &str, bundles: &[Bundle], strict: bool) -> Result<(String, String)> {
177
+ // For function calling, we expect the output to be JSON or contain tool calls
178
+ // This is a simplified implementation
179
+
180
+ // Try to parse as JSON tool call
181
+ if let Ok(json) = serde_json::from_str::<serde_json::Value>(output) {
182
+ if let Some(name) = json.get("name").and_then(|n| n.as_str()) {
183
+ let args = json
184
+ .get("arguments")
185
+ .map(|a| a.to_string())
186
+ .unwrap_or_default();
187
+ return Ok((String::new(), format!("{} {}", name, args)));
188
+ }
189
+ }
190
+
191
+ // Fall back to thought action parsing
192
+ ThoughtActionParser::new().parse(output, bundles, strict)
193
+ }
194
+ }
195
+
196
+ /// JSON parser for structured output
197
+ pub struct JsonParser;
198
+
199
+ impl JsonParser {
200
+ pub fn new() -> Self {
201
+ Self
202
+ }
203
+ }
204
+
205
+ impl Default for JsonParser {
206
+ fn default() -> Self {
207
+ Self::new()
208
+ }
209
+ }
210
+
211
+ impl ParseFunction for JsonParser {
212
+ fn parse(&self, output: &str, _bundles: &[Bundle], strict: bool) -> Result<(String, String)> {
213
+ // Try to find JSON in the output
214
+ let json_pattern = Regex::new(r"(?s)\{.*\}").unwrap();
215
+
216
+ if let Some(json_match) = json_pattern.find(output) {
217
+ let json_str = json_match.as_str();
218
+ if let Ok(json) = serde_json::from_str::<serde_json::Value>(json_str) {
219
+ let thought = json
220
+ .get("thought")
221
+ .and_then(|t| t.as_str())
222
+ .unwrap_or("")
223
+ .to_string();
224
+ let action = json
225
+ .get("action")
226
+ .and_then(|a| a.as_str())
227
+ .unwrap_or("")
228
+ .to_string();
229
+ return Ok((thought, action));
230
+ }
231
+ }
232
+
233
+ if strict {
234
+ Err(SWEAgentError::FormatError(
235
+ "Could not parse JSON output".to_string(),
236
+ ))
237
+ } else {
238
+ Ok((output.to_string(), String::new()))
239
+ }
240
+ }
241
+ }
242
+
243
+ /// Identity parser that returns output as-is
244
+ pub struct IdentityParser;
245
+
246
+ impl IdentityParser {
247
+ pub fn new() -> Self {
248
+ Self
249
+ }
250
+ }
251
+
252
+ impl Default for IdentityParser {
253
+ fn default() -> Self {
254
+ Self::new()
255
+ }
256
+ }
257
+
258
+ impl ParseFunction for IdentityParser {
259
+ fn parse(&self, output: &str, _bundles: &[Bundle], _strict: bool) -> Result<(String, String)> {
260
+ Ok((String::new(), output.to_string()))
261
+ }
262
+ }
263
+
264
+ /// Configuration for parse functions
265
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
266
+ #[serde(rename_all = "snake_case")]
267
+ pub enum ParseFunctionConfig {
268
+ #[default]
269
+ ThoughtAction,
270
+ ActionOnly,
271
+ XmlThoughtAction,
272
+ FunctionCalling,
273
+ Json,
274
+ Identity,
275
+ }
276
+
277
+ /// Create a parser from configuration
278
+ pub fn create_parser(config: &ParseFunctionConfig) -> Box<dyn ParseFunction> {
279
+ match config {
280
+ ParseFunctionConfig::ThoughtAction => Box::new(ThoughtActionParser::new()),
281
+ ParseFunctionConfig::ActionOnly => Box::new(ActionOnlyParser::new()),
282
+ ParseFunctionConfig::XmlThoughtAction => Box::new(XmlThoughtActionParser::new()),
283
+ ParseFunctionConfig::FunctionCalling => Box::new(FunctionCallingParser::new()),
284
+ ParseFunctionConfig::Json => Box::new(JsonParser::new()),
285
+ ParseFunctionConfig::Identity => Box::new(IdentityParser::new()),
286
+ }
287
+ }
288
+
289
+ /// Get a parser by name string
290
+ pub fn get_parser(name: &str) -> Box<dyn ParseFunction> {
291
+ match name {
292
+ "thought_action" => Box::new(ThoughtActionParser::new()),
293
+ "action_only" => Box::new(ActionOnlyParser::new()),
294
+ "xml_thought_action" => Box::new(XmlThoughtActionParser::new()),
295
+ "function_calling" => Box::new(FunctionCallingParser::new()),
296
+ "json" => Box::new(JsonParser::new()),
297
+ "identity" => Box::new(IdentityParser::new()),
298
+ _ => Box::new(ThoughtActionParser::new()),
299
+ }
300
+ }
301
+
302
+ #[cfg(test)]
303
+ mod tests {
304
+ use super::*;
305
+
306
+ #[test]
307
+ fn test_thought_action_parser() {
308
+ let parser = ThoughtActionParser::new();
309
+ let output = "Let me check the file.\n\n```\ncat file.txt\n```";
310
+ let (thought, action) = parser.parse(output, &[], true).unwrap();
311
+
312
+ assert_eq!(thought, "Let me check the file.");
313
+ assert_eq!(action, "cat file.txt");
314
+ }
315
+
316
+ #[test]
317
+ fn test_action_only_parser() {
318
+ let parser = ActionOnlyParser::new();
319
+ let output = "```bash\nls -la\n```";
320
+ let (thought, action) = parser.parse(output, &[], true).unwrap();
321
+
322
+ assert_eq!(thought, "");
323
+ assert_eq!(action, "ls -la");
324
+ }
325
+
326
+ #[test]
327
+ fn test_xml_parser() {
328
+ let parser = XmlThoughtActionParser::new();
329
+ let output = "<thought>Checking files</thought><action>ls</action>";
330
+ let (thought, action) = parser.parse(output, &[], true).unwrap();
331
+
332
+ assert_eq!(thought, "Checking files");
333
+ assert_eq!(action, "ls");
334
+ }
335
+
336
+ #[test]
337
+ fn test_strict_mode_failure() {
338
+ let parser = ThoughtActionParser::new();
339
+ let output = "No code block here";
340
+ let result = parser.parse(output, &[], true);
341
+
342
+ assert!(result.is_err());
343
+ }
344
+
345
+ #[test]
346
+ fn test_non_strict_mode() {
347
+ let parser = ThoughtActionParser::new();
348
+ let output = "No code block here";
349
+ let (thought, action) = parser.parse(output, &[], false).unwrap();
350
+
351
+ assert_eq!(thought, "No code block here");
352
+ assert_eq!(action, "");
353
+ }
354
+ }