@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,106 @@
1
+ from pathlib import Path
2
+ from typing import Self
3
+
4
+ from sweagent.agent.agents import DefaultAgent, ShellAgentConfig
5
+ from sweagent.agent.models import HumanModel, HumanModelConfig, get_model
6
+ from sweagent.agent.problem_statement import ProblemStatement, ProblemStatementConfig
7
+ from sweagent.environment.swe_env import SWEEnv
8
+ from sweagent.tools.parsing import ActionOnlyParser
9
+ from sweagent.tools.tools import ToolHandler
10
+ from sweagent.types import AgentRunResult, StepOutput
11
+
12
+
13
+ class ShellAgent(DefaultAgent):
14
+ def __init__(self, *args, **kwargs):
15
+ super().__init__(*args, **kwargs)
16
+
17
+ @classmethod
18
+ def from_config(cls, config: ShellAgentConfig) -> Self:
19
+ # To ensure that all models stay completely independent, we deepcopy the
20
+ # model config, because it lives on as a property in the model, tools, etc.
21
+ config = config.model_copy(deep=True)
22
+ model = get_model(config.model, config.tools)
23
+ return cls(
24
+ templates=config.templates,
25
+ tools=ToolHandler(config.tools),
26
+ history_processors=config.history_processors,
27
+ model=model,
28
+ max_requeries=config.max_requeries,
29
+ )
30
+
31
+ def human_step_in(self) -> None:
32
+ """Replace the current model with a HumanModel instance.
33
+ This allows for human intervention during agent execution.
34
+ """
35
+ self._original_model = self.model
36
+ self._original_parser = self.tools.config.parse_function
37
+
38
+ human_config = HumanModelConfig(name="human", catch_eof=False)
39
+ self.model = get_model(human_config, self.tools.config)
40
+ self.tools.config.parse_function = ActionOnlyParser()
41
+
42
+ self.logger.info("Switched to human mode. Agent will now accept human input. Press ^D to switch back.")
43
+
44
+ def human_step_out(self) -> None:
45
+ """Switch back to the original model from human mode.
46
+ This is called when ^D is pressed in human mode.
47
+ """
48
+ if not hasattr(self, "_original_model") or self._original_model is None:
49
+ self.logger.info("No previous model to switch back to. Remaining in current mode.")
50
+ return
51
+
52
+ self.model = self._original_model
53
+ self.tools.config.parse_function = self._original_parser # type: ignore
54
+ self._original_model = None
55
+ self._original_parser = None
56
+
57
+ self.logger.info("Switched back to AI model mode.")
58
+
59
+ def run(
60
+ self,
61
+ env: SWEEnv,
62
+ problem_statement: ProblemStatement | ProblemStatementConfig,
63
+ *,
64
+ output_dir: Path = Path("."),
65
+ ) -> AgentRunResult:
66
+ """Run the agent on a problem instance. This method contains the
67
+ main loop that repeatedly calls `self._step` until the problem is solved.
68
+
69
+ Args:
70
+ setup_args: Arguments to pass to the agent's setup method.
71
+ env: The environment to run the agent on.
72
+ traj_dir: Directory to save the trajectory to
73
+ interruptible: Whether the human can jump in by pressing ^C
74
+ """
75
+ self.setup(env=env, problem_statement=problem_statement, output_dir=output_dir)
76
+
77
+ # Run action/observation loop
78
+ self._chook.on_run_start()
79
+ step_output = StepOutput()
80
+ while not step_output.done:
81
+ try:
82
+ step_output = self.step()
83
+ self.save_trajectory()
84
+ except KeyboardInterrupt:
85
+ if not isinstance(self.model, HumanModel):
86
+ self.human_step_in()
87
+ continue
88
+ raise
89
+ except EOFError:
90
+ # Can only happen if we have a human model, so switch back
91
+ self.logger.info("Detected ^D - switching back to AI mode")
92
+ self.human_step_out()
93
+ continue
94
+ if step_output.done and not isinstance(self.model, HumanModel):
95
+ # Human has to submit the solution
96
+ self.logger.info("Robot is done! Please submit the solution.")
97
+ self.human_step_in()
98
+ step_output.done = False
99
+ self._chook.on_run_done(trajectory=self.trajectory, info=self.info)
100
+
101
+ self.logger.info("Trajectory saved to %s", self.traj_path)
102
+
103
+ # Here we want to return the "global" information (e.g., submission should
104
+ # be the best submission instead of the last one, etc.), so we get it from the traj file
105
+ data = self.get_trajectory_data()
106
+ return AgentRunResult(info=data["info"], trajectory=data["trajectory"])
@@ -0,0 +1,399 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import re
5
+ from abc import abstractmethod
6
+ from typing import Annotated, Literal, Protocol
7
+
8
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
9
+
10
+ from sweagent.types import History, HistoryItem
11
+
12
+
13
+ class AbstractHistoryProcessor(Protocol):
14
+ @abstractmethod
15
+ def __call__(self, history: History) -> History:
16
+ raise NotImplementedError
17
+
18
+
19
+ # Utility functions
20
+ # -----------------
21
+
22
+
23
+ def _get_content_stats(entry: HistoryItem) -> tuple[int, int]:
24
+ if isinstance(entry["content"], str):
25
+ return len(entry["content"].splitlines()), 0
26
+ n_text_lines = sum(len(item["text"].splitlines()) for item in entry["content"] if item.get("type") == "text")
27
+ n_images = sum(1 for item in entry["content"] if item.get("type") == "image_url")
28
+ return n_text_lines, n_images
29
+
30
+
31
+ def _get_content_text(entry: HistoryItem) -> str:
32
+ if isinstance(entry["content"], str):
33
+ return entry["content"]
34
+ assert len(entry["content"]) == 1, "Expected single message in content"
35
+ return entry["content"][0]["text"]
36
+
37
+
38
+ def _set_content_text(entry: HistoryItem, text: str) -> None:
39
+ if isinstance(entry["content"], str):
40
+ entry["content"] = text
41
+ else:
42
+ assert len(entry["content"]) == 1, "Expected single message in content"
43
+ entry["content"][0]["text"] = text
44
+
45
+
46
+ def _clear_cache_control(entry: HistoryItem) -> None:
47
+ if isinstance(entry["content"], list):
48
+ for item in entry["content"]:
49
+ item.pop("cache_control", None)
50
+ entry.pop("cache_control", None)
51
+
52
+
53
+ def _set_cache_control(entry: HistoryItem) -> None:
54
+ if not isinstance(entry["content"], list):
55
+ entry["content"] = [ # type: ignore
56
+ {
57
+ "type": "text",
58
+ "text": _get_content_text(entry),
59
+ "cache_control": {"type": "ephemeral"},
60
+ }
61
+ ]
62
+ else:
63
+ entry["content"][0]["cache_control"] = {"type": "ephemeral"}
64
+ if entry["role"] == "tool":
65
+ # Workaround for weird bug
66
+ entry["content"][0].pop("cache_control", None)
67
+ entry["cache_control"] = {"type": "ephemeral"}
68
+
69
+
70
+ # History processors
71
+ # ------------------
72
+
73
+
74
+ class DefaultHistoryProcessor(BaseModel):
75
+ type: Literal["default"] = "default"
76
+ """Do not change. Used for (de)serialization."""
77
+
78
+ # pydantic config
79
+ model_config = ConfigDict(extra="forbid")
80
+
81
+ def __call__(self, history: History) -> History:
82
+ return history
83
+
84
+
85
+ class LastNObservations(BaseModel):
86
+ """Elide all but the last n observations or remove tagged observations.
87
+
88
+ This is our most classic history processor, used in the original paper
89
+ to elide but the last 5 observations.
90
+ Elided observations are replaced by "Old environment output: (n lines omitted)".
91
+
92
+ Typical configuration:
93
+
94
+ ```yaml
95
+ agent:
96
+ history_processors:
97
+ - type: last_n_observations
98
+ n: 5
99
+ ```
100
+
101
+ as for example in use in the SWE-agent 0.7 config at
102
+ https://github.com/SWE-agent/SWE-agent/blob/main/config/sweagent_0_7/07.yaml
103
+
104
+ For most use cases, you only need to set `n`.
105
+
106
+ Note that using this history processor will break prompt caching (as the
107
+ history of every query will change every time due to the elided observations).
108
+ There are some workarounds possible with the `polling` parameter.
109
+
110
+ However, most SotA models can now fit a lot of context, so generally this
111
+ history processor is not always needed anymore.
112
+ """
113
+
114
+ n: int
115
+ """Number of observations to keep."""
116
+
117
+ polling: int = 1
118
+ """How many steps to keep between updating the number of observations to keep.
119
+ This is useful for caching, as we want to remove more and more messages, but every
120
+ time we change the history, we need to cache everything again.
121
+ Effectively, we will now keep between `n` and `n+polling` observations.
122
+ """
123
+
124
+ always_remove_output_for_tags: set[str] = {"remove_output"}
125
+ """Any observation with a `tags` field containing one of these strings will be elided,
126
+ even if it is one of the last n observations.
127
+ """
128
+
129
+ always_keep_output_for_tags: set[str] = {"keep_output"}
130
+ """Any observation with a `tags` field containing one of these strings will be kept,
131
+ even if it is not one of the last n observations.
132
+ """
133
+
134
+ type: Literal["last_n_observations"] = "last_n_observations"
135
+ """Do not change. Used for (de)serialization."""
136
+
137
+ # pydantic config
138
+ model_config = ConfigDict(extra="forbid")
139
+
140
+ @field_validator("n")
141
+ def validate_n(cls, n: int) -> int:
142
+ if n <= 0:
143
+ msg = "n must be a positive integer"
144
+ raise ValueError(msg)
145
+ return n
146
+
147
+ def _get_omit_indices(self, history: History) -> list[int]:
148
+ observation_indices = [
149
+ idx
150
+ for idx, entry in enumerate(history)
151
+ if entry.get("message_type") == "observation" and not entry.get("is_demo", False)
152
+ ]
153
+ last_removed_idx = max(0, (len(observation_indices) // self.polling) * self.polling - self.n)
154
+ # Note: We never remove the first observation, as it is the instance template
155
+ return observation_indices[1:last_removed_idx]
156
+
157
+ def __call__(self, history: History) -> History:
158
+ new_history = []
159
+ omit_content_idxs = self._get_omit_indices(history)
160
+ for idx, entry in enumerate(history):
161
+ tags = set(entry.get("tags", []))
162
+ if ((idx not in omit_content_idxs) or (tags & self.always_keep_output_for_tags)) and not (
163
+ tags & self.always_remove_output_for_tags
164
+ ):
165
+ new_history.append(entry)
166
+ else:
167
+ data = entry.copy()
168
+ assert data.get("message_type") == "observation", (
169
+ f"Expected observation for dropped entry, got: {data.get('message_type')}"
170
+ )
171
+ num_text_lines, num_images = _get_content_stats(data)
172
+ data["content"] = f"Old environment output: ({num_text_lines} lines omitted)"
173
+ if num_images > 0:
174
+ data["content"] += f" ({num_images} images omitted)"
175
+ new_history.append(data)
176
+ return new_history
177
+
178
+
179
+ class TagToolCallObservations(BaseModel):
180
+ """Adds tags to history items for specific tool calls."""
181
+
182
+ type: Literal["tag_tool_call_observations"] = "tag_tool_call_observations"
183
+ """Do not change. Used for (de)serialization."""
184
+
185
+ tags: set[str] = {"keep_output"}
186
+ """Add the following tag to all observations matching the search criteria."""
187
+
188
+ function_names: set[str] = set()
189
+ """Only consider observations made by tools with these names."""
190
+
191
+ # pydantic config
192
+ model_config = ConfigDict(extra="forbid")
193
+
194
+ def _add_tags(self, entry: HistoryItem) -> None:
195
+ tags = set(entry.get("tags", []))
196
+ tags.update(self.tags)
197
+ entry["tags"] = list(tags)
198
+
199
+ def _should_add_tags(self, entry: HistoryItem) -> bool:
200
+ if entry.get("message_type") != "action":
201
+ return False
202
+ function_calls = entry.get("tool_calls", [])
203
+ if not function_calls:
204
+ return False
205
+ function_names = {call["function"]["name"] for call in function_calls} # type: ignore
206
+ return bool(self.function_names & function_names)
207
+
208
+ def __call__(self, history: History) -> History:
209
+ for entry in history:
210
+ if self._should_add_tags(entry):
211
+ self._add_tags(entry)
212
+ return history
213
+
214
+
215
+ class ClosedWindowHistoryProcessor(BaseModel):
216
+ """For each value in history, keep track of which windows have been shown.
217
+ We want to mark windows that should stay open (they're the last window for a particular file)
218
+ Then we'll replace all other windows with a simple summary of the window (i.e. number of lines)
219
+ """
220
+
221
+ type: Literal["closed_window"] = "closed_window"
222
+ """Do not change. Used for (de)serialization."""
223
+
224
+ _pattern = re.compile(r"^(\d+)\:.*?(\n|$)", re.MULTILINE)
225
+ _file_pattern = re.compile(r"\[File:\s+(.*)\s+\(\d+\s+lines\ total\)\]")
226
+
227
+ # pydantic config
228
+ model_config = ConfigDict(extra="forbid")
229
+
230
+ def __call__(self, history):
231
+ new_history = list()
232
+ windows = set()
233
+ for entry in reversed(history):
234
+ data = entry.copy()
235
+ if data["role"] != "user":
236
+ new_history.append(entry)
237
+ continue
238
+ if data.get("is_demo", False):
239
+ new_history.append(entry)
240
+ continue
241
+ matches = list(self._pattern.finditer(entry["content"]))
242
+ if len(matches) >= 1:
243
+ file_match = self._file_pattern.search(entry["content"])
244
+ if file_match:
245
+ file = file_match.group(1)
246
+ else:
247
+ continue
248
+ if file in windows:
249
+ start = matches[0].start()
250
+ end = matches[-1].end()
251
+ data["content"] = (
252
+ entry["content"][:start]
253
+ + f"Outdated window with {len(matches)} lines omitted...\n"
254
+ + entry["content"][end:]
255
+ )
256
+ windows.add(file)
257
+ new_history.append(data)
258
+ return list(reversed(new_history))
259
+
260
+
261
+ class CacheControlHistoryProcessor(BaseModel):
262
+ """This history processor adds manual cache control marks to the history.
263
+ Use this when running with anthropic claude.
264
+ """
265
+
266
+ type: Literal["cache_control"] = "cache_control"
267
+ """Do not change. Used for (de)serialization."""
268
+
269
+ last_n_messages: int = 2
270
+ """Add cache control to the last n user messages (and clear it for anything else).
271
+ In most cases this should be set to 2 (caching for multi-turn conversations).
272
+ When resampling and running concurrent instances, you want to set it to 1.
273
+ If set to <= 0, any set cache control will be removed from all messages.
274
+ """
275
+
276
+ last_n_messages_offset: int = 0
277
+ """E.g., set to 1 to start cache control after the second to last user message.
278
+ This can be useful in rare cases, when you want to modify the last message after
279
+ we've got the completion and you want to avoid cache mismatch.
280
+ """
281
+
282
+ tagged_roles: list[str] = ["user", "tool"]
283
+ """Only add cache control to messages with these roles."""
284
+
285
+ # pydantic config
286
+ model_config = ConfigDict(extra="forbid")
287
+
288
+ def __call__(self, history: History) -> History:
289
+ new_history = []
290
+ n_tagged = 0
291
+ for i_entry, entry in enumerate(reversed(history)):
292
+ # Clear cache control from previous messages
293
+ _clear_cache_control(entry)
294
+ if (
295
+ n_tagged < self.last_n_messages
296
+ and entry["role"] in self.tagged_roles
297
+ and i_entry >= self.last_n_messages_offset
298
+ ):
299
+ _set_cache_control(entry)
300
+ n_tagged += 1
301
+ new_history.append(entry)
302
+ return list(reversed(new_history))
303
+
304
+
305
+ class RemoveRegex(BaseModel):
306
+ """This history processor can remove arbitrary content from history items"""
307
+
308
+ remove: list[str] = ["<diff>.*</diff>"]
309
+ """Regex patterns to remove from history items"""
310
+
311
+ keep_last: int = 0
312
+ """Keep the last n history items unchanged"""
313
+
314
+ type: Literal["remove_regex"] = "remove_regex"
315
+ """Do not change. Used for (de)serialization."""
316
+
317
+ # pydantic config
318
+ model_config = ConfigDict(extra="forbid")
319
+
320
+ def __call__(self, history: History) -> History:
321
+ new_history = []
322
+ for i_entry, entry in enumerate(reversed(history)):
323
+ entry = copy.deepcopy(entry)
324
+ if i_entry < self.keep_last:
325
+ new_history.append(entry)
326
+ else:
327
+ if isinstance(entry["content"], list):
328
+ for item in entry["content"]:
329
+ if item["type"] == "text":
330
+ for pattern in self.remove:
331
+ item["text"] = re.sub(pattern, "", item["text"], flags=re.DOTALL)
332
+ else:
333
+ assert isinstance(entry["content"], str), "Expected string content"
334
+ for pattern in self.remove:
335
+ entry["content"] = re.sub(pattern, "", entry["content"], flags=re.DOTALL)
336
+ new_history.append(entry)
337
+ return list(reversed(new_history))
338
+
339
+
340
+ class ImageParsingHistoryProcessor(BaseModel):
341
+ """Parse embedded base64 images from markdown and convert to multi-modal format."""
342
+
343
+ type: Literal["image_parsing"] = "image_parsing"
344
+ allowed_mime_types: set[str] = {"image/png", "image/jpeg", "image/webp"}
345
+
346
+ _pattern = re.compile(r"(!\[([^\]]*)\]\(data:)([^;]+);base64,([^)]+)(\))")
347
+ model_config = ConfigDict(extra="forbid")
348
+
349
+ def __call__(self, history: History) -> History:
350
+ return [self._process_entry(entry) for entry in history]
351
+
352
+ def _process_entry(self, entry: HistoryItem) -> HistoryItem:
353
+ if entry.get("role") not in ["user", "tool"]:
354
+ return entry
355
+ entry = copy.deepcopy(entry)
356
+ content = _get_content_text(entry)
357
+ segments = self._parse_images(content)
358
+ if any(seg["type"] == "image_url" for seg in segments):
359
+ entry["content"] = segments
360
+ return entry
361
+
362
+ def _parse_images(self, content: str) -> list[dict]:
363
+ segments = []
364
+ last_end = 0
365
+ has_images = False
366
+
367
+ def add_text(text: str) -> None:
368
+ """Add text to the last segment if it's text, otherwise create new text segment."""
369
+ if text and segments and segments[-1]["type"] == "text":
370
+ segments[-1]["text"] += text
371
+ elif text:
372
+ segments.append({"type": "text", "text": text})
373
+
374
+ for match in self._pattern.finditer(content):
375
+ markdown_prefix, alt_text, mime_type, base64_data, markdown_suffix = match.groups()
376
+ add_text(content[last_end : match.start()])
377
+ mime_type = "image/jpeg" if mime_type == "image/jpg" else mime_type
378
+ if mime_type in self.allowed_mime_types:
379
+ add_text(markdown_prefix)
380
+ segments.append({"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_data}"}})
381
+ add_text(markdown_suffix)
382
+ has_images = True
383
+ else:
384
+ add_text(match.group(0))
385
+ last_end = match.end()
386
+ add_text(content[last_end:])
387
+ return segments if has_images else [{"type": "text", "text": content}]
388
+
389
+
390
+ HistoryProcessor = Annotated[
391
+ DefaultHistoryProcessor
392
+ | LastNObservations
393
+ | ClosedWindowHistoryProcessor
394
+ | TagToolCallObservations
395
+ | CacheControlHistoryProcessor
396
+ | RemoveRegex
397
+ | ImageParsingHistoryProcessor,
398
+ Field(discriminator="type"),
399
+ ]
File without changes
@@ -0,0 +1,139 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from sweagent.types import AgentInfo, StepOutput, Trajectory
4
+
5
+ if TYPE_CHECKING:
6
+ # avoid circular import
7
+ from sweagent.agent.agents import DefaultAgent
8
+
9
+
10
+ class AbstractAgentHook:
11
+ def on_init(self, *, agent: "DefaultAgent"):
12
+ """Note: Depending on the internals of `Agent` should be done with care,
13
+ it's best to use this as little as possible.
14
+ """
15
+
16
+ def on_run_start(
17
+ self,
18
+ ): ...
19
+
20
+ def on_step_start(self): ...
21
+
22
+ def on_actions_generated(self, *, step: StepOutput): ...
23
+
24
+ def on_action_started(self, *, step: StepOutput): ...
25
+
26
+ def on_action_executed(self, *, step: StepOutput): ...
27
+
28
+ def on_step_done(self, *, step: StepOutput, info: AgentInfo): ...
29
+
30
+ def on_run_done(self, *, trajectory: Trajectory, info: AgentInfo): ...
31
+
32
+ def on_setup_attempt(self): ...
33
+
34
+ def on_model_query(self, *, messages: list[dict[str, str]], agent: str):
35
+ """Actually query the model with the complete history."""
36
+
37
+ def on_query_message_added(
38
+ self,
39
+ *,
40
+ agent: str,
41
+ role: str,
42
+ content: str,
43
+ message_type: str,
44
+ is_demo: bool = False,
45
+ thought: str = "",
46
+ action: str = "",
47
+ tool_calls: list[dict[str, str]] | None = None,
48
+ tool_call_ids: list[str] | None = None,
49
+ ): ...
50
+
51
+ def on_setup_done(self): ...
52
+
53
+ def on_tools_installation_started(self): ...
54
+
55
+
56
+ class CombinedAgentHook(AbstractAgentHook):
57
+ def __init__(self, hooks: list[AbstractAgentHook] | None = None):
58
+ self._hooks = hooks or []
59
+
60
+ def add_hook(self, hook: AbstractAgentHook):
61
+ self._hooks.append(hook)
62
+
63
+ @property
64
+ def hooks(self) -> list[AbstractAgentHook]:
65
+ return self._hooks
66
+
67
+ def on_init(self, *, agent: "DefaultAgent"):
68
+ for hook in self.hooks:
69
+ hook.on_init(agent=agent)
70
+
71
+ def on_run_start(self):
72
+ for hook in self.hooks:
73
+ hook.on_run_start()
74
+
75
+ def on_step_start(self):
76
+ for hook in self.hooks:
77
+ hook.on_step_start()
78
+
79
+ def on_actions_generated(self, *, step: StepOutput):
80
+ for hook in self.hooks:
81
+ hook.on_actions_generated(step=step)
82
+
83
+ def on_action_started(self, *, step: StepOutput):
84
+ for hook in self.hooks:
85
+ hook.on_action_started(step=step)
86
+
87
+ def on_action_executed(self, *, step: StepOutput):
88
+ for hook in self.hooks:
89
+ hook.on_action_executed(step=step)
90
+
91
+ def on_step_done(self, *, step: StepOutput, info: AgentInfo):
92
+ for hook in self.hooks:
93
+ hook.on_step_done(step=step, info=info)
94
+
95
+ def on_run_done(self, *, trajectory: Trajectory, info: AgentInfo):
96
+ for hook in self.hooks:
97
+ hook.on_run_done(trajectory=trajectory, info=info)
98
+
99
+ def on_setup_attempt(self):
100
+ for hook in self.hooks:
101
+ hook.on_setup_attempt()
102
+
103
+ def on_model_query(self, *, messages: list[dict[str, str]], agent: str):
104
+ for hook in self.hooks:
105
+ hook.on_model_query(messages=messages, agent=agent)
106
+
107
+ def on_query_message_added(
108
+ self,
109
+ *,
110
+ agent: str,
111
+ role: str,
112
+ content: str,
113
+ message_type: str,
114
+ is_demo: bool = False,
115
+ thought: str = "",
116
+ action: str = "",
117
+ tool_calls: list[dict[str, str]] | None = None,
118
+ tool_call_ids: list[str] | None = None,
119
+ thinking_blocks: list[dict[str, str]] | None = None,
120
+ ):
121
+ for hook in self.hooks:
122
+ hook.on_query_message_added(
123
+ agent=agent,
124
+ role=role,
125
+ content=content,
126
+ message_type=message_type,
127
+ is_demo=is_demo,
128
+ thought=thought,
129
+ action=action,
130
+ tool_calls=tool_calls,
131
+ tool_call_ids=tool_call_ids,
132
+ )
133
+
134
+ def on_setup_done(self):
135
+ return super().on_setup_done()
136
+
137
+ def on_tools_installation_started(self):
138
+ for hook in self.hooks:
139
+ hook.on_tools_installation_started()
@@ -0,0 +1,34 @@
1
+ from collections.abc import Callable
2
+
3
+ from sweagent.agent.hooks.abstract import AbstractAgentHook
4
+ from sweagent.types import AgentInfo, StepOutput
5
+
6
+
7
+ class SetStatusAgentHook(AbstractAgentHook):
8
+ def __init__(self, id: str, callable: Callable[[str, str], None]):
9
+ self._callable = callable
10
+ self._id = id
11
+ self._i_step = 0
12
+ self._cost = 0.0
13
+ self._i_attempt = 0
14
+ self._previous_cost = 0.0
15
+
16
+ def on_setup_attempt(self):
17
+ self._i_attempt += 1
18
+ self._i_step = 0
19
+ # Costs will be reset for the next attempt
20
+ self._previous_cost += self._cost
21
+
22
+ def _update(self, message: str):
23
+ self._callable(self._id, message)
24
+
25
+ def on_step_start(self):
26
+ self._i_step += 1
27
+ attempt_str = f"Attempt {self._i_attempt} " if self._i_attempt > 1 else ""
28
+ self._update(f"{attempt_str}Step {self._i_step:>3} (${self._previous_cost + self._cost:.2f})")
29
+
30
+ def on_step_done(self, *, step: StepOutput, info: AgentInfo):
31
+ self._cost = info["model_stats"]["instance_cost"] # type: ignore
32
+
33
+ def on_tools_installation_started(self):
34
+ self._update("Installing tools")