@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,430 @@
1
+ """
2
+ This module contains the configuration for the tools that are made available to the agent.
3
+
4
+ The `ToolConfig` class is used to configure the tools that are available to the agent.
5
+ The `ToolHandler` class is used to handle the tools that are available to the agent.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import os
11
+ import re
12
+ from functools import cached_property
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ from pydantic import BaseModel, Field
17
+ from swerex.runtime.abstract import Command as RexCommand
18
+ from swerex.runtime.abstract import UploadRequest
19
+ from typing_extensions import Self
20
+
21
+ from sweagent.environment.swe_env import SWEEnv
22
+ from sweagent.tools.bundle import Bundle
23
+ from sweagent.tools.commands import BASH_COMMAND, Command
24
+ from sweagent.tools.parsing import FunctionCallingParser, JsonParser, ParseFunction
25
+ from sweagent.tools.utils import _guard_multiline_input, generate_command_docs
26
+ from sweagent.utils.log import get_logger
27
+
28
+
29
+ class ToolFilterConfig(BaseModel):
30
+ """Filter out commands that are blocked by the environment
31
+ (for example interactive commands like `vim`).
32
+ """
33
+
34
+ blocklist_error_template: str = "Operation '{{action}}' is not supported by this environment."
35
+ """The error template to use when a command is blocked."""
36
+
37
+ blocklist: list[str] = [
38
+ "vim",
39
+ "vi",
40
+ "emacs",
41
+ "nano",
42
+ "nohup",
43
+ "gdb",
44
+ "less",
45
+ "tail -f",
46
+ "python -m venv",
47
+ "make",
48
+ ]
49
+ """Block any command that starts with one of these"""
50
+
51
+ blocklist_standalone: list[str] = [
52
+ "python",
53
+ "python3",
54
+ "ipython",
55
+ "bash",
56
+ "sh",
57
+ "/bin/bash",
58
+ "/bin/sh",
59
+ "nohup",
60
+ "vi",
61
+ "vim",
62
+ "emacs",
63
+ "nano",
64
+ "su",
65
+ ]
66
+ """Block any command that matches one of these exactly"""
67
+
68
+ block_unless_regex: dict[str, str] = {
69
+ "radare2": r"\b(?:radare2)\b.*\s+-c\s+.*",
70
+ "r2": r"\b(?:radare2)\b.*\s+-c\s+.*",
71
+ }
72
+ """Block any command that matches one of these names unless it also matches the regex"""
73
+
74
+
75
+ class ToolConfig(BaseModel):
76
+ """Configuration for the tools that are made available to the agent."""
77
+
78
+ filter: ToolFilterConfig = ToolFilterConfig()
79
+ """Filter out commands that are blocked by the environment
80
+ (for example interactive commands like `vim`).
81
+ """
82
+
83
+ bundles: list[Bundle] = Field(default_factory=list)
84
+ """The tool bundles to load."""
85
+
86
+ propagate_env_variables: list[str] = []
87
+ """Environment variables to propagate to the environment.
88
+ This is useful if you want to propagate API keys or similar from your own environment to the
89
+ environment in which the tools run.
90
+ IMPORTANT NOTE: The value of the environment variables can be read in debug log files,
91
+ so be careful with your API keys!
92
+ """
93
+
94
+ env_variables: dict[str, Any] = {
95
+ "PAGER": "cat",
96
+ "MANPAGER": "cat",
97
+ "LESS": "-R",
98
+ "PIP_PROGRESS_BAR": "off",
99
+ "TQDM_DISABLE": "1",
100
+ "GIT_PAGER": "cat",
101
+ }
102
+ """Shorthand to set environment variables for the tools, effectively
103
+ equivalent to adding `export VARNAME=value` to the `reset_commands`.
104
+ """
105
+
106
+ registry_variables: dict[str, Any] = {}
107
+ """Populate the registry with these variables. Will be written out as json in the registry file."""
108
+
109
+ submit_command: str = "submit"
110
+ """The command/tool to use to submit the solution."""
111
+
112
+ parse_function: ParseFunction = Field(default_factory=FunctionCallingParser)
113
+ """The action parser that is responsible for parsing the model output into a thought and action.
114
+ """
115
+
116
+ enable_bash_tool: bool = True
117
+ """Whether to enable the bash tool in addition to the other tools specified in bundles."""
118
+
119
+ format_error_template: str = None # type: ignore
120
+ """Defaults to format_error_template in ParseFunction"""
121
+
122
+ command_docs: str = None # type: ignore
123
+ """Automatically generated documentation generated based on
124
+ the loaded tool bundles.
125
+ """
126
+
127
+ multi_line_command_endings: dict[str, str] = {}
128
+ submit_command_end_name: str | None = None
129
+
130
+ """Commands to install dependencies and tools.
131
+ These commands are executed in a subprocess and are not part of the environment state.
132
+ """
133
+
134
+ reset_commands: list[str | list[str]] = []
135
+ """Commands to reset the environment. They will also be called when we start the environment.
136
+ Unlike `install_commands`, these commands are part of the environment state.
137
+ """
138
+
139
+ execution_timeout: int = 30
140
+ """Timeout for executing commands in the environment"""
141
+
142
+ install_timeout: int = 300
143
+ """Timeout used for each of the installation commands"""
144
+
145
+ total_execution_timeout: int = 1800
146
+ """Timeout for executing all commands in the environment.
147
+ Note: Does not interrupt running commands, but will stop the agent for the next step.
148
+ """
149
+
150
+ max_consecutive_execution_timeouts: int = 3
151
+ """Maximum number of consecutive execution timeouts before the agent exits.
152
+ """
153
+
154
+ @cached_property
155
+ def use_function_calling(self) -> bool:
156
+ return isinstance(self.parse_function, FunctionCallingParser)
157
+
158
+ @cached_property
159
+ def state_commands(self) -> list[str]:
160
+ """This property returns the state commands from all bundles.
161
+ State commands are commands that are used to get the state of the environment
162
+ (e.g., the current working directory).
163
+ """
164
+ return [bundle.state_command for bundle in self.bundles if bundle.state_command]
165
+
166
+ # todo: move to ToolHandler?
167
+ @cached_property
168
+ def commands(self) -> list[Command]:
169
+ """Read command files and return parsed command objects"""
170
+ commands = []
171
+ tool_sources: dict[str, Path] = {} # Track which file each tool comes from
172
+ # Add bash command if enabled
173
+ if self.enable_bash_tool:
174
+ commands.append(BASH_COMMAND)
175
+ tool_sources[BASH_COMMAND.name] = Path("<builtin>")
176
+
177
+ # Collect commands from all bundles
178
+ for bundle in self.bundles:
179
+ for command in bundle.commands:
180
+ if command.name in tool_sources:
181
+ existing_source = tool_sources[command.name]
182
+ msg = (
183
+ f"Tool '{command.name}' is defined multiple times:\n"
184
+ f" - First definition in: {existing_source}\n"
185
+ f" - Duplicate definition in: {bundle.path}"
186
+ )
187
+ raise ValueError(msg)
188
+ commands.append(command)
189
+ tool_sources[command.name] = bundle.path
190
+
191
+ return commands
192
+
193
+ @cached_property
194
+ def tools(self) -> list[dict]:
195
+ return [command.get_function_calling_tool() for command in self.commands]
196
+
197
+ # todo: can some of these be moved to ToolHandler?
198
+ def model_post_init(self, __context):
199
+ # for caching:
200
+ commands = self.commands
201
+ multi_line_command_endings = {
202
+ command.name: command.end_name for command in commands if command.end_name is not None
203
+ }
204
+ self.tools
205
+
206
+ # assert not self.enable_bash_tool and parse_function is FunctionCallingParser or JsonParser
207
+ if not self.enable_bash_tool and not (
208
+ isinstance(self.parse_function, FunctionCallingParser) or isinstance(self.parse_function, JsonParser)
209
+ ):
210
+ msg = f"Bash tool can only be disabled if {FunctionCallingParser.type} parser or {JsonParser.type} parser is used."
211
+ raise ValueError(msg)
212
+
213
+ self.multi_line_command_endings = multi_line_command_endings
214
+ self.command_docs = generate_command_docs(
215
+ self.commands,
216
+ [],
217
+ **self.env_variables,
218
+ )
219
+ if self.format_error_template is None:
220
+ self.format_error_template = self.parse_function.format_error_template
221
+ for command in commands:
222
+ if command.name == self.submit_command:
223
+ self.submit_command_end_name = command.end_name
224
+ break
225
+
226
+
227
+ class ToolHandler:
228
+ def __init__(self, tools: ToolConfig):
229
+ """This class handles most of the tool usage. It has the following responsibilities:
230
+
231
+ - Install the tools
232
+ - Parse commands and handle multiline commands
233
+ - Decide if an action should be blocked
234
+ - Get the current state of the environment
235
+ """
236
+ # Always copy config to avoid shared state between different instances across threads
237
+ self.config = tools.model_copy(deep=True)
238
+ # partially initialized in `install_commands`.
239
+ self._reset_commands = []
240
+ self._command_patterns = self._get_command_patterns()
241
+ self.logger = get_logger("swea-tools", emoji="🧰")
242
+ # For testing: Return this state instead of querying the environment
243
+ self.mock_state: dict[str, str] | None = None
244
+
245
+ @classmethod
246
+ def from_config(cls, config: ToolConfig) -> Self:
247
+ return cls(config)
248
+
249
+ # Installation & Reset
250
+ # --------------------
251
+
252
+ def install(self, env: SWEEnv) -> None:
253
+ self._install_commands(env)
254
+ self.reset(env)
255
+
256
+ def reset(self, env: SWEEnv) -> None:
257
+ self.logger.info("Resetting tools")
258
+ env_variables = self.config.env_variables.copy() | {
259
+ var: os.getenv(var) for var in self.config.propagate_env_variables
260
+ }
261
+ env.set_env_variables(env_variables)
262
+ env.write_file("/root/.swe-agent-env", json.dumps(self.config.registry_variables))
263
+ env.write_file("/root/state.json", "{}")
264
+ env.communicate(" && ".join(self._reset_commands), check="raise", timeout=self.config.install_timeout)
265
+
266
+ async def _upload_bundles(self, env: SWEEnv) -> None:
267
+ await asyncio.gather(
268
+ *(
269
+ env.deployment.runtime.upload(
270
+ UploadRequest(source_path=bundle.path.as_posix(), target_path=f"/root/tools/{bundle.path.name}")
271
+ )
272
+ for bundle in self.config.bundles
273
+ )
274
+ )
275
+
276
+ async def _is_command_available(self, env, command: str, env_vars: dict[str, str]) -> None:
277
+ if command == "bash":
278
+ return
279
+ try:
280
+ await env.deployment.runtime.execute(
281
+ RexCommand(command=f"which {command}", shell=True, check=True, env=env_vars)
282
+ )
283
+ except Exception:
284
+ msg = f"Tool {command} is not available in the container."
285
+ raise RuntimeError(msg) from None
286
+
287
+ async def _check_available_commands(self, env: SWEEnv, env_vars: dict[str, str]) -> None:
288
+ await asyncio.gather(
289
+ *(self._is_command_available(env, command.name, env_vars) for command in self.config.commands)
290
+ )
291
+
292
+ def _install_commands(self, env: SWEEnv) -> None:
293
+ """Make sure all commands are available in the container"""
294
+ env.set_env_variables(self.config.env_variables)
295
+ cwd = env.communicate("pwd", check="raise").strip()
296
+ asyncio.run(self._upload_bundles(env))
297
+ for bundle in self.config.bundles:
298
+ cmds = [
299
+ f"export PATH=/root/tools/{bundle.path.name}/bin:$PATH",
300
+ f"chmod +x /root/tools/{bundle.path.name}/bin/*",
301
+ ]
302
+ if (bundle.path / "install.sh").exists():
303
+ cmds.append(f"cd /root/tools/{bundle.path.name} && source install.sh")
304
+ cmds.append(f"chmod +x /root/tools/{bundle.path.name}/bin/*")
305
+ env.communicate(
306
+ " && ".join(cmds),
307
+ check="raise",
308
+ timeout=self.config.install_timeout,
309
+ )
310
+ env.communicate(f"cd {cwd}", check="raise")
311
+ path = env.communicate("echo $PATH", check="raise").strip()
312
+ asyncio.run(self._check_available_commands(env, {"PATH": path}))
313
+
314
+ # Getting state
315
+ # -------------
316
+
317
+ def _get_state(self, env: SWEEnv) -> dict[str, str]:
318
+ """Retrieve the state from the environment"""
319
+ try:
320
+ state_str = env.read_file("/root/state.json")
321
+ except FileNotFoundError:
322
+ self.logger.warning("State file not found, returning empty state")
323
+ return {}
324
+ if not state_str.strip():
325
+ self.logger.warning("State file is empty, returning empty state")
326
+ return {}
327
+ try:
328
+ state = json.loads(state_str)
329
+ except json.JSONDecodeError as e:
330
+ msg = f"State {state_str!r} is not valid json. This is an internal error, please report it."
331
+ raise ValueError(msg) from e
332
+ if not isinstance(state, dict):
333
+ msg = f"State commands must return a dictionary. Got {state!r} instead."
334
+ raise ValueError(msg)
335
+ return state
336
+
337
+ def get_state(self, env: SWEEnv) -> dict[str, str]:
338
+ """Execute state commands from all bundles and combine their results.
339
+ This can be used to extract environment variables etc. from the environment.
340
+ """
341
+ if self.mock_state is not None:
342
+ return self.mock_state
343
+
344
+ for state_command in self.config.state_commands:
345
+ env.communicate(state_command, check="warn")
346
+ combined_state = self._get_state(env)
347
+ self.logger.debug(f"Retrieved state from environment: {combined_state}")
348
+ return combined_state
349
+
350
+ # Blocking
351
+ # --------
352
+
353
+ def should_block_action(self, action: str) -> bool:
354
+ """Check if the command should be blocked."""
355
+ action = action.strip()
356
+ if not action:
357
+ return False
358
+ if any(f.startswith(action) for f in self.config.filter.blocklist):
359
+ return True
360
+ if action in self.config.filter.blocklist_standalone:
361
+ return True
362
+ name = action.split()[0]
363
+ if name in self.config.filter.block_unless_regex and not re.search(
364
+ self.config.filter.block_unless_regex[name], action
365
+ ):
366
+ return True
367
+ return False
368
+
369
+ # Parsing & multiline commands
370
+ # -----------------------------
371
+
372
+ def check_for_submission_cmd(self, output: str) -> bool:
373
+ """Function for checking submission request."""
374
+ if r"<<SWE_AGENT_SUBMISSION>>" in output:
375
+ return True
376
+ return False
377
+
378
+ def parse_actions(self, output: dict) -> tuple[str, str]:
379
+ """Parse the model output into a thought and action."""
380
+ return self.config.parse_function(output, self.config.commands)
381
+
382
+ def guard_multiline_input(self, action: str) -> str:
383
+ """Split action by multiline commands, then append the first line in each multiline command with "<< '{end_name}'".
384
+ Multiline commands (which are specified by an end_name) are commands that span multiple lines and are terminated by a specific end_name.
385
+
386
+ Their multi-line argument is sent using a heredoc, which is a way to send a multi-line string to a command in bash.
387
+ """
388
+ return _guard_multiline_input(action, self._get_first_multiline_cmd)
389
+
390
+ def _get_first_multiline_cmd(self, action: str) -> re.Match | None:
391
+ """Return the first match of a command pattern in the action string.
392
+ Where first match is defined by the start of the match.
393
+
394
+ The match object has three groups: (1) command name, (2) command arguments, (3) end name
395
+ """
396
+ patterns = {
397
+ k: v
398
+ for k, v in self._command_patterns.items()
399
+ if k in self.config.multi_line_command_endings or k == self.config.submit_command
400
+ }
401
+ matches = list()
402
+ for _, pat in patterns.items():
403
+ match = pat.search(action)
404
+ if match:
405
+ matches.append(match)
406
+ if len(matches) == 0:
407
+ return None
408
+ matches = sorted(matches, key=lambda x: x.start())
409
+ return matches[0]
410
+
411
+ def _get_command_patterns(self) -> dict[str, re.Pattern]:
412
+ """Creates regular expressions for the commands"""
413
+
414
+ _command_patterns = {}
415
+ for command in self.config.commands:
416
+ if command.end_name is not None:
417
+ pat = re.compile(
418
+ rf"^\s*({command.name})\s*(.*?)^({command.end_name})\s*$",
419
+ re.DOTALL | re.MULTILINE,
420
+ )
421
+ _command_patterns[command.name] = pat
422
+ else:
423
+ pat = re.compile(rf"^\s*({command.name})\s*(.*?)$", re.MULTILINE)
424
+ _command_patterns[command.name] = pat
425
+ submit_pat = re.compile(
426
+ rf"^\s*({self.config.submit_command})\s*(.*?)^({self.config.submit_command_end_name})\s*$",
427
+ re.DOTALL | re.MULTILINE,
428
+ )
429
+ _command_patterns[self.config.submit_command] = submit_pat
430
+ return _command_patterns
@@ -0,0 +1,108 @@
1
+ import re
2
+ from collections.abc import Callable
3
+ from typing import Any
4
+
5
+ from sweagent.tools.commands import Command
6
+
7
+
8
+ def _guard_multiline_input(action: str, match_fct: Callable[[str], re.Match | None]) -> str:
9
+ """Split action by multiline commands, then append the first line in each multiline command with "<< '{end_name}'".
10
+ Multiline commands (which are specified by an end_name) are commands that span multiple lines and are terminated by a specific end_name.
11
+
12
+ Their multi-line argument is sent using a heredoc, which is a way to send a multi-line string to a command in bash.
13
+ """
14
+ parsed_action = []
15
+ rem_action = action
16
+ while rem_action.strip():
17
+ first_match = match_fct(rem_action)
18
+ if first_match:
19
+ pre_action = rem_action[: first_match.start()]
20
+ match_action = rem_action[first_match.start() : first_match.end()]
21
+ rem_action = rem_action[first_match.end() :]
22
+ if pre_action.strip():
23
+ parsed_action.append(pre_action)
24
+ if match_action.strip():
25
+ eof = first_match.group(3).strip()
26
+ if not match_action.split("\n")[0].strip().endswith(f"<< '{eof}'"):
27
+ guarded_command = match_action[first_match.start() :]
28
+ first_line = guarded_command.split("\n")[0]
29
+ guarded_command = guarded_command.replace(first_line, first_line + f" << '{eof}'", 1)
30
+ parsed_action.append(guarded_command)
31
+ else:
32
+ parsed_action.append(match_action)
33
+ else:
34
+ parsed_action.append(rem_action)
35
+ rem_action = ""
36
+ return "\n".join(parsed_action)
37
+
38
+
39
+ def _should_quote(value: Any, command: Command) -> bool:
40
+ """Returns True if the value should be quoted, False otherwise."""
41
+ if command.name == "bash":
42
+ return False
43
+ return isinstance(value, str) and command.end_name is None
44
+
45
+
46
+ def get_signature(cmd):
47
+ """Generate a command signature from its arguments.
48
+
49
+ Args:
50
+ cmd: Command object to generate signature for
51
+
52
+ Returns:
53
+ Formatted signature string
54
+ """
55
+ signature = cmd.name
56
+ if "arguments" in cmd.__dict__ and cmd.arguments is not None:
57
+ if cmd.end_name is None:
58
+ for argument in cmd.arguments:
59
+ param = argument.name
60
+ if argument.required:
61
+ signature += f" <{param}>"
62
+ else:
63
+ signature += f" [<{param}>]"
64
+ else:
65
+ for argument in cmd.arguments[:-1]:
66
+ param = argument.name
67
+ if argument.required:
68
+ signature += f" <{param}>"
69
+ else:
70
+ signature += f" [<{param}>]"
71
+ signature += f"\n{list(cmd.arguments[-1].keys())[0]}\n{cmd.end_name}"
72
+ return signature
73
+
74
+
75
+ def generate_command_docs(
76
+ commands: list[Command],
77
+ subroutine_types,
78
+ **kwargs,
79
+ ) -> str:
80
+ """Generate detailed command documentation.
81
+
82
+ Format includes docstring, signature and argument details.
83
+
84
+ Args:
85
+ commands: List of commands to document
86
+ subroutine_types: List of subroutines to document
87
+ **kwargs: Additional format variables for docstrings
88
+
89
+ Returns:
90
+ Formatted documentation string
91
+ """
92
+ docs = ""
93
+ for cmd in commands + subroutine_types:
94
+ docs += f"{cmd.name}:\n"
95
+ if cmd.docstring is not None:
96
+ docs += f" docstring: {cmd.docstring.format(**kwargs)}\n"
97
+ if cmd.signature is not None:
98
+ docs += f" signature: {cmd.signature}\n"
99
+ else:
100
+ docs += f" signature: {get_signature(cmd)}\n"
101
+ if cmd.arguments:
102
+ docs += " arguments:\n"
103
+ for argument in cmd.arguments:
104
+ param = argument.name
105
+ req_string = "required" if argument.required else "optional"
106
+ docs += f" - {param} ({argument.type}) [{req_string}]: {argument.description}\n"
107
+ docs += "\n"
108
+ return docs
@@ -0,0 +1,102 @@
1
+ """This file has types/dataclass definitions that are used in the SWE agent
2
+ for exchanging data between different modules/functions/classes.
3
+ They oftentimes cannot be defined in the same file where they are used
4
+ because of circular dependencies.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Literal
10
+
11
+ from pydantic import BaseModel
12
+ from typing_extensions import TypedDict
13
+
14
+
15
+ class StepOutput(BaseModel):
16
+ query: list[dict] = [{}]
17
+ thought: str = ""
18
+ action: str = ""
19
+ output: str = ""
20
+ observation: str = ""
21
+ execution_time: float = 0.0
22
+ done: bool = False
23
+ exit_status: int | str | None = None
24
+ submission: str | None = None
25
+ state: dict[str, str] = {}
26
+ tool_calls: list[dict[str, Any]] | None = None
27
+ tool_call_ids: list[str] | None = None
28
+ thinking_blocks: list[dict[str, Any]] | None = None
29
+
30
+ """State of the environment at the end of the step"""
31
+ extra_info: dict[str, Any] = {}
32
+
33
+ def to_template_format_dict(self) -> dict[str, str | int | float | bool | None]:
34
+ """Used for formatting (error) prompt templates"""
35
+ out = {}
36
+ for k, v in self.model_dump().items():
37
+ if k in ("tool_calls", "tool_call_ids", "state"):
38
+ continue
39
+ out[k] = v
40
+ out |= self.state
41
+ return out
42
+
43
+
44
+ class TrajectoryStep(TypedDict):
45
+ action: str
46
+ observation: str
47
+ response: str
48
+ state: dict[str, str]
49
+ thought: str
50
+ execution_time: float
51
+ query: list[dict[str, Any]]
52
+ extra_info: dict[str, Any]
53
+
54
+
55
+ # required fields go here
56
+ class _HistoryItem(TypedDict):
57
+ role: str
58
+ content: str | list[dict[str, Any]]
59
+ message_type: Literal["thought", "action", "observation"]
60
+
61
+
62
+ # see _HistoryItem for required fields
63
+ class HistoryItem(_HistoryItem, total=False):
64
+ agent: str
65
+ is_demo: bool
66
+ thought: str
67
+ action: str | None
68
+ tool_calls: list[dict[str, str]] | None
69
+ tool_call_ids: list[str] | None
70
+ tags: list[str]
71
+ cache_control: dict[str, Any] | None
72
+ thinking_blocks: list[dict[str, Any]] | None
73
+
74
+ """HistoryProcessors can add these tags to enable special processing"""
75
+
76
+
77
+ History = list[HistoryItem]
78
+ Trajectory = list[TrajectoryStep]
79
+
80
+
81
+ # todo: Make this actually have the dataclasses instead of dict versions
82
+ class AgentInfo(TypedDict, total=False):
83
+ # same as `APIStats` from models.py
84
+ model_stats: dict[str, float]
85
+ exit_status: str | None
86
+ submission: str | None
87
+ # same as `ReviewerResult`
88
+ review: dict[str, Any]
89
+ edited_files30: str
90
+ edited_files50: str
91
+ edited_files70: str
92
+ # only if summarizer is used
93
+ summarizer: dict
94
+ swe_agent_hash: str
95
+ swe_agent_version: str
96
+ swe_rex_version: str
97
+ swe_rex_hash: str
98
+
99
+
100
+ class AgentRunResult(BaseModel):
101
+ info: AgentInfo
102
+ trajectory: Trajectory
File without changes