@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,482 @@
1
+ //! Monitoring and alerting module for production deployments
2
+ //!
3
+ //! Provides hooks for observability, metrics collection, and alerting.
4
+
5
+ use crate::types::AgentRunResult;
6
+ use serde::{Deserialize, Serialize};
7
+ use std::collections::HashMap;
8
+ use std::sync::atomic::{AtomicU64, Ordering};
9
+ use std::sync::Arc;
10
+ use std::time::{Duration, Instant};
11
+
12
+ /// Metrics for monitoring agent performance
13
+ #[derive(Debug, Default)]
14
+ pub struct AgentMetrics {
15
+ /// Total runs started
16
+ pub runs_started: AtomicU64,
17
+ /// Total runs completed successfully
18
+ pub runs_completed: AtomicU64,
19
+ /// Total runs failed
20
+ pub runs_failed: AtomicU64,
21
+ /// Total cost in micro-dollars
22
+ pub total_cost_micros: AtomicU64,
23
+ /// Total tokens sent
24
+ pub total_tokens_sent: AtomicU64,
25
+ /// Total tokens received
26
+ pub total_tokens_received: AtomicU64,
27
+ /// Total API calls
28
+ pub total_api_calls: AtomicU64,
29
+ /// Total execution time in milliseconds
30
+ pub total_execution_time_ms: AtomicU64,
31
+ }
32
+
33
+ impl AgentMetrics {
34
+ pub fn new() -> Self {
35
+ Self::default()
36
+ }
37
+
38
+ pub fn record_run_start(&self) {
39
+ self.runs_started.fetch_add(1, Ordering::SeqCst);
40
+ }
41
+
42
+ pub fn record_run_complete(&self, result: &AgentRunResult) {
43
+ let is_error = result
44
+ .info
45
+ .exit_status
46
+ .as_ref()
47
+ .map(|s| s.contains("error"))
48
+ .unwrap_or(false);
49
+
50
+ if is_error {
51
+ self.runs_failed.fetch_add(1, Ordering::SeqCst);
52
+ } else {
53
+ self.runs_completed.fetch_add(1, Ordering::SeqCst);
54
+ }
55
+
56
+ // Record model stats if available
57
+ if let Some(ref stats) = result.info.model_stats {
58
+ self.total_cost_micros
59
+ .fetch_add((stats.instance_cost * 1_000_000.0) as u64, Ordering::SeqCst);
60
+ self.total_tokens_sent
61
+ .fetch_add(stats.tokens_sent, Ordering::SeqCst);
62
+ self.total_tokens_received
63
+ .fetch_add(stats.tokens_received, Ordering::SeqCst);
64
+ self.total_api_calls
65
+ .fetch_add(stats.api_calls, Ordering::SeqCst);
66
+ }
67
+ }
68
+
69
+ pub fn record_execution_time(&self, duration: Duration) {
70
+ self.total_execution_time_ms
71
+ .fetch_add(duration.as_millis() as u64, Ordering::SeqCst);
72
+ }
73
+
74
+ /// Get current metrics as a snapshot
75
+ pub fn snapshot(&self) -> MetricsSnapshot {
76
+ MetricsSnapshot {
77
+ runs_started: self.runs_started.load(Ordering::SeqCst),
78
+ runs_completed: self.runs_completed.load(Ordering::SeqCst),
79
+ runs_failed: self.runs_failed.load(Ordering::SeqCst),
80
+ total_cost: self.total_cost_micros.load(Ordering::SeqCst) as f64 / 1_000_000.0,
81
+ total_tokens_sent: self.total_tokens_sent.load(Ordering::SeqCst),
82
+ total_tokens_received: self.total_tokens_received.load(Ordering::SeqCst),
83
+ total_api_calls: self.total_api_calls.load(Ordering::SeqCst),
84
+ total_execution_time_ms: self.total_execution_time_ms.load(Ordering::SeqCst),
85
+ }
86
+ }
87
+ }
88
+
89
+ /// Immutable snapshot of metrics for reporting
90
+ #[derive(Debug, Clone, Serialize, Deserialize)]
91
+ pub struct MetricsSnapshot {
92
+ pub runs_started: u64,
93
+ pub runs_completed: u64,
94
+ pub runs_failed: u64,
95
+ pub total_cost: f64,
96
+ pub total_tokens_sent: u64,
97
+ pub total_tokens_received: u64,
98
+ pub total_api_calls: u64,
99
+ pub total_execution_time_ms: u64,
100
+ }
101
+
102
+ impl MetricsSnapshot {
103
+ /// Calculate success rate
104
+ pub fn success_rate(&self) -> f64 {
105
+ if self.runs_started == 0 {
106
+ return 0.0;
107
+ }
108
+ self.runs_completed as f64 / self.runs_started as f64
109
+ }
110
+
111
+ /// Calculate average cost per run
112
+ pub fn avg_cost_per_run(&self) -> f64 {
113
+ if self.runs_started == 0 {
114
+ return 0.0;
115
+ }
116
+ self.total_cost / self.runs_started as f64
117
+ }
118
+
119
+ /// Calculate average execution time
120
+ pub fn avg_execution_time_ms(&self) -> f64 {
121
+ if self.runs_started == 0 {
122
+ return 0.0;
123
+ }
124
+ self.total_execution_time_ms as f64 / self.runs_started as f64
125
+ }
126
+ }
127
+
128
+ /// Alert severity levels
129
+ #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
130
+ #[serde(rename_all = "lowercase")]
131
+ pub enum AlertSeverity {
132
+ Info,
133
+ Warning,
134
+ Error,
135
+ Critical,
136
+ }
137
+
138
+ /// Alert information
139
+ #[derive(Debug, Clone, Serialize, Deserialize)]
140
+ pub struct Alert {
141
+ pub severity: AlertSeverity,
142
+ pub message: String,
143
+ pub timestamp: chrono::DateTime<chrono::Utc>,
144
+ pub context: HashMap<String, serde_json::Value>,
145
+ }
146
+
147
+ impl Alert {
148
+ pub fn new(severity: AlertSeverity, message: impl Into<String>) -> Self {
149
+ Self {
150
+ severity,
151
+ message: message.into(),
152
+ timestamp: chrono::Utc::now(),
153
+ context: HashMap::new(),
154
+ }
155
+ }
156
+
157
+ pub fn with_context(mut self, key: impl Into<String>, value: impl Serialize) -> Self {
158
+ self.context
159
+ .insert(key.into(), serde_json::to_value(value).unwrap_or_default());
160
+ self
161
+ }
162
+ }
163
+
164
+ /// Trait for alert handlers
165
+ pub trait AlertHandler: Send + Sync {
166
+ fn handle(&self, alert: &Alert);
167
+ }
168
+
169
+ /// Log-based alert handler (default)
170
+ pub struct LogAlertHandler;
171
+
172
+ impl AlertHandler for LogAlertHandler {
173
+ fn handle(&self, alert: &Alert) {
174
+ match alert.severity {
175
+ AlertSeverity::Info => tracing::info!(
176
+ message = %alert.message,
177
+ context = ?alert.context,
178
+ "Alert"
179
+ ),
180
+ AlertSeverity::Warning => tracing::warn!(
181
+ message = %alert.message,
182
+ context = ?alert.context,
183
+ "Alert"
184
+ ),
185
+ AlertSeverity::Error => tracing::error!(
186
+ message = %alert.message,
187
+ context = ?alert.context,
188
+ "Alert"
189
+ ),
190
+ AlertSeverity::Critical => tracing::error!(
191
+ message = %alert.message,
192
+ context = ?alert.context,
193
+ severity = "CRITICAL",
194
+ "Alert"
195
+ ),
196
+ }
197
+ }
198
+ }
199
+
200
+ /// Webhook-based alert handler for external services (Slack, PagerDuty, etc.)
201
+ pub struct WebhookAlertHandler {
202
+ url: String,
203
+ client: reqwest::Client,
204
+ min_severity: AlertSeverity,
205
+ }
206
+
207
+ impl WebhookAlertHandler {
208
+ pub fn new(url: impl Into<String>, min_severity: AlertSeverity) -> Self {
209
+ Self {
210
+ url: url.into(),
211
+ client: reqwest::Client::new(),
212
+ min_severity,
213
+ }
214
+ }
215
+
216
+ fn should_send(&self, severity: AlertSeverity) -> bool {
217
+ matches!(
218
+ (self.min_severity, severity),
219
+ (AlertSeverity::Critical, AlertSeverity::Critical)
220
+ | (AlertSeverity::Error, AlertSeverity::Critical | AlertSeverity::Error)
221
+ | (AlertSeverity::Warning, AlertSeverity::Critical | AlertSeverity::Error | AlertSeverity::Warning)
222
+ | (AlertSeverity::Info, _)
223
+ )
224
+ }
225
+ }
226
+
227
+ impl AlertHandler for WebhookAlertHandler {
228
+ fn handle(&self, alert: &Alert) {
229
+ if !self.should_send(alert.severity) {
230
+ return;
231
+ }
232
+
233
+ let url = self.url.clone();
234
+ let payload = serde_json::json!({
235
+ "severity": alert.severity,
236
+ "message": alert.message,
237
+ "timestamp": alert.timestamp.to_rfc3339(),
238
+ "context": alert.context,
239
+ });
240
+
241
+ let client = self.client.clone();
242
+
243
+ // Fire and forget - don't block on webhook
244
+ tokio::spawn(async move {
245
+ if let Err(e) = client.post(&url).json(&payload).send().await {
246
+ tracing::warn!(error = %e, "Failed to send webhook alert");
247
+ }
248
+ });
249
+ }
250
+ }
251
+
252
+ /// Alert thresholds for automatic alerting
253
+ #[derive(Debug, Clone, Serialize, Deserialize)]
254
+ pub struct AlertThresholds {
255
+ /// Alert if cost exceeds this amount
256
+ pub cost_limit: f64,
257
+ /// Alert if failure rate exceeds this percentage
258
+ pub failure_rate_percent: f64,
259
+ /// Alert if average execution time exceeds this (ms)
260
+ pub execution_time_ms: u64,
261
+ /// Alert if API calls exceed this limit
262
+ pub api_calls_limit: u64,
263
+ }
264
+
265
+ impl Default for AlertThresholds {
266
+ fn default() -> Self {
267
+ Self {
268
+ cost_limit: 100.0, // $100
269
+ failure_rate_percent: 20.0, // 20%
270
+ execution_time_ms: 600_000, // 10 minutes
271
+ api_calls_limit: 10_000, // 10k calls
272
+ }
273
+ }
274
+ }
275
+
276
+ /// Monitor that checks metrics against thresholds
277
+ pub struct MetricsMonitor {
278
+ metrics: Arc<AgentMetrics>,
279
+ thresholds: AlertThresholds,
280
+ handlers: Vec<Box<dyn AlertHandler>>,
281
+ #[allow(dead_code)]
282
+ last_check: std::sync::Mutex<Instant>,
283
+ }
284
+
285
+ impl MetricsMonitor {
286
+ pub fn new(metrics: Arc<AgentMetrics>, thresholds: AlertThresholds) -> Self {
287
+ Self {
288
+ metrics,
289
+ thresholds,
290
+ handlers: vec![Box::new(LogAlertHandler)],
291
+ last_check: std::sync::Mutex::new(Instant::now()),
292
+ }
293
+ }
294
+
295
+ pub fn add_handler(&mut self, handler: Box<dyn AlertHandler>) {
296
+ self.handlers.push(handler);
297
+ }
298
+
299
+ pub fn check(&self) {
300
+ let snapshot = self.metrics.snapshot();
301
+
302
+ // Check cost limit
303
+ if snapshot.total_cost > self.thresholds.cost_limit {
304
+ self.alert(
305
+ Alert::new(
306
+ AlertSeverity::Warning,
307
+ format!(
308
+ "Cost limit exceeded: ${:.2} > ${:.2}",
309
+ snapshot.total_cost, self.thresholds.cost_limit
310
+ ),
311
+ )
312
+ .with_context("total_cost", snapshot.total_cost),
313
+ );
314
+ }
315
+
316
+ // Check failure rate
317
+ let failure_rate = if snapshot.runs_started > 0 {
318
+ 100.0 * snapshot.runs_failed as f64 / snapshot.runs_started as f64
319
+ } else {
320
+ 0.0
321
+ };
322
+
323
+ if failure_rate > self.thresholds.failure_rate_percent && snapshot.runs_started >= 10 {
324
+ self.alert(
325
+ Alert::new(
326
+ AlertSeverity::Error,
327
+ format!(
328
+ "High failure rate: {:.1}% > {:.1}%",
329
+ failure_rate, self.thresholds.failure_rate_percent
330
+ ),
331
+ )
332
+ .with_context("failure_rate", failure_rate)
333
+ .with_context("runs_failed", snapshot.runs_failed)
334
+ .with_context("runs_started", snapshot.runs_started),
335
+ );
336
+ }
337
+
338
+ // Check API calls
339
+ if snapshot.total_api_calls > self.thresholds.api_calls_limit {
340
+ self.alert(
341
+ Alert::new(
342
+ AlertSeverity::Warning,
343
+ format!(
344
+ "API call limit exceeded: {} > {}",
345
+ snapshot.total_api_calls, self.thresholds.api_calls_limit
346
+ ),
347
+ )
348
+ .with_context("api_calls", snapshot.total_api_calls),
349
+ );
350
+ }
351
+ }
352
+
353
+ fn alert(&self, alert: Alert) {
354
+ for handler in &self.handlers {
355
+ handler.handle(&alert);
356
+ }
357
+ }
358
+ }
359
+
360
+ /// Health check status
361
+ #[derive(Debug, Clone, Serialize, Deserialize)]
362
+ pub struct HealthStatus {
363
+ pub healthy: bool,
364
+ pub components: HashMap<String, ComponentHealth>,
365
+ pub timestamp: chrono::DateTime<chrono::Utc>,
366
+ }
367
+
368
+ #[derive(Debug, Clone, Serialize, Deserialize)]
369
+ pub struct ComponentHealth {
370
+ pub healthy: bool,
371
+ pub message: String,
372
+ }
373
+
374
+ impl HealthStatus {
375
+ pub fn new() -> Self {
376
+ Self {
377
+ healthy: true,
378
+ components: HashMap::new(),
379
+ timestamp: chrono::Utc::now(),
380
+ }
381
+ }
382
+
383
+ pub fn add_component(
384
+ &mut self,
385
+ name: impl Into<String>,
386
+ healthy: bool,
387
+ message: impl Into<String>,
388
+ ) {
389
+ let name = name.into();
390
+ if !healthy {
391
+ self.healthy = false;
392
+ }
393
+ self.components.insert(
394
+ name,
395
+ ComponentHealth {
396
+ healthy,
397
+ message: message.into(),
398
+ },
399
+ );
400
+ }
401
+ }
402
+
403
+ impl Default for HealthStatus {
404
+ fn default() -> Self {
405
+ Self::new()
406
+ }
407
+ }
408
+
409
+ /// Perform a health check
410
+ pub async fn health_check() -> HealthStatus {
411
+ let mut status = HealthStatus::new();
412
+
413
+ // Check Docker availability
414
+ let docker_check = tokio::process::Command::new("docker")
415
+ .arg("info")
416
+ .output()
417
+ .await;
418
+
419
+ match docker_check {
420
+ Ok(output) if output.status.success() => {
421
+ status.add_component("docker", true, "Docker daemon is running");
422
+ }
423
+ Ok(_) => {
424
+ status.add_component("docker", false, "Docker daemon not responding");
425
+ }
426
+ Err(e) => {
427
+ status.add_component("docker", false, format!("Docker not available: {}", e));
428
+ }
429
+ }
430
+
431
+ // Check environment variables
432
+ let has_api_key =
433
+ std::env::var("OPENAI_API_KEY").is_ok() || std::env::var("ANTHROPIC_API_KEY").is_ok();
434
+
435
+ status.add_component(
436
+ "api_keys",
437
+ has_api_key,
438
+ if has_api_key {
439
+ "API keys configured"
440
+ } else {
441
+ "No API keys found"
442
+ },
443
+ );
444
+
445
+ status
446
+ }
447
+
448
+ #[cfg(test)]
449
+ mod tests {
450
+ use super::*;
451
+
452
+ #[test]
453
+ fn test_metrics_snapshot() {
454
+ let metrics = AgentMetrics::new();
455
+ metrics.runs_started.store(10, Ordering::SeqCst);
456
+ metrics.runs_completed.store(8, Ordering::SeqCst);
457
+ metrics.runs_failed.store(2, Ordering::SeqCst);
458
+
459
+ let snapshot = metrics.snapshot();
460
+ assert_eq!(snapshot.runs_started, 10);
461
+ assert!((snapshot.success_rate() - 0.8).abs() < 0.001);
462
+ }
463
+
464
+ #[test]
465
+ fn test_alert_with_context() {
466
+ let alert = Alert::new(AlertSeverity::Warning, "Test alert")
467
+ .with_context("count", 42)
468
+ .with_context("name", "test");
469
+
470
+ assert_eq!(alert.severity, AlertSeverity::Warning);
471
+ assert_eq!(alert.context.len(), 2);
472
+ }
473
+
474
+ #[test]
475
+ fn test_health_status() {
476
+ let mut status = HealthStatus::new();
477
+ assert!(status.healthy);
478
+
479
+ status.add_component("test", false, "Failed");
480
+ assert!(!status.healthy);
481
+ }
482
+ }
@@ -0,0 +1,134 @@
1
+ //! Run hooks for monitoring and extending run behavior
2
+
3
+ use crate::types::AgentRunResult;
4
+ use async_trait::async_trait;
5
+
6
+ /// Hook for run events
7
+ #[async_trait]
8
+ pub trait RunHook: Send + Sync {
9
+ /// Called when run is initialized
10
+ fn on_init(&mut self, _run: &dyn std::any::Any) {}
11
+
12
+ /// Called when run starts
13
+ fn on_start(&mut self) {}
14
+
15
+ /// Called when run ends
16
+ fn on_end(&mut self) {}
17
+
18
+ /// Called when an instance is skipped
19
+ fn on_instance_skipped(&mut self, _reason: &str) {}
20
+
21
+ /// Called when an instance starts
22
+ fn on_instance_start(&mut self, _index: usize, _instance_id: &str) {}
23
+
24
+ /// Called when an instance completes
25
+ fn on_instance_completed(&mut self, _result: &AgentRunResult) {}
26
+ }
27
+
28
+ /// Combined hook that wraps multiple hooks
29
+ pub struct CombinedRunHook {
30
+ hooks: Vec<Box<dyn RunHook>>,
31
+ }
32
+
33
+ impl CombinedRunHook {
34
+ pub fn new() -> Self {
35
+ Self { hooks: Vec::new() }
36
+ }
37
+
38
+ pub fn add_hook(&mut self, hook: Box<dyn RunHook>) {
39
+ self.hooks.push(hook);
40
+ }
41
+ }
42
+
43
+ impl Default for CombinedRunHook {
44
+ fn default() -> Self {
45
+ Self::new()
46
+ }
47
+ }
48
+
49
+ #[async_trait]
50
+ impl RunHook for CombinedRunHook {
51
+ fn on_init(&mut self, run: &dyn std::any::Any) {
52
+ for hook in &mut self.hooks {
53
+ hook.on_init(run);
54
+ }
55
+ }
56
+
57
+ fn on_start(&mut self) {
58
+ for hook in &mut self.hooks {
59
+ hook.on_start();
60
+ }
61
+ }
62
+
63
+ fn on_end(&mut self) {
64
+ for hook in &mut self.hooks {
65
+ hook.on_end();
66
+ }
67
+ }
68
+
69
+ fn on_instance_skipped(&mut self, reason: &str) {
70
+ for hook in &mut self.hooks {
71
+ hook.on_instance_skipped(reason);
72
+ }
73
+ }
74
+
75
+ fn on_instance_start(&mut self, index: usize, instance_id: &str) {
76
+ for hook in &mut self.hooks {
77
+ hook.on_instance_start(index, instance_id);
78
+ }
79
+ }
80
+
81
+ fn on_instance_completed(&mut self, result: &AgentRunResult) {
82
+ for hook in &mut self.hooks {
83
+ hook.on_instance_completed(result);
84
+ }
85
+ }
86
+ }
87
+
88
+ /// Hook to save applied patches
89
+ pub struct SaveApplyPatchHook {
90
+ pub output_dir: String,
91
+ }
92
+
93
+ impl SaveApplyPatchHook {
94
+ pub fn new(output_dir: impl Into<String>) -> Self {
95
+ Self {
96
+ output_dir: output_dir.into(),
97
+ }
98
+ }
99
+ }
100
+
101
+ #[async_trait]
102
+ impl RunHook for SaveApplyPatchHook {
103
+ fn on_instance_completed(&mut self, result: &AgentRunResult) {
104
+ if let Some(ref _submission) = result.info.submission {
105
+ let patch_path = std::path::Path::new(&self.output_dir).join("patches");
106
+ let _ = std::fs::create_dir_all(&patch_path);
107
+
108
+ // Save the patch
109
+ // In a full implementation, would write to file based on instance ID
110
+ tracing::info!(path = ?patch_path, "Would save patch");
111
+ }
112
+ }
113
+ }
114
+
115
+ /// Hook to open PRs
116
+ pub struct OpenPRHook {
117
+ pub github_token: Option<String>,
118
+ }
119
+
120
+ impl OpenPRHook {
121
+ pub fn new(github_token: Option<String>) -> Self {
122
+ Self { github_token }
123
+ }
124
+ }
125
+
126
+ #[async_trait]
127
+ impl RunHook for OpenPRHook {
128
+ fn on_instance_completed(&mut self, result: &AgentRunResult) {
129
+ if result.info.submission.is_some() && self.github_token.is_some() {
130
+ // In a full implementation, would create a PR using GitHub API
131
+ tracing::info!("Would create PR with submission");
132
+ }
133
+ }
134
+ }
@@ -0,0 +1,12 @@
1
+ //! Run module for SWE-agent
2
+ //!
3
+ //! This module provides the execution infrastructure for running agents
4
+ //! on problem instances.
5
+
6
+ pub mod hooks;
7
+ pub mod run_batch;
8
+ pub mod run_single;
9
+
10
+ pub use hooks::*;
11
+ pub use run_batch::*;
12
+ pub use run_single::*;