@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,505 @@
1
+ //! Reviewer implementations for evaluating agent submissions
2
+ //!
3
+ //! Reviewers are used in retry loops to evaluate submissions and decide
4
+ //! whether to retry with different approaches.
5
+
6
+ use crate::exceptions::{Result, SWEAgentError};
7
+ use crate::types::{AgentInfo, Trajectory};
8
+ use async_trait::async_trait;
9
+ use serde::{Deserialize, Serialize};
10
+ use std::collections::HashMap;
11
+
12
+ /// Result from a review
13
+ #[derive(Debug, Clone, Serialize, Deserialize)]
14
+ pub struct ReviewerResult {
15
+ pub score: f64,
16
+ pub feedback: String,
17
+ pub should_retry: bool,
18
+ pub extra: HashMap<String, serde_json::Value>,
19
+ }
20
+
21
+ /// Data submitted for review
22
+ #[derive(Debug, Clone)]
23
+ pub struct ReviewSubmission {
24
+ pub trajectory: Trajectory,
25
+ pub info: AgentInfo,
26
+ pub submission: Option<String>,
27
+ }
28
+
29
+ /// Trait for submission reviewers
30
+ #[async_trait]
31
+ pub trait Reviewer: Send + Sync {
32
+ /// Review a submission
33
+ async fn review(&self, submission: &ReviewSubmission) -> Result<ReviewerResult>;
34
+ }
35
+
36
+ /// Simple reviewer that always passes - baseline implementation
37
+ pub struct PassThroughReviewer;
38
+
39
+ #[async_trait]
40
+ impl Reviewer for PassThroughReviewer {
41
+ async fn review(&self, _submission: &ReviewSubmission) -> Result<ReviewerResult> {
42
+ Ok(ReviewerResult {
43
+ score: 1.0,
44
+ feedback: "Submission accepted (pass-through reviewer)".to_string(),
45
+ should_retry: false,
46
+ extra: HashMap::new(),
47
+ })
48
+ }
49
+ }
50
+
51
+ /// Reviewer that checks if a submission was actually provided
52
+ pub struct SubmissionPresenceReviewer {
53
+ threshold: f64,
54
+ }
55
+
56
+ impl SubmissionPresenceReviewer {
57
+ pub fn new(threshold: f64) -> Self {
58
+ Self { threshold }
59
+ }
60
+ }
61
+
62
+ impl Default for SubmissionPresenceReviewer {
63
+ fn default() -> Self {
64
+ Self::new(0.5)
65
+ }
66
+ }
67
+
68
+ #[async_trait]
69
+ impl Reviewer for SubmissionPresenceReviewer {
70
+ async fn review(&self, submission: &ReviewSubmission) -> Result<ReviewerResult> {
71
+ let has_submission = submission
72
+ .submission
73
+ .as_ref()
74
+ .map(|s| !s.trim().is_empty())
75
+ .unwrap_or(false);
76
+
77
+ let score = if has_submission { 1.0 } else { 0.0 };
78
+
79
+ Ok(ReviewerResult {
80
+ score,
81
+ feedback: if has_submission {
82
+ "Submission provided".to_string()
83
+ } else {
84
+ "No submission provided".to_string()
85
+ },
86
+ should_retry: score < self.threshold,
87
+ extra: HashMap::new(),
88
+ })
89
+ }
90
+ }
91
+
92
+ /// Reviewer that checks submission is non-empty patch
93
+ pub struct PatchPresenceReviewer {
94
+ min_lines: usize,
95
+ }
96
+
97
+ impl PatchPresenceReviewer {
98
+ pub fn new(min_lines: usize) -> Self {
99
+ Self { min_lines }
100
+ }
101
+ }
102
+
103
+ impl Default for PatchPresenceReviewer {
104
+ fn default() -> Self {
105
+ Self::new(1)
106
+ }
107
+ }
108
+
109
+ #[async_trait]
110
+ impl Reviewer for PatchPresenceReviewer {
111
+ async fn review(&self, submission: &ReviewSubmission) -> Result<ReviewerResult> {
112
+ let patch = submission.submission.as_deref().unwrap_or("");
113
+ let has_diff_content = patch
114
+ .lines()
115
+ .any(|line| line.starts_with('+') || line.starts_with('-'));
116
+
117
+ let line_count = patch.lines().count();
118
+ let passes = has_diff_content && line_count >= self.min_lines;
119
+
120
+ let score = if passes { 1.0 } else { 0.0 };
121
+
122
+ Ok(ReviewerResult {
123
+ score,
124
+ feedback: if passes {
125
+ format!("Valid patch with {} lines", line_count)
126
+ } else if !has_diff_content {
127
+ "Patch contains no diff content (+/- lines)".to_string()
128
+ } else {
129
+ format!(
130
+ "Patch too short ({} lines, need {})",
131
+ line_count, self.min_lines
132
+ )
133
+ },
134
+ should_retry: !passes,
135
+ extra: {
136
+ let mut m = HashMap::new();
137
+ m.insert("line_count".to_string(), serde_json::json!(line_count));
138
+ m.insert(
139
+ "has_diff_content".to_string(),
140
+ serde_json::json!(has_diff_content),
141
+ );
142
+ m
143
+ },
144
+ })
145
+ }
146
+ }
147
+
148
+ /// Chooser for selecting the best from multiple submissions
149
+ #[derive(Debug, Clone)]
150
+ pub struct ChooserOutput {
151
+ pub best_index: usize,
152
+ pub scores: Vec<f64>,
153
+ pub reasoning: String,
154
+ }
155
+
156
+ /// Trait for choosers
157
+ #[async_trait]
158
+ pub trait Chooser: Send + Sync {
159
+ /// Choose the best submission from a list
160
+ async fn choose(&self, submissions: &[ReviewSubmission]) -> Result<ChooserOutput>;
161
+ }
162
+
163
+ /// Simple chooser that selects based on submission presence and length
164
+ pub struct SimpleChooser;
165
+
166
+ #[async_trait]
167
+ impl Chooser for SimpleChooser {
168
+ async fn choose(&self, submissions: &[ReviewSubmission]) -> Result<ChooserOutput> {
169
+ if submissions.is_empty() {
170
+ return Err(SWEAgentError::ConfigurationError(
171
+ "No submissions to choose from".to_string(),
172
+ ));
173
+ }
174
+
175
+ // Score based on: has submission (0.5) + patch length normalized (0.5)
176
+ let scores: Vec<f64> = submissions
177
+ .iter()
178
+ .map(|s| {
179
+ let has_submission = s.submission.is_some();
180
+ let patch_len = s.submission.as_ref().map(|p| p.len()).unwrap_or(0);
181
+ let base_score = if has_submission { 0.5 } else { 0.0 };
182
+ let len_score = (patch_len as f64 / 10000.0).min(0.5);
183
+ base_score + len_score
184
+ })
185
+ .collect();
186
+
187
+ let best_index = scores
188
+ .iter()
189
+ .enumerate()
190
+ .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
191
+ .map(|(i, _)| i)
192
+ .unwrap_or(0);
193
+
194
+ Ok(ChooserOutput {
195
+ best_index,
196
+ scores: scores.clone(),
197
+ reasoning: format!(
198
+ "Selected submission {} with score {:.2} (scores: {:?})",
199
+ best_index, scores[best_index], scores
200
+ ),
201
+ })
202
+ }
203
+ }
204
+
205
+ /// Abstract retry loop trait
206
+ #[async_trait]
207
+ pub trait RetryLoop: Send + Sync {
208
+ /// Called when a submission is made
209
+ fn on_submit(&mut self, submission: ReviewSubmission);
210
+
211
+ /// Check if should retry
212
+ fn should_retry(&self) -> bool;
213
+
214
+ /// Get the best submission index
215
+ fn get_best(&self) -> Option<usize>;
216
+ }
217
+
218
+ /// Simple retry loop with max attempts
219
+ pub struct MaxAttemptsRetryLoop {
220
+ submissions: Vec<ReviewSubmission>,
221
+ max_attempts: usize,
222
+ }
223
+
224
+ impl MaxAttemptsRetryLoop {
225
+ pub fn new(max_attempts: usize) -> Self {
226
+ Self {
227
+ submissions: Vec::new(),
228
+ max_attempts,
229
+ }
230
+ }
231
+ }
232
+
233
+ #[async_trait]
234
+ impl RetryLoop for MaxAttemptsRetryLoop {
235
+ fn on_submit(&mut self, submission: ReviewSubmission) {
236
+ self.submissions.push(submission);
237
+ }
238
+
239
+ fn should_retry(&self) -> bool {
240
+ self.submissions.len() < self.max_attempts
241
+ }
242
+
243
+ fn get_best(&self) -> Option<usize> {
244
+ if self.submissions.is_empty() {
245
+ return None;
246
+ }
247
+
248
+ // Return the last submission with a patch, or the last one
249
+ self.submissions
250
+ .iter()
251
+ .enumerate()
252
+ .rev()
253
+ .find(|(_, s)| s.submission.is_some())
254
+ .map(|(i, _)| i)
255
+ .or(Some(self.submissions.len() - 1))
256
+ }
257
+ }
258
+
259
+ /// Retry loop with reviewer-based decisions
260
+ pub struct ReviewerRetryLoop {
261
+ submissions: Vec<(ReviewSubmission, ReviewerResult)>,
262
+ max_attempts: usize,
263
+ reviewer: Box<dyn Reviewer>,
264
+ score_threshold: f64,
265
+ }
266
+
267
+ impl ReviewerRetryLoop {
268
+ pub fn new(max_attempts: usize, reviewer: Box<dyn Reviewer>, score_threshold: f64) -> Self {
269
+ Self {
270
+ submissions: Vec::new(),
271
+ max_attempts,
272
+ reviewer,
273
+ score_threshold,
274
+ }
275
+ }
276
+
277
+ /// Async review and store - must be called separately from on_submit
278
+ pub async fn review_submission(
279
+ &mut self,
280
+ submission: ReviewSubmission,
281
+ ) -> Result<ReviewerResult> {
282
+ let result = self.reviewer.review(&submission).await?;
283
+ self.submissions.push((submission, result.clone()));
284
+ Ok(result)
285
+ }
286
+ }
287
+
288
+ #[async_trait]
289
+ impl RetryLoop for ReviewerRetryLoop {
290
+ fn on_submit(&mut self, submission: ReviewSubmission) {
291
+ // Store with a placeholder review - actual review should use review_submission
292
+ self.submissions.push((
293
+ submission,
294
+ ReviewerResult {
295
+ score: 0.0,
296
+ feedback: "Not reviewed".to_string(),
297
+ should_retry: true,
298
+ extra: HashMap::new(),
299
+ },
300
+ ));
301
+ }
302
+
303
+ fn should_retry(&self) -> bool {
304
+ if self.submissions.is_empty() {
305
+ return true;
306
+ }
307
+
308
+ // Stop if we've hit max attempts
309
+ if self.submissions.len() >= self.max_attempts {
310
+ return false;
311
+ }
312
+
313
+ // Stop if last submission passed threshold
314
+ if let Some((_, result)) = self.submissions.last() {
315
+ if result.score >= self.score_threshold {
316
+ return false;
317
+ }
318
+ }
319
+
320
+ true
321
+ }
322
+
323
+ fn get_best(&self) -> Option<usize> {
324
+ self.submissions
325
+ .iter()
326
+ .enumerate()
327
+ .max_by(|(_, (_, a)), (_, (_, b))| a.score.partial_cmp(&b.score).unwrap())
328
+ .map(|(i, _)| i)
329
+ }
330
+ }
331
+
332
+ fn default_threshold() -> f64 {
333
+ 0.5
334
+ }
335
+
336
+ fn default_min_lines() -> usize {
337
+ 1
338
+ }
339
+
340
+ /// Configuration for retry loops
341
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
342
+ #[serde(tag = "type", rename_all = "snake_case")]
343
+ pub enum RetryLoopConfig {
344
+ /// No retry - run once
345
+ #[default]
346
+ None,
347
+ /// Retry up to max_attempts times
348
+ MaxAttempts { max_attempts: usize },
349
+ /// Retry based on submission presence
350
+ SubmissionPresence {
351
+ max_attempts: usize,
352
+ #[serde(default = "default_threshold")]
353
+ threshold: f64,
354
+ },
355
+ /// Retry based on patch presence
356
+ PatchPresence {
357
+ max_attempts: usize,
358
+ #[serde(default = "default_min_lines")]
359
+ min_lines: usize,
360
+ },
361
+ }
362
+
363
+ /// Create a retry loop from configuration
364
+ pub fn get_retry_loop_from_config(config: &RetryLoopConfig) -> Box<dyn RetryLoop> {
365
+ match config {
366
+ RetryLoopConfig::None => Box::new(MaxAttemptsRetryLoop::new(1)),
367
+ RetryLoopConfig::MaxAttempts { max_attempts } => {
368
+ Box::new(MaxAttemptsRetryLoop::new(*max_attempts))
369
+ }
370
+ RetryLoopConfig::SubmissionPresence {
371
+ max_attempts,
372
+ threshold,
373
+ } => Box::new(ReviewerRetryLoop::new(
374
+ *max_attempts,
375
+ Box::new(SubmissionPresenceReviewer::new(*threshold)),
376
+ *threshold,
377
+ )),
378
+ RetryLoopConfig::PatchPresence {
379
+ max_attempts,
380
+ min_lines,
381
+ } => Box::new(ReviewerRetryLoop::new(
382
+ *max_attempts,
383
+ Box::new(PatchPresenceReviewer::new(*min_lines)),
384
+ 0.5,
385
+ )),
386
+ }
387
+ }
388
+
389
+ #[cfg(test)]
390
+ mod tests {
391
+ use super::*;
392
+
393
+ #[tokio::test]
394
+ async fn test_pass_through_reviewer() {
395
+ let reviewer = PassThroughReviewer;
396
+ let submission = ReviewSubmission {
397
+ trajectory: vec![],
398
+ info: AgentInfo::default(),
399
+ submission: Some("test patch".to_string()),
400
+ };
401
+
402
+ let result = reviewer.review(&submission).await.unwrap();
403
+ assert_eq!(result.score, 1.0);
404
+ assert!(!result.should_retry);
405
+ }
406
+
407
+ #[tokio::test]
408
+ async fn test_submission_presence_reviewer() {
409
+ let reviewer = SubmissionPresenceReviewer::new(0.5);
410
+
411
+ // With submission
412
+ let with_sub = ReviewSubmission {
413
+ trajectory: vec![],
414
+ info: AgentInfo::default(),
415
+ submission: Some("patch content".to_string()),
416
+ };
417
+ let result = reviewer.review(&with_sub).await.unwrap();
418
+ assert_eq!(result.score, 1.0);
419
+ assert!(!result.should_retry);
420
+
421
+ // Without submission
422
+ let no_sub = ReviewSubmission {
423
+ trajectory: vec![],
424
+ info: AgentInfo::default(),
425
+ submission: None,
426
+ };
427
+ let result = reviewer.review(&no_sub).await.unwrap();
428
+ assert_eq!(result.score, 0.0);
429
+ assert!(result.should_retry);
430
+ }
431
+
432
+ #[tokio::test]
433
+ async fn test_patch_presence_reviewer() {
434
+ let reviewer = PatchPresenceReviewer::new(2);
435
+
436
+ // Valid patch
437
+ let valid = ReviewSubmission {
438
+ trajectory: vec![],
439
+ info: AgentInfo::default(),
440
+ submission: Some("--- a/file.py\n+++ b/file.py\n+new line\n-old line".to_string()),
441
+ };
442
+ let result = reviewer.review(&valid).await.unwrap();
443
+ assert_eq!(result.score, 1.0);
444
+
445
+ // Invalid patch (no diff content)
446
+ let invalid = ReviewSubmission {
447
+ trajectory: vec![],
448
+ info: AgentInfo::default(),
449
+ submission: Some("just some text\nno diff here".to_string()),
450
+ };
451
+ let result = reviewer.review(&invalid).await.unwrap();
452
+ assert_eq!(result.score, 0.0);
453
+ }
454
+
455
+ #[tokio::test]
456
+ async fn test_simple_chooser() {
457
+ let chooser = SimpleChooser;
458
+ let submissions = vec![
459
+ ReviewSubmission {
460
+ trajectory: vec![],
461
+ info: AgentInfo::default(),
462
+ submission: None,
463
+ },
464
+ ReviewSubmission {
465
+ trajectory: vec![],
466
+ info: AgentInfo::default(),
467
+ submission: Some("a longer patch content here".to_string()),
468
+ },
469
+ ];
470
+
471
+ let result = chooser.choose(&submissions).await.unwrap();
472
+ assert_eq!(result.best_index, 1);
473
+ }
474
+
475
+ #[test]
476
+ fn test_max_attempts_retry_loop() {
477
+ let mut loop_runner = MaxAttemptsRetryLoop::new(3);
478
+
479
+ assert!(loop_runner.should_retry());
480
+
481
+ loop_runner.on_submit(ReviewSubmission {
482
+ trajectory: vec![],
483
+ info: AgentInfo::default(),
484
+ submission: None,
485
+ });
486
+ assert!(loop_runner.should_retry());
487
+
488
+ loop_runner.on_submit(ReviewSubmission {
489
+ trajectory: vec![],
490
+ info: AgentInfo::default(),
491
+ submission: Some("patch".to_string()),
492
+ });
493
+ assert!(loop_runner.should_retry());
494
+
495
+ loop_runner.on_submit(ReviewSubmission {
496
+ trajectory: vec![],
497
+ info: AgentInfo::default(),
498
+ submission: None,
499
+ });
500
+ assert!(!loop_runner.should_retry());
501
+
502
+ // Best should be index 1 (the one with submission)
503
+ assert_eq!(loop_runner.get_best(), Some(1));
504
+ }
505
+ }