@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,837 @@
1
+ //! Model implementations for SWE-agent
2
+ //!
3
+ //! This module contains various model implementations for interacting with LLMs.
4
+
5
+ use crate::exceptions::{Result, SWEAgentError};
6
+ use crate::types::{History, ModelOutput, ModelStats, RetryConfig, Role, ToolCall};
7
+ use async_trait::async_trait;
8
+ use serde::{Deserialize, Serialize};
9
+ use std::collections::HashMap;
10
+ use std::sync::atomic::{AtomicU64, Ordering};
11
+ use std::sync::Arc;
12
+ use tokio::sync::Mutex;
13
+
14
+ /// Global statistics tracking across all model instances
15
+ #[derive(Debug, Default)]
16
+ pub struct GlobalStats {
17
+ pub total_cost: AtomicU64, // Stored as micro-dollars for precision
18
+ pub last_query_timestamp: AtomicU64,
19
+ }
20
+
21
+ impl GlobalStats {
22
+ pub fn add_cost(&self, cost: f64) {
23
+ let micro_cost = (cost * 1_000_000.0) as u64;
24
+ self.total_cost.fetch_add(micro_cost, Ordering::SeqCst);
25
+ }
26
+
27
+ pub fn get_total_cost(&self) -> f64 {
28
+ self.total_cost.load(Ordering::SeqCst) as f64 / 1_000_000.0
29
+ }
30
+
31
+ pub fn update_timestamp(&self) {
32
+ let now = std::time::SystemTime::now()
33
+ .duration_since(std::time::UNIX_EPOCH)
34
+ .map(|d| d.as_secs())
35
+ .unwrap_or(0);
36
+ self.last_query_timestamp.store(now, Ordering::SeqCst);
37
+ }
38
+ }
39
+
40
+ /// Instance-specific statistics
41
+ #[derive(Debug, Default, Clone)]
42
+ pub struct InstanceStats {
43
+ pub instance_cost: f64,
44
+ pub tokens_sent: u64,
45
+ pub tokens_received: u64,
46
+ pub api_calls: u64,
47
+ }
48
+
49
+ impl InstanceStats {
50
+ pub fn add(&self, other: &InstanceStats) -> InstanceStats {
51
+ InstanceStats {
52
+ instance_cost: self.instance_cost + other.instance_cost,
53
+ tokens_sent: self.tokens_sent + other.tokens_sent,
54
+ tokens_received: self.tokens_received + other.tokens_received,
55
+ api_calls: self.api_calls + other.api_calls,
56
+ }
57
+ }
58
+
59
+ pub fn to_model_stats(&self) -> ModelStats {
60
+ ModelStats {
61
+ instance_cost: self.instance_cost,
62
+ tokens_sent: self.tokens_sent,
63
+ tokens_received: self.tokens_received,
64
+ api_calls: self.api_calls,
65
+ }
66
+ }
67
+ }
68
+
69
+ /// Abstract trait for all models
70
+ #[async_trait]
71
+ pub trait Model: Send + Sync {
72
+ /// Query the model with conversation history
73
+ async fn query(&self, history: &History) -> Result<ModelOutput>;
74
+
75
+ /// Query with specific temperature and number of completions
76
+ async fn query_with_params(
77
+ &self,
78
+ history: &History,
79
+ _temperature: Option<f64>,
80
+ _n: Option<usize>,
81
+ ) -> Result<Vec<ModelOutput>> {
82
+ let output = self.query(history).await?;
83
+ Ok(vec![output])
84
+ }
85
+
86
+ /// Reset instance statistics
87
+ fn reset_stats(&self);
88
+
89
+ /// Get current instance statistics
90
+ fn get_stats(&self) -> InstanceStats;
91
+
92
+ /// Get per-instance cost limit
93
+ fn instance_cost_limit(&self) -> f64;
94
+ }
95
+
96
+ /// Generic API model configuration
97
+ #[derive(Debug, Clone, Serialize, Deserialize)]
98
+ pub struct GenericApiModelConfig {
99
+ pub name: String,
100
+ #[serde(default = "default_per_instance_cost_limit")]
101
+ pub per_instance_cost_limit: f64,
102
+ #[serde(default)]
103
+ pub total_cost_limit: f64,
104
+ #[serde(default)]
105
+ pub per_instance_call_limit: u64,
106
+ #[serde(default)]
107
+ pub temperature: f64,
108
+ #[serde(default = "default_top_p")]
109
+ pub top_p: Option<f64>,
110
+ #[serde(default)]
111
+ pub api_base: Option<String>,
112
+ #[serde(default)]
113
+ pub api_key: Option<String>,
114
+ #[serde(default)]
115
+ pub stop: Vec<String>,
116
+ #[serde(default)]
117
+ pub completion_kwargs: HashMap<String, serde_json::Value>,
118
+ #[serde(default)]
119
+ pub convert_system_to_user: bool,
120
+ #[serde(default)]
121
+ pub retry: RetryConfig,
122
+ #[serde(default)]
123
+ pub delay: f64,
124
+ #[serde(default)]
125
+ pub max_input_tokens: Option<u64>,
126
+ #[serde(default)]
127
+ pub max_output_tokens: Option<u64>,
128
+ }
129
+
130
+ fn default_per_instance_cost_limit() -> f64 {
131
+ 3.0
132
+ }
133
+
134
+ fn default_top_p() -> Option<f64> {
135
+ Some(1.0)
136
+ }
137
+
138
+ impl Default for GenericApiModelConfig {
139
+ fn default() -> Self {
140
+ Self {
141
+ name: "gpt-4".to_string(),
142
+ per_instance_cost_limit: default_per_instance_cost_limit(),
143
+ total_cost_limit: 0.0,
144
+ per_instance_call_limit: 0,
145
+ temperature: 0.0,
146
+ top_p: default_top_p(),
147
+ api_base: None,
148
+ api_key: None,
149
+ stop: Vec::new(),
150
+ completion_kwargs: HashMap::new(),
151
+ convert_system_to_user: false,
152
+ retry: RetryConfig::default(),
153
+ delay: 0.0,
154
+ max_input_tokens: None,
155
+ max_output_tokens: None,
156
+ }
157
+ }
158
+ }
159
+
160
+ /// LiteLLM-compatible model for API-based LLMs
161
+ pub struct LiteLLMModel {
162
+ config: GenericApiModelConfig,
163
+ stats: Arc<Mutex<InstanceStats>>,
164
+ global_stats: Arc<GlobalStats>,
165
+ api_keys: Vec<String>,
166
+ current_key_index: Arc<AtomicU64>,
167
+ client: reqwest::Client,
168
+ }
169
+
170
+ impl LiteLLMModel {
171
+ pub fn new(config: GenericApiModelConfig, global_stats: Arc<GlobalStats>) -> Self {
172
+ let api_keys = Self::get_api_keys(&config);
173
+ Self {
174
+ config,
175
+ stats: Arc::new(Mutex::new(InstanceStats::default())),
176
+ global_stats,
177
+ api_keys,
178
+ current_key_index: Arc::new(AtomicU64::new(0)),
179
+ client: reqwest::Client::new(),
180
+ }
181
+ }
182
+
183
+ fn get_api_keys(config: &GenericApiModelConfig) -> Vec<String> {
184
+ if let Some(ref key) = config.api_key {
185
+ if let Some(stripped) = key.strip_prefix('$') {
186
+ // Environment variable
187
+ if let Ok(env_key) = std::env::var(stripped) {
188
+ return env_key.split(":::").map(String::from).collect();
189
+ }
190
+ } else {
191
+ return key.split(":::").map(String::from).collect();
192
+ }
193
+ }
194
+
195
+ // Try environment variable based on model name
196
+ let env_name = format!("{}_API_KEY", config.name.to_uppercase().replace('-', "_"));
197
+ if let Ok(key) = std::env::var(&env_name) {
198
+ return key.split(":::").map(String::from).collect();
199
+ }
200
+
201
+ Vec::new()
202
+ }
203
+
204
+ fn choose_api_key(&self) -> Option<String> {
205
+ if self.api_keys.is_empty() {
206
+ return None;
207
+ }
208
+
209
+ let idx = self.current_key_index.fetch_add(1, Ordering::SeqCst) as usize;
210
+ Some(self.api_keys[idx % self.api_keys.len()].clone())
211
+ }
212
+
213
+ fn history_to_messages(&self, history: &History) -> Vec<serde_json::Value> {
214
+ history
215
+ .iter()
216
+ .map(|item| {
217
+ let role = match item.role {
218
+ Role::System => {
219
+ if self.config.convert_system_to_user {
220
+ "user"
221
+ } else {
222
+ "system"
223
+ }
224
+ }
225
+ Role::User => "user",
226
+ Role::Assistant => "assistant",
227
+ Role::Tool => "tool",
228
+ };
229
+
230
+ let mut msg = serde_json::json!({
231
+ "role": role,
232
+ "content": item.content.as_str(),
233
+ });
234
+
235
+ if let Some(ref tool_calls) = item.tool_calls {
236
+ msg["tool_calls"] = serde_json::to_value(tool_calls).unwrap_or_default();
237
+ }
238
+
239
+ if let Some(ref ids) = item.tool_call_ids {
240
+ if !ids.is_empty() {
241
+ msg["tool_call_id"] = serde_json::Value::String(ids[0].clone());
242
+ }
243
+ }
244
+
245
+ msg
246
+ })
247
+ .collect()
248
+ }
249
+
250
+ fn calculate_cost(&self, input_tokens: u64, output_tokens: u64) -> f64 {
251
+ // Simplified pricing - in production, use actual model pricing
252
+ let (input_price, output_price) = match self.config.name.as_str() {
253
+ name if name.contains("gpt-4") => (0.03 / 1000.0, 0.06 / 1000.0),
254
+ name if name.contains("gpt-3.5") => (0.0005 / 1000.0, 0.0015 / 1000.0),
255
+ name if name.contains("claude-3-opus") => (0.015 / 1000.0, 0.075 / 1000.0),
256
+ name if name.contains("claude-3-sonnet") => (0.003 / 1000.0, 0.015 / 1000.0),
257
+ name if name.contains("claude-3-haiku") => (0.00025 / 1000.0, 0.00125 / 1000.0),
258
+ _ => (0.001 / 1000.0, 0.002 / 1000.0),
259
+ };
260
+
261
+ input_tokens as f64 * input_price + output_tokens as f64 * output_price
262
+ }
263
+
264
+ async fn check_cost_limits(&self) -> Result<()> {
265
+ let stats = self.stats.lock().await;
266
+
267
+ if self.config.per_instance_cost_limit > 0.0
268
+ && stats.instance_cost >= self.config.per_instance_cost_limit
269
+ {
270
+ return Err(SWEAgentError::InstanceCostLimitExceeded(format!(
271
+ "Instance cost {} exceeds limit {}",
272
+ stats.instance_cost, self.config.per_instance_cost_limit
273
+ )));
274
+ }
275
+
276
+ if self.config.total_cost_limit > 0.0
277
+ && self.global_stats.get_total_cost() >= self.config.total_cost_limit
278
+ {
279
+ return Err(SWEAgentError::TotalCostLimitExceeded(format!(
280
+ "Total cost {} exceeds limit {}",
281
+ self.global_stats.get_total_cost(),
282
+ self.config.total_cost_limit
283
+ )));
284
+ }
285
+
286
+ if self.config.per_instance_call_limit > 0
287
+ && stats.api_calls >= self.config.per_instance_call_limit
288
+ {
289
+ return Err(SWEAgentError::InstanceCallLimitExceeded(format!(
290
+ "API calls {} exceeds limit {}",
291
+ stats.api_calls, self.config.per_instance_call_limit
292
+ )));
293
+ }
294
+
295
+ Ok(())
296
+ }
297
+ }
298
+
299
+ #[async_trait]
300
+ impl Model for LiteLLMModel {
301
+ async fn query(&self, history: &History) -> Result<ModelOutput> {
302
+ self.check_cost_limits().await?;
303
+
304
+ let api_key = self.choose_api_key();
305
+ let messages = self.history_to_messages(history);
306
+
307
+ // Determine API endpoint based on model name
308
+ let is_anthropic = self.config.name.contains("claude");
309
+ let _is_openai = self.config.name.contains("gpt");
310
+
311
+ let (url, headers) = if is_anthropic {
312
+ let url = self
313
+ .config
314
+ .api_base
315
+ .clone()
316
+ .unwrap_or_else(|| "https://api.anthropic.com/v1/messages".to_string());
317
+ let mut headers = reqwest::header::HeaderMap::new();
318
+ headers.insert("Content-Type", "application/json".parse().unwrap());
319
+ headers.insert("anthropic-version", "2023-06-01".parse().unwrap());
320
+ if let Some(ref key) = api_key {
321
+ headers.insert("x-api-key", key.parse().unwrap());
322
+ }
323
+ (url, headers)
324
+ } else {
325
+ let url = self
326
+ .config
327
+ .api_base
328
+ .clone()
329
+ .unwrap_or_else(|| "https://api.openai.com/v1/chat/completions".to_string());
330
+ let mut headers = reqwest::header::HeaderMap::new();
331
+ headers.insert("Content-Type", "application/json".parse().unwrap());
332
+ if let Some(ref key) = api_key {
333
+ headers.insert("Authorization", format!("Bearer {}", key).parse().unwrap());
334
+ }
335
+ (url, headers)
336
+ };
337
+
338
+ let mut request_body = serde_json::json!({
339
+ "model": self.config.name,
340
+ "messages": messages,
341
+ "temperature": self.config.temperature,
342
+ });
343
+
344
+ if let Some(top_p) = self.config.top_p {
345
+ request_body["top_p"] = serde_json::Value::from(top_p);
346
+ }
347
+
348
+ if !self.config.stop.is_empty() {
349
+ request_body["stop"] = serde_json::to_value(&self.config.stop)?;
350
+ }
351
+
352
+ if let Some(max_tokens) = self.config.max_output_tokens {
353
+ request_body["max_tokens"] = serde_json::Value::from(max_tokens);
354
+ }
355
+
356
+ // Handle Anthropic-specific format
357
+ if is_anthropic {
358
+ let system_msg = messages.iter().find(|m| m["role"] == "system");
359
+ if let Some(sys) = system_msg {
360
+ request_body["system"] = sys["content"].clone();
361
+ let non_system: Vec<_> = messages
362
+ .iter()
363
+ .filter(|m| m["role"] != "system")
364
+ .cloned()
365
+ .collect();
366
+ request_body["messages"] = serde_json::to_value(non_system)?;
367
+ }
368
+ }
369
+
370
+ let response = self
371
+ .client
372
+ .post(&url)
373
+ .headers(headers)
374
+ .json(&request_body)
375
+ .send()
376
+ .await?;
377
+
378
+ let status = response.status();
379
+ let response_text = response.text().await.unwrap_or_default();
380
+
381
+ if !status.is_success() {
382
+ // Check for specific error types
383
+ if response_text.contains("content_policy") || response_text.contains("safety") {
384
+ return Err(SWEAgentError::ContentPolicyViolation(response_text));
385
+ }
386
+ return Err(SWEAgentError::ApiError(format!(
387
+ "API request failed with status {}: {}",
388
+ status, response_text
389
+ )));
390
+ }
391
+
392
+ // Parse response - handle both OpenAI and Anthropic formats
393
+ let json_response: serde_json::Value = serde_json::from_str(&response_text)
394
+ .map_err(|e| SWEAgentError::ApiError(format!("Failed to parse response: {}", e)))?;
395
+
396
+ let (message, tool_calls, input_tokens, output_tokens) = if is_anthropic {
397
+ // Anthropic format: { "content": [{"type": "text", "text": "..."}], "usage": {...} }
398
+ let content = json_response
399
+ .get("content")
400
+ .and_then(|c| c.as_array())
401
+ .map(|arr| {
402
+ arr.iter()
403
+ .filter_map(|item| {
404
+ if item.get("type").and_then(|t| t.as_str()) == Some("text") {
405
+ item.get("text").and_then(|t| t.as_str()).map(String::from)
406
+ } else {
407
+ None
408
+ }
409
+ })
410
+ .collect::<Vec<_>>()
411
+ .join("")
412
+ })
413
+ .unwrap_or_default();
414
+
415
+ // Extract tool use blocks from Anthropic response
416
+ let tools: Option<Vec<ToolCall>> = json_response
417
+ .get("content")
418
+ .and_then(|c| c.as_array())
419
+ .map(|arr| {
420
+ arr.iter()
421
+ .filter_map(|item| {
422
+ if item.get("type").and_then(|t| t.as_str()) == Some("tool_use") {
423
+ let id = item
424
+ .get("id")
425
+ .and_then(|i| i.as_str())
426
+ .unwrap_or("")
427
+ .to_string();
428
+ let name = item
429
+ .get("name")
430
+ .and_then(|n| n.as_str())
431
+ .unwrap_or("")
432
+ .to_string();
433
+ let args = item
434
+ .get("input")
435
+ .map(|i| serde_json::to_string(i).unwrap_or_default())
436
+ .unwrap_or_default();
437
+ Some(ToolCall {
438
+ id,
439
+ call_type: "function".to_string(),
440
+ function: crate::types::ToolCallFunction {
441
+ name,
442
+ arguments: args,
443
+ },
444
+ })
445
+ } else {
446
+ None
447
+ }
448
+ })
449
+ .collect()
450
+ })
451
+ .filter(|v: &Vec<ToolCall>| !v.is_empty());
452
+
453
+ let usage = json_response.get("usage");
454
+ let input = usage
455
+ .and_then(|u| u.get("input_tokens"))
456
+ .and_then(|t| t.as_u64())
457
+ .unwrap_or(0);
458
+ let output = usage
459
+ .and_then(|u| u.get("output_tokens"))
460
+ .and_then(|t| t.as_u64())
461
+ .unwrap_or(0);
462
+
463
+ (content, tools, input, output)
464
+ } else {
465
+ // OpenAI format: { "choices": [{"message": {"content": "..."}}], "usage": {...} }
466
+ let message_content = json_response
467
+ .get("choices")
468
+ .and_then(|c| c.as_array())
469
+ .and_then(|arr| arr.first())
470
+ .and_then(|choice| choice.get("message"))
471
+ .and_then(|msg| msg.get("content"))
472
+ .and_then(|c| c.as_str())
473
+ .unwrap_or("")
474
+ .to_string();
475
+
476
+ // Extract tool calls from OpenAI response
477
+ let tools: Option<Vec<ToolCall>> = json_response
478
+ .get("choices")
479
+ .and_then(|c| c.as_array())
480
+ .and_then(|arr| arr.first())
481
+ .and_then(|choice| choice.get("message"))
482
+ .and_then(|msg| msg.get("tool_calls"))
483
+ .and_then(|tc| tc.as_array())
484
+ .map(|arr| {
485
+ arr.iter()
486
+ .filter_map(|item| {
487
+ let id = item
488
+ .get("id")
489
+ .and_then(|i| i.as_str())
490
+ .unwrap_or("")
491
+ .to_string();
492
+ let func = item.get("function")?;
493
+ let name = func
494
+ .get("name")
495
+ .and_then(|n| n.as_str())
496
+ .unwrap_or("")
497
+ .to_string();
498
+ let args = func
499
+ .get("arguments")
500
+ .and_then(|a| a.as_str())
501
+ .unwrap_or("")
502
+ .to_string();
503
+ Some(ToolCall {
504
+ id,
505
+ call_type: "function".to_string(),
506
+ function: crate::types::ToolCallFunction {
507
+ name,
508
+ arguments: args,
509
+ },
510
+ })
511
+ })
512
+ .collect()
513
+ })
514
+ .filter(|v: &Vec<ToolCall>| !v.is_empty());
515
+
516
+ let usage = json_response.get("usage");
517
+ let input = usage
518
+ .and_then(|u| u.get("prompt_tokens"))
519
+ .and_then(|t| t.as_u64())
520
+ .unwrap_or(0);
521
+ let output = usage
522
+ .and_then(|u| u.get("completion_tokens"))
523
+ .and_then(|t| t.as_u64())
524
+ .unwrap_or(0);
525
+
526
+ (message_content, tools, input, output)
527
+ };
528
+
529
+ // Update stats
530
+ let cost = self.calculate_cost(input_tokens, output_tokens);
531
+
532
+ {
533
+ let mut stats = self.stats.lock().await;
534
+ stats.tokens_sent += input_tokens;
535
+ stats.tokens_received += output_tokens;
536
+ stats.instance_cost += cost;
537
+ stats.api_calls += 1;
538
+ }
539
+
540
+ self.global_stats.add_cost(cost);
541
+ self.global_stats.update_timestamp();
542
+
543
+ Ok(ModelOutput {
544
+ message,
545
+ tool_calls,
546
+ thinking_blocks: None,
547
+ })
548
+ }
549
+
550
+ fn reset_stats(&self) {
551
+ if let Ok(mut stats) = self.stats.try_lock() {
552
+ *stats = InstanceStats::default();
553
+ }
554
+ }
555
+
556
+ fn get_stats(&self) -> InstanceStats {
557
+ self.stats.try_lock().map(|s| s.clone()).unwrap_or_default()
558
+ }
559
+
560
+ fn instance_cost_limit(&self) -> f64 {
561
+ self.config.per_instance_cost_limit
562
+ }
563
+ }
564
+
565
+ /// Human model for interactive input
566
+ pub struct HumanModel {
567
+ stats: Arc<Mutex<InstanceStats>>,
568
+ cost_per_call: f64,
569
+ }
570
+
571
+ impl HumanModel {
572
+ pub fn new(cost_per_call: f64) -> Self {
573
+ Self {
574
+ stats: Arc::new(Mutex::new(InstanceStats::default())),
575
+ cost_per_call,
576
+ }
577
+ }
578
+ }
579
+
580
+ #[async_trait]
581
+ impl Model for HumanModel {
582
+ async fn query(&self, _history: &History) -> Result<ModelOutput> {
583
+ use std::io::{self, BufRead, Write};
584
+
585
+ print!("> ");
586
+ io::stdout().flush()?;
587
+
588
+ let stdin = io::stdin();
589
+ let line = stdin.lock().lines().next();
590
+
591
+ let input = match line {
592
+ Some(Ok(s)) => s,
593
+ Some(Err(e)) => return Err(SWEAgentError::IoError(e.to_string())),
594
+ None => return Err(SWEAgentError::EOF),
595
+ };
596
+
597
+ {
598
+ let mut stats = self.stats.lock().await;
599
+ stats.api_calls += 1;
600
+ stats.instance_cost += self.cost_per_call;
601
+ }
602
+
603
+ Ok(ModelOutput {
604
+ message: input,
605
+ tool_calls: None,
606
+ thinking_blocks: None,
607
+ })
608
+ }
609
+
610
+ fn reset_stats(&self) {
611
+ if let Ok(mut stats) = self.stats.try_lock() {
612
+ *stats = InstanceStats::default();
613
+ }
614
+ }
615
+
616
+ fn get_stats(&self) -> InstanceStats {
617
+ self.stats.try_lock().map(|s| s.clone()).unwrap_or_default()
618
+ }
619
+
620
+ fn instance_cost_limit(&self) -> f64 {
621
+ 0.0
622
+ }
623
+ }
624
+
625
+ /// Instant empty submit model for testing
626
+ pub struct InstantEmptySubmitModel {
627
+ stats: Arc<Mutex<InstanceStats>>,
628
+ action_idx: Arc<AtomicU64>,
629
+ }
630
+
631
+ impl InstantEmptySubmitModel {
632
+ pub fn new() -> Self {
633
+ Self {
634
+ stats: Arc::new(Mutex::new(InstanceStats::default())),
635
+ action_idx: Arc::new(AtomicU64::new(0)),
636
+ }
637
+ }
638
+ }
639
+
640
+ impl Default for InstantEmptySubmitModel {
641
+ fn default() -> Self {
642
+ Self::new()
643
+ }
644
+ }
645
+
646
+ #[async_trait]
647
+ impl Model for InstantEmptySubmitModel {
648
+ async fn query(&self, _history: &History) -> Result<ModelOutput> {
649
+ let idx = self.action_idx.fetch_add(1, Ordering::SeqCst);
650
+
651
+ let message = if idx == 0 {
652
+ "DISCUSSION\nLet's reproduce the bug by creating a `reproduce.py` file.\n\n```\ntouch reproduce.py\n```\n"
653
+ } else {
654
+ self.action_idx.store(0, Ordering::SeqCst);
655
+ "DISCUSSION\nThe task should be resolved, so let's submit the patch.\n\n```\nsubmit\n```\n"
656
+ };
657
+
658
+ {
659
+ let mut stats = self.stats.lock().await;
660
+ stats.api_calls += 1;
661
+ }
662
+
663
+ Ok(ModelOutput {
664
+ message: message.to_string(),
665
+ tool_calls: None,
666
+ thinking_blocks: None,
667
+ })
668
+ }
669
+
670
+ fn reset_stats(&self) {
671
+ if let Ok(mut stats) = self.stats.try_lock() {
672
+ *stats = InstanceStats::default();
673
+ }
674
+ self.action_idx.store(0, Ordering::SeqCst);
675
+ }
676
+
677
+ fn get_stats(&self) -> InstanceStats {
678
+ self.stats.try_lock().map(|s| s.clone()).unwrap_or_default()
679
+ }
680
+
681
+ fn instance_cost_limit(&self) -> f64 {
682
+ 0.0
683
+ }
684
+ }
685
+
686
+ /// Replay model for replaying trajectories
687
+ pub struct ReplayModel {
688
+ stats: Arc<Mutex<InstanceStats>>,
689
+ replays: Vec<Vec<String>>,
690
+ replay_idx: Arc<AtomicU64>,
691
+ action_idx: Arc<AtomicU64>,
692
+ submit_command: String,
693
+ }
694
+
695
+ impl ReplayModel {
696
+ pub fn new(replay_path: &str, submit_command: &str) -> Result<Self> {
697
+ let content = std::fs::read_to_string(replay_path)?;
698
+ let replays: Vec<Vec<String>> = content
699
+ .lines()
700
+ .filter(|l| !l.trim().is_empty())
701
+ .filter_map(|l| {
702
+ serde_json::from_str::<HashMap<String, Vec<String>>>(l)
703
+ .ok()
704
+ .and_then(|m| m.into_values().next())
705
+ })
706
+ .collect();
707
+
708
+ Ok(Self {
709
+ stats: Arc::new(Mutex::new(InstanceStats::default())),
710
+ replays,
711
+ replay_idx: Arc::new(AtomicU64::new(0)),
712
+ action_idx: Arc::new(AtomicU64::new(0)),
713
+ submit_command: submit_command.to_string(),
714
+ })
715
+ }
716
+ }
717
+
718
+ #[async_trait]
719
+ impl Model for ReplayModel {
720
+ async fn query(&self, _history: &History) -> Result<ModelOutput> {
721
+ let replay_idx = self.replay_idx.load(Ordering::SeqCst) as usize;
722
+ let action_idx = self.action_idx.fetch_add(1, Ordering::SeqCst) as usize;
723
+
724
+ let action = if replay_idx >= self.replays.len() {
725
+ format!("```\n{}\n```", self.submit_command)
726
+ } else if action_idx >= self.replays[replay_idx].len() {
727
+ tracing::error!("Reached end of replay trajectory without submitting");
728
+ self.replay_idx.fetch_add(1, Ordering::SeqCst);
729
+ self.action_idx.store(0, Ordering::SeqCst);
730
+ format!("```\n{}\n```", self.submit_command)
731
+ } else {
732
+ let action = &self.replays[replay_idx][action_idx];
733
+ if action == "submit" || action.contains(&self.submit_command) {
734
+ self.replay_idx.fetch_add(1, Ordering::SeqCst);
735
+ self.action_idx.store(0, Ordering::SeqCst);
736
+ }
737
+ action.clone()
738
+ };
739
+
740
+ {
741
+ let mut stats = self.stats.lock().await;
742
+ stats.api_calls += 1;
743
+ }
744
+
745
+ Ok(ModelOutput {
746
+ message: action,
747
+ tool_calls: None,
748
+ thinking_blocks: None,
749
+ })
750
+ }
751
+
752
+ fn reset_stats(&self) {
753
+ if let Ok(mut stats) = self.stats.try_lock() {
754
+ *stats = InstanceStats::default();
755
+ }
756
+ }
757
+
758
+ fn get_stats(&self) -> InstanceStats {
759
+ self.stats.try_lock().map(|s| s.clone()).unwrap_or_default()
760
+ }
761
+
762
+ fn instance_cost_limit(&self) -> f64 {
763
+ 0.0
764
+ }
765
+ }
766
+
767
+ /// Model configuration enum
768
+ #[derive(Debug, Clone, Serialize, Deserialize)]
769
+ #[serde(tag = "name")]
770
+ pub enum ModelConfig {
771
+ #[serde(rename = "human")]
772
+ Human { cost_per_call: Option<f64> },
773
+ #[serde(rename = "instant_empty_submit")]
774
+ InstantEmptySubmit,
775
+ #[serde(rename = "replay")]
776
+ Replay { replay_path: String },
777
+ #[serde(untagged)]
778
+ Generic(Box<GenericApiModelConfig>),
779
+ }
780
+
781
+ impl Default for ModelConfig {
782
+ fn default() -> Self {
783
+ ModelConfig::Generic(Box::default())
784
+ }
785
+ }
786
+
787
+ /// Create a model from configuration
788
+ pub fn get_model(config: ModelConfig, global_stats: Arc<GlobalStats>) -> Result<Box<dyn Model>> {
789
+ match config {
790
+ ModelConfig::Human { cost_per_call } => {
791
+ Ok(Box::new(HumanModel::new(cost_per_call.unwrap_or(0.0))))
792
+ }
793
+ ModelConfig::InstantEmptySubmit => Ok(Box::new(InstantEmptySubmitModel::new())),
794
+ ModelConfig::Replay { replay_path } => {
795
+ Ok(Box::new(ReplayModel::new(&replay_path, "submit")?))
796
+ }
797
+ ModelConfig::Generic(config) => Ok(Box::new(LiteLLMModel::new(*config, global_stats))),
798
+ }
799
+ }
800
+
801
+ #[cfg(test)]
802
+ mod tests {
803
+ use super::*;
804
+
805
+ #[tokio::test]
806
+ async fn test_instant_empty_submit_model() {
807
+ let model = InstantEmptySubmitModel::new();
808
+ let history = vec![];
809
+
810
+ let output1 = model.query(&history).await.unwrap();
811
+ assert!(output1.message.contains("reproduce.py"));
812
+
813
+ let output2 = model.query(&history).await.unwrap();
814
+ assert!(output2.message.contains("submit"));
815
+ }
816
+
817
+ #[test]
818
+ fn test_instance_stats_add() {
819
+ let a = InstanceStats {
820
+ instance_cost: 1.0,
821
+ tokens_sent: 100,
822
+ tokens_received: 50,
823
+ api_calls: 1,
824
+ };
825
+ let b = InstanceStats {
826
+ instance_cost: 2.0,
827
+ tokens_sent: 200,
828
+ tokens_received: 100,
829
+ api_calls: 2,
830
+ };
831
+ let c = a.add(&b);
832
+
833
+ assert_eq!(c.instance_cost, 3.0);
834
+ assert_eq!(c.tokens_sent, 300);
835
+ assert_eq!(c.api_calls, 3);
836
+ }
837
+ }