@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,143 @@
1
+ //! Tool registry for managing available tools
2
+
3
+ use super::{create_bundle, Bundle, BundleConfig};
4
+ use crate::exceptions::Result;
5
+ use std::collections::HashMap;
6
+
7
+ /// Registry of available tools
8
+ pub struct ToolRegistry {
9
+ tools: HashMap<String, Bundle>,
10
+ }
11
+
12
+ impl ToolRegistry {
13
+ pub fn new() -> Self {
14
+ Self {
15
+ tools: HashMap::new(),
16
+ }
17
+ }
18
+
19
+ /// Register a tool bundle
20
+ pub fn register(&mut self, bundle: Bundle) {
21
+ self.tools.insert(bundle.name.clone(), bundle);
22
+ }
23
+
24
+ /// Register a tool from configuration
25
+ pub fn register_config(&mut self, config: &BundleConfig) -> Result<()> {
26
+ let bundle = create_bundle(config)?;
27
+ self.register(bundle);
28
+ Ok(())
29
+ }
30
+
31
+ /// Get a tool by name
32
+ pub fn get(&self, name: &str) -> Option<&Bundle> {
33
+ self.tools.get(name)
34
+ }
35
+
36
+ /// Check if a tool exists
37
+ pub fn has(&self, name: &str) -> bool {
38
+ self.tools.contains_key(name)
39
+ }
40
+
41
+ /// Get all registered tools
42
+ pub fn all(&self) -> Vec<&Bundle> {
43
+ self.tools.values().collect()
44
+ }
45
+
46
+ /// Get tool names
47
+ pub fn names(&self) -> Vec<&str> {
48
+ self.tools.keys().map(|s| s.as_str()).collect()
49
+ }
50
+
51
+ /// Remove a tool
52
+ pub fn remove(&mut self, name: &str) -> Option<Bundle> {
53
+ self.tools.remove(name)
54
+ }
55
+
56
+ /// Clear all tools
57
+ pub fn clear(&mut self) {
58
+ self.tools.clear();
59
+ }
60
+ }
61
+
62
+ impl Default for ToolRegistry {
63
+ fn default() -> Self {
64
+ Self::new()
65
+ }
66
+ }
67
+
68
+ /// Create a registry with default SWE-agent tools
69
+ pub fn create_default_registry() -> ToolRegistry {
70
+ let mut registry = ToolRegistry::new();
71
+
72
+ // Register common tools
73
+ registry.register(
74
+ Bundle::new("edit")
75
+ .with_end_name("ENDEDIT")
76
+ .with_description("Edit a file")
77
+ .with_signature("edit <file> <start_line> <end_line>"),
78
+ );
79
+
80
+ registry.register(
81
+ Bundle::new("view")
82
+ .with_description("View a file or directory")
83
+ .with_signature("view <path> [start_line] [end_line]"),
84
+ );
85
+
86
+ registry.register(
87
+ Bundle::new("search")
88
+ .with_description("Search for a pattern in files")
89
+ .with_signature("search <pattern> [path]"),
90
+ );
91
+
92
+ registry.register(
93
+ Bundle::new("find_file")
94
+ .with_description("Find files by name")
95
+ .with_signature("find_file <pattern> [directory]"),
96
+ );
97
+
98
+ registry.register(
99
+ Bundle::new("submit")
100
+ .with_description("Submit the solution")
101
+ .with_signature("submit"),
102
+ );
103
+
104
+ registry.register(
105
+ Bundle::new("exit")
106
+ .with_description("Exit without submitting")
107
+ .with_signature("exit"),
108
+ );
109
+
110
+ registry
111
+ }
112
+
113
+ #[cfg(test)]
114
+ mod tests {
115
+ use super::*;
116
+
117
+ #[test]
118
+ fn test_registry_register() {
119
+ let mut registry = ToolRegistry::new();
120
+ registry.register(Bundle::new("test"));
121
+
122
+ assert!(registry.has("test"));
123
+ assert!(!registry.has("nonexistent"));
124
+ }
125
+
126
+ #[test]
127
+ fn test_registry_get() {
128
+ let mut registry = ToolRegistry::new();
129
+ registry.register(Bundle::new("test").with_description("A test tool"));
130
+
131
+ let bundle = registry.get("test").unwrap();
132
+ assert_eq!(bundle.description, Some("A test tool".to_string()));
133
+ }
134
+
135
+ #[test]
136
+ fn test_default_registry() {
137
+ let registry = create_default_registry();
138
+
139
+ assert!(registry.has("edit"));
140
+ assert!(registry.has("view"));
141
+ assert!(registry.has("submit"));
142
+ }
143
+ }
@@ -0,0 +1,554 @@
1
+ //! Core type definitions for SWE-agent
2
+ //!
3
+ //! This module contains all shared types used throughout the SWE-agent implementation.
4
+
5
+ use serde::{Deserialize, Serialize};
6
+ use std::collections::HashMap;
7
+
8
+ /// Role in a conversation
9
+ #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
10
+ #[serde(rename_all = "lowercase")]
11
+ pub enum Role {
12
+ System,
13
+ #[default]
14
+ User,
15
+ Assistant,
16
+ Tool,
17
+ }
18
+
19
+ /// Type of message in history
20
+ #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
21
+ #[serde(rename_all = "snake_case")]
22
+ pub enum MessageType {
23
+ System,
24
+ #[default]
25
+ Observation,
26
+ Action,
27
+ Thought,
28
+ Demonstration,
29
+ User,
30
+ Assistant,
31
+ }
32
+
33
+ /// A thinking block from model output (for Claude-style extended thinking)
34
+ #[derive(Debug, Clone, Serialize, Deserialize)]
35
+ pub struct ThinkingBlock {
36
+ #[serde(rename = "type")]
37
+ pub block_type: String,
38
+ pub content: String,
39
+ #[serde(skip_serializing_if = "Option::is_none")]
40
+ pub start_time: Option<f64>,
41
+ #[serde(skip_serializing_if = "Option::is_none")]
42
+ pub end_time: Option<f64>,
43
+ }
44
+
45
+ /// Tool call function definition
46
+ #[derive(Debug, Clone, Serialize, Deserialize)]
47
+ pub struct ToolCallFunction {
48
+ pub name: String,
49
+ pub arguments: String,
50
+ }
51
+
52
+ /// A tool call from model output
53
+ #[derive(Debug, Clone, Serialize, Deserialize)]
54
+ pub struct ToolCall {
55
+ pub id: String,
56
+ #[serde(rename = "type")]
57
+ pub call_type: String,
58
+ pub function: ToolCallFunction,
59
+ }
60
+
61
+ /// Content can be either a string or structured content
62
+ #[derive(Debug, Clone, Serialize, Deserialize)]
63
+ #[serde(untagged)]
64
+ pub enum Content {
65
+ Text(String),
66
+ Structured(Vec<ContentPart>),
67
+ }
68
+
69
+ impl Default for Content {
70
+ fn default() -> Self {
71
+ Self::Text(String::new())
72
+ }
73
+ }
74
+
75
+ impl Content {
76
+ pub fn as_str(&self) -> String {
77
+ match self {
78
+ Content::Text(s) => s.clone(),
79
+ Content::Structured(parts) => parts
80
+ .iter()
81
+ .filter_map(|p| match p {
82
+ ContentPart::Text { text } => Some(text.clone()),
83
+ _ => None,
84
+ })
85
+ .collect::<Vec<_>>()
86
+ .join("\n"),
87
+ }
88
+ }
89
+ }
90
+
91
+ /// Part of structured content
92
+ #[derive(Debug, Clone, Serialize, Deserialize)]
93
+ #[serde(tag = "type", rename_all = "snake_case")]
94
+ pub enum ContentPart {
95
+ Text { text: String },
96
+ Image { image_url: ImageUrl },
97
+ }
98
+
99
+ /// Image URL reference
100
+ #[derive(Debug, Clone, Serialize, Deserialize)]
101
+ pub struct ImageUrl {
102
+ pub url: String,
103
+ #[serde(skip_serializing_if = "Option::is_none")]
104
+ pub detail: Option<String>,
105
+ }
106
+
107
+ /// A single item in the conversation history
108
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
109
+ pub struct HistoryItem {
110
+ pub role: Role,
111
+ pub content: Content,
112
+ #[serde(skip_serializing_if = "Option::is_none")]
113
+ pub agent: Option<String>,
114
+ #[serde(skip_serializing_if = "Option::is_none")]
115
+ pub message_type: Option<MessageType>,
116
+ #[serde(skip_serializing_if = "Option::is_none")]
117
+ pub is_demo: Option<bool>,
118
+ #[serde(skip_serializing_if = "Option::is_none")]
119
+ pub thought: Option<String>,
120
+ #[serde(skip_serializing_if = "Option::is_none")]
121
+ pub action: Option<String>,
122
+ #[serde(skip_serializing_if = "Option::is_none")]
123
+ pub tool_calls: Option<Vec<ToolCall>>,
124
+ #[serde(skip_serializing_if = "Option::is_none")]
125
+ pub tool_call_ids: Option<Vec<String>>,
126
+ #[serde(skip_serializing_if = "Option::is_none")]
127
+ pub thinking_blocks: Option<Vec<ThinkingBlock>>,
128
+ }
129
+
130
+ impl HistoryItem {
131
+ pub fn system(content: impl Into<String>) -> Self {
132
+ Self {
133
+ role: Role::System,
134
+ content: Content::Text(content.into()),
135
+ message_type: Some(MessageType::System),
136
+ ..Default::default()
137
+ }
138
+ }
139
+
140
+ pub fn user(content: impl Into<String>) -> Self {
141
+ Self {
142
+ role: Role::User,
143
+ content: Content::Text(content.into()),
144
+ message_type: Some(MessageType::User),
145
+ ..Default::default()
146
+ }
147
+ }
148
+
149
+ pub fn assistant(content: impl Into<String>) -> Self {
150
+ Self {
151
+ role: Role::Assistant,
152
+ content: Content::Text(content.into()),
153
+ message_type: Some(MessageType::Assistant),
154
+ ..Default::default()
155
+ }
156
+ }
157
+
158
+ pub fn observation(content: impl Into<String>) -> Self {
159
+ Self {
160
+ role: Role::User,
161
+ content: Content::Text(content.into()),
162
+ message_type: Some(MessageType::Observation),
163
+ ..Default::default()
164
+ }
165
+ }
166
+
167
+ pub fn action(thought: impl Into<String>, action: impl Into<String>) -> Self {
168
+ let thought_str = thought.into();
169
+ let action_str = action.into();
170
+ Self {
171
+ role: Role::Assistant,
172
+ content: Content::Text(format!("{}\n```\n{}\n```", thought_str, action_str)),
173
+ message_type: Some(MessageType::Action),
174
+ thought: Some(thought_str),
175
+ action: Some(action_str),
176
+ ..Default::default()
177
+ }
178
+ }
179
+ }
180
+
181
+ /// Conversation history
182
+ pub type History = Vec<HistoryItem>;
183
+
184
+ /// Environment state at a point in time
185
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
186
+ pub struct EnvironmentState {
187
+ #[serde(skip_serializing_if = "Option::is_none")]
188
+ pub working_dir: Option<String>,
189
+ #[serde(skip_serializing_if = "Option::is_none")]
190
+ pub open_files: Option<Vec<String>>,
191
+ #[serde(skip_serializing_if = "Option::is_none")]
192
+ pub git_status: Option<String>,
193
+ #[serde(skip_serializing_if = "Option::is_none")]
194
+ pub diff: Option<String>,
195
+ #[serde(flatten)]
196
+ pub extra: HashMap<String, serde_json::Value>,
197
+ }
198
+
199
+ /// Query message for tracking
200
+ #[derive(Debug, Clone, Serialize, Deserialize)]
201
+ pub struct QueryMessage {
202
+ pub role: Role,
203
+ pub content: String,
204
+ #[serde(skip_serializing_if = "Option::is_none")]
205
+ pub message_type: Option<MessageType>,
206
+ }
207
+
208
+ /// Output from a single agent step
209
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
210
+ pub struct StepOutput {
211
+ pub done: bool,
212
+ pub thought: String,
213
+ pub action: String,
214
+ pub observation: String,
215
+ #[serde(skip_serializing_if = "Option::is_none")]
216
+ pub submission: Option<String>,
217
+ #[serde(skip_serializing_if = "Option::is_none")]
218
+ pub exit_status: Option<String>,
219
+ pub execution_time: f64,
220
+ pub state: EnvironmentState,
221
+ pub query: Vec<QueryMessage>,
222
+ #[serde(default)]
223
+ pub extra_info: HashMap<String, serde_json::Value>,
224
+ pub output: String,
225
+ #[serde(skip_serializing_if = "Option::is_none")]
226
+ pub tool_calls: Option<Vec<ToolCall>>,
227
+ #[serde(skip_serializing_if = "Option::is_none")]
228
+ pub tool_call_ids: Option<Vec<String>>,
229
+ #[serde(skip_serializing_if = "Option::is_none")]
230
+ pub thinking_blocks: Option<Vec<ThinkingBlock>>,
231
+ }
232
+
233
+ impl StepOutput {
234
+ pub fn to_template_format_dict(&self) -> HashMap<String, String> {
235
+ let mut dict = HashMap::new();
236
+ dict.insert("thought".to_string(), self.thought.clone());
237
+ dict.insert("action".to_string(), self.action.clone());
238
+ dict.insert("observation".to_string(), self.observation.clone());
239
+ if let Some(ref status) = self.exit_status {
240
+ dict.insert("exit_status".to_string(), status.clone());
241
+ }
242
+ dict
243
+ }
244
+ }
245
+
246
+ /// A single step in a trajectory
247
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
248
+ pub struct TrajectoryStep {
249
+ pub action: String,
250
+ pub observation: String,
251
+ pub response: String,
252
+ pub thought: String,
253
+ pub execution_time: f64,
254
+ pub state: EnvironmentState,
255
+ pub query: Vec<QueryMessage>,
256
+ #[serde(default)]
257
+ pub extra_info: HashMap<String, serde_json::Value>,
258
+ }
259
+
260
+ impl From<&StepOutput> for TrajectoryStep {
261
+ fn from(step: &StepOutput) -> Self {
262
+ Self {
263
+ action: step.action.clone(),
264
+ observation: step.observation.clone(),
265
+ response: step.output.clone(),
266
+ thought: step.thought.clone(),
267
+ execution_time: step.execution_time,
268
+ state: step.state.clone(),
269
+ query: step.query.clone(),
270
+ extra_info: step.extra_info.clone(),
271
+ }
272
+ }
273
+ }
274
+
275
+ /// Full trajectory of an agent run
276
+ pub type Trajectory = Vec<TrajectoryStep>;
277
+
278
+ /// Model statistics for tracking costs and usage
279
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
280
+ pub struct ModelStats {
281
+ pub instance_cost: f64,
282
+ pub tokens_sent: u64,
283
+ pub tokens_received: u64,
284
+ pub api_calls: u64,
285
+ }
286
+
287
+ impl ModelStats {
288
+ pub fn add(&self, other: &ModelStats) -> ModelStats {
289
+ ModelStats {
290
+ instance_cost: self.instance_cost + other.instance_cost,
291
+ tokens_sent: self.tokens_sent + other.tokens_sent,
292
+ tokens_received: self.tokens_received + other.tokens_received,
293
+ api_calls: self.api_calls + other.api_calls,
294
+ }
295
+ }
296
+ }
297
+
298
+ /// Agent run information
299
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
300
+ pub struct AgentInfo {
301
+ #[serde(skip_serializing_if = "Option::is_none")]
302
+ pub swe_agent_version: Option<String>,
303
+ #[serde(skip_serializing_if = "Option::is_none")]
304
+ pub submission: Option<String>,
305
+ #[serde(skip_serializing_if = "Option::is_none")]
306
+ pub exit_status: Option<String>,
307
+ #[serde(skip_serializing_if = "Option::is_none")]
308
+ pub model_stats: Option<ModelStats>,
309
+ #[serde(flatten)]
310
+ pub extra: HashMap<String, serde_json::Value>,
311
+ }
312
+
313
+ /// Result of an agent run
314
+ #[derive(Debug, Clone, Serialize, Deserialize)]
315
+ pub struct AgentRunResult {
316
+ pub info: AgentInfo,
317
+ pub trajectory: Trajectory,
318
+ }
319
+
320
+ /// Output from a model query
321
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
322
+ pub struct ModelOutput {
323
+ pub message: String,
324
+ #[serde(skip_serializing_if = "Option::is_none")]
325
+ pub tool_calls: Option<Vec<ToolCall>>,
326
+ #[serde(skip_serializing_if = "Option::is_none")]
327
+ pub thinking_blocks: Option<Vec<ThinkingBlock>>,
328
+ }
329
+
330
+ /// API response from LLM providers
331
+ #[derive(Debug, Clone, Serialize, Deserialize)]
332
+ pub struct ApiResponse {
333
+ #[serde(skip_serializing_if = "Option::is_none")]
334
+ pub choices: Option<Vec<ApiChoice>>,
335
+ #[serde(skip_serializing_if = "Option::is_none")]
336
+ pub usage: Option<ApiUsage>,
337
+ }
338
+
339
+ /// A single choice in API response
340
+ #[derive(Debug, Clone, Serialize, Deserialize)]
341
+ pub struct ApiChoice {
342
+ #[serde(skip_serializing_if = "Option::is_none")]
343
+ pub message: Option<ApiMessage>,
344
+ #[serde(skip_serializing_if = "Option::is_none")]
345
+ pub text: Option<String>,
346
+ }
347
+
348
+ /// Message in API response
349
+ #[derive(Debug, Clone, Serialize, Deserialize)]
350
+ pub struct ApiMessage {
351
+ #[serde(skip_serializing_if = "Option::is_none")]
352
+ pub content: Option<String>,
353
+ #[serde(skip_serializing_if = "Option::is_none")]
354
+ pub role: Option<String>,
355
+ #[serde(skip_serializing_if = "Option::is_none")]
356
+ pub tool_calls: Option<Vec<ToolCall>>,
357
+ }
358
+
359
+ /// Usage statistics in API response
360
+ #[derive(Debug, Clone, Default, Serialize, Deserialize)]
361
+ pub struct ApiUsage {
362
+ #[serde(default)]
363
+ pub prompt_tokens: u64,
364
+ #[serde(default)]
365
+ pub completion_tokens: u64,
366
+ #[serde(default)]
367
+ pub total_tokens: u64,
368
+ }
369
+
370
+ /// Batch instance for running multiple problems
371
+ #[derive(Debug, Clone, Serialize, Deserialize)]
372
+ pub struct BatchInstance {
373
+ pub instance_id: String,
374
+ #[serde(skip_serializing_if = "Option::is_none")]
375
+ pub problem_statement: Option<String>,
376
+ #[serde(skip_serializing_if = "Option::is_none")]
377
+ pub repo: Option<String>,
378
+ #[serde(skip_serializing_if = "Option::is_none")]
379
+ pub base_commit: Option<String>,
380
+ #[serde(flatten)]
381
+ pub extra: HashMap<String, serde_json::Value>,
382
+ }
383
+
384
+ /// Simple batch instance format
385
+ #[derive(Debug, Clone, Serialize, Deserialize)]
386
+ pub struct SimpleBatchInstance {
387
+ pub id: String,
388
+ pub problem_statement: String,
389
+ #[serde(skip_serializing_if = "Option::is_none")]
390
+ pub repo_path: Option<String>,
391
+ #[serde(skip_serializing_if = "Option::is_none")]
392
+ pub github_url: Option<String>,
393
+ }
394
+
395
+ /// Retry configuration for API calls
396
+ #[derive(Debug, Clone, Serialize, Deserialize)]
397
+ pub struct RetryConfig {
398
+ #[serde(default = "default_retries")]
399
+ pub retries: u32,
400
+ #[serde(default = "default_min_wait")]
401
+ pub min_wait: u64,
402
+ #[serde(default = "default_max_wait")]
403
+ pub max_wait: u64,
404
+ }
405
+
406
+ fn default_retries() -> u32 {
407
+ 20
408
+ }
409
+
410
+ fn default_min_wait() -> u64 {
411
+ 10
412
+ }
413
+
414
+ fn default_max_wait() -> u64 {
415
+ 120
416
+ }
417
+
418
+ impl Default for RetryConfig {
419
+ fn default() -> Self {
420
+ Self {
421
+ retries: default_retries(),
422
+ min_wait: default_min_wait(),
423
+ max_wait: default_max_wait(),
424
+ }
425
+ }
426
+ }
427
+
428
+ /// Template configuration for agent messages
429
+ #[derive(Debug, Clone, Serialize, Deserialize)]
430
+ pub struct TemplateConfig {
431
+ #[serde(default)]
432
+ pub system_template: String,
433
+ #[serde(default)]
434
+ pub instance_template: String,
435
+ #[serde(default = "default_next_step_template")]
436
+ pub next_step_template: String,
437
+ #[serde(default = "default_next_step_truncated_template")]
438
+ pub next_step_truncated_observation_template: String,
439
+ #[serde(default = "default_max_observation_length")]
440
+ pub max_observation_length: usize,
441
+ #[serde(skip_serializing_if = "Option::is_none")]
442
+ pub next_step_no_output_template: Option<String>,
443
+ #[serde(skip_serializing_if = "Option::is_none")]
444
+ pub strategy_template: Option<String>,
445
+ #[serde(skip_serializing_if = "Option::is_none")]
446
+ pub demonstration_template: Option<String>,
447
+ #[serde(default)]
448
+ pub demonstrations: Vec<String>,
449
+ #[serde(default)]
450
+ pub put_demos_in_history: bool,
451
+ #[serde(default)]
452
+ pub disable_image_processing: bool,
453
+ #[serde(default = "default_shell_check_error_template")]
454
+ pub shell_check_error_template: String,
455
+ #[serde(default = "default_command_cancelled_template")]
456
+ pub command_cancelled_timeout_template: String,
457
+ }
458
+
459
+ fn default_next_step_template() -> String {
460
+ "Observation: {{observation}}".to_string()
461
+ }
462
+
463
+ fn default_next_step_truncated_template() -> String {
464
+ "Observation: {{observation}}<response clipped>\n<NOTE>Observations should not exceed {{max_observation_length}} characters. {{elided_chars}} characters were elided.</NOTE>".to_string()
465
+ }
466
+
467
+ fn default_max_observation_length() -> usize {
468
+ 100000
469
+ }
470
+
471
+ fn default_shell_check_error_template() -> String {
472
+ "Your command contains syntax errors. Please fix them and try again.\nError: {{error_message}}\nHint: {{hint}}".to_string()
473
+ }
474
+
475
+ fn default_command_cancelled_template() -> String {
476
+ "Command cancelled after {{timeout}} seconds. The command was: {{command}}".to_string()
477
+ }
478
+
479
+ impl Default for TemplateConfig {
480
+ fn default() -> Self {
481
+ Self {
482
+ system_template: String::new(),
483
+ instance_template: String::new(),
484
+ next_step_template: default_next_step_template(),
485
+ next_step_truncated_observation_template: default_next_step_truncated_template(),
486
+ max_observation_length: default_max_observation_length(),
487
+ next_step_no_output_template: None,
488
+ strategy_template: None,
489
+ demonstration_template: None,
490
+ demonstrations: Vec::new(),
491
+ put_demos_in_history: false,
492
+ disable_image_processing: false,
493
+ shell_check_error_template: default_shell_check_error_template(),
494
+ command_cancelled_timeout_template: default_command_cancelled_template(),
495
+ }
496
+ }
497
+ }
498
+
499
+ #[cfg(test)]
500
+ mod tests {
501
+ use super::*;
502
+
503
+ #[test]
504
+ fn test_history_item_system() {
505
+ let item = HistoryItem::system("You are a helpful assistant.");
506
+ assert_eq!(item.role, Role::System);
507
+ assert_eq!(item.content.as_str(), "You are a helpful assistant.");
508
+ }
509
+
510
+ #[test]
511
+ fn test_history_item_action() {
512
+ let item = HistoryItem::action("I will run a command", "ls -la");
513
+ assert_eq!(item.role, Role::Assistant);
514
+ assert_eq!(item.thought, Some("I will run a command".to_string()));
515
+ assert_eq!(item.action, Some("ls -la".to_string()));
516
+ }
517
+
518
+ #[test]
519
+ fn test_step_output_to_template_dict() {
520
+ let step = StepOutput {
521
+ thought: "thinking".to_string(),
522
+ action: "doing".to_string(),
523
+ observation: "seeing".to_string(),
524
+ exit_status: Some("done".to_string()),
525
+ ..Default::default()
526
+ };
527
+ let dict = step.to_template_format_dict();
528
+ assert_eq!(dict.get("thought"), Some(&"thinking".to_string()));
529
+ assert_eq!(dict.get("action"), Some(&"doing".to_string()));
530
+ assert_eq!(dict.get("observation"), Some(&"seeing".to_string()));
531
+ assert_eq!(dict.get("exit_status"), Some(&"done".to_string()));
532
+ }
533
+
534
+ #[test]
535
+ fn test_model_stats_add() {
536
+ let a = ModelStats {
537
+ instance_cost: 1.0,
538
+ tokens_sent: 100,
539
+ tokens_received: 50,
540
+ api_calls: 1,
541
+ };
542
+ let b = ModelStats {
543
+ instance_cost: 2.0,
544
+ tokens_sent: 200,
545
+ tokens_received: 100,
546
+ api_calls: 2,
547
+ };
548
+ let c = a.add(&b);
549
+ assert_eq!(c.instance_cost, 3.0);
550
+ assert_eq!(c.tokens_sent, 300);
551
+ assert_eq!(c.tokens_received, 150);
552
+ assert_eq!(c.api_calls, 3);
553
+ }
554
+ }