@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,784 @@
1
+ //! SWE-agent CLI
2
+ //!
3
+ //! Main command-line interface for the SWE-agent software engineering agent.
4
+
5
+ use clap::{Parser, Subcommand};
6
+ use elizaos_sweagent::run::{
7
+ run_batch_from_config, run_from_config, RunBatchConfig, RunSingleConfig,
8
+ };
9
+ use elizaos_sweagent::types::Trajectory;
10
+ use elizaos_sweagent::VERSION;
11
+ use serde::{Deserialize, Serialize};
12
+ use std::collections::HashMap;
13
+ use std::path::PathBuf;
14
+
15
+ #[derive(Parser)]
16
+ #[command(name = "sweagent")]
17
+ #[command(author = "elizaOS")]
18
+ #[command(version = VERSION)]
19
+ #[command(about = "SWE-agent: AI software engineering agent", long_about = None)]
20
+ struct Cli {
21
+ #[command(subcommand)]
22
+ command: Commands,
23
+ }
24
+
25
+ #[derive(Subcommand)]
26
+ enum Commands {
27
+ /// Run swe-agent on a single problem statement instance
28
+ #[command(alias = "r")]
29
+ Run {
30
+ /// Path to configuration file
31
+ #[arg(long)]
32
+ config: Option<PathBuf>,
33
+
34
+ /// Model name to use
35
+ #[arg(long = "agent.model.name")]
36
+ model_name: Option<String>,
37
+
38
+ /// GitHub repository URL
39
+ #[arg(long = "env.repo.github_url")]
40
+ github_url: Option<String>,
41
+
42
+ /// Local repository path
43
+ #[arg(long = "env.repo.path")]
44
+ repo_path: Option<String>,
45
+
46
+ /// GitHub issue URL for problem statement
47
+ #[arg(long = "problem_statement.github_url")]
48
+ issue_url: Option<String>,
49
+
50
+ /// Path to problem statement file
51
+ #[arg(long = "problem_statement.path")]
52
+ problem_path: Option<String>,
53
+
54
+ /// Output directory
55
+ #[arg(long, default_value = "trajectories")]
56
+ output_dir: String,
57
+
58
+ /// Open a PR with the patch
59
+ #[arg(long = "actions.open_pr")]
60
+ open_pr: bool,
61
+
62
+ /// Apply patch to local repository
63
+ #[arg(long = "actions.apply_patch_locally")]
64
+ apply_patch: bool,
65
+
66
+ /// Enable verbose output
67
+ #[arg(long, short)]
68
+ verbose: bool,
69
+ },
70
+
71
+ /// Run swe-agent on a batch of problem statements
72
+ #[command(alias = "b")]
73
+ RunBatch {
74
+ /// Path to configuration file
75
+ #[arg(long)]
76
+ config: Option<PathBuf>,
77
+
78
+ /// Instance source type (swe_bench, file)
79
+ #[arg(long = "instances.type")]
80
+ instances_type: Option<String>,
81
+
82
+ /// SWE-bench subset
83
+ #[arg(long = "instances.subset")]
84
+ subset: Option<String>,
85
+
86
+ /// Dataset split
87
+ #[arg(long = "instances.split")]
88
+ split: Option<String>,
89
+
90
+ /// Slice specification
91
+ #[arg(long = "instances.slice")]
92
+ slice: Option<String>,
93
+
94
+ /// Shuffle instances
95
+ #[arg(long = "instances.shuffle")]
96
+ shuffle: bool,
97
+
98
+ /// Filter instances by regex
99
+ #[arg(long = "instances.filter")]
100
+ filter: Option<String>,
101
+
102
+ /// Path to instances file
103
+ #[arg(long = "instances.path")]
104
+ instances_path: Option<String>,
105
+
106
+ /// Model name
107
+ #[arg(long = "agent.model.name")]
108
+ model_name: Option<String>,
109
+
110
+ /// Cost limit per instance
111
+ #[arg(long = "agent.model.per_instance_cost_limit")]
112
+ cost_limit: Option<f64>,
113
+
114
+ /// Output directory
115
+ #[arg(long, default_value = "trajectories")]
116
+ output_dir: String,
117
+
118
+ /// Number of parallel workers
119
+ #[arg(long, default_value = "1")]
120
+ num_workers: usize,
121
+
122
+ /// Redo existing trajectories
123
+ #[arg(long)]
124
+ redo_existing: bool,
125
+ },
126
+
127
+ /// Open a trajectory file and display info
128
+ #[command(alias = "i")]
129
+ Inspect {
130
+ /// Path to trajectory file or directory
131
+ #[arg(default_value = ".")]
132
+ trajectory_path: PathBuf,
133
+
134
+ /// Path to data file for gold patches
135
+ #[arg(long, short)]
136
+ data_path: Option<PathBuf>,
137
+
138
+ /// Show full messages (not truncated)
139
+ #[arg(long)]
140
+ full: bool,
141
+ },
142
+
143
+ /// Calculate quick statistics from trajectories
144
+ #[command(alias = "qs")]
145
+ QuickStats {
146
+ /// Directory to search for .traj files
147
+ #[arg(default_value = ".")]
148
+ directory: PathBuf,
149
+ },
150
+
151
+ /// Merge multiple prediction files
152
+ MergePreds {
153
+ /// Directories containing predictions
154
+ directories: Vec<PathBuf>,
155
+
156
+ /// Output file
157
+ #[arg(long, short)]
158
+ output: Option<PathBuf>,
159
+ },
160
+
161
+ /// Remove unfinished trajectories
162
+ #[command(alias = "ru")]
163
+ RemoveUnfinished {
164
+ /// Base directory
165
+ #[arg(long, default_value = ".")]
166
+ base_dir: PathBuf,
167
+
168
+ /// Actually remove (dry run by default)
169
+ #[arg(long)]
170
+ remove: bool,
171
+ },
172
+
173
+ /// Compare multiple run results
174
+ #[command(alias = "cr")]
175
+ CompareRuns {
176
+ /// Paths to results files or directories
177
+ paths: Vec<PathBuf>,
178
+
179
+ /// Show instances with same results
180
+ #[arg(long)]
181
+ show_same: bool,
182
+ },
183
+
184
+ /// Replay a trajectory file
185
+ RunReplay {
186
+ /// Path to trajectory file
187
+ #[arg(long)]
188
+ traj_path: PathBuf,
189
+
190
+ /// Override deployment type
191
+ #[arg(long)]
192
+ deployment: Option<String>,
193
+
194
+ /// Output directory
195
+ #[arg(long, default_value = "trajectories")]
196
+ output_dir: String,
197
+
198
+ /// Only execute forward passes
199
+ #[arg(long)]
200
+ forward_only: bool,
201
+
202
+ /// Number of forward passes
203
+ #[arg(long, default_value = "0")]
204
+ n_forward: usize,
205
+ },
206
+ }
207
+
208
+ /// Trajectory file format
209
+ #[derive(Debug, Serialize, Deserialize)]
210
+ struct TrajectoryFile {
211
+ #[serde(default)]
212
+ trajectory: Trajectory,
213
+ #[serde(default)]
214
+ info: TrajectoryInfo,
215
+ #[serde(default)]
216
+ history: Vec<serde_json::Value>,
217
+ }
218
+
219
+ #[derive(Debug, Default, Serialize, Deserialize)]
220
+ struct TrajectoryInfo {
221
+ #[serde(default)]
222
+ instance_id: String,
223
+ #[serde(default)]
224
+ exit_status: String,
225
+ #[serde(default)]
226
+ submission: Option<String>,
227
+ #[serde(default)]
228
+ model_stats: Option<ModelStatsInfo>,
229
+ }
230
+
231
+ #[derive(Debug, Default, Serialize, Deserialize)]
232
+ struct ModelStatsInfo {
233
+ #[serde(default)]
234
+ instance_cost: f64,
235
+ #[serde(default)]
236
+ tokens_sent: u64,
237
+ #[serde(default)]
238
+ tokens_received: u64,
239
+ #[serde(default)]
240
+ api_calls: u64,
241
+ }
242
+
243
+ /// Statistics for a collection of trajectories
244
+ #[derive(Debug, Default)]
245
+ struct TrajectoryStats {
246
+ total: usize,
247
+ submitted: usize,
248
+ empty_submission: usize,
249
+ errored: usize,
250
+ total_cost: f64,
251
+ total_tokens_sent: u64,
252
+ total_tokens_received: u64,
253
+ total_api_calls: u64,
254
+ exit_statuses: HashMap<String, usize>,
255
+ }
256
+
257
+ fn find_trajectory_files(dir: &PathBuf) -> Vec<PathBuf> {
258
+ let mut files = Vec::new();
259
+
260
+ if dir.is_file() && dir.extension().is_some_and(|e| e == "traj") {
261
+ files.push(dir.clone());
262
+ return files;
263
+ }
264
+
265
+ if let Ok(entries) = std::fs::read_dir(dir) {
266
+ for entry in entries.flatten() {
267
+ let path = entry.path();
268
+ if path.is_file() && path.extension().is_some_and(|e| e == "traj") {
269
+ files.push(path);
270
+ } else if path.is_dir() {
271
+ files.extend(find_trajectory_files(&path));
272
+ }
273
+ }
274
+ }
275
+
276
+ files
277
+ }
278
+
279
+ fn load_trajectory(path: &PathBuf) -> anyhow::Result<TrajectoryFile> {
280
+ let content = std::fs::read_to_string(path)?;
281
+ let traj: TrajectoryFile = serde_json::from_str(&content)?;
282
+ Ok(traj)
283
+ }
284
+
285
+ fn inspect_trajectory(path: &PathBuf, full: bool) -> anyhow::Result<()> {
286
+ let traj = load_trajectory(path)?;
287
+
288
+ println!("=== Trajectory: {} ===", path.display());
289
+ println!("Instance ID: {}", traj.info.instance_id);
290
+ println!("Exit Status: {}", traj.info.exit_status);
291
+
292
+ if let Some(ref stats) = traj.info.model_stats {
293
+ println!("\n--- Model Statistics ---");
294
+ println!("Cost: ${:.4}", stats.instance_cost);
295
+ println!("Tokens sent: {}", stats.tokens_sent);
296
+ println!("Tokens received: {}", stats.tokens_received);
297
+ println!("API calls: {}", stats.api_calls);
298
+ }
299
+
300
+ println!("\n--- Trajectory ({} steps) ---", traj.trajectory.len());
301
+ for (idx, step) in traj.trajectory.iter().enumerate() {
302
+ println!("\n[Step {}]", idx + 1);
303
+
304
+ // Display thought if present
305
+ if !step.thought.is_empty() {
306
+ let thought = if full || step.thought.len() <= 200 {
307
+ step.thought.clone()
308
+ } else {
309
+ format!("{}... (truncated)", &step.thought[..200])
310
+ };
311
+ println!("Thought: {}", thought);
312
+ }
313
+
314
+ // Display action
315
+ if !step.action.is_empty() {
316
+ let action = if full || step.action.len() <= 200 {
317
+ step.action.clone()
318
+ } else {
319
+ format!("{}... (truncated)", &step.action[..200])
320
+ };
321
+ println!("Action: {}", action);
322
+ }
323
+
324
+ // Display observation (truncated by default)
325
+ if !step.observation.is_empty() {
326
+ let obs = if full || step.observation.len() <= 500 {
327
+ step.observation.clone()
328
+ } else {
329
+ format!("{}... (truncated)", &step.observation[..500])
330
+ };
331
+ println!("Observation: {}", obs);
332
+ }
333
+
334
+ // Display execution time
335
+ println!("Execution time: {:.2}s", step.execution_time);
336
+ }
337
+
338
+ if let Some(ref submission) = traj.info.submission {
339
+ println!("\n--- Submission ---");
340
+ let sub_display = if full || submission.len() <= 1000 {
341
+ submission.clone()
342
+ } else {
343
+ format!(
344
+ "{}... (truncated, {} bytes total)",
345
+ &submission[..1000],
346
+ submission.len()
347
+ )
348
+ };
349
+ println!("{}", sub_display);
350
+ }
351
+
352
+ Ok(())
353
+ }
354
+
355
+ fn calculate_stats(files: &[PathBuf]) -> TrajectoryStats {
356
+ let mut stats = TrajectoryStats::default();
357
+
358
+ for path in files {
359
+ stats.total += 1;
360
+
361
+ match load_trajectory(path) {
362
+ Ok(traj) => {
363
+ // Track exit status
364
+ let status = if traj.info.exit_status.is_empty() {
365
+ "unknown".to_string()
366
+ } else {
367
+ traj.info.exit_status.clone()
368
+ };
369
+ *stats.exit_statuses.entry(status.clone()).or_insert(0) += 1;
370
+
371
+ if status.contains("error") || status.contains("Error") {
372
+ stats.errored += 1;
373
+ }
374
+
375
+ // Track submission
376
+ if let Some(ref sub) = traj.info.submission {
377
+ if sub.trim().is_empty() {
378
+ stats.empty_submission += 1;
379
+ } else {
380
+ stats.submitted += 1;
381
+ }
382
+ } else {
383
+ stats.empty_submission += 1;
384
+ }
385
+
386
+ // Track model stats
387
+ if let Some(ref model_stats) = traj.info.model_stats {
388
+ stats.total_cost += model_stats.instance_cost;
389
+ stats.total_tokens_sent += model_stats.tokens_sent;
390
+ stats.total_tokens_received += model_stats.tokens_received;
391
+ stats.total_api_calls += model_stats.api_calls;
392
+ }
393
+ }
394
+ Err(e) => {
395
+ tracing::warn!(path = ?path, error = %e, "Failed to load trajectory");
396
+ stats.errored += 1;
397
+ }
398
+ }
399
+ }
400
+
401
+ stats
402
+ }
403
+
404
+ fn find_unfinished(dir: &PathBuf) -> Vec<PathBuf> {
405
+ let files = find_trajectory_files(dir);
406
+ let mut unfinished = Vec::new();
407
+
408
+ for path in files {
409
+ if let Ok(traj) = load_trajectory(&path) {
410
+ // A trajectory is unfinished if it has no submission and no definitive exit status
411
+ let is_finished = traj.info.submission.is_some()
412
+ || traj.info.exit_status.contains("submitted")
413
+ || traj.info.exit_status.contains("cost_limit")
414
+ || traj.info.exit_status.contains("error");
415
+
416
+ if !is_finished {
417
+ unfinished.push(path);
418
+ }
419
+ }
420
+ }
421
+
422
+ unfinished
423
+ }
424
+
425
+ fn merge_predictions(dirs: &[PathBuf], output: Option<&PathBuf>) -> anyhow::Result<()> {
426
+ let mut all_predictions: HashMap<String, serde_json::Value> = HashMap::new();
427
+
428
+ for dir in dirs {
429
+ let files = find_trajectory_files(dir);
430
+ for path in files {
431
+ if let Ok(traj) = load_trajectory(&path) {
432
+ if let Some(ref submission) = traj.info.submission {
433
+ all_predictions.insert(
434
+ traj.info.instance_id.clone(),
435
+ serde_json::json!({
436
+ "instance_id": traj.info.instance_id,
437
+ "model_patch": submission,
438
+ "model_name_or_path": path.display().to_string(),
439
+ }),
440
+ );
441
+ }
442
+ }
443
+ }
444
+ }
445
+
446
+ let predictions: Vec<_> = all_predictions.values().collect();
447
+ let json = serde_json::to_string_pretty(&predictions)?;
448
+
449
+ if let Some(output_path) = output {
450
+ std::fs::write(output_path, &json)?;
451
+ println!(
452
+ "Wrote {} predictions to {}",
453
+ predictions.len(),
454
+ output_path.display()
455
+ );
456
+ } else {
457
+ println!("{}", json);
458
+ }
459
+
460
+ Ok(())
461
+ }
462
+
463
+ fn compare_runs(paths: &[PathBuf], show_same: bool) -> anyhow::Result<()> {
464
+ // Load predictions from each run
465
+ let mut runs: Vec<(PathBuf, HashMap<String, String>)> = Vec::new();
466
+
467
+ for path in paths {
468
+ let mut predictions = HashMap::new();
469
+ let files = find_trajectory_files(path);
470
+
471
+ for file in files {
472
+ if let Ok(traj) = load_trajectory(&file) {
473
+ let status = if traj.info.submission.is_some() {
474
+ "submitted"
475
+ } else {
476
+ "no_submission"
477
+ };
478
+ predictions.insert(traj.info.instance_id, status.to_string());
479
+ }
480
+ }
481
+
482
+ runs.push((path.clone(), predictions));
483
+ }
484
+
485
+ // Find all instance IDs
486
+ let mut all_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
487
+ for (_, preds) in &runs {
488
+ all_ids.extend(preds.keys().cloned());
489
+ }
490
+
491
+ println!(
492
+ "Comparing {} runs across {} instances\n",
493
+ runs.len(),
494
+ all_ids.len()
495
+ );
496
+
497
+ // Print header
498
+ print!("{:<40}", "Instance ID");
499
+ for (path, _) in &runs {
500
+ let name = path
501
+ .file_name()
502
+ .map(|n| n.to_string_lossy().to_string())
503
+ .unwrap_or_else(|| path.display().to_string());
504
+ print!("{:<20}", &name[..name.len().min(18)]);
505
+ }
506
+ println!();
507
+ println!("{}", "-".repeat(40 + runs.len() * 20));
508
+
509
+ // Print comparison
510
+ let mut same_count = 0;
511
+ let mut diff_count = 0;
512
+
513
+ for id in all_ids {
514
+ let statuses: Vec<&str> = runs
515
+ .iter()
516
+ .map(|(_, preds)| preds.get(&id).map(|s| s.as_str()).unwrap_or("-"))
517
+ .collect();
518
+
519
+ let all_same = statuses.iter().all(|s| *s == statuses[0]);
520
+
521
+ if all_same {
522
+ same_count += 1;
523
+ if !show_same {
524
+ continue;
525
+ }
526
+ } else {
527
+ diff_count += 1;
528
+ }
529
+
530
+ print!("{:<40}", &id[..id.len().min(38)]);
531
+ for status in &statuses {
532
+ print!("{:<20}", status);
533
+ }
534
+ println!();
535
+ }
536
+
537
+ println!("\nSummary: {} same, {} different", same_count, diff_count);
538
+
539
+ Ok(())
540
+ }
541
+
542
+ #[tokio::main]
543
+ async fn main() -> anyhow::Result<()> {
544
+ // Initialize logging
545
+ tracing_subscriber::fmt()
546
+ .with_env_filter(
547
+ tracing_subscriber::EnvFilter::try_from_default_env()
548
+ .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
549
+ )
550
+ .init();
551
+
552
+ let cli = Cli::parse();
553
+
554
+ match cli.command {
555
+ Commands::Run {
556
+ config,
557
+ model_name: _,
558
+ github_url: _,
559
+ repo_path: _,
560
+ issue_url: _,
561
+ problem_path: _,
562
+ output_dir,
563
+ open_pr,
564
+ apply_patch,
565
+ verbose,
566
+ } => {
567
+ if verbose {
568
+ tracing::info!("Running in verbose mode");
569
+ }
570
+
571
+ // Build configuration
572
+ let mut run_config = if let Some(config_path) = config {
573
+ let content = std::fs::read_to_string(&config_path)?;
574
+ serde_yaml::from_str(&content)?
575
+ } else {
576
+ RunSingleConfig::default()
577
+ };
578
+
579
+ run_config.output_dir = output_dir;
580
+ run_config.actions.open_pr = open_pr;
581
+ run_config.actions.apply_patch_locally = apply_patch;
582
+
583
+ let result = run_from_config(run_config).await?;
584
+
585
+ if let Some(submission) = result.info.submission {
586
+ println!("Submission:\n{}", submission);
587
+ }
588
+
589
+ println!("Exit status: {:?}", result.info.exit_status);
590
+ }
591
+
592
+ Commands::RunBatch {
593
+ config,
594
+ instances_type: _,
595
+ subset: _,
596
+ split: _,
597
+ slice: _,
598
+ shuffle: _,
599
+ filter: _,
600
+ instances_path: _,
601
+ model_name: _,
602
+ cost_limit: _,
603
+ output_dir,
604
+ num_workers,
605
+ redo_existing,
606
+ } => {
607
+ let mut batch_config = if let Some(config_path) = config {
608
+ let content = std::fs::read_to_string(&config_path)?;
609
+ serde_yaml::from_str(&content)?
610
+ } else {
611
+ RunBatchConfig::default()
612
+ };
613
+
614
+ batch_config.output_dir = output_dir;
615
+ batch_config.num_workers = num_workers;
616
+ batch_config.redo_existing = redo_existing;
617
+
618
+ let result = run_batch_from_config(batch_config).await?;
619
+
620
+ println!(
621
+ "Batch complete: {} completed, {} skipped, {} failed",
622
+ result.completed, result.skipped, result.failed
623
+ );
624
+ }
625
+
626
+ Commands::Inspect {
627
+ trajectory_path,
628
+ data_path: _,
629
+ full,
630
+ } => {
631
+ let files = find_trajectory_files(&trajectory_path);
632
+
633
+ if files.is_empty() {
634
+ println!("No trajectory files found in {:?}", trajectory_path);
635
+ return Ok(());
636
+ }
637
+
638
+ for file in files {
639
+ if let Err(e) = inspect_trajectory(&file, full) {
640
+ eprintln!("Error inspecting {}: {}", file.display(), e);
641
+ }
642
+ println!();
643
+ }
644
+ }
645
+
646
+ Commands::QuickStats { directory } => {
647
+ let files = find_trajectory_files(&directory);
648
+
649
+ if files.is_empty() {
650
+ println!("No trajectory files found in {:?}", directory);
651
+ return Ok(());
652
+ }
653
+
654
+ let stats = calculate_stats(&files);
655
+
656
+ println!("=== Trajectory Statistics ===");
657
+ println!("Directory: {:?}", directory);
658
+ println!("Total trajectories: {}", stats.total);
659
+ println!(
660
+ "Submitted: {} ({:.1}%)",
661
+ stats.submitted,
662
+ 100.0 * stats.submitted as f64 / stats.total as f64
663
+ );
664
+ println!(
665
+ "Empty submission: {} ({:.1}%)",
666
+ stats.empty_submission,
667
+ 100.0 * stats.empty_submission as f64 / stats.total as f64
668
+ );
669
+ println!(
670
+ "Errored: {} ({:.1}%)",
671
+ stats.errored,
672
+ 100.0 * stats.errored as f64 / stats.total as f64
673
+ );
674
+ println!();
675
+ println!("=== Model Usage ===");
676
+ println!("Total cost: ${:.4}", stats.total_cost);
677
+ println!(
678
+ "Average cost: ${:.4}",
679
+ stats.total_cost / stats.total as f64
680
+ );
681
+ println!("Total tokens sent: {}", stats.total_tokens_sent);
682
+ println!("Total tokens received: {}", stats.total_tokens_received);
683
+ println!("Total API calls: {}", stats.total_api_calls);
684
+ println!();
685
+ println!("=== Exit Statuses ===");
686
+ let mut sorted_statuses: Vec<_> = stats.exit_statuses.into_iter().collect();
687
+ sorted_statuses.sort_by(|a, b| b.1.cmp(&a.1));
688
+ for (status, count) in sorted_statuses {
689
+ println!(
690
+ " {}: {} ({:.1}%)",
691
+ status,
692
+ count,
693
+ 100.0 * count as f64 / stats.total as f64
694
+ );
695
+ }
696
+ }
697
+
698
+ Commands::MergePreds {
699
+ directories,
700
+ output,
701
+ } => {
702
+ merge_predictions(&directories, output.as_ref())?;
703
+ }
704
+
705
+ Commands::RemoveUnfinished { base_dir, remove } => {
706
+ let unfinished = find_unfinished(&base_dir);
707
+
708
+ if unfinished.is_empty() {
709
+ println!("No unfinished trajectories found in {:?}", base_dir);
710
+ return Ok(());
711
+ }
712
+
713
+ println!("Found {} unfinished trajectories:", unfinished.len());
714
+ for path in &unfinished {
715
+ println!(" {}", path.display());
716
+ }
717
+
718
+ if remove {
719
+ println!("\nRemoving...");
720
+ for path in &unfinished {
721
+ if let Err(e) = std::fs::remove_file(path) {
722
+ eprintln!("Failed to remove {}: {}", path.display(), e);
723
+ } else {
724
+ println!("Removed: {}", path.display());
725
+ }
726
+ }
727
+ println!("Done. Removed {} files.", unfinished.len());
728
+ } else {
729
+ println!("\nDry run. Use --remove to actually delete these files.");
730
+ }
731
+ }
732
+
733
+ Commands::CompareRuns { paths, show_same } => {
734
+ compare_runs(&paths, show_same)?;
735
+ }
736
+
737
+ Commands::RunReplay {
738
+ traj_path,
739
+ deployment: _,
740
+ output_dir: _,
741
+ forward_only: _,
742
+ n_forward: _,
743
+ } => {
744
+ // Load the trajectory
745
+ let traj = load_trajectory(&traj_path)?;
746
+
747
+ println!("=== Trajectory Replay ===");
748
+ println!("File: {}", traj_path.display());
749
+ println!("Instance: {}", traj.info.instance_id);
750
+ println!("Steps: {}", traj.trajectory.len());
751
+
752
+ // Extract actions from trajectory
753
+ let actions: Vec<String> = traj
754
+ .trajectory
755
+ .iter()
756
+ .filter(|step| !step.action.is_empty())
757
+ .map(|step| step.action.clone())
758
+ .collect();
759
+
760
+ println!("\n--- Extracted Actions ({}) ---", actions.len());
761
+ for (idx, action) in actions.iter().enumerate() {
762
+ let preview: String = if action.len() > 100 {
763
+ format!("{}...", &action[..100])
764
+ } else {
765
+ action.clone()
766
+ };
767
+ println!("[{}] {}", idx + 1, preview.replace('\n', "\\n"));
768
+ }
769
+
770
+ println!("\nReplay file generated. Use 'sweagent run' with --replay to execute.");
771
+
772
+ // Save replay file
773
+ let replay_path = traj_path.with_extension("replay.json");
774
+ let replay_data = serde_json::json!({
775
+ "instance_id": traj.info.instance_id,
776
+ "actions": actions,
777
+ });
778
+ std::fs::write(&replay_path, serde_json::to_string_pretty(&replay_data)?)?;
779
+ println!("Replay data saved to: {}", replay_path.display());
780
+ }
781
+ }
782
+
783
+ Ok(())
784
+ }