@elizaos/sweagent-root 2.0.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +270 -0
  3. package/package.json +71 -0
  4. package/python/LICENSE +21 -0
  5. package/python/config/README.md +15 -0
  6. package/python/config/bash_only.yaml +222 -0
  7. package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
  8. package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
  9. package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
  10. package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
  11. package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
  12. package/python/config/coding_challenge.yaml +104 -0
  13. package/python/config/default.yaml +69 -0
  14. package/python/config/default_backticks.yaml +69 -0
  15. package/python/config/default_mm_no_images.yaml +82 -0
  16. package/python/config/default_mm_with_images.yaml +83 -0
  17. package/python/config/demo/default.yaml +80 -0
  18. package/python/config/demo/no_instructions.yaml +69 -0
  19. package/python/config/demo/only_bash.yaml +60 -0
  20. package/python/config/exotic/default_shell.yaml +52 -0
  21. package/python/config/exotic/windowed_replace.yaml +125 -0
  22. package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
  23. package/python/config/human/human.yaml +24 -0
  24. package/python/config/human/human_demo.yaml +52 -0
  25. package/python/config/sweagent_0_7/07.yaml +101 -0
  26. package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
  27. package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
  28. package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
  29. package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
  30. package/python/mlc_config.json +44 -0
  31. package/python/pyproject.toml +262 -0
  32. package/python/sweagent/__init__.py +114 -0
  33. package/python/sweagent/__main__.py +4 -0
  34. package/python/sweagent/agent/__init__.py +0 -0
  35. package/python/sweagent/agent/action_sampler.py +317 -0
  36. package/python/sweagent/agent/agents.py +1294 -0
  37. package/python/sweagent/agent/extra/shell_agent.py +106 -0
  38. package/python/sweagent/agent/history_processors.py +399 -0
  39. package/python/sweagent/agent/hooks/__init__.py +0 -0
  40. package/python/sweagent/agent/hooks/abstract.py +139 -0
  41. package/python/sweagent/agent/hooks/status.py +34 -0
  42. package/python/sweagent/agent/models.py +896 -0
  43. package/python/sweagent/agent/problem_statement.py +312 -0
  44. package/python/sweagent/agent/reviewer.py +664 -0
  45. package/python/sweagent/environment/__init__.py +0 -0
  46. package/python/sweagent/environment/hooks/__init__.py +0 -0
  47. package/python/sweagent/environment/hooks/abstract.py +60 -0
  48. package/python/sweagent/environment/hooks/status.py +28 -0
  49. package/python/sweagent/environment/repo.py +219 -0
  50. package/python/sweagent/environment/swe_env.py +276 -0
  51. package/python/sweagent/exceptions.py +54 -0
  52. package/python/sweagent/inspector/README.md +6 -0
  53. package/python/sweagent/inspector/__init__.py +0 -0
  54. package/python/sweagent/inspector/favicon.ico +0 -0
  55. package/python/sweagent/inspector/fileViewer.js +354 -0
  56. package/python/sweagent/inspector/icons/computer.png +0 -0
  57. package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
  58. package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
  59. package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
  60. package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
  61. package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
  62. package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
  63. package/python/sweagent/inspector/index.html +25 -0
  64. package/python/sweagent/inspector/server.py +354 -0
  65. package/python/sweagent/inspector/static.py +169 -0
  66. package/python/sweagent/inspector/style.css +454 -0
  67. package/python/sweagent/run/__init__.py +0 -0
  68. package/python/sweagent/run/_progress.py +158 -0
  69. package/python/sweagent/run/batch_instances.py +419 -0
  70. package/python/sweagent/run/common.py +387 -0
  71. package/python/sweagent/run/compare_runs.py +123 -0
  72. package/python/sweagent/run/extract_pred.py +19 -0
  73. package/python/sweagent/run/hooks/__init__.py +0 -0
  74. package/python/sweagent/run/hooks/abstract.py +67 -0
  75. package/python/sweagent/run/hooks/apply_patch.py +106 -0
  76. package/python/sweagent/run/hooks/open_pr.py +244 -0
  77. package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
  78. package/python/sweagent/run/inspector_cli.py +493 -0
  79. package/python/sweagent/run/merge_predictions.py +64 -0
  80. package/python/sweagent/run/quick_stats.py +96 -0
  81. package/python/sweagent/run/remove_unfinished.py +63 -0
  82. package/python/sweagent/run/rich_test.py +91 -0
  83. package/python/sweagent/run/run.py +147 -0
  84. package/python/sweagent/run/run_batch.py +442 -0
  85. package/python/sweagent/run/run_replay.py +219 -0
  86. package/python/sweagent/run/run_shell.py +155 -0
  87. package/python/sweagent/run/run_single.py +225 -0
  88. package/python/sweagent/run/run_traj_to_demo.py +85 -0
  89. package/python/sweagent/tools/__init__.py +0 -0
  90. package/python/sweagent/tools/bundle.py +57 -0
  91. package/python/sweagent/tools/commands.py +220 -0
  92. package/python/sweagent/tools/parsing.py +619 -0
  93. package/python/sweagent/tools/tools.py +430 -0
  94. package/python/sweagent/tools/utils.py +108 -0
  95. package/python/sweagent/types.py +102 -0
  96. package/python/sweagent/utils/__init__.py +0 -0
  97. package/python/sweagent/utils/config.py +80 -0
  98. package/python/sweagent/utils/files.py +27 -0
  99. package/python/sweagent/utils/github.py +118 -0
  100. package/python/sweagent/utils/jinja_warnings.py +14 -0
  101. package/python/sweagent/utils/log.py +175 -0
  102. package/python/sweagent/utils/patch_formatter.py +152 -0
  103. package/python/sweagent/utils/serialization.py +45 -0
  104. package/python/tests/__init__.py +0 -0
  105. package/python/tests/conftest.py +191 -0
  106. package/python/tests/test_agent.py +258 -0
  107. package/python/tests/test_batch_instance.py +43 -0
  108. package/python/tests/test_commands/_interactive_dummy.py +35 -0
  109. package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
  110. package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
  111. package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
  112. package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
  113. package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
  114. package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
  115. package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
  116. package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
  117. package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
  118. package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
  119. package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
  120. package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
  121. package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
  122. package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
  123. package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
  124. package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
  125. package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
  126. package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
  127. package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
  128. package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
  129. package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
  130. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
  131. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
  132. package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
  133. package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
  134. package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
  135. package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
  136. package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
  137. package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
  138. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
  139. package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
  140. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
  141. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
  142. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
  143. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
  144. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
  145. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
  146. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
  147. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
  148. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
  149. package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
  150. package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
  151. package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
  152. package/python/tests/test_data/data_sources/human_eval.json +1 -0
  153. package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
  154. package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
  155. package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
  156. package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
  157. package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
  158. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
  159. package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
  160. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
  161. package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
  162. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
  163. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
  164. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
  165. package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
  166. package/python/tests/test_env.py +66 -0
  167. package/python/tests/test_env_utils.py +129 -0
  168. package/python/tests/test_history_processors.py +40 -0
  169. package/python/tests/test_models.py +23 -0
  170. package/python/tests/test_openai_live.py +164 -0
  171. package/python/tests/test_packaging.py +7 -0
  172. package/python/tests/test_parsing.py +131 -0
  173. package/python/tests/test_problem_statement_multimodal.py +111 -0
  174. package/python/tests/test_quick_stats.py +42 -0
  175. package/python/tests/test_run.py +37 -0
  176. package/python/tests/test_run_batch.py +110 -0
  177. package/python/tests/test_run_hooks.py +114 -0
  178. package/python/tests/test_run_replay.py +33 -0
  179. package/python/tests/test_run_single.py +125 -0
  180. package/python/tests/test_tools_command_parsing.py +193 -0
  181. package/python/tests/test_utils.py +15 -0
  182. package/python/tests/tools/__init__.py +0 -0
  183. package/python/tests/tools/conftest.py +12 -0
  184. package/python/tests/tools/test_default_utils.py +153 -0
  185. package/python/tests/tools/test_edit_replace.py +0 -0
  186. package/python/tests/tools/test_split_string.py +82 -0
  187. package/python/tests/utils.py +29 -0
  188. package/python/tools/diff_state/bin/_state_diff_state +52 -0
  189. package/python/tools/diff_state/config.yaml +2 -0
  190. package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
  191. package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
  192. package/python/tools/edit_anthropic/config.yaml +56 -0
  193. package/python/tools/edit_anthropic/install.sh +3 -0
  194. package/python/tools/filemap/bin/filemap +45 -0
  195. package/python/tools/filemap/config.yaml +9 -0
  196. package/python/tools/filemap/install.sh +2 -0
  197. package/python/tools/forfeit/bin/exit_forfeit +5 -0
  198. package/python/tools/forfeit/config.yaml +5 -0
  199. package/python/tools/image_tools/bin/view_image +36 -0
  200. package/python/tools/image_tools/config.yaml +9 -0
  201. package/python/tools/multilingual_setup/bin/do_nothing +2 -0
  202. package/python/tools/multilingual_setup/config.yaml +1 -0
  203. package/python/tools/multilingual_setup/install.sh +45 -0
  204. package/python/tools/registry/bin/_read_env +10 -0
  205. package/python/tools/registry/bin/_write_env +10 -0
  206. package/python/tools/registry/config.yaml +1 -0
  207. package/python/tools/registry/install.sh +6 -0
  208. package/python/tools/registry/lib/__init__.py +0 -0
  209. package/python/tools/registry/lib/registry.py +56 -0
  210. package/python/tools/review_on_submit_m/README.md +6 -0
  211. package/python/tools/review_on_submit_m/bin/submit +54 -0
  212. package/python/tools/review_on_submit_m/config.yaml +6 -0
  213. package/python/tools/review_on_submit_m/install.sh +0 -0
  214. package/python/tools/search/bin/find_file +31 -0
  215. package/python/tools/search/bin/search_dir +39 -0
  216. package/python/tools/search/bin/search_file +55 -0
  217. package/python/tools/search/config.yaml +37 -0
  218. package/python/tools/search/install.sh +3 -0
  219. package/python/tools/submit/bin/submit +17 -0
  220. package/python/tools/submit/config.yaml +5 -0
  221. package/python/tools/web_browser/bin/click_mouse +41 -0
  222. package/python/tools/web_browser/bin/close_site +28 -0
  223. package/python/tools/web_browser/bin/double_click_mouse +37 -0
  224. package/python/tools/web_browser/bin/drag_mouse +46 -0
  225. package/python/tools/web_browser/bin/execute_script_on_page +39 -0
  226. package/python/tools/web_browser/bin/get_console_output +48 -0
  227. package/python/tools/web_browser/bin/move_mouse +35 -0
  228. package/python/tools/web_browser/bin/navigate_back +33 -0
  229. package/python/tools/web_browser/bin/navigate_forward +33 -0
  230. package/python/tools/web_browser/bin/open_site +36 -0
  231. package/python/tools/web_browser/bin/press_keys_on_page +51 -0
  232. package/python/tools/web_browser/bin/reload_page +33 -0
  233. package/python/tools/web_browser/bin/run_web_browser_server +394 -0
  234. package/python/tools/web_browser/bin/screenshot_site +38 -0
  235. package/python/tools/web_browser/bin/scroll_on_page +40 -0
  236. package/python/tools/web_browser/bin/set_browser_window_size +40 -0
  237. package/python/tools/web_browser/bin/type_text +34 -0
  238. package/python/tools/web_browser/bin/wait_time +39 -0
  239. package/python/tools/web_browser/config.yaml +155 -0
  240. package/python/tools/web_browser/install.sh +22 -0
  241. package/python/tools/web_browser/lib/browser_manager.py +404 -0
  242. package/python/tools/web_browser/lib/web_browser_config.py +33 -0
  243. package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
  244. package/python/tools/web_browser/test_console.html +1 -0
  245. package/python/tools/windowed/bin/_state +25 -0
  246. package/python/tools/windowed/bin/create +29 -0
  247. package/python/tools/windowed/bin/goto +37 -0
  248. package/python/tools/windowed/bin/open +49 -0
  249. package/python/tools/windowed/bin/scroll_down +12 -0
  250. package/python/tools/windowed/bin/scroll_up +13 -0
  251. package/python/tools/windowed/config.yaml +38 -0
  252. package/python/tools/windowed/install.sh +15 -0
  253. package/python/tools/windowed/lib/__init__.py +0 -0
  254. package/python/tools/windowed/lib/flake8_utils.py +147 -0
  255. package/python/tools/windowed/lib/windowed_file.py +312 -0
  256. package/python/tools/windowed_edit_linting/bin/edit +128 -0
  257. package/python/tools/windowed_edit_linting/config.yaml +31 -0
  258. package/python/tools/windowed_edit_linting/install.sh +5 -0
  259. package/python/tools/windowed_edit_replace/bin/edit +172 -0
  260. package/python/tools/windowed_edit_replace/bin/insert +77 -0
  261. package/python/tools/windowed_edit_replace/config.yaml +60 -0
  262. package/python/tools/windowed_edit_replace/install.sh +5 -0
  263. package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
  264. package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
  265. package/python/tools/windowed_edit_rewrite/install.sh +5 -0
  266. package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
  267. package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
  268. package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
  269. package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
  270. package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
  271. package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
  272. package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
  273. package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
  274. package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
  275. package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
  276. package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
  277. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
  278. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  279. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  280. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
  281. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
  282. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
  283. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
  284. package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
  285. package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
  286. package/rust/Cargo.toml +100 -0
  287. package/rust/README.md +49 -0
  288. package/rust/src/agent/action_sampler.rs +130 -0
  289. package/rust/src/agent/agents.rs +1029 -0
  290. package/rust/src/agent/history_processors.rs +277 -0
  291. package/rust/src/agent/hooks/mod.rs +208 -0
  292. package/rust/src/agent/mod.rs +24 -0
  293. package/rust/src/agent/models.rs +837 -0
  294. package/rust/src/agent/problem_statement.rs +355 -0
  295. package/rust/src/agent/reviewer.rs +505 -0
  296. package/rust/src/bin/sweagent.rs +784 -0
  297. package/rust/src/environment/deployment.rs +631 -0
  298. package/rust/src/environment/hooks/mod.rs +114 -0
  299. package/rust/src/environment/mod.rs +16 -0
  300. package/rust/src/environment/repo.rs +265 -0
  301. package/rust/src/environment/runtime.rs +237 -0
  302. package/rust/src/environment/swe_env.rs +248 -0
  303. package/rust/src/exceptions.rs +228 -0
  304. package/rust/src/lib.rs +68 -0
  305. package/rust/src/monitoring.rs +482 -0
  306. package/rust/src/run/hooks/mod.rs +134 -0
  307. package/rust/src/run/mod.rs +12 -0
  308. package/rust/src/run/run_batch.rs +563 -0
  309. package/rust/src/run/run_single.rs +196 -0
  310. package/rust/src/tools/bundle.rs +224 -0
  311. package/rust/src/tools/commands.rs +173 -0
  312. package/rust/src/tools/mod.rs +295 -0
  313. package/rust/src/tools/parsing.rs +354 -0
  314. package/rust/src/tools/registry.rs +143 -0
  315. package/rust/src/types.rs +554 -0
  316. package/rust/src/utils/config.rs +105 -0
  317. package/rust/src/utils/files.rs +137 -0
  318. package/rust/src/utils/github.rs +171 -0
  319. package/rust/src/utils/log.rs +65 -0
  320. package/rust/src/utils/mod.rs +17 -0
  321. package/rust/src/utils/serialization.rs +181 -0
  322. package/rust/src/utils/template.rs +173 -0
  323. package/typescript/README.md +335 -0
@@ -0,0 +1,354 @@
1
+ from __future__ import annotations
2
+
3
+ import http.server
4
+ import json
5
+ import os
6
+ import socketserver
7
+ from argparse import ArgumentParser
8
+ from functools import partial
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import yaml
13
+
14
+
15
+ def add_problem_statement(content):
16
+ """The problem statement is the first 'user' message in the history.
17
+
18
+ We'll prepend the trajectory with the problem statement.
19
+ """
20
+ problem_statement = ""
21
+ for item in content["history"]:
22
+ if item["role"] == "user":
23
+ problem_statement = item["content"]
24
+ break
25
+ if problem_statement:
26
+ content["trajectory"].insert(
27
+ 0,
28
+ {
29
+ "thought": "",
30
+ "action": "",
31
+ "response": "",
32
+ "observation": problem_statement,
33
+ "messages": [{"role": "system", "content": "Problem Statement placeholder"}],
34
+ },
35
+ )
36
+ return content
37
+
38
+
39
+ def append_exit(content):
40
+ exit_status = content.get("info", {}).get("exit_status", None)
41
+ if exit_status is None:
42
+ return content
43
+
44
+ if exit_status.startswith("submitted"):
45
+ if "submission" in content["info"]:
46
+ content["trajectory"].append(
47
+ {
48
+ "thought": "Submitting solution",
49
+ "action": "Model Submission",
50
+ "response": "Submitting solution",
51
+ "observation": content["info"]["submission"],
52
+ "messages": [{"role": "system", "content": f"Submission generated - {exit_status}"}],
53
+ }
54
+ )
55
+ else:
56
+ msg = "No submission in history or info"
57
+ raise ValueError(msg)
58
+ return content
59
+
60
+
61
+ def append_patch(instance_id, content, patches, patch_type):
62
+ if content.get("info", {}).get("exit_status", None) is not None:
63
+ if instance_id in patches:
64
+ content["trajectory"].append(
65
+ {
66
+ "thought": f"Showing {patch_type} patch",
67
+ "response": f"Showing {patch_type} patch",
68
+ "action": f"{patch_type} Patch",
69
+ "observation": patches[instance_id],
70
+ }
71
+ )
72
+ return content
73
+
74
+
75
+ def append_results(traj_path: Path, instance_id: str, content, results, results_file):
76
+ stats: list[str] = []
77
+ model_stats = {}
78
+ if traj_path.exists():
79
+ data = json.loads(traj_path.read_text())
80
+ info = data.get("info", {})
81
+ model_stats = info.get("model_stats", {})
82
+
83
+ # Build stats section
84
+ exit_status = info.get("exit_status", "N/A")
85
+ instance_cost = model_stats.get("instance_cost", None)
86
+ instance_cost = f"{instance_cost:.2f}" if instance_cost is not None else "N/A"
87
+ tokens_sent = model_stats.get("tokens_sent", None)
88
+ tokens_sent = f"{tokens_sent:,}" if tokens_sent is not None else "N/A"
89
+ tokens_received = model_stats.get("tokens_received", None)
90
+ tokens_received = f"{tokens_received:,}" if tokens_received is not None else "N/A"
91
+ api_calls = model_stats.get("api_calls", None)
92
+ api_calls = f"{api_calls:,}" if api_calls is not None else "N/A"
93
+
94
+ stats.append("**** Run Stats ****")
95
+ stats.append(f"Exit Status: {exit_status}")
96
+ stats.append(f"Instance Cost: ${instance_cost}")
97
+ stats.append(f"Tokens Sent: {tokens_sent}")
98
+ stats.append(f"Tokens Received: {tokens_received}")
99
+ stats.append(f"API Calls: {api_calls}\n")
100
+
101
+ # Build status section
102
+ status = []
103
+ if results is None:
104
+ status.append("Evaluation results not found")
105
+ elif "completed_ids" in results and "submitted_ids" in results and "resolved_ids" in results:
106
+ is_completed = instance_id in results["completed_ids"]
107
+ is_submitted = instance_id in results["submitted_ids"]
108
+ is_resolved = instance_id in results["resolved_ids"]
109
+
110
+ status.append("**** Statuses ****")
111
+ status.append(f" {'✅' if is_completed else '❌'} Completed (The agent successfully ran)")
112
+ status.append(f" {'✅' if is_submitted else '❌'} Submitted (The agent successfully submitted a pull request)")
113
+ status.append(
114
+ f" {'✅' if is_resolved else '❌'} Resolved (The pull request {'' if is_resolved else 'has not '}"
115
+ "successfully resolved the issue during eval)"
116
+ )
117
+ else:
118
+ status.append("Results format not recognized")
119
+
120
+ if status == []:
121
+ status.append("Instance not found in results")
122
+ else:
123
+ status.append("---------------------------")
124
+ status.append(
125
+ "Note that the evaluation results here may not be accurate or up to date, since they are computed separately from the agent run itself."
126
+ )
127
+ status.append(f"Check {results_file} for the most accurate evaluation results.")
128
+ status.append("")
129
+ status.append(f"Instance ID: {instance_id}")
130
+
131
+ # Add evaluation report as first and last items in trajectory
132
+ eval_report = {
133
+ "thought": "Evaluation Report",
134
+ "action": "Showing evaluation results",
135
+ "response": "Showing evaluation results",
136
+ "observation": "\n".join([*stats, *status]),
137
+ "messages": [{"role": "system", "content": "Showing evaluation results and statistics"}],
138
+ }
139
+
140
+ if not content.get("trajectory"):
141
+ content["trajectory"] = []
142
+ content["trajectory"].insert(0, eval_report)
143
+ content["trajectory"].append(eval_report)
144
+ return content
145
+
146
+
147
+ def get_action_summary(content):
148
+ out = ""
149
+ i = 0
150
+ for item in content["history"]:
151
+ if item["role"] != "assistant":
152
+ continue
153
+ if item.get("is_demo"):
154
+ continue
155
+ i += 1
156
+ try:
157
+ action = item["action"]
158
+ except KeyError:
159
+ print(f"No action for step {i}")
160
+ print(item)
161
+ raise
162
+ if len(action) > 70:
163
+ action = action[:67] + "..."
164
+ out += f"Step {i}: {action}\n"
165
+ return out
166
+
167
+
168
+ def load_content(file_name, gold_patches, test_patches) -> dict[str, Any]:
169
+ with open(file_name) as infile:
170
+ content = json.load(infile)
171
+ results_file = Path(file_name).parent / "results.json"
172
+ results = load_results(results_file)
173
+
174
+ content = add_problem_statement(content)
175
+ content = append_exit(content)
176
+ content = append_patch(Path(file_name).stem, content, gold_patches, "Gold")
177
+ content = append_patch(Path(file_name).stem, content, test_patches, "Test")
178
+ content["history"].insert(0, {"role": "Action Summary", "content": get_action_summary(content)})
179
+ return append_results(
180
+ Path(file_name),
181
+ Path(file_name).stem,
182
+ content,
183
+ results,
184
+ results_file,
185
+ )
186
+
187
+
188
+ def load_results(results_path: Path) -> dict[str, Any] | None:
189
+ """Load results from results.json.
190
+
191
+ If file is not found, return None.
192
+ """
193
+ if not results_path.exists():
194
+ return None
195
+ with open(results_path) as infile:
196
+ results = json.load(infile)
197
+ # Different versions of the code used "not_generated" or "no_generation".
198
+ # Let's standardize this here
199
+ if "no_generation" in results:
200
+ results["not_generated"] = results["no_generation"]
201
+ del results["no_generation"]
202
+ return results
203
+
204
+
205
+ def get_status(traj_path) -> str:
206
+ """Return results emoji for single trajectory"""
207
+ results = load_results(Path(traj_path).parent / "results.json")
208
+ info = json.loads(Path(traj_path).read_text()).get("info", {})
209
+ n_steps = info.get("model_stats", {}).get("api_calls", "N/A")
210
+ exit_status = info.get("exit_status", "N/A")
211
+ exit_status_str = f" ({exit_status} after {n_steps} steps)"
212
+ instance_id = Path(traj_path).stem
213
+ if results is None:
214
+ return f"❓ {exit_status_str}"
215
+ elif instance_id in results["resolved_ids"]:
216
+ return "✅"
217
+ else:
218
+ return f"❌ {exit_status_str}"
219
+
220
+
221
+ class Handler(http.server.SimpleHTTPRequestHandler):
222
+ file_mod_times = {} # Dictionary to keep track of file modification times
223
+
224
+ def __init__(self, *args, **kwargs):
225
+ self.gold_patches = {}
226
+ self.test_patches = {}
227
+ if "gold_patches" in kwargs:
228
+ self.gold_patches = kwargs.pop("gold_patches")
229
+ if "test_patches" in kwargs:
230
+ self.test_patches = kwargs.pop("test_patches")
231
+ self.traj_dir = kwargs.pop("directory", ".") # Extract directory
232
+ super().__init__(*args, **kwargs)
233
+
234
+ def serve_directory_info(self):
235
+ self.send_response(200)
236
+ self.send_header("Content-type", "application/json")
237
+ self.end_headers()
238
+ self.wfile.write(json.dumps({"directory": self.traj_dir}).encode())
239
+
240
+ def serve_file_content(self, file_path):
241
+ try:
242
+ content = load_content(
243
+ Path(self.traj_dir) / file_path,
244
+ self.gold_patches,
245
+ self.test_patches,
246
+ )
247
+ self.send_response(200)
248
+ self.send_header("Content-type", "text/plain")
249
+ self.end_headers()
250
+ self.wfile.write(json.dumps(content).encode())
251
+ except FileNotFoundError:
252
+ self.send_error(404, f"File {file_path} not found")
253
+
254
+ def do_GET(self):
255
+ if self.path == "/directory_info":
256
+ self.serve_directory_info()
257
+ elif self.path.startswith("/files"):
258
+ self.handle_files_request()
259
+ elif self.path.startswith("/trajectory/"):
260
+ file_path = self.path[len("/trajectory/") :]
261
+ self.serve_file_content(file_path)
262
+ elif self.path.startswith("/check_update"):
263
+ self.check_for_updates()
264
+ else:
265
+ super().do_GET()
266
+
267
+ def handle_files_request(self):
268
+ self.send_response(200)
269
+ self.send_header("Content-type", "application/json")
270
+ self.end_headers()
271
+ files = sorted(
272
+ (
273
+ str(file.relative_to(Path(self.traj_dir))) + " " * 4 + get_status(file)
274
+ for file in Path(self.traj_dir).glob("**/*.traj")
275
+ ),
276
+ key=lambda x: str(Path(self.traj_dir) / x),
277
+ reverse=True,
278
+ )
279
+ self.wfile.write(json.dumps(files).encode())
280
+
281
+ def check_for_updates(self):
282
+ current_mod_times = {str(file): file.stat().st_mtime for file in Path(self.traj_dir).glob("**/*.traj")}
283
+ if current_mod_times != Handler.file_mod_times:
284
+ Handler.file_mod_times = current_mod_times
285
+ self.send_response(200) # Send response that there's an update
286
+ else:
287
+ self.send_response(204) # Send no content response if no update
288
+ self.end_headers()
289
+
290
+ def end_headers(self):
291
+ self.send_header("Access-Control-Allow-Origin", "*")
292
+ super().end_headers()
293
+
294
+
295
+ def main(data_path, directory, port):
296
+ data = []
297
+ if data_path is not None:
298
+ if data_path.endswith(".jsonl"):
299
+ data = [json.loads(x) for x in Path(data_path).read_text().splitlines(keepends=True)]
300
+ elif data_path.endswith(".json"):
301
+ with open(data_path) as f:
302
+ data = json.load(f)
303
+ elif "args.yaml" in os.listdir(directory):
304
+ with open(Path(directory) / "args.yaml") as file:
305
+ args = yaml.safe_load(file)
306
+ if "environment" in args and "data_path" in args["environment"]:
307
+ data_path = Path(__file__).parent.parent / args["environment"]["data_path"]
308
+ if data_path.exists:
309
+ with open(data_path) as f:
310
+ data = json.load(f)
311
+
312
+ gold_patches = {d["instance_id"]: d["patch"] if "patch" in d else None for d in data}
313
+ test_patches = {d["instance_id"]: d["test_patch"] if "test_patch" in d else None for d in data}
314
+
315
+ handler_with_directory = partial(
316
+ Handler,
317
+ directory=directory,
318
+ gold_patches=gold_patches,
319
+ test_patches=test_patches,
320
+ )
321
+ try:
322
+ with socketserver.TCPServer(("", port), handler_with_directory) as httpd:
323
+ print(f"Serving at http://localhost:{port}")
324
+ httpd.serve_forever()
325
+ except OSError as e:
326
+ if e.errno == 48:
327
+ print(f"ERROR: Port ({port}) is already in use. Try another port with the --port flag.")
328
+ else:
329
+ raise e
330
+
331
+
332
+ def get_parser():
333
+ parser = ArgumentParser()
334
+ parser.add_argument(
335
+ "--data_path",
336
+ type=str,
337
+ help="Path to dataset that was used for the trajectories. Necessary to display gold patches.",
338
+ )
339
+ parser.add_argument("--directory", type=str, help="Directory to serve", default=os.getcwd(), nargs="?")
340
+ parser.add_argument("--port", type=int, help="Port to serve", default=8000)
341
+ return parser
342
+
343
+
344
+ def run_from_cli(args: list[str] | None = None):
345
+ # Hack to make sure all the templates and all are found
346
+ parsed_args = get_parser().parse_args(args)
347
+ # convert directory, relative to the absolute path
348
+ parsed_args.directory = str(Path(parsed_args.directory).resolve().absolute())
349
+ os.chdir(Path(__file__).parent)
350
+ main(**vars(parsed_args))
351
+
352
+
353
+ if __name__ == "__main__":
354
+ run_from_cli()
@@ -0,0 +1,169 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import traceback
6
+ from argparse import ArgumentParser
7
+ from pathlib import Path
8
+
9
+ import yaml
10
+ from tqdm.auto import tqdm
11
+
12
+ try:
13
+ from .server import load_content
14
+ except ImportError:
15
+ from server import load_content
16
+
17
+
18
+ logger = logging.getLogger(__name__)
19
+ logging.getLogger("simple_parsing").setLevel(logging.INFO)
20
+
21
+
22
+ TEMPLATE = """
23
+ <html>
24
+ <head>
25
+ <title>Trajectory Viewer</title>
26
+ <style>
27
+ {style_sheet}
28
+ </style>
29
+ </head>
30
+ <body>
31
+ <div class="container">
32
+ {file_path_tree}
33
+ <h2>Conversation History</h2>
34
+ <pre id="fileContent">{file_content}</pre>
35
+ </div>
36
+ </body>
37
+ </html>
38
+ """
39
+
40
+ try:
41
+ with open(Path(__file__).parent / "style.css") as infile:
42
+ STYLE_SHEET = infile.read()
43
+ except Exception as e:
44
+ style_file = Path(__file__).parent / "style.css"
45
+ logger.error(f"Failed to load style sheet from {style_file}: {traceback.format_exc()}")
46
+ raise e
47
+
48
+
49
+ def _load_file(file_name, gold_patches, test_patches):
50
+ try:
51
+ role_map = {
52
+ "user": "Computer",
53
+ "assistant": "SWE-Agent",
54
+ "subroutine": "SWE-Agent subroutine",
55
+ "default": "Default",
56
+ "system": "System",
57
+ "demo": "Demonstration",
58
+ }
59
+ content = load_content(file_name, gold_patches, test_patches)
60
+ if "history" in content and isinstance(content["history"], list):
61
+ history_content = ""
62
+ for index, item in enumerate(content["history"]):
63
+ item_content = item.get("content", "").replace("<", "&lt;").replace(">", "&gt;")
64
+ if item.get("agent") and item["agent"] != "primary":
65
+ role_class = "subroutine"
66
+ else:
67
+ role_class = item.get("role", "default").lower().replace(" ", "-")
68
+ element_id = f"historyItem{index}"
69
+ role_name = role_map.get(item.get("role", ""), item.get("role", ""))
70
+ history_content += (
71
+ f"""<div class="history-item {role_class}" id="{element_id}">"""
72
+ f"""<div class="role-bar {role_class}"><strong><span>{role_name}</span></strong></div>"""
73
+ f"""<div class="content-container">"""
74
+ f"""<pre>{item_content}</pre>"""
75
+ f"""</div>"""
76
+ f"""<div class="shadow"></div>"""
77
+ f"""</div>"""
78
+ )
79
+ return history_content
80
+ else:
81
+ return "No history content found."
82
+ except Exception:
83
+ return f"Error loading content. {traceback.format_exc()}"
84
+
85
+
86
+ def _make_file_path_tree(file_path):
87
+ path_parts = file_path.split("/")
88
+ relevant_parts = path_parts[-3:]
89
+ html_string = '<div class="filepath">\n'
90
+ for part in relevant_parts:
91
+ html_string += f'<div class="part">{part}</div>\n'
92
+ html_string += "</div>"
93
+ return html_string
94
+
95
+
96
+ def save_static_viewer(file_path):
97
+ if not isinstance(file_path, Path):
98
+ file_path = Path(file_path)
99
+ data = []
100
+ if "args.yaml" in list(map(lambda x: x.name, file_path.parent.iterdir())):
101
+ args = yaml.safe_load(Path(file_path.parent / "args.yaml").read_text())
102
+ if "environment" in args and "data_path" in args["environment"]:
103
+ data_path = Path(__file__).parent.parent / args["environment"]["data_path"]
104
+ if data_path.exists():
105
+ with open(data_path) as f:
106
+ data = json.load(f)
107
+ if not isinstance(data, list) or not data or "patch" not in data[0] or "test_patch" not in data[0]:
108
+ data = []
109
+ gold_patches = {x["instance_id"]: x["patch"] for x in data}
110
+ test_patches = {x["instance_id"]: x["test_patch"] for x in data}
111
+ content = _load_file(file_path, gold_patches, test_patches)
112
+ file_path_tree = _make_file_path_tree(file_path.absolute().as_posix())
113
+ icons_path = Path(__file__).parent / "icons"
114
+ relative_icons_path = find_relative_path(file_path, icons_path)
115
+ style_sheet = STYLE_SHEET.replace("url('icons/", f"url('{relative_icons_path.as_posix()}/").replace(
116
+ 'url("icons/',
117
+ f'url("{relative_icons_path.as_posix()}/',
118
+ )
119
+ data = TEMPLATE.format(file_content=content, style_sheet=style_sheet, file_path_tree=file_path_tree)
120
+ output_file = file_path.with_suffix(".html")
121
+ with open(output_file, "w+") as outfile:
122
+ print(data, file=outfile)
123
+ logger.info(f"Saved static viewer to {output_file}")
124
+
125
+
126
+ def find_relative_path(from_path, to_path):
127
+ # Convert paths to absolute for uniformity
128
+ from_path = from_path.resolve()
129
+ to_path = to_path.resolve()
130
+ if from_path.is_file():
131
+ from_path = from_path.parent
132
+ if to_path.is_file():
133
+ to_path = to_path.parent
134
+ if not from_path.is_dir() or not to_path.is_dir():
135
+ msg = f"Both from_path and to_path must be directories, but got {from_path} and {to_path}"
136
+ raise ValueError(msg)
137
+
138
+ # Identify the common ancestor and the parts of each path beyond it
139
+ common_parts = 0
140
+ for from_part, to_part in zip(from_path.parts, to_path.parts):
141
+ if from_part != to_part:
142
+ break
143
+ common_parts += 1
144
+
145
+ # Calculate the '../' needed to get back from from_path to the common ancestor
146
+ back_to_ancestor = [".."] * (len(from_path.parts) - common_parts)
147
+
148
+ # Direct path from common ancestor to to_path
149
+ to_target = to_path.parts[common_parts:]
150
+
151
+ # Combine to get the relative path
152
+ return Path(*back_to_ancestor, *to_target)
153
+
154
+
155
+ def save_all_trajectories(directory):
156
+ if not isinstance(directory, Path):
157
+ directory = Path(directory)
158
+ all_files = list(directory.glob("**/*.traj"))
159
+ logger.info(f"Found {len(all_files)} trajectory files in {directory}")
160
+ for file_path in tqdm(all_files, desc="Saving static viewers"):
161
+ save_static_viewer(file_path)
162
+ logger.info(f"Saved static viewers for all trajectories in {args.directory}")
163
+
164
+
165
+ if __name__ == "__main__":
166
+ parser = ArgumentParser()
167
+ parser.add_argument("directory", type=str, help="Directory containing trajectory files")
168
+ args = parser.parse_args()
169
+ save_all_trajectories(args.directory)