@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
//! Tool registry for managing available tools
|
|
2
|
+
|
|
3
|
+
use super::{create_bundle, Bundle, BundleConfig};
|
|
4
|
+
use crate::exceptions::Result;
|
|
5
|
+
use std::collections::HashMap;
|
|
6
|
+
|
|
7
|
+
/// Registry of available tools
|
|
8
|
+
pub struct ToolRegistry {
|
|
9
|
+
tools: HashMap<String, Bundle>,
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
impl ToolRegistry {
|
|
13
|
+
pub fn new() -> Self {
|
|
14
|
+
Self {
|
|
15
|
+
tools: HashMap::new(),
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/// Register a tool bundle
|
|
20
|
+
pub fn register(&mut self, bundle: Bundle) {
|
|
21
|
+
self.tools.insert(bundle.name.clone(), bundle);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/// Register a tool from configuration
|
|
25
|
+
pub fn register_config(&mut self, config: &BundleConfig) -> Result<()> {
|
|
26
|
+
let bundle = create_bundle(config)?;
|
|
27
|
+
self.register(bundle);
|
|
28
|
+
Ok(())
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/// Get a tool by name
|
|
32
|
+
pub fn get(&self, name: &str) -> Option<&Bundle> {
|
|
33
|
+
self.tools.get(name)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/// Check if a tool exists
|
|
37
|
+
pub fn has(&self, name: &str) -> bool {
|
|
38
|
+
self.tools.contains_key(name)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/// Get all registered tools
|
|
42
|
+
pub fn all(&self) -> Vec<&Bundle> {
|
|
43
|
+
self.tools.values().collect()
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/// Get tool names
|
|
47
|
+
pub fn names(&self) -> Vec<&str> {
|
|
48
|
+
self.tools.keys().map(|s| s.as_str()).collect()
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/// Remove a tool
|
|
52
|
+
pub fn remove(&mut self, name: &str) -> Option<Bundle> {
|
|
53
|
+
self.tools.remove(name)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/// Clear all tools
|
|
57
|
+
pub fn clear(&mut self) {
|
|
58
|
+
self.tools.clear();
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
impl Default for ToolRegistry {
|
|
63
|
+
fn default() -> Self {
|
|
64
|
+
Self::new()
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/// Create a registry with default SWE-agent tools
|
|
69
|
+
pub fn create_default_registry() -> ToolRegistry {
|
|
70
|
+
let mut registry = ToolRegistry::new();
|
|
71
|
+
|
|
72
|
+
// Register common tools
|
|
73
|
+
registry.register(
|
|
74
|
+
Bundle::new("edit")
|
|
75
|
+
.with_end_name("ENDEDIT")
|
|
76
|
+
.with_description("Edit a file")
|
|
77
|
+
.with_signature("edit <file> <start_line> <end_line>"),
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
registry.register(
|
|
81
|
+
Bundle::new("view")
|
|
82
|
+
.with_description("View a file or directory")
|
|
83
|
+
.with_signature("view <path> [start_line] [end_line]"),
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
registry.register(
|
|
87
|
+
Bundle::new("search")
|
|
88
|
+
.with_description("Search for a pattern in files")
|
|
89
|
+
.with_signature("search <pattern> [path]"),
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
registry.register(
|
|
93
|
+
Bundle::new("find_file")
|
|
94
|
+
.with_description("Find files by name")
|
|
95
|
+
.with_signature("find_file <pattern> [directory]"),
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
registry.register(
|
|
99
|
+
Bundle::new("submit")
|
|
100
|
+
.with_description("Submit the solution")
|
|
101
|
+
.with_signature("submit"),
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
registry.register(
|
|
105
|
+
Bundle::new("exit")
|
|
106
|
+
.with_description("Exit without submitting")
|
|
107
|
+
.with_signature("exit"),
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
registry
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
#[cfg(test)]
|
|
114
|
+
mod tests {
|
|
115
|
+
use super::*;
|
|
116
|
+
|
|
117
|
+
#[test]
|
|
118
|
+
fn test_registry_register() {
|
|
119
|
+
let mut registry = ToolRegistry::new();
|
|
120
|
+
registry.register(Bundle::new("test"));
|
|
121
|
+
|
|
122
|
+
assert!(registry.has("test"));
|
|
123
|
+
assert!(!registry.has("nonexistent"));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
#[test]
|
|
127
|
+
fn test_registry_get() {
|
|
128
|
+
let mut registry = ToolRegistry::new();
|
|
129
|
+
registry.register(Bundle::new("test").with_description("A test tool"));
|
|
130
|
+
|
|
131
|
+
let bundle = registry.get("test").unwrap();
|
|
132
|
+
assert_eq!(bundle.description, Some("A test tool".to_string()));
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
#[test]
|
|
136
|
+
fn test_default_registry() {
|
|
137
|
+
let registry = create_default_registry();
|
|
138
|
+
|
|
139
|
+
assert!(registry.has("edit"));
|
|
140
|
+
assert!(registry.has("view"));
|
|
141
|
+
assert!(registry.has("submit"));
|
|
142
|
+
}
|
|
143
|
+
}
|
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
//! Core type definitions for SWE-agent
|
|
2
|
+
//!
|
|
3
|
+
//! This module contains all shared types used throughout the SWE-agent implementation.
|
|
4
|
+
|
|
5
|
+
use serde::{Deserialize, Serialize};
|
|
6
|
+
use std::collections::HashMap;
|
|
7
|
+
|
|
8
|
+
/// Role in a conversation
|
|
9
|
+
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
|
10
|
+
#[serde(rename_all = "lowercase")]
|
|
11
|
+
pub enum Role {
|
|
12
|
+
System,
|
|
13
|
+
#[default]
|
|
14
|
+
User,
|
|
15
|
+
Assistant,
|
|
16
|
+
Tool,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/// Type of message in history
|
|
20
|
+
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
|
21
|
+
#[serde(rename_all = "snake_case")]
|
|
22
|
+
pub enum MessageType {
|
|
23
|
+
System,
|
|
24
|
+
#[default]
|
|
25
|
+
Observation,
|
|
26
|
+
Action,
|
|
27
|
+
Thought,
|
|
28
|
+
Demonstration,
|
|
29
|
+
User,
|
|
30
|
+
Assistant,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/// A thinking block from model output (for Claude-style extended thinking)
|
|
34
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
35
|
+
pub struct ThinkingBlock {
|
|
36
|
+
#[serde(rename = "type")]
|
|
37
|
+
pub block_type: String,
|
|
38
|
+
pub content: String,
|
|
39
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
40
|
+
pub start_time: Option<f64>,
|
|
41
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
42
|
+
pub end_time: Option<f64>,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/// Tool call function definition
|
|
46
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
47
|
+
pub struct ToolCallFunction {
|
|
48
|
+
pub name: String,
|
|
49
|
+
pub arguments: String,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/// A tool call from model output
|
|
53
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
54
|
+
pub struct ToolCall {
|
|
55
|
+
pub id: String,
|
|
56
|
+
#[serde(rename = "type")]
|
|
57
|
+
pub call_type: String,
|
|
58
|
+
pub function: ToolCallFunction,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/// Content can be either a string or structured content
|
|
62
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
63
|
+
#[serde(untagged)]
|
|
64
|
+
pub enum Content {
|
|
65
|
+
Text(String),
|
|
66
|
+
Structured(Vec<ContentPart>),
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
impl Default for Content {
|
|
70
|
+
fn default() -> Self {
|
|
71
|
+
Self::Text(String::new())
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
impl Content {
|
|
76
|
+
pub fn as_str(&self) -> String {
|
|
77
|
+
match self {
|
|
78
|
+
Content::Text(s) => s.clone(),
|
|
79
|
+
Content::Structured(parts) => parts
|
|
80
|
+
.iter()
|
|
81
|
+
.filter_map(|p| match p {
|
|
82
|
+
ContentPart::Text { text } => Some(text.clone()),
|
|
83
|
+
_ => None,
|
|
84
|
+
})
|
|
85
|
+
.collect::<Vec<_>>()
|
|
86
|
+
.join("\n"),
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/// Part of structured content
|
|
92
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
93
|
+
#[serde(tag = "type", rename_all = "snake_case")]
|
|
94
|
+
pub enum ContentPart {
|
|
95
|
+
Text { text: String },
|
|
96
|
+
Image { image_url: ImageUrl },
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/// Image URL reference
|
|
100
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
101
|
+
pub struct ImageUrl {
|
|
102
|
+
pub url: String,
|
|
103
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
104
|
+
pub detail: Option<String>,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/// A single item in the conversation history
|
|
108
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
109
|
+
pub struct HistoryItem {
|
|
110
|
+
pub role: Role,
|
|
111
|
+
pub content: Content,
|
|
112
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
113
|
+
pub agent: Option<String>,
|
|
114
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
115
|
+
pub message_type: Option<MessageType>,
|
|
116
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
117
|
+
pub is_demo: Option<bool>,
|
|
118
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
119
|
+
pub thought: Option<String>,
|
|
120
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
121
|
+
pub action: Option<String>,
|
|
122
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
123
|
+
pub tool_calls: Option<Vec<ToolCall>>,
|
|
124
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
125
|
+
pub tool_call_ids: Option<Vec<String>>,
|
|
126
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
127
|
+
pub thinking_blocks: Option<Vec<ThinkingBlock>>,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
impl HistoryItem {
|
|
131
|
+
pub fn system(content: impl Into<String>) -> Self {
|
|
132
|
+
Self {
|
|
133
|
+
role: Role::System,
|
|
134
|
+
content: Content::Text(content.into()),
|
|
135
|
+
message_type: Some(MessageType::System),
|
|
136
|
+
..Default::default()
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
pub fn user(content: impl Into<String>) -> Self {
|
|
141
|
+
Self {
|
|
142
|
+
role: Role::User,
|
|
143
|
+
content: Content::Text(content.into()),
|
|
144
|
+
message_type: Some(MessageType::User),
|
|
145
|
+
..Default::default()
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
pub fn assistant(content: impl Into<String>) -> Self {
|
|
150
|
+
Self {
|
|
151
|
+
role: Role::Assistant,
|
|
152
|
+
content: Content::Text(content.into()),
|
|
153
|
+
message_type: Some(MessageType::Assistant),
|
|
154
|
+
..Default::default()
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
pub fn observation(content: impl Into<String>) -> Self {
|
|
159
|
+
Self {
|
|
160
|
+
role: Role::User,
|
|
161
|
+
content: Content::Text(content.into()),
|
|
162
|
+
message_type: Some(MessageType::Observation),
|
|
163
|
+
..Default::default()
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
pub fn action(thought: impl Into<String>, action: impl Into<String>) -> Self {
|
|
168
|
+
let thought_str = thought.into();
|
|
169
|
+
let action_str = action.into();
|
|
170
|
+
Self {
|
|
171
|
+
role: Role::Assistant,
|
|
172
|
+
content: Content::Text(format!("{}\n```\n{}\n```", thought_str, action_str)),
|
|
173
|
+
message_type: Some(MessageType::Action),
|
|
174
|
+
thought: Some(thought_str),
|
|
175
|
+
action: Some(action_str),
|
|
176
|
+
..Default::default()
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/// Conversation history
|
|
182
|
+
pub type History = Vec<HistoryItem>;
|
|
183
|
+
|
|
184
|
+
/// Environment state at a point in time
|
|
185
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
186
|
+
pub struct EnvironmentState {
|
|
187
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
188
|
+
pub working_dir: Option<String>,
|
|
189
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
190
|
+
pub open_files: Option<Vec<String>>,
|
|
191
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
192
|
+
pub git_status: Option<String>,
|
|
193
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
194
|
+
pub diff: Option<String>,
|
|
195
|
+
#[serde(flatten)]
|
|
196
|
+
pub extra: HashMap<String, serde_json::Value>,
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/// Query message for tracking
|
|
200
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
201
|
+
pub struct QueryMessage {
|
|
202
|
+
pub role: Role,
|
|
203
|
+
pub content: String,
|
|
204
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
205
|
+
pub message_type: Option<MessageType>,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/// Output from a single agent step
|
|
209
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
210
|
+
pub struct StepOutput {
|
|
211
|
+
pub done: bool,
|
|
212
|
+
pub thought: String,
|
|
213
|
+
pub action: String,
|
|
214
|
+
pub observation: String,
|
|
215
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
216
|
+
pub submission: Option<String>,
|
|
217
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
218
|
+
pub exit_status: Option<String>,
|
|
219
|
+
pub execution_time: f64,
|
|
220
|
+
pub state: EnvironmentState,
|
|
221
|
+
pub query: Vec<QueryMessage>,
|
|
222
|
+
#[serde(default)]
|
|
223
|
+
pub extra_info: HashMap<String, serde_json::Value>,
|
|
224
|
+
pub output: String,
|
|
225
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
226
|
+
pub tool_calls: Option<Vec<ToolCall>>,
|
|
227
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
228
|
+
pub tool_call_ids: Option<Vec<String>>,
|
|
229
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
230
|
+
pub thinking_blocks: Option<Vec<ThinkingBlock>>,
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
impl StepOutput {
|
|
234
|
+
pub fn to_template_format_dict(&self) -> HashMap<String, String> {
|
|
235
|
+
let mut dict = HashMap::new();
|
|
236
|
+
dict.insert("thought".to_string(), self.thought.clone());
|
|
237
|
+
dict.insert("action".to_string(), self.action.clone());
|
|
238
|
+
dict.insert("observation".to_string(), self.observation.clone());
|
|
239
|
+
if let Some(ref status) = self.exit_status {
|
|
240
|
+
dict.insert("exit_status".to_string(), status.clone());
|
|
241
|
+
}
|
|
242
|
+
dict
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/// A single step in a trajectory
|
|
247
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
248
|
+
pub struct TrajectoryStep {
|
|
249
|
+
pub action: String,
|
|
250
|
+
pub observation: String,
|
|
251
|
+
pub response: String,
|
|
252
|
+
pub thought: String,
|
|
253
|
+
pub execution_time: f64,
|
|
254
|
+
pub state: EnvironmentState,
|
|
255
|
+
pub query: Vec<QueryMessage>,
|
|
256
|
+
#[serde(default)]
|
|
257
|
+
pub extra_info: HashMap<String, serde_json::Value>,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
impl From<&StepOutput> for TrajectoryStep {
|
|
261
|
+
fn from(step: &StepOutput) -> Self {
|
|
262
|
+
Self {
|
|
263
|
+
action: step.action.clone(),
|
|
264
|
+
observation: step.observation.clone(),
|
|
265
|
+
response: step.output.clone(),
|
|
266
|
+
thought: step.thought.clone(),
|
|
267
|
+
execution_time: step.execution_time,
|
|
268
|
+
state: step.state.clone(),
|
|
269
|
+
query: step.query.clone(),
|
|
270
|
+
extra_info: step.extra_info.clone(),
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/// Full trajectory of an agent run
|
|
276
|
+
pub type Trajectory = Vec<TrajectoryStep>;
|
|
277
|
+
|
|
278
|
+
/// Model statistics for tracking costs and usage
|
|
279
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
280
|
+
pub struct ModelStats {
|
|
281
|
+
pub instance_cost: f64,
|
|
282
|
+
pub tokens_sent: u64,
|
|
283
|
+
pub tokens_received: u64,
|
|
284
|
+
pub api_calls: u64,
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
impl ModelStats {
|
|
288
|
+
pub fn add(&self, other: &ModelStats) -> ModelStats {
|
|
289
|
+
ModelStats {
|
|
290
|
+
instance_cost: self.instance_cost + other.instance_cost,
|
|
291
|
+
tokens_sent: self.tokens_sent + other.tokens_sent,
|
|
292
|
+
tokens_received: self.tokens_received + other.tokens_received,
|
|
293
|
+
api_calls: self.api_calls + other.api_calls,
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/// Agent run information
|
|
299
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
300
|
+
pub struct AgentInfo {
|
|
301
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
302
|
+
pub swe_agent_version: Option<String>,
|
|
303
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
304
|
+
pub submission: Option<String>,
|
|
305
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
306
|
+
pub exit_status: Option<String>,
|
|
307
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
308
|
+
pub model_stats: Option<ModelStats>,
|
|
309
|
+
#[serde(flatten)]
|
|
310
|
+
pub extra: HashMap<String, serde_json::Value>,
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/// Result of an agent run
|
|
314
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
315
|
+
pub struct AgentRunResult {
|
|
316
|
+
pub info: AgentInfo,
|
|
317
|
+
pub trajectory: Trajectory,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/// Output from a model query
|
|
321
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
322
|
+
pub struct ModelOutput {
|
|
323
|
+
pub message: String,
|
|
324
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
325
|
+
pub tool_calls: Option<Vec<ToolCall>>,
|
|
326
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
327
|
+
pub thinking_blocks: Option<Vec<ThinkingBlock>>,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/// API response from LLM providers
|
|
331
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
332
|
+
pub struct ApiResponse {
|
|
333
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
334
|
+
pub choices: Option<Vec<ApiChoice>>,
|
|
335
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
336
|
+
pub usage: Option<ApiUsage>,
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/// A single choice in API response
|
|
340
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
341
|
+
pub struct ApiChoice {
|
|
342
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
343
|
+
pub message: Option<ApiMessage>,
|
|
344
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
345
|
+
pub text: Option<String>,
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/// Message in API response
|
|
349
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
350
|
+
pub struct ApiMessage {
|
|
351
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
352
|
+
pub content: Option<String>,
|
|
353
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
354
|
+
pub role: Option<String>,
|
|
355
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
356
|
+
pub tool_calls: Option<Vec<ToolCall>>,
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/// Usage statistics in API response
|
|
360
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
361
|
+
pub struct ApiUsage {
|
|
362
|
+
#[serde(default)]
|
|
363
|
+
pub prompt_tokens: u64,
|
|
364
|
+
#[serde(default)]
|
|
365
|
+
pub completion_tokens: u64,
|
|
366
|
+
#[serde(default)]
|
|
367
|
+
pub total_tokens: u64,
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/// Batch instance for running multiple problems
|
|
371
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
372
|
+
pub struct BatchInstance {
|
|
373
|
+
pub instance_id: String,
|
|
374
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
375
|
+
pub problem_statement: Option<String>,
|
|
376
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
377
|
+
pub repo: Option<String>,
|
|
378
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
379
|
+
pub base_commit: Option<String>,
|
|
380
|
+
#[serde(flatten)]
|
|
381
|
+
pub extra: HashMap<String, serde_json::Value>,
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/// Simple batch instance format
|
|
385
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
386
|
+
pub struct SimpleBatchInstance {
|
|
387
|
+
pub id: String,
|
|
388
|
+
pub problem_statement: String,
|
|
389
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
390
|
+
pub repo_path: Option<String>,
|
|
391
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
392
|
+
pub github_url: Option<String>,
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
/// Retry configuration for API calls
|
|
396
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
397
|
+
pub struct RetryConfig {
|
|
398
|
+
#[serde(default = "default_retries")]
|
|
399
|
+
pub retries: u32,
|
|
400
|
+
#[serde(default = "default_min_wait")]
|
|
401
|
+
pub min_wait: u64,
|
|
402
|
+
#[serde(default = "default_max_wait")]
|
|
403
|
+
pub max_wait: u64,
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
fn default_retries() -> u32 {
|
|
407
|
+
20
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
fn default_min_wait() -> u64 {
|
|
411
|
+
10
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
fn default_max_wait() -> u64 {
|
|
415
|
+
120
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
impl Default for RetryConfig {
|
|
419
|
+
fn default() -> Self {
|
|
420
|
+
Self {
|
|
421
|
+
retries: default_retries(),
|
|
422
|
+
min_wait: default_min_wait(),
|
|
423
|
+
max_wait: default_max_wait(),
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/// Template configuration for agent messages
|
|
429
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
430
|
+
pub struct TemplateConfig {
|
|
431
|
+
#[serde(default)]
|
|
432
|
+
pub system_template: String,
|
|
433
|
+
#[serde(default)]
|
|
434
|
+
pub instance_template: String,
|
|
435
|
+
#[serde(default = "default_next_step_template")]
|
|
436
|
+
pub next_step_template: String,
|
|
437
|
+
#[serde(default = "default_next_step_truncated_template")]
|
|
438
|
+
pub next_step_truncated_observation_template: String,
|
|
439
|
+
#[serde(default = "default_max_observation_length")]
|
|
440
|
+
pub max_observation_length: usize,
|
|
441
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
442
|
+
pub next_step_no_output_template: Option<String>,
|
|
443
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
444
|
+
pub strategy_template: Option<String>,
|
|
445
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
446
|
+
pub demonstration_template: Option<String>,
|
|
447
|
+
#[serde(default)]
|
|
448
|
+
pub demonstrations: Vec<String>,
|
|
449
|
+
#[serde(default)]
|
|
450
|
+
pub put_demos_in_history: bool,
|
|
451
|
+
#[serde(default)]
|
|
452
|
+
pub disable_image_processing: bool,
|
|
453
|
+
#[serde(default = "default_shell_check_error_template")]
|
|
454
|
+
pub shell_check_error_template: String,
|
|
455
|
+
#[serde(default = "default_command_cancelled_template")]
|
|
456
|
+
pub command_cancelled_timeout_template: String,
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
fn default_next_step_template() -> String {
|
|
460
|
+
"Observation: {{observation}}".to_string()
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
fn default_next_step_truncated_template() -> String {
|
|
464
|
+
"Observation: {{observation}}<response clipped>\n<NOTE>Observations should not exceed {{max_observation_length}} characters. {{elided_chars}} characters were elided.</NOTE>".to_string()
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
fn default_max_observation_length() -> usize {
|
|
468
|
+
100000
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
fn default_shell_check_error_template() -> String {
|
|
472
|
+
"Your command contains syntax errors. Please fix them and try again.\nError: {{error_message}}\nHint: {{hint}}".to_string()
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
fn default_command_cancelled_template() -> String {
|
|
476
|
+
"Command cancelled after {{timeout}} seconds. The command was: {{command}}".to_string()
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
impl Default for TemplateConfig {
|
|
480
|
+
fn default() -> Self {
|
|
481
|
+
Self {
|
|
482
|
+
system_template: String::new(),
|
|
483
|
+
instance_template: String::new(),
|
|
484
|
+
next_step_template: default_next_step_template(),
|
|
485
|
+
next_step_truncated_observation_template: default_next_step_truncated_template(),
|
|
486
|
+
max_observation_length: default_max_observation_length(),
|
|
487
|
+
next_step_no_output_template: None,
|
|
488
|
+
strategy_template: None,
|
|
489
|
+
demonstration_template: None,
|
|
490
|
+
demonstrations: Vec::new(),
|
|
491
|
+
put_demos_in_history: false,
|
|
492
|
+
disable_image_processing: false,
|
|
493
|
+
shell_check_error_template: default_shell_check_error_template(),
|
|
494
|
+
command_cancelled_timeout_template: default_command_cancelled_template(),
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
#[cfg(test)]
|
|
500
|
+
mod tests {
|
|
501
|
+
use super::*;
|
|
502
|
+
|
|
503
|
+
#[test]
|
|
504
|
+
fn test_history_item_system() {
|
|
505
|
+
let item = HistoryItem::system("You are a helpful assistant.");
|
|
506
|
+
assert_eq!(item.role, Role::System);
|
|
507
|
+
assert_eq!(item.content.as_str(), "You are a helpful assistant.");
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
#[test]
|
|
511
|
+
fn test_history_item_action() {
|
|
512
|
+
let item = HistoryItem::action("I will run a command", "ls -la");
|
|
513
|
+
assert_eq!(item.role, Role::Assistant);
|
|
514
|
+
assert_eq!(item.thought, Some("I will run a command".to_string()));
|
|
515
|
+
assert_eq!(item.action, Some("ls -la".to_string()));
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
#[test]
|
|
519
|
+
fn test_step_output_to_template_dict() {
|
|
520
|
+
let step = StepOutput {
|
|
521
|
+
thought: "thinking".to_string(),
|
|
522
|
+
action: "doing".to_string(),
|
|
523
|
+
observation: "seeing".to_string(),
|
|
524
|
+
exit_status: Some("done".to_string()),
|
|
525
|
+
..Default::default()
|
|
526
|
+
};
|
|
527
|
+
let dict = step.to_template_format_dict();
|
|
528
|
+
assert_eq!(dict.get("thought"), Some(&"thinking".to_string()));
|
|
529
|
+
assert_eq!(dict.get("action"), Some(&"doing".to_string()));
|
|
530
|
+
assert_eq!(dict.get("observation"), Some(&"seeing".to_string()));
|
|
531
|
+
assert_eq!(dict.get("exit_status"), Some(&"done".to_string()));
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
#[test]
|
|
535
|
+
fn test_model_stats_add() {
|
|
536
|
+
let a = ModelStats {
|
|
537
|
+
instance_cost: 1.0,
|
|
538
|
+
tokens_sent: 100,
|
|
539
|
+
tokens_received: 50,
|
|
540
|
+
api_calls: 1,
|
|
541
|
+
};
|
|
542
|
+
let b = ModelStats {
|
|
543
|
+
instance_cost: 2.0,
|
|
544
|
+
tokens_sent: 200,
|
|
545
|
+
tokens_received: 100,
|
|
546
|
+
api_calls: 2,
|
|
547
|
+
};
|
|
548
|
+
let c = a.add(&b);
|
|
549
|
+
assert_eq!(c.instance_cost, 3.0);
|
|
550
|
+
assert_eq!(c.tokens_sent, 300);
|
|
551
|
+
assert_eq!(c.tokens_received, 150);
|
|
552
|
+
assert_eq!(c.api_calls, 3);
|
|
553
|
+
}
|
|
554
|
+
}
|