@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,837 @@
|
|
|
1
|
+
//! Model implementations for SWE-agent
|
|
2
|
+
//!
|
|
3
|
+
//! This module contains various model implementations for interacting with LLMs.
|
|
4
|
+
|
|
5
|
+
use crate::exceptions::{Result, SWEAgentError};
|
|
6
|
+
use crate::types::{History, ModelOutput, ModelStats, RetryConfig, Role, ToolCall};
|
|
7
|
+
use async_trait::async_trait;
|
|
8
|
+
use serde::{Deserialize, Serialize};
|
|
9
|
+
use std::collections::HashMap;
|
|
10
|
+
use std::sync::atomic::{AtomicU64, Ordering};
|
|
11
|
+
use std::sync::Arc;
|
|
12
|
+
use tokio::sync::Mutex;
|
|
13
|
+
|
|
14
|
+
/// Global statistics tracking across all model instances
|
|
15
|
+
#[derive(Debug, Default)]
|
|
16
|
+
pub struct GlobalStats {
|
|
17
|
+
pub total_cost: AtomicU64, // Stored as micro-dollars for precision
|
|
18
|
+
pub last_query_timestamp: AtomicU64,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
impl GlobalStats {
|
|
22
|
+
pub fn add_cost(&self, cost: f64) {
|
|
23
|
+
let micro_cost = (cost * 1_000_000.0) as u64;
|
|
24
|
+
self.total_cost.fetch_add(micro_cost, Ordering::SeqCst);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
pub fn get_total_cost(&self) -> f64 {
|
|
28
|
+
self.total_cost.load(Ordering::SeqCst) as f64 / 1_000_000.0
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
pub fn update_timestamp(&self) {
|
|
32
|
+
let now = std::time::SystemTime::now()
|
|
33
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
34
|
+
.map(|d| d.as_secs())
|
|
35
|
+
.unwrap_or(0);
|
|
36
|
+
self.last_query_timestamp.store(now, Ordering::SeqCst);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/// Instance-specific statistics
|
|
41
|
+
#[derive(Debug, Default, Clone)]
|
|
42
|
+
pub struct InstanceStats {
|
|
43
|
+
pub instance_cost: f64,
|
|
44
|
+
pub tokens_sent: u64,
|
|
45
|
+
pub tokens_received: u64,
|
|
46
|
+
pub api_calls: u64,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
impl InstanceStats {
|
|
50
|
+
pub fn add(&self, other: &InstanceStats) -> InstanceStats {
|
|
51
|
+
InstanceStats {
|
|
52
|
+
instance_cost: self.instance_cost + other.instance_cost,
|
|
53
|
+
tokens_sent: self.tokens_sent + other.tokens_sent,
|
|
54
|
+
tokens_received: self.tokens_received + other.tokens_received,
|
|
55
|
+
api_calls: self.api_calls + other.api_calls,
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
pub fn to_model_stats(&self) -> ModelStats {
|
|
60
|
+
ModelStats {
|
|
61
|
+
instance_cost: self.instance_cost,
|
|
62
|
+
tokens_sent: self.tokens_sent,
|
|
63
|
+
tokens_received: self.tokens_received,
|
|
64
|
+
api_calls: self.api_calls,
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/// Abstract trait for all models
|
|
70
|
+
#[async_trait]
|
|
71
|
+
pub trait Model: Send + Sync {
|
|
72
|
+
/// Query the model with conversation history
|
|
73
|
+
async fn query(&self, history: &History) -> Result<ModelOutput>;
|
|
74
|
+
|
|
75
|
+
/// Query with specific temperature and number of completions
|
|
76
|
+
async fn query_with_params(
|
|
77
|
+
&self,
|
|
78
|
+
history: &History,
|
|
79
|
+
_temperature: Option<f64>,
|
|
80
|
+
_n: Option<usize>,
|
|
81
|
+
) -> Result<Vec<ModelOutput>> {
|
|
82
|
+
let output = self.query(history).await?;
|
|
83
|
+
Ok(vec![output])
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/// Reset instance statistics
|
|
87
|
+
fn reset_stats(&self);
|
|
88
|
+
|
|
89
|
+
/// Get current instance statistics
|
|
90
|
+
fn get_stats(&self) -> InstanceStats;
|
|
91
|
+
|
|
92
|
+
/// Get per-instance cost limit
|
|
93
|
+
fn instance_cost_limit(&self) -> f64;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/// Generic API model configuration
|
|
97
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
98
|
+
pub struct GenericApiModelConfig {
|
|
99
|
+
pub name: String,
|
|
100
|
+
#[serde(default = "default_per_instance_cost_limit")]
|
|
101
|
+
pub per_instance_cost_limit: f64,
|
|
102
|
+
#[serde(default)]
|
|
103
|
+
pub total_cost_limit: f64,
|
|
104
|
+
#[serde(default)]
|
|
105
|
+
pub per_instance_call_limit: u64,
|
|
106
|
+
#[serde(default)]
|
|
107
|
+
pub temperature: f64,
|
|
108
|
+
#[serde(default = "default_top_p")]
|
|
109
|
+
pub top_p: Option<f64>,
|
|
110
|
+
#[serde(default)]
|
|
111
|
+
pub api_base: Option<String>,
|
|
112
|
+
#[serde(default)]
|
|
113
|
+
pub api_key: Option<String>,
|
|
114
|
+
#[serde(default)]
|
|
115
|
+
pub stop: Vec<String>,
|
|
116
|
+
#[serde(default)]
|
|
117
|
+
pub completion_kwargs: HashMap<String, serde_json::Value>,
|
|
118
|
+
#[serde(default)]
|
|
119
|
+
pub convert_system_to_user: bool,
|
|
120
|
+
#[serde(default)]
|
|
121
|
+
pub retry: RetryConfig,
|
|
122
|
+
#[serde(default)]
|
|
123
|
+
pub delay: f64,
|
|
124
|
+
#[serde(default)]
|
|
125
|
+
pub max_input_tokens: Option<u64>,
|
|
126
|
+
#[serde(default)]
|
|
127
|
+
pub max_output_tokens: Option<u64>,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
fn default_per_instance_cost_limit() -> f64 {
|
|
131
|
+
3.0
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
fn default_top_p() -> Option<f64> {
|
|
135
|
+
Some(1.0)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
impl Default for GenericApiModelConfig {
|
|
139
|
+
fn default() -> Self {
|
|
140
|
+
Self {
|
|
141
|
+
name: "gpt-4".to_string(),
|
|
142
|
+
per_instance_cost_limit: default_per_instance_cost_limit(),
|
|
143
|
+
total_cost_limit: 0.0,
|
|
144
|
+
per_instance_call_limit: 0,
|
|
145
|
+
temperature: 0.0,
|
|
146
|
+
top_p: default_top_p(),
|
|
147
|
+
api_base: None,
|
|
148
|
+
api_key: None,
|
|
149
|
+
stop: Vec::new(),
|
|
150
|
+
completion_kwargs: HashMap::new(),
|
|
151
|
+
convert_system_to_user: false,
|
|
152
|
+
retry: RetryConfig::default(),
|
|
153
|
+
delay: 0.0,
|
|
154
|
+
max_input_tokens: None,
|
|
155
|
+
max_output_tokens: None,
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/// LiteLLM-compatible model for API-based LLMs
|
|
161
|
+
pub struct LiteLLMModel {
|
|
162
|
+
config: GenericApiModelConfig,
|
|
163
|
+
stats: Arc<Mutex<InstanceStats>>,
|
|
164
|
+
global_stats: Arc<GlobalStats>,
|
|
165
|
+
api_keys: Vec<String>,
|
|
166
|
+
current_key_index: Arc<AtomicU64>,
|
|
167
|
+
client: reqwest::Client,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
impl LiteLLMModel {
|
|
171
|
+
pub fn new(config: GenericApiModelConfig, global_stats: Arc<GlobalStats>) -> Self {
|
|
172
|
+
let api_keys = Self::get_api_keys(&config);
|
|
173
|
+
Self {
|
|
174
|
+
config,
|
|
175
|
+
stats: Arc::new(Mutex::new(InstanceStats::default())),
|
|
176
|
+
global_stats,
|
|
177
|
+
api_keys,
|
|
178
|
+
current_key_index: Arc::new(AtomicU64::new(0)),
|
|
179
|
+
client: reqwest::Client::new(),
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
fn get_api_keys(config: &GenericApiModelConfig) -> Vec<String> {
|
|
184
|
+
if let Some(ref key) = config.api_key {
|
|
185
|
+
if let Some(stripped) = key.strip_prefix('$') {
|
|
186
|
+
// Environment variable
|
|
187
|
+
if let Ok(env_key) = std::env::var(stripped) {
|
|
188
|
+
return env_key.split(":::").map(String::from).collect();
|
|
189
|
+
}
|
|
190
|
+
} else {
|
|
191
|
+
return key.split(":::").map(String::from).collect();
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Try environment variable based on model name
|
|
196
|
+
let env_name = format!("{}_API_KEY", config.name.to_uppercase().replace('-', "_"));
|
|
197
|
+
if let Ok(key) = std::env::var(&env_name) {
|
|
198
|
+
return key.split(":::").map(String::from).collect();
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
Vec::new()
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
fn choose_api_key(&self) -> Option<String> {
|
|
205
|
+
if self.api_keys.is_empty() {
|
|
206
|
+
return None;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
let idx = self.current_key_index.fetch_add(1, Ordering::SeqCst) as usize;
|
|
210
|
+
Some(self.api_keys[idx % self.api_keys.len()].clone())
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
fn history_to_messages(&self, history: &History) -> Vec<serde_json::Value> {
|
|
214
|
+
history
|
|
215
|
+
.iter()
|
|
216
|
+
.map(|item| {
|
|
217
|
+
let role = match item.role {
|
|
218
|
+
Role::System => {
|
|
219
|
+
if self.config.convert_system_to_user {
|
|
220
|
+
"user"
|
|
221
|
+
} else {
|
|
222
|
+
"system"
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
Role::User => "user",
|
|
226
|
+
Role::Assistant => "assistant",
|
|
227
|
+
Role::Tool => "tool",
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
let mut msg = serde_json::json!({
|
|
231
|
+
"role": role,
|
|
232
|
+
"content": item.content.as_str(),
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
if let Some(ref tool_calls) = item.tool_calls {
|
|
236
|
+
msg["tool_calls"] = serde_json::to_value(tool_calls).unwrap_or_default();
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if let Some(ref ids) = item.tool_call_ids {
|
|
240
|
+
if !ids.is_empty() {
|
|
241
|
+
msg["tool_call_id"] = serde_json::Value::String(ids[0].clone());
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
msg
|
|
246
|
+
})
|
|
247
|
+
.collect()
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
fn calculate_cost(&self, input_tokens: u64, output_tokens: u64) -> f64 {
|
|
251
|
+
// Simplified pricing - in production, use actual model pricing
|
|
252
|
+
let (input_price, output_price) = match self.config.name.as_str() {
|
|
253
|
+
name if name.contains("gpt-4") => (0.03 / 1000.0, 0.06 / 1000.0),
|
|
254
|
+
name if name.contains("gpt-3.5") => (0.0005 / 1000.0, 0.0015 / 1000.0),
|
|
255
|
+
name if name.contains("claude-3-opus") => (0.015 / 1000.0, 0.075 / 1000.0),
|
|
256
|
+
name if name.contains("claude-3-sonnet") => (0.003 / 1000.0, 0.015 / 1000.0),
|
|
257
|
+
name if name.contains("claude-3-haiku") => (0.00025 / 1000.0, 0.00125 / 1000.0),
|
|
258
|
+
_ => (0.001 / 1000.0, 0.002 / 1000.0),
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
input_tokens as f64 * input_price + output_tokens as f64 * output_price
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
async fn check_cost_limits(&self) -> Result<()> {
|
|
265
|
+
let stats = self.stats.lock().await;
|
|
266
|
+
|
|
267
|
+
if self.config.per_instance_cost_limit > 0.0
|
|
268
|
+
&& stats.instance_cost >= self.config.per_instance_cost_limit
|
|
269
|
+
{
|
|
270
|
+
return Err(SWEAgentError::InstanceCostLimitExceeded(format!(
|
|
271
|
+
"Instance cost {} exceeds limit {}",
|
|
272
|
+
stats.instance_cost, self.config.per_instance_cost_limit
|
|
273
|
+
)));
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if self.config.total_cost_limit > 0.0
|
|
277
|
+
&& self.global_stats.get_total_cost() >= self.config.total_cost_limit
|
|
278
|
+
{
|
|
279
|
+
return Err(SWEAgentError::TotalCostLimitExceeded(format!(
|
|
280
|
+
"Total cost {} exceeds limit {}",
|
|
281
|
+
self.global_stats.get_total_cost(),
|
|
282
|
+
self.config.total_cost_limit
|
|
283
|
+
)));
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if self.config.per_instance_call_limit > 0
|
|
287
|
+
&& stats.api_calls >= self.config.per_instance_call_limit
|
|
288
|
+
{
|
|
289
|
+
return Err(SWEAgentError::InstanceCallLimitExceeded(format!(
|
|
290
|
+
"API calls {} exceeds limit {}",
|
|
291
|
+
stats.api_calls, self.config.per_instance_call_limit
|
|
292
|
+
)));
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
Ok(())
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
#[async_trait]
|
|
300
|
+
impl Model for LiteLLMModel {
|
|
301
|
+
async fn query(&self, history: &History) -> Result<ModelOutput> {
|
|
302
|
+
self.check_cost_limits().await?;
|
|
303
|
+
|
|
304
|
+
let api_key = self.choose_api_key();
|
|
305
|
+
let messages = self.history_to_messages(history);
|
|
306
|
+
|
|
307
|
+
// Determine API endpoint based on model name
|
|
308
|
+
let is_anthropic = self.config.name.contains("claude");
|
|
309
|
+
let _is_openai = self.config.name.contains("gpt");
|
|
310
|
+
|
|
311
|
+
let (url, headers) = if is_anthropic {
|
|
312
|
+
let url = self
|
|
313
|
+
.config
|
|
314
|
+
.api_base
|
|
315
|
+
.clone()
|
|
316
|
+
.unwrap_or_else(|| "https://api.anthropic.com/v1/messages".to_string());
|
|
317
|
+
let mut headers = reqwest::header::HeaderMap::new();
|
|
318
|
+
headers.insert("Content-Type", "application/json".parse().unwrap());
|
|
319
|
+
headers.insert("anthropic-version", "2023-06-01".parse().unwrap());
|
|
320
|
+
if let Some(ref key) = api_key {
|
|
321
|
+
headers.insert("x-api-key", key.parse().unwrap());
|
|
322
|
+
}
|
|
323
|
+
(url, headers)
|
|
324
|
+
} else {
|
|
325
|
+
let url = self
|
|
326
|
+
.config
|
|
327
|
+
.api_base
|
|
328
|
+
.clone()
|
|
329
|
+
.unwrap_or_else(|| "https://api.openai.com/v1/chat/completions".to_string());
|
|
330
|
+
let mut headers = reqwest::header::HeaderMap::new();
|
|
331
|
+
headers.insert("Content-Type", "application/json".parse().unwrap());
|
|
332
|
+
if let Some(ref key) = api_key {
|
|
333
|
+
headers.insert("Authorization", format!("Bearer {}", key).parse().unwrap());
|
|
334
|
+
}
|
|
335
|
+
(url, headers)
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
let mut request_body = serde_json::json!({
|
|
339
|
+
"model": self.config.name,
|
|
340
|
+
"messages": messages,
|
|
341
|
+
"temperature": self.config.temperature,
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
if let Some(top_p) = self.config.top_p {
|
|
345
|
+
request_body["top_p"] = serde_json::Value::from(top_p);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
if !self.config.stop.is_empty() {
|
|
349
|
+
request_body["stop"] = serde_json::to_value(&self.config.stop)?;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
if let Some(max_tokens) = self.config.max_output_tokens {
|
|
353
|
+
request_body["max_tokens"] = serde_json::Value::from(max_tokens);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Handle Anthropic-specific format
|
|
357
|
+
if is_anthropic {
|
|
358
|
+
let system_msg = messages.iter().find(|m| m["role"] == "system");
|
|
359
|
+
if let Some(sys) = system_msg {
|
|
360
|
+
request_body["system"] = sys["content"].clone();
|
|
361
|
+
let non_system: Vec<_> = messages
|
|
362
|
+
.iter()
|
|
363
|
+
.filter(|m| m["role"] != "system")
|
|
364
|
+
.cloned()
|
|
365
|
+
.collect();
|
|
366
|
+
request_body["messages"] = serde_json::to_value(non_system)?;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
let response = self
|
|
371
|
+
.client
|
|
372
|
+
.post(&url)
|
|
373
|
+
.headers(headers)
|
|
374
|
+
.json(&request_body)
|
|
375
|
+
.send()
|
|
376
|
+
.await?;
|
|
377
|
+
|
|
378
|
+
let status = response.status();
|
|
379
|
+
let response_text = response.text().await.unwrap_or_default();
|
|
380
|
+
|
|
381
|
+
if !status.is_success() {
|
|
382
|
+
// Check for specific error types
|
|
383
|
+
if response_text.contains("content_policy") || response_text.contains("safety") {
|
|
384
|
+
return Err(SWEAgentError::ContentPolicyViolation(response_text));
|
|
385
|
+
}
|
|
386
|
+
return Err(SWEAgentError::ApiError(format!(
|
|
387
|
+
"API request failed with status {}: {}",
|
|
388
|
+
status, response_text
|
|
389
|
+
)));
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Parse response - handle both OpenAI and Anthropic formats
|
|
393
|
+
let json_response: serde_json::Value = serde_json::from_str(&response_text)
|
|
394
|
+
.map_err(|e| SWEAgentError::ApiError(format!("Failed to parse response: {}", e)))?;
|
|
395
|
+
|
|
396
|
+
let (message, tool_calls, input_tokens, output_tokens) = if is_anthropic {
|
|
397
|
+
// Anthropic format: { "content": [{"type": "text", "text": "..."}], "usage": {...} }
|
|
398
|
+
let content = json_response
|
|
399
|
+
.get("content")
|
|
400
|
+
.and_then(|c| c.as_array())
|
|
401
|
+
.map(|arr| {
|
|
402
|
+
arr.iter()
|
|
403
|
+
.filter_map(|item| {
|
|
404
|
+
if item.get("type").and_then(|t| t.as_str()) == Some("text") {
|
|
405
|
+
item.get("text").and_then(|t| t.as_str()).map(String::from)
|
|
406
|
+
} else {
|
|
407
|
+
None
|
|
408
|
+
}
|
|
409
|
+
})
|
|
410
|
+
.collect::<Vec<_>>()
|
|
411
|
+
.join("")
|
|
412
|
+
})
|
|
413
|
+
.unwrap_or_default();
|
|
414
|
+
|
|
415
|
+
// Extract tool use blocks from Anthropic response
|
|
416
|
+
let tools: Option<Vec<ToolCall>> = json_response
|
|
417
|
+
.get("content")
|
|
418
|
+
.and_then(|c| c.as_array())
|
|
419
|
+
.map(|arr| {
|
|
420
|
+
arr.iter()
|
|
421
|
+
.filter_map(|item| {
|
|
422
|
+
if item.get("type").and_then(|t| t.as_str()) == Some("tool_use") {
|
|
423
|
+
let id = item
|
|
424
|
+
.get("id")
|
|
425
|
+
.and_then(|i| i.as_str())
|
|
426
|
+
.unwrap_or("")
|
|
427
|
+
.to_string();
|
|
428
|
+
let name = item
|
|
429
|
+
.get("name")
|
|
430
|
+
.and_then(|n| n.as_str())
|
|
431
|
+
.unwrap_or("")
|
|
432
|
+
.to_string();
|
|
433
|
+
let args = item
|
|
434
|
+
.get("input")
|
|
435
|
+
.map(|i| serde_json::to_string(i).unwrap_or_default())
|
|
436
|
+
.unwrap_or_default();
|
|
437
|
+
Some(ToolCall {
|
|
438
|
+
id,
|
|
439
|
+
call_type: "function".to_string(),
|
|
440
|
+
function: crate::types::ToolCallFunction {
|
|
441
|
+
name,
|
|
442
|
+
arguments: args,
|
|
443
|
+
},
|
|
444
|
+
})
|
|
445
|
+
} else {
|
|
446
|
+
None
|
|
447
|
+
}
|
|
448
|
+
})
|
|
449
|
+
.collect()
|
|
450
|
+
})
|
|
451
|
+
.filter(|v: &Vec<ToolCall>| !v.is_empty());
|
|
452
|
+
|
|
453
|
+
let usage = json_response.get("usage");
|
|
454
|
+
let input = usage
|
|
455
|
+
.and_then(|u| u.get("input_tokens"))
|
|
456
|
+
.and_then(|t| t.as_u64())
|
|
457
|
+
.unwrap_or(0);
|
|
458
|
+
let output = usage
|
|
459
|
+
.and_then(|u| u.get("output_tokens"))
|
|
460
|
+
.and_then(|t| t.as_u64())
|
|
461
|
+
.unwrap_or(0);
|
|
462
|
+
|
|
463
|
+
(content, tools, input, output)
|
|
464
|
+
} else {
|
|
465
|
+
// OpenAI format: { "choices": [{"message": {"content": "..."}}], "usage": {...} }
|
|
466
|
+
let message_content = json_response
|
|
467
|
+
.get("choices")
|
|
468
|
+
.and_then(|c| c.as_array())
|
|
469
|
+
.and_then(|arr| arr.first())
|
|
470
|
+
.and_then(|choice| choice.get("message"))
|
|
471
|
+
.and_then(|msg| msg.get("content"))
|
|
472
|
+
.and_then(|c| c.as_str())
|
|
473
|
+
.unwrap_or("")
|
|
474
|
+
.to_string();
|
|
475
|
+
|
|
476
|
+
// Extract tool calls from OpenAI response
|
|
477
|
+
let tools: Option<Vec<ToolCall>> = json_response
|
|
478
|
+
.get("choices")
|
|
479
|
+
.and_then(|c| c.as_array())
|
|
480
|
+
.and_then(|arr| arr.first())
|
|
481
|
+
.and_then(|choice| choice.get("message"))
|
|
482
|
+
.and_then(|msg| msg.get("tool_calls"))
|
|
483
|
+
.and_then(|tc| tc.as_array())
|
|
484
|
+
.map(|arr| {
|
|
485
|
+
arr.iter()
|
|
486
|
+
.filter_map(|item| {
|
|
487
|
+
let id = item
|
|
488
|
+
.get("id")
|
|
489
|
+
.and_then(|i| i.as_str())
|
|
490
|
+
.unwrap_or("")
|
|
491
|
+
.to_string();
|
|
492
|
+
let func = item.get("function")?;
|
|
493
|
+
let name = func
|
|
494
|
+
.get("name")
|
|
495
|
+
.and_then(|n| n.as_str())
|
|
496
|
+
.unwrap_or("")
|
|
497
|
+
.to_string();
|
|
498
|
+
let args = func
|
|
499
|
+
.get("arguments")
|
|
500
|
+
.and_then(|a| a.as_str())
|
|
501
|
+
.unwrap_or("")
|
|
502
|
+
.to_string();
|
|
503
|
+
Some(ToolCall {
|
|
504
|
+
id,
|
|
505
|
+
call_type: "function".to_string(),
|
|
506
|
+
function: crate::types::ToolCallFunction {
|
|
507
|
+
name,
|
|
508
|
+
arguments: args,
|
|
509
|
+
},
|
|
510
|
+
})
|
|
511
|
+
})
|
|
512
|
+
.collect()
|
|
513
|
+
})
|
|
514
|
+
.filter(|v: &Vec<ToolCall>| !v.is_empty());
|
|
515
|
+
|
|
516
|
+
let usage = json_response.get("usage");
|
|
517
|
+
let input = usage
|
|
518
|
+
.and_then(|u| u.get("prompt_tokens"))
|
|
519
|
+
.and_then(|t| t.as_u64())
|
|
520
|
+
.unwrap_or(0);
|
|
521
|
+
let output = usage
|
|
522
|
+
.and_then(|u| u.get("completion_tokens"))
|
|
523
|
+
.and_then(|t| t.as_u64())
|
|
524
|
+
.unwrap_or(0);
|
|
525
|
+
|
|
526
|
+
(message_content, tools, input, output)
|
|
527
|
+
};
|
|
528
|
+
|
|
529
|
+
// Update stats
|
|
530
|
+
let cost = self.calculate_cost(input_tokens, output_tokens);
|
|
531
|
+
|
|
532
|
+
{
|
|
533
|
+
let mut stats = self.stats.lock().await;
|
|
534
|
+
stats.tokens_sent += input_tokens;
|
|
535
|
+
stats.tokens_received += output_tokens;
|
|
536
|
+
stats.instance_cost += cost;
|
|
537
|
+
stats.api_calls += 1;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
self.global_stats.add_cost(cost);
|
|
541
|
+
self.global_stats.update_timestamp();
|
|
542
|
+
|
|
543
|
+
Ok(ModelOutput {
|
|
544
|
+
message,
|
|
545
|
+
tool_calls,
|
|
546
|
+
thinking_blocks: None,
|
|
547
|
+
})
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
fn reset_stats(&self) {
|
|
551
|
+
if let Ok(mut stats) = self.stats.try_lock() {
|
|
552
|
+
*stats = InstanceStats::default();
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
fn get_stats(&self) -> InstanceStats {
|
|
557
|
+
self.stats.try_lock().map(|s| s.clone()).unwrap_or_default()
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
fn instance_cost_limit(&self) -> f64 {
|
|
561
|
+
self.config.per_instance_cost_limit
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/// Human model for interactive input
|
|
566
|
+
pub struct HumanModel {
|
|
567
|
+
stats: Arc<Mutex<InstanceStats>>,
|
|
568
|
+
cost_per_call: f64,
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
impl HumanModel {
|
|
572
|
+
pub fn new(cost_per_call: f64) -> Self {
|
|
573
|
+
Self {
|
|
574
|
+
stats: Arc::new(Mutex::new(InstanceStats::default())),
|
|
575
|
+
cost_per_call,
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
#[async_trait]
|
|
581
|
+
impl Model for HumanModel {
|
|
582
|
+
async fn query(&self, _history: &History) -> Result<ModelOutput> {
|
|
583
|
+
use std::io::{self, BufRead, Write};
|
|
584
|
+
|
|
585
|
+
print!("> ");
|
|
586
|
+
io::stdout().flush()?;
|
|
587
|
+
|
|
588
|
+
let stdin = io::stdin();
|
|
589
|
+
let line = stdin.lock().lines().next();
|
|
590
|
+
|
|
591
|
+
let input = match line {
|
|
592
|
+
Some(Ok(s)) => s,
|
|
593
|
+
Some(Err(e)) => return Err(SWEAgentError::IoError(e.to_string())),
|
|
594
|
+
None => return Err(SWEAgentError::EOF),
|
|
595
|
+
};
|
|
596
|
+
|
|
597
|
+
{
|
|
598
|
+
let mut stats = self.stats.lock().await;
|
|
599
|
+
stats.api_calls += 1;
|
|
600
|
+
stats.instance_cost += self.cost_per_call;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
Ok(ModelOutput {
|
|
604
|
+
message: input,
|
|
605
|
+
tool_calls: None,
|
|
606
|
+
thinking_blocks: None,
|
|
607
|
+
})
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
fn reset_stats(&self) {
|
|
611
|
+
if let Ok(mut stats) = self.stats.try_lock() {
|
|
612
|
+
*stats = InstanceStats::default();
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
fn get_stats(&self) -> InstanceStats {
|
|
617
|
+
self.stats.try_lock().map(|s| s.clone()).unwrap_or_default()
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
fn instance_cost_limit(&self) -> f64 {
|
|
621
|
+
0.0
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/// Instant empty submit model for testing
|
|
626
|
+
pub struct InstantEmptySubmitModel {
|
|
627
|
+
stats: Arc<Mutex<InstanceStats>>,
|
|
628
|
+
action_idx: Arc<AtomicU64>,
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
impl InstantEmptySubmitModel {
|
|
632
|
+
pub fn new() -> Self {
|
|
633
|
+
Self {
|
|
634
|
+
stats: Arc::new(Mutex::new(InstanceStats::default())),
|
|
635
|
+
action_idx: Arc::new(AtomicU64::new(0)),
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
impl Default for InstantEmptySubmitModel {
|
|
641
|
+
fn default() -> Self {
|
|
642
|
+
Self::new()
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
#[async_trait]
|
|
647
|
+
impl Model for InstantEmptySubmitModel {
|
|
648
|
+
async fn query(&self, _history: &History) -> Result<ModelOutput> {
|
|
649
|
+
let idx = self.action_idx.fetch_add(1, Ordering::SeqCst);
|
|
650
|
+
|
|
651
|
+
let message = if idx == 0 {
|
|
652
|
+
"DISCUSSION\nLet's reproduce the bug by creating a `reproduce.py` file.\n\n```\ntouch reproduce.py\n```\n"
|
|
653
|
+
} else {
|
|
654
|
+
self.action_idx.store(0, Ordering::SeqCst);
|
|
655
|
+
"DISCUSSION\nThe task should be resolved, so let's submit the patch.\n\n```\nsubmit\n```\n"
|
|
656
|
+
};
|
|
657
|
+
|
|
658
|
+
{
|
|
659
|
+
let mut stats = self.stats.lock().await;
|
|
660
|
+
stats.api_calls += 1;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
Ok(ModelOutput {
|
|
664
|
+
message: message.to_string(),
|
|
665
|
+
tool_calls: None,
|
|
666
|
+
thinking_blocks: None,
|
|
667
|
+
})
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
fn reset_stats(&self) {
|
|
671
|
+
if let Ok(mut stats) = self.stats.try_lock() {
|
|
672
|
+
*stats = InstanceStats::default();
|
|
673
|
+
}
|
|
674
|
+
self.action_idx.store(0, Ordering::SeqCst);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
fn get_stats(&self) -> InstanceStats {
|
|
678
|
+
self.stats.try_lock().map(|s| s.clone()).unwrap_or_default()
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
fn instance_cost_limit(&self) -> f64 {
|
|
682
|
+
0.0
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
/// Replay model for replaying trajectories
|
|
687
|
+
pub struct ReplayModel {
|
|
688
|
+
stats: Arc<Mutex<InstanceStats>>,
|
|
689
|
+
replays: Vec<Vec<String>>,
|
|
690
|
+
replay_idx: Arc<AtomicU64>,
|
|
691
|
+
action_idx: Arc<AtomicU64>,
|
|
692
|
+
submit_command: String,
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
impl ReplayModel {
|
|
696
|
+
pub fn new(replay_path: &str, submit_command: &str) -> Result<Self> {
|
|
697
|
+
let content = std::fs::read_to_string(replay_path)?;
|
|
698
|
+
let replays: Vec<Vec<String>> = content
|
|
699
|
+
.lines()
|
|
700
|
+
.filter(|l| !l.trim().is_empty())
|
|
701
|
+
.filter_map(|l| {
|
|
702
|
+
serde_json::from_str::<HashMap<String, Vec<String>>>(l)
|
|
703
|
+
.ok()
|
|
704
|
+
.and_then(|m| m.into_values().next())
|
|
705
|
+
})
|
|
706
|
+
.collect();
|
|
707
|
+
|
|
708
|
+
Ok(Self {
|
|
709
|
+
stats: Arc::new(Mutex::new(InstanceStats::default())),
|
|
710
|
+
replays,
|
|
711
|
+
replay_idx: Arc::new(AtomicU64::new(0)),
|
|
712
|
+
action_idx: Arc::new(AtomicU64::new(0)),
|
|
713
|
+
submit_command: submit_command.to_string(),
|
|
714
|
+
})
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
#[async_trait]
|
|
719
|
+
impl Model for ReplayModel {
|
|
720
|
+
async fn query(&self, _history: &History) -> Result<ModelOutput> {
|
|
721
|
+
let replay_idx = self.replay_idx.load(Ordering::SeqCst) as usize;
|
|
722
|
+
let action_idx = self.action_idx.fetch_add(1, Ordering::SeqCst) as usize;
|
|
723
|
+
|
|
724
|
+
let action = if replay_idx >= self.replays.len() {
|
|
725
|
+
format!("```\n{}\n```", self.submit_command)
|
|
726
|
+
} else if action_idx >= self.replays[replay_idx].len() {
|
|
727
|
+
tracing::error!("Reached end of replay trajectory without submitting");
|
|
728
|
+
self.replay_idx.fetch_add(1, Ordering::SeqCst);
|
|
729
|
+
self.action_idx.store(0, Ordering::SeqCst);
|
|
730
|
+
format!("```\n{}\n```", self.submit_command)
|
|
731
|
+
} else {
|
|
732
|
+
let action = &self.replays[replay_idx][action_idx];
|
|
733
|
+
if action == "submit" || action.contains(&self.submit_command) {
|
|
734
|
+
self.replay_idx.fetch_add(1, Ordering::SeqCst);
|
|
735
|
+
self.action_idx.store(0, Ordering::SeqCst);
|
|
736
|
+
}
|
|
737
|
+
action.clone()
|
|
738
|
+
};
|
|
739
|
+
|
|
740
|
+
{
|
|
741
|
+
let mut stats = self.stats.lock().await;
|
|
742
|
+
stats.api_calls += 1;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
Ok(ModelOutput {
|
|
746
|
+
message: action,
|
|
747
|
+
tool_calls: None,
|
|
748
|
+
thinking_blocks: None,
|
|
749
|
+
})
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
fn reset_stats(&self) {
|
|
753
|
+
if let Ok(mut stats) = self.stats.try_lock() {
|
|
754
|
+
*stats = InstanceStats::default();
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
fn get_stats(&self) -> InstanceStats {
|
|
759
|
+
self.stats.try_lock().map(|s| s.clone()).unwrap_or_default()
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
fn instance_cost_limit(&self) -> f64 {
|
|
763
|
+
0.0
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
/// Model configuration enum
|
|
768
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
769
|
+
#[serde(tag = "name")]
|
|
770
|
+
pub enum ModelConfig {
|
|
771
|
+
#[serde(rename = "human")]
|
|
772
|
+
Human { cost_per_call: Option<f64> },
|
|
773
|
+
#[serde(rename = "instant_empty_submit")]
|
|
774
|
+
InstantEmptySubmit,
|
|
775
|
+
#[serde(rename = "replay")]
|
|
776
|
+
Replay { replay_path: String },
|
|
777
|
+
#[serde(untagged)]
|
|
778
|
+
Generic(Box<GenericApiModelConfig>),
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
impl Default for ModelConfig {
|
|
782
|
+
fn default() -> Self {
|
|
783
|
+
ModelConfig::Generic(Box::default())
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
/// Create a model from configuration
|
|
788
|
+
pub fn get_model(config: ModelConfig, global_stats: Arc<GlobalStats>) -> Result<Box<dyn Model>> {
|
|
789
|
+
match config {
|
|
790
|
+
ModelConfig::Human { cost_per_call } => {
|
|
791
|
+
Ok(Box::new(HumanModel::new(cost_per_call.unwrap_or(0.0))))
|
|
792
|
+
}
|
|
793
|
+
ModelConfig::InstantEmptySubmit => Ok(Box::new(InstantEmptySubmitModel::new())),
|
|
794
|
+
ModelConfig::Replay { replay_path } => {
|
|
795
|
+
Ok(Box::new(ReplayModel::new(&replay_path, "submit")?))
|
|
796
|
+
}
|
|
797
|
+
ModelConfig::Generic(config) => Ok(Box::new(LiteLLMModel::new(*config, global_stats))),
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
#[cfg(test)]
|
|
802
|
+
mod tests {
|
|
803
|
+
use super::*;
|
|
804
|
+
|
|
805
|
+
#[tokio::test]
|
|
806
|
+
async fn test_instant_empty_submit_model() {
|
|
807
|
+
let model = InstantEmptySubmitModel::new();
|
|
808
|
+
let history = vec![];
|
|
809
|
+
|
|
810
|
+
let output1 = model.query(&history).await.unwrap();
|
|
811
|
+
assert!(output1.message.contains("reproduce.py"));
|
|
812
|
+
|
|
813
|
+
let output2 = model.query(&history).await.unwrap();
|
|
814
|
+
assert!(output2.message.contains("submit"));
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
#[test]
|
|
818
|
+
fn test_instance_stats_add() {
|
|
819
|
+
let a = InstanceStats {
|
|
820
|
+
instance_cost: 1.0,
|
|
821
|
+
tokens_sent: 100,
|
|
822
|
+
tokens_received: 50,
|
|
823
|
+
api_calls: 1,
|
|
824
|
+
};
|
|
825
|
+
let b = InstanceStats {
|
|
826
|
+
instance_cost: 2.0,
|
|
827
|
+
tokens_sent: 200,
|
|
828
|
+
tokens_received: 100,
|
|
829
|
+
api_calls: 2,
|
|
830
|
+
};
|
|
831
|
+
let c = a.add(&b);
|
|
832
|
+
|
|
833
|
+
assert_eq!(c.instance_cost, 3.0);
|
|
834
|
+
assert_eq!(c.tokens_sent, 300);
|
|
835
|
+
assert_eq!(c.api_calls, 3);
|
|
836
|
+
}
|
|
837
|
+
}
|