@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Self
|
|
3
|
+
|
|
4
|
+
from sweagent.agent.agents import DefaultAgent, ShellAgentConfig
|
|
5
|
+
from sweagent.agent.models import HumanModel, HumanModelConfig, get_model
|
|
6
|
+
from sweagent.agent.problem_statement import ProblemStatement, ProblemStatementConfig
|
|
7
|
+
from sweagent.environment.swe_env import SWEEnv
|
|
8
|
+
from sweagent.tools.parsing import ActionOnlyParser
|
|
9
|
+
from sweagent.tools.tools import ToolHandler
|
|
10
|
+
from sweagent.types import AgentRunResult, StepOutput
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ShellAgent(DefaultAgent):
|
|
14
|
+
def __init__(self, *args, **kwargs):
|
|
15
|
+
super().__init__(*args, **kwargs)
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def from_config(cls, config: ShellAgentConfig) -> Self:
|
|
19
|
+
# To ensure that all models stay completely independent, we deepcopy the
|
|
20
|
+
# model config, because it lives on as a property in the model, tools, etc.
|
|
21
|
+
config = config.model_copy(deep=True)
|
|
22
|
+
model = get_model(config.model, config.tools)
|
|
23
|
+
return cls(
|
|
24
|
+
templates=config.templates,
|
|
25
|
+
tools=ToolHandler(config.tools),
|
|
26
|
+
history_processors=config.history_processors,
|
|
27
|
+
model=model,
|
|
28
|
+
max_requeries=config.max_requeries,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def human_step_in(self) -> None:
|
|
32
|
+
"""Replace the current model with a HumanModel instance.
|
|
33
|
+
This allows for human intervention during agent execution.
|
|
34
|
+
"""
|
|
35
|
+
self._original_model = self.model
|
|
36
|
+
self._original_parser = self.tools.config.parse_function
|
|
37
|
+
|
|
38
|
+
human_config = HumanModelConfig(name="human", catch_eof=False)
|
|
39
|
+
self.model = get_model(human_config, self.tools.config)
|
|
40
|
+
self.tools.config.parse_function = ActionOnlyParser()
|
|
41
|
+
|
|
42
|
+
self.logger.info("Switched to human mode. Agent will now accept human input. Press ^D to switch back.")
|
|
43
|
+
|
|
44
|
+
def human_step_out(self) -> None:
|
|
45
|
+
"""Switch back to the original model from human mode.
|
|
46
|
+
This is called when ^D is pressed in human mode.
|
|
47
|
+
"""
|
|
48
|
+
if not hasattr(self, "_original_model") or self._original_model is None:
|
|
49
|
+
self.logger.info("No previous model to switch back to. Remaining in current mode.")
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
self.model = self._original_model
|
|
53
|
+
self.tools.config.parse_function = self._original_parser # type: ignore
|
|
54
|
+
self._original_model = None
|
|
55
|
+
self._original_parser = None
|
|
56
|
+
|
|
57
|
+
self.logger.info("Switched back to AI model mode.")
|
|
58
|
+
|
|
59
|
+
def run(
|
|
60
|
+
self,
|
|
61
|
+
env: SWEEnv,
|
|
62
|
+
problem_statement: ProblemStatement | ProblemStatementConfig,
|
|
63
|
+
*,
|
|
64
|
+
output_dir: Path = Path("."),
|
|
65
|
+
) -> AgentRunResult:
|
|
66
|
+
"""Run the agent on a problem instance. This method contains the
|
|
67
|
+
main loop that repeatedly calls `self._step` until the problem is solved.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
setup_args: Arguments to pass to the agent's setup method.
|
|
71
|
+
env: The environment to run the agent on.
|
|
72
|
+
traj_dir: Directory to save the trajectory to
|
|
73
|
+
interruptible: Whether the human can jump in by pressing ^C
|
|
74
|
+
"""
|
|
75
|
+
self.setup(env=env, problem_statement=problem_statement, output_dir=output_dir)
|
|
76
|
+
|
|
77
|
+
# Run action/observation loop
|
|
78
|
+
self._chook.on_run_start()
|
|
79
|
+
step_output = StepOutput()
|
|
80
|
+
while not step_output.done:
|
|
81
|
+
try:
|
|
82
|
+
step_output = self.step()
|
|
83
|
+
self.save_trajectory()
|
|
84
|
+
except KeyboardInterrupt:
|
|
85
|
+
if not isinstance(self.model, HumanModel):
|
|
86
|
+
self.human_step_in()
|
|
87
|
+
continue
|
|
88
|
+
raise
|
|
89
|
+
except EOFError:
|
|
90
|
+
# Can only happen if we have a human model, so switch back
|
|
91
|
+
self.logger.info("Detected ^D - switching back to AI mode")
|
|
92
|
+
self.human_step_out()
|
|
93
|
+
continue
|
|
94
|
+
if step_output.done and not isinstance(self.model, HumanModel):
|
|
95
|
+
# Human has to submit the solution
|
|
96
|
+
self.logger.info("Robot is done! Please submit the solution.")
|
|
97
|
+
self.human_step_in()
|
|
98
|
+
step_output.done = False
|
|
99
|
+
self._chook.on_run_done(trajectory=self.trajectory, info=self.info)
|
|
100
|
+
|
|
101
|
+
self.logger.info("Trajectory saved to %s", self.traj_path)
|
|
102
|
+
|
|
103
|
+
# Here we want to return the "global" information (e.g., submission should
|
|
104
|
+
# be the best submission instead of the last one, etc.), so we get it from the traj file
|
|
105
|
+
data = self.get_trajectory_data()
|
|
106
|
+
return AgentRunResult(info=data["info"], trajectory=data["trajectory"])
|
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
import re
|
|
5
|
+
from abc import abstractmethod
|
|
6
|
+
from typing import Annotated, Literal, Protocol
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
9
|
+
|
|
10
|
+
from sweagent.types import History, HistoryItem
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AbstractHistoryProcessor(Protocol):
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def __call__(self, history: History) -> History:
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Utility functions
|
|
20
|
+
# -----------------
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_content_stats(entry: HistoryItem) -> tuple[int, int]:
|
|
24
|
+
if isinstance(entry["content"], str):
|
|
25
|
+
return len(entry["content"].splitlines()), 0
|
|
26
|
+
n_text_lines = sum(len(item["text"].splitlines()) for item in entry["content"] if item.get("type") == "text")
|
|
27
|
+
n_images = sum(1 for item in entry["content"] if item.get("type") == "image_url")
|
|
28
|
+
return n_text_lines, n_images
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_content_text(entry: HistoryItem) -> str:
|
|
32
|
+
if isinstance(entry["content"], str):
|
|
33
|
+
return entry["content"]
|
|
34
|
+
assert len(entry["content"]) == 1, "Expected single message in content"
|
|
35
|
+
return entry["content"][0]["text"]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _set_content_text(entry: HistoryItem, text: str) -> None:
|
|
39
|
+
if isinstance(entry["content"], str):
|
|
40
|
+
entry["content"] = text
|
|
41
|
+
else:
|
|
42
|
+
assert len(entry["content"]) == 1, "Expected single message in content"
|
|
43
|
+
entry["content"][0]["text"] = text
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _clear_cache_control(entry: HistoryItem) -> None:
|
|
47
|
+
if isinstance(entry["content"], list):
|
|
48
|
+
for item in entry["content"]:
|
|
49
|
+
item.pop("cache_control", None)
|
|
50
|
+
entry.pop("cache_control", None)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _set_cache_control(entry: HistoryItem) -> None:
|
|
54
|
+
if not isinstance(entry["content"], list):
|
|
55
|
+
entry["content"] = [ # type: ignore
|
|
56
|
+
{
|
|
57
|
+
"type": "text",
|
|
58
|
+
"text": _get_content_text(entry),
|
|
59
|
+
"cache_control": {"type": "ephemeral"},
|
|
60
|
+
}
|
|
61
|
+
]
|
|
62
|
+
else:
|
|
63
|
+
entry["content"][0]["cache_control"] = {"type": "ephemeral"}
|
|
64
|
+
if entry["role"] == "tool":
|
|
65
|
+
# Workaround for weird bug
|
|
66
|
+
entry["content"][0].pop("cache_control", None)
|
|
67
|
+
entry["cache_control"] = {"type": "ephemeral"}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# History processors
|
|
71
|
+
# ------------------
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class DefaultHistoryProcessor(BaseModel):
|
|
75
|
+
type: Literal["default"] = "default"
|
|
76
|
+
"""Do not change. Used for (de)serialization."""
|
|
77
|
+
|
|
78
|
+
# pydantic config
|
|
79
|
+
model_config = ConfigDict(extra="forbid")
|
|
80
|
+
|
|
81
|
+
def __call__(self, history: History) -> History:
|
|
82
|
+
return history
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class LastNObservations(BaseModel):
|
|
86
|
+
"""Elide all but the last n observations or remove tagged observations.
|
|
87
|
+
|
|
88
|
+
This is our most classic history processor, used in the original paper
|
|
89
|
+
to elide but the last 5 observations.
|
|
90
|
+
Elided observations are replaced by "Old environment output: (n lines omitted)".
|
|
91
|
+
|
|
92
|
+
Typical configuration:
|
|
93
|
+
|
|
94
|
+
```yaml
|
|
95
|
+
agent:
|
|
96
|
+
history_processors:
|
|
97
|
+
- type: last_n_observations
|
|
98
|
+
n: 5
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
as for example in use in the SWE-agent 0.7 config at
|
|
102
|
+
https://github.com/SWE-agent/SWE-agent/blob/main/config/sweagent_0_7/07.yaml
|
|
103
|
+
|
|
104
|
+
For most use cases, you only need to set `n`.
|
|
105
|
+
|
|
106
|
+
Note that using this history processor will break prompt caching (as the
|
|
107
|
+
history of every query will change every time due to the elided observations).
|
|
108
|
+
There are some workarounds possible with the `polling` parameter.
|
|
109
|
+
|
|
110
|
+
However, most SotA models can now fit a lot of context, so generally this
|
|
111
|
+
history processor is not always needed anymore.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
n: int
|
|
115
|
+
"""Number of observations to keep."""
|
|
116
|
+
|
|
117
|
+
polling: int = 1
|
|
118
|
+
"""How many steps to keep between updating the number of observations to keep.
|
|
119
|
+
This is useful for caching, as we want to remove more and more messages, but every
|
|
120
|
+
time we change the history, we need to cache everything again.
|
|
121
|
+
Effectively, we will now keep between `n` and `n+polling` observations.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
always_remove_output_for_tags: set[str] = {"remove_output"}
|
|
125
|
+
"""Any observation with a `tags` field containing one of these strings will be elided,
|
|
126
|
+
even if it is one of the last n observations.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
always_keep_output_for_tags: set[str] = {"keep_output"}
|
|
130
|
+
"""Any observation with a `tags` field containing one of these strings will be kept,
|
|
131
|
+
even if it is not one of the last n observations.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
type: Literal["last_n_observations"] = "last_n_observations"
|
|
135
|
+
"""Do not change. Used for (de)serialization."""
|
|
136
|
+
|
|
137
|
+
# pydantic config
|
|
138
|
+
model_config = ConfigDict(extra="forbid")
|
|
139
|
+
|
|
140
|
+
@field_validator("n")
|
|
141
|
+
def validate_n(cls, n: int) -> int:
|
|
142
|
+
if n <= 0:
|
|
143
|
+
msg = "n must be a positive integer"
|
|
144
|
+
raise ValueError(msg)
|
|
145
|
+
return n
|
|
146
|
+
|
|
147
|
+
def _get_omit_indices(self, history: History) -> list[int]:
|
|
148
|
+
observation_indices = [
|
|
149
|
+
idx
|
|
150
|
+
for idx, entry in enumerate(history)
|
|
151
|
+
if entry.get("message_type") == "observation" and not entry.get("is_demo", False)
|
|
152
|
+
]
|
|
153
|
+
last_removed_idx = max(0, (len(observation_indices) // self.polling) * self.polling - self.n)
|
|
154
|
+
# Note: We never remove the first observation, as it is the instance template
|
|
155
|
+
return observation_indices[1:last_removed_idx]
|
|
156
|
+
|
|
157
|
+
def __call__(self, history: History) -> History:
|
|
158
|
+
new_history = []
|
|
159
|
+
omit_content_idxs = self._get_omit_indices(history)
|
|
160
|
+
for idx, entry in enumerate(history):
|
|
161
|
+
tags = set(entry.get("tags", []))
|
|
162
|
+
if ((idx not in omit_content_idxs) or (tags & self.always_keep_output_for_tags)) and not (
|
|
163
|
+
tags & self.always_remove_output_for_tags
|
|
164
|
+
):
|
|
165
|
+
new_history.append(entry)
|
|
166
|
+
else:
|
|
167
|
+
data = entry.copy()
|
|
168
|
+
assert data.get("message_type") == "observation", (
|
|
169
|
+
f"Expected observation for dropped entry, got: {data.get('message_type')}"
|
|
170
|
+
)
|
|
171
|
+
num_text_lines, num_images = _get_content_stats(data)
|
|
172
|
+
data["content"] = f"Old environment output: ({num_text_lines} lines omitted)"
|
|
173
|
+
if num_images > 0:
|
|
174
|
+
data["content"] += f" ({num_images} images omitted)"
|
|
175
|
+
new_history.append(data)
|
|
176
|
+
return new_history
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class TagToolCallObservations(BaseModel):
|
|
180
|
+
"""Adds tags to history items for specific tool calls."""
|
|
181
|
+
|
|
182
|
+
type: Literal["tag_tool_call_observations"] = "tag_tool_call_observations"
|
|
183
|
+
"""Do not change. Used for (de)serialization."""
|
|
184
|
+
|
|
185
|
+
tags: set[str] = {"keep_output"}
|
|
186
|
+
"""Add the following tag to all observations matching the search criteria."""
|
|
187
|
+
|
|
188
|
+
function_names: set[str] = set()
|
|
189
|
+
"""Only consider observations made by tools with these names."""
|
|
190
|
+
|
|
191
|
+
# pydantic config
|
|
192
|
+
model_config = ConfigDict(extra="forbid")
|
|
193
|
+
|
|
194
|
+
def _add_tags(self, entry: HistoryItem) -> None:
|
|
195
|
+
tags = set(entry.get("tags", []))
|
|
196
|
+
tags.update(self.tags)
|
|
197
|
+
entry["tags"] = list(tags)
|
|
198
|
+
|
|
199
|
+
def _should_add_tags(self, entry: HistoryItem) -> bool:
|
|
200
|
+
if entry.get("message_type") != "action":
|
|
201
|
+
return False
|
|
202
|
+
function_calls = entry.get("tool_calls", [])
|
|
203
|
+
if not function_calls:
|
|
204
|
+
return False
|
|
205
|
+
function_names = {call["function"]["name"] for call in function_calls} # type: ignore
|
|
206
|
+
return bool(self.function_names & function_names)
|
|
207
|
+
|
|
208
|
+
def __call__(self, history: History) -> History:
|
|
209
|
+
for entry in history:
|
|
210
|
+
if self._should_add_tags(entry):
|
|
211
|
+
self._add_tags(entry)
|
|
212
|
+
return history
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class ClosedWindowHistoryProcessor(BaseModel):
|
|
216
|
+
"""For each value in history, keep track of which windows have been shown.
|
|
217
|
+
We want to mark windows that should stay open (they're the last window for a particular file)
|
|
218
|
+
Then we'll replace all other windows with a simple summary of the window (i.e. number of lines)
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
type: Literal["closed_window"] = "closed_window"
|
|
222
|
+
"""Do not change. Used for (de)serialization."""
|
|
223
|
+
|
|
224
|
+
_pattern = re.compile(r"^(\d+)\:.*?(\n|$)", re.MULTILINE)
|
|
225
|
+
_file_pattern = re.compile(r"\[File:\s+(.*)\s+\(\d+\s+lines\ total\)\]")
|
|
226
|
+
|
|
227
|
+
# pydantic config
|
|
228
|
+
model_config = ConfigDict(extra="forbid")
|
|
229
|
+
|
|
230
|
+
def __call__(self, history):
|
|
231
|
+
new_history = list()
|
|
232
|
+
windows = set()
|
|
233
|
+
for entry in reversed(history):
|
|
234
|
+
data = entry.copy()
|
|
235
|
+
if data["role"] != "user":
|
|
236
|
+
new_history.append(entry)
|
|
237
|
+
continue
|
|
238
|
+
if data.get("is_demo", False):
|
|
239
|
+
new_history.append(entry)
|
|
240
|
+
continue
|
|
241
|
+
matches = list(self._pattern.finditer(entry["content"]))
|
|
242
|
+
if len(matches) >= 1:
|
|
243
|
+
file_match = self._file_pattern.search(entry["content"])
|
|
244
|
+
if file_match:
|
|
245
|
+
file = file_match.group(1)
|
|
246
|
+
else:
|
|
247
|
+
continue
|
|
248
|
+
if file in windows:
|
|
249
|
+
start = matches[0].start()
|
|
250
|
+
end = matches[-1].end()
|
|
251
|
+
data["content"] = (
|
|
252
|
+
entry["content"][:start]
|
|
253
|
+
+ f"Outdated window with {len(matches)} lines omitted...\n"
|
|
254
|
+
+ entry["content"][end:]
|
|
255
|
+
)
|
|
256
|
+
windows.add(file)
|
|
257
|
+
new_history.append(data)
|
|
258
|
+
return list(reversed(new_history))
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class CacheControlHistoryProcessor(BaseModel):
|
|
262
|
+
"""This history processor adds manual cache control marks to the history.
|
|
263
|
+
Use this when running with anthropic claude.
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
type: Literal["cache_control"] = "cache_control"
|
|
267
|
+
"""Do not change. Used for (de)serialization."""
|
|
268
|
+
|
|
269
|
+
last_n_messages: int = 2
|
|
270
|
+
"""Add cache control to the last n user messages (and clear it for anything else).
|
|
271
|
+
In most cases this should be set to 2 (caching for multi-turn conversations).
|
|
272
|
+
When resampling and running concurrent instances, you want to set it to 1.
|
|
273
|
+
If set to <= 0, any set cache control will be removed from all messages.
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
last_n_messages_offset: int = 0
|
|
277
|
+
"""E.g., set to 1 to start cache control after the second to last user message.
|
|
278
|
+
This can be useful in rare cases, when you want to modify the last message after
|
|
279
|
+
we've got the completion and you want to avoid cache mismatch.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
tagged_roles: list[str] = ["user", "tool"]
|
|
283
|
+
"""Only add cache control to messages with these roles."""
|
|
284
|
+
|
|
285
|
+
# pydantic config
|
|
286
|
+
model_config = ConfigDict(extra="forbid")
|
|
287
|
+
|
|
288
|
+
def __call__(self, history: History) -> History:
|
|
289
|
+
new_history = []
|
|
290
|
+
n_tagged = 0
|
|
291
|
+
for i_entry, entry in enumerate(reversed(history)):
|
|
292
|
+
# Clear cache control from previous messages
|
|
293
|
+
_clear_cache_control(entry)
|
|
294
|
+
if (
|
|
295
|
+
n_tagged < self.last_n_messages
|
|
296
|
+
and entry["role"] in self.tagged_roles
|
|
297
|
+
and i_entry >= self.last_n_messages_offset
|
|
298
|
+
):
|
|
299
|
+
_set_cache_control(entry)
|
|
300
|
+
n_tagged += 1
|
|
301
|
+
new_history.append(entry)
|
|
302
|
+
return list(reversed(new_history))
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class RemoveRegex(BaseModel):
|
|
306
|
+
"""This history processor can remove arbitrary content from history items"""
|
|
307
|
+
|
|
308
|
+
remove: list[str] = ["<diff>.*</diff>"]
|
|
309
|
+
"""Regex patterns to remove from history items"""
|
|
310
|
+
|
|
311
|
+
keep_last: int = 0
|
|
312
|
+
"""Keep the last n history items unchanged"""
|
|
313
|
+
|
|
314
|
+
type: Literal["remove_regex"] = "remove_regex"
|
|
315
|
+
"""Do not change. Used for (de)serialization."""
|
|
316
|
+
|
|
317
|
+
# pydantic config
|
|
318
|
+
model_config = ConfigDict(extra="forbid")
|
|
319
|
+
|
|
320
|
+
def __call__(self, history: History) -> History:
|
|
321
|
+
new_history = []
|
|
322
|
+
for i_entry, entry in enumerate(reversed(history)):
|
|
323
|
+
entry = copy.deepcopy(entry)
|
|
324
|
+
if i_entry < self.keep_last:
|
|
325
|
+
new_history.append(entry)
|
|
326
|
+
else:
|
|
327
|
+
if isinstance(entry["content"], list):
|
|
328
|
+
for item in entry["content"]:
|
|
329
|
+
if item["type"] == "text":
|
|
330
|
+
for pattern in self.remove:
|
|
331
|
+
item["text"] = re.sub(pattern, "", item["text"], flags=re.DOTALL)
|
|
332
|
+
else:
|
|
333
|
+
assert isinstance(entry["content"], str), "Expected string content"
|
|
334
|
+
for pattern in self.remove:
|
|
335
|
+
entry["content"] = re.sub(pattern, "", entry["content"], flags=re.DOTALL)
|
|
336
|
+
new_history.append(entry)
|
|
337
|
+
return list(reversed(new_history))
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class ImageParsingHistoryProcessor(BaseModel):
|
|
341
|
+
"""Parse embedded base64 images from markdown and convert to multi-modal format."""
|
|
342
|
+
|
|
343
|
+
type: Literal["image_parsing"] = "image_parsing"
|
|
344
|
+
allowed_mime_types: set[str] = {"image/png", "image/jpeg", "image/webp"}
|
|
345
|
+
|
|
346
|
+
_pattern = re.compile(r"(!\[([^\]]*)\]\(data:)([^;]+);base64,([^)]+)(\))")
|
|
347
|
+
model_config = ConfigDict(extra="forbid")
|
|
348
|
+
|
|
349
|
+
def __call__(self, history: History) -> History:
|
|
350
|
+
return [self._process_entry(entry) for entry in history]
|
|
351
|
+
|
|
352
|
+
def _process_entry(self, entry: HistoryItem) -> HistoryItem:
|
|
353
|
+
if entry.get("role") not in ["user", "tool"]:
|
|
354
|
+
return entry
|
|
355
|
+
entry = copy.deepcopy(entry)
|
|
356
|
+
content = _get_content_text(entry)
|
|
357
|
+
segments = self._parse_images(content)
|
|
358
|
+
if any(seg["type"] == "image_url" for seg in segments):
|
|
359
|
+
entry["content"] = segments
|
|
360
|
+
return entry
|
|
361
|
+
|
|
362
|
+
def _parse_images(self, content: str) -> list[dict]:
|
|
363
|
+
segments = []
|
|
364
|
+
last_end = 0
|
|
365
|
+
has_images = False
|
|
366
|
+
|
|
367
|
+
def add_text(text: str) -> None:
|
|
368
|
+
"""Add text to the last segment if it's text, otherwise create new text segment."""
|
|
369
|
+
if text and segments and segments[-1]["type"] == "text":
|
|
370
|
+
segments[-1]["text"] += text
|
|
371
|
+
elif text:
|
|
372
|
+
segments.append({"type": "text", "text": text})
|
|
373
|
+
|
|
374
|
+
for match in self._pattern.finditer(content):
|
|
375
|
+
markdown_prefix, alt_text, mime_type, base64_data, markdown_suffix = match.groups()
|
|
376
|
+
add_text(content[last_end : match.start()])
|
|
377
|
+
mime_type = "image/jpeg" if mime_type == "image/jpg" else mime_type
|
|
378
|
+
if mime_type in self.allowed_mime_types:
|
|
379
|
+
add_text(markdown_prefix)
|
|
380
|
+
segments.append({"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_data}"}})
|
|
381
|
+
add_text(markdown_suffix)
|
|
382
|
+
has_images = True
|
|
383
|
+
else:
|
|
384
|
+
add_text(match.group(0))
|
|
385
|
+
last_end = match.end()
|
|
386
|
+
add_text(content[last_end:])
|
|
387
|
+
return segments if has_images else [{"type": "text", "text": content}]
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
HistoryProcessor = Annotated[
|
|
391
|
+
DefaultHistoryProcessor
|
|
392
|
+
| LastNObservations
|
|
393
|
+
| ClosedWindowHistoryProcessor
|
|
394
|
+
| TagToolCallObservations
|
|
395
|
+
| CacheControlHistoryProcessor
|
|
396
|
+
| RemoveRegex
|
|
397
|
+
| ImageParsingHistoryProcessor,
|
|
398
|
+
Field(discriminator="type"),
|
|
399
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from sweagent.types import AgentInfo, StepOutput, Trajectory
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
# avoid circular import
|
|
7
|
+
from sweagent.agent.agents import DefaultAgent
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AbstractAgentHook:
|
|
11
|
+
def on_init(self, *, agent: "DefaultAgent"):
|
|
12
|
+
"""Note: Depending on the internals of `Agent` should be done with care,
|
|
13
|
+
it's best to use this as little as possible.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def on_run_start(
|
|
17
|
+
self,
|
|
18
|
+
): ...
|
|
19
|
+
|
|
20
|
+
def on_step_start(self): ...
|
|
21
|
+
|
|
22
|
+
def on_actions_generated(self, *, step: StepOutput): ...
|
|
23
|
+
|
|
24
|
+
def on_action_started(self, *, step: StepOutput): ...
|
|
25
|
+
|
|
26
|
+
def on_action_executed(self, *, step: StepOutput): ...
|
|
27
|
+
|
|
28
|
+
def on_step_done(self, *, step: StepOutput, info: AgentInfo): ...
|
|
29
|
+
|
|
30
|
+
def on_run_done(self, *, trajectory: Trajectory, info: AgentInfo): ...
|
|
31
|
+
|
|
32
|
+
def on_setup_attempt(self): ...
|
|
33
|
+
|
|
34
|
+
def on_model_query(self, *, messages: list[dict[str, str]], agent: str):
|
|
35
|
+
"""Actually query the model with the complete history."""
|
|
36
|
+
|
|
37
|
+
def on_query_message_added(
|
|
38
|
+
self,
|
|
39
|
+
*,
|
|
40
|
+
agent: str,
|
|
41
|
+
role: str,
|
|
42
|
+
content: str,
|
|
43
|
+
message_type: str,
|
|
44
|
+
is_demo: bool = False,
|
|
45
|
+
thought: str = "",
|
|
46
|
+
action: str = "",
|
|
47
|
+
tool_calls: list[dict[str, str]] | None = None,
|
|
48
|
+
tool_call_ids: list[str] | None = None,
|
|
49
|
+
): ...
|
|
50
|
+
|
|
51
|
+
def on_setup_done(self): ...
|
|
52
|
+
|
|
53
|
+
def on_tools_installation_started(self): ...
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CombinedAgentHook(AbstractAgentHook):
|
|
57
|
+
def __init__(self, hooks: list[AbstractAgentHook] | None = None):
|
|
58
|
+
self._hooks = hooks or []
|
|
59
|
+
|
|
60
|
+
def add_hook(self, hook: AbstractAgentHook):
|
|
61
|
+
self._hooks.append(hook)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def hooks(self) -> list[AbstractAgentHook]:
|
|
65
|
+
return self._hooks
|
|
66
|
+
|
|
67
|
+
def on_init(self, *, agent: "DefaultAgent"):
|
|
68
|
+
for hook in self.hooks:
|
|
69
|
+
hook.on_init(agent=agent)
|
|
70
|
+
|
|
71
|
+
def on_run_start(self):
|
|
72
|
+
for hook in self.hooks:
|
|
73
|
+
hook.on_run_start()
|
|
74
|
+
|
|
75
|
+
def on_step_start(self):
|
|
76
|
+
for hook in self.hooks:
|
|
77
|
+
hook.on_step_start()
|
|
78
|
+
|
|
79
|
+
def on_actions_generated(self, *, step: StepOutput):
|
|
80
|
+
for hook in self.hooks:
|
|
81
|
+
hook.on_actions_generated(step=step)
|
|
82
|
+
|
|
83
|
+
def on_action_started(self, *, step: StepOutput):
|
|
84
|
+
for hook in self.hooks:
|
|
85
|
+
hook.on_action_started(step=step)
|
|
86
|
+
|
|
87
|
+
def on_action_executed(self, *, step: StepOutput):
|
|
88
|
+
for hook in self.hooks:
|
|
89
|
+
hook.on_action_executed(step=step)
|
|
90
|
+
|
|
91
|
+
def on_step_done(self, *, step: StepOutput, info: AgentInfo):
|
|
92
|
+
for hook in self.hooks:
|
|
93
|
+
hook.on_step_done(step=step, info=info)
|
|
94
|
+
|
|
95
|
+
def on_run_done(self, *, trajectory: Trajectory, info: AgentInfo):
|
|
96
|
+
for hook in self.hooks:
|
|
97
|
+
hook.on_run_done(trajectory=trajectory, info=info)
|
|
98
|
+
|
|
99
|
+
def on_setup_attempt(self):
|
|
100
|
+
for hook in self.hooks:
|
|
101
|
+
hook.on_setup_attempt()
|
|
102
|
+
|
|
103
|
+
def on_model_query(self, *, messages: list[dict[str, str]], agent: str):
|
|
104
|
+
for hook in self.hooks:
|
|
105
|
+
hook.on_model_query(messages=messages, agent=agent)
|
|
106
|
+
|
|
107
|
+
def on_query_message_added(
|
|
108
|
+
self,
|
|
109
|
+
*,
|
|
110
|
+
agent: str,
|
|
111
|
+
role: str,
|
|
112
|
+
content: str,
|
|
113
|
+
message_type: str,
|
|
114
|
+
is_demo: bool = False,
|
|
115
|
+
thought: str = "",
|
|
116
|
+
action: str = "",
|
|
117
|
+
tool_calls: list[dict[str, str]] | None = None,
|
|
118
|
+
tool_call_ids: list[str] | None = None,
|
|
119
|
+
thinking_blocks: list[dict[str, str]] | None = None,
|
|
120
|
+
):
|
|
121
|
+
for hook in self.hooks:
|
|
122
|
+
hook.on_query_message_added(
|
|
123
|
+
agent=agent,
|
|
124
|
+
role=role,
|
|
125
|
+
content=content,
|
|
126
|
+
message_type=message_type,
|
|
127
|
+
is_demo=is_demo,
|
|
128
|
+
thought=thought,
|
|
129
|
+
action=action,
|
|
130
|
+
tool_calls=tool_calls,
|
|
131
|
+
tool_call_ids=tool_call_ids,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
def on_setup_done(self):
|
|
135
|
+
return super().on_setup_done()
|
|
136
|
+
|
|
137
|
+
def on_tools_installation_started(self):
|
|
138
|
+
for hook in self.hooks:
|
|
139
|
+
hook.on_tools_installation_started()
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
|
|
3
|
+
from sweagent.agent.hooks.abstract import AbstractAgentHook
|
|
4
|
+
from sweagent.types import AgentInfo, StepOutput
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SetStatusAgentHook(AbstractAgentHook):
|
|
8
|
+
def __init__(self, id: str, callable: Callable[[str, str], None]):
|
|
9
|
+
self._callable = callable
|
|
10
|
+
self._id = id
|
|
11
|
+
self._i_step = 0
|
|
12
|
+
self._cost = 0.0
|
|
13
|
+
self._i_attempt = 0
|
|
14
|
+
self._previous_cost = 0.0
|
|
15
|
+
|
|
16
|
+
def on_setup_attempt(self):
|
|
17
|
+
self._i_attempt += 1
|
|
18
|
+
self._i_step = 0
|
|
19
|
+
# Costs will be reset for the next attempt
|
|
20
|
+
self._previous_cost += self._cost
|
|
21
|
+
|
|
22
|
+
def _update(self, message: str):
|
|
23
|
+
self._callable(self._id, message)
|
|
24
|
+
|
|
25
|
+
def on_step_start(self):
|
|
26
|
+
self._i_step += 1
|
|
27
|
+
attempt_str = f"Attempt {self._i_attempt} " if self._i_attempt > 1 else ""
|
|
28
|
+
self._update(f"{attempt_str}Step {self._i_step:>3} (${self._previous_cost + self._cost:.2f})")
|
|
29
|
+
|
|
30
|
+
def on_step_done(self, *, step: StepOutput, info: AgentInfo):
|
|
31
|
+
self._cost = info["model_stats"]["instance_cost"] # type: ignore
|
|
32
|
+
|
|
33
|
+
def on_tools_installation_started(self):
|
|
34
|
+
self._update("Installing tools")
|