@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,619 @@
|
|
|
1
|
+
"""Our parsers parse output from the LM into thoughts and actions.
|
|
2
|
+
|
|
3
|
+
For example, our most basic parser is the `ThoughtActionParser`.
|
|
4
|
+
It expects the model response to be a discussion followed by a command wrapped in backticks like so:
|
|
5
|
+
|
|
6
|
+
```
|
|
7
|
+
Let's look at the files in the current directory.
|
|
8
|
+
|
|
9
|
+
Action:
|
|
10
|
+
```
|
|
11
|
+
ls -l
|
|
12
|
+
```
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
For models that support function calling, we instead recommend using the `FunctionCallingParser`.
|
|
16
|
+
|
|
17
|
+
To use a specific parser, set the `parse_function` key in your tool config to the `type` field of the parser.
|
|
18
|
+
|
|
19
|
+
```yaml
|
|
20
|
+
agent:
|
|
21
|
+
tools:
|
|
22
|
+
...
|
|
23
|
+
parse_function:
|
|
24
|
+
type: "thought_action"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Or from the command line: `--agent.tools.parse_function.type=thought_action`.
|
|
28
|
+
|
|
29
|
+
!!! note "Describing available tools"
|
|
30
|
+
If you do not use the `FunctionCallingParser`, you need to include documentation about the available tools
|
|
31
|
+
in your system prompt. You can use the `{{command_docs}}` variable to include the automatically generated
|
|
32
|
+
documentation or explicitly describe the available tools.
|
|
33
|
+
Also see [#1130](https://github.com/SWE-agent/SWE-agent/issues/1130).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
import json
|
|
37
|
+
import re
|
|
38
|
+
import textwrap
|
|
39
|
+
from abc import ABC, abstractmethod
|
|
40
|
+
from shlex import quote
|
|
41
|
+
from textwrap import dedent
|
|
42
|
+
from typing import Any, Literal
|
|
43
|
+
|
|
44
|
+
from jinja2 import Template
|
|
45
|
+
from pydantic import BaseModel
|
|
46
|
+
|
|
47
|
+
from sweagent.exceptions import FormatError, FunctionCallingFormatError
|
|
48
|
+
from sweagent.tools.commands import Command
|
|
49
|
+
from sweagent.tools.utils import _should_quote
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class AbstractParseFunction(ABC):
|
|
53
|
+
"""
|
|
54
|
+
Abstract class for parsing functions.
|
|
55
|
+
We use get to generate the right parser based on the name of the parser.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
error_message: str
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def __call__(self, model_response, commands: list[Command], strict=False) -> tuple[str, str]:
|
|
62
|
+
raise NotImplementedError
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def format_error_template(self):
|
|
66
|
+
return textwrap.dedent(self.error_message)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# DEFINE NEW PARSING FUNCTIONS BELOW THIS LINE
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class ActionParser(AbstractParseFunction, BaseModel):
|
|
73
|
+
"""
|
|
74
|
+
Expects the model response to be a single command.
|
|
75
|
+
Example: "ls -l"
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
error_message: str = """\
|
|
79
|
+
The command you provided was not recognized. Please specify one of the commands (+ any necessary arguments) from the following list in your response. Do not include any other text.
|
|
80
|
+
|
|
81
|
+
COMMANDS:
|
|
82
|
+
{command_docs}
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
type: Literal["action"] = "action"
|
|
86
|
+
"""Type for (de)serialization. Do not change."""
|
|
87
|
+
|
|
88
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False):
|
|
89
|
+
if model_response["message"].split():
|
|
90
|
+
action = model_response["message"].strip().split()[0]
|
|
91
|
+
if action in {command.name for command in commands}:
|
|
92
|
+
return model_response["message"], model_response["message"]
|
|
93
|
+
msg = "First word in model response is not a valid command."
|
|
94
|
+
raise FormatError(msg)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class ActionOnlyParser(AbstractParseFunction, BaseModel):
|
|
98
|
+
"""Expects the model response to be a single command."""
|
|
99
|
+
|
|
100
|
+
error_message: str = "No message found in model response."
|
|
101
|
+
|
|
102
|
+
type: Literal["action_only"] = "action_only"
|
|
103
|
+
"""Type for (de)serialization. Do not change."""
|
|
104
|
+
|
|
105
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False):
|
|
106
|
+
return "", model_response["message"]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class ThoughtActionParser(AbstractParseFunction, BaseModel):
|
|
110
|
+
"""
|
|
111
|
+
Expects the model response to be a discussion followed by a command wrapped in backticks.
|
|
112
|
+
Example:
|
|
113
|
+
Let's look at the files in the current directory.
|
|
114
|
+
```
|
|
115
|
+
ls -l
|
|
116
|
+
```
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
error_message: str = dedent("""\
|
|
120
|
+
Your output was not formatted correctly. You must always include one discussion and one command as part of your response. Make sure you do not have multiple discussion/command tags.
|
|
121
|
+
Please make sure your output precisely matches the following format:
|
|
122
|
+
DISCUSSION
|
|
123
|
+
Discuss here with yourself about what your planning and what you're going to do in this step.
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
command(s) that you're going to run
|
|
127
|
+
```
|
|
128
|
+
""")
|
|
129
|
+
|
|
130
|
+
type: Literal["thought_action"] = "thought_action"
|
|
131
|
+
"""Type for (de)serialization. Do not change."""
|
|
132
|
+
|
|
133
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False):
|
|
134
|
+
"""
|
|
135
|
+
Parses the action from the output of the API call.
|
|
136
|
+
We assume that the action is the last code block in the model_response.
|
|
137
|
+
We also assume that the action is not nested within another code block.
|
|
138
|
+
This is problematic if the model_response includes many unnamed ``` blocks.
|
|
139
|
+
For instance:
|
|
140
|
+
```
|
|
141
|
+
This is a code block.
|
|
142
|
+
```
|
|
143
|
+
```
|
|
144
|
+
This is another code block.
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
In this case, only the second code block will be parsed as the action.
|
|
148
|
+
"""
|
|
149
|
+
code_block_pat = re.compile(r"^```(\S*)\s*\n|^```\s*$", re.MULTILINE)
|
|
150
|
+
stack = []
|
|
151
|
+
last_valid_block = None
|
|
152
|
+
for match in code_block_pat.finditer(model_response["message"]):
|
|
153
|
+
if stack and not match.group(1): # Closing of a code block
|
|
154
|
+
start = stack.pop()
|
|
155
|
+
# Check if it's not nested within another block
|
|
156
|
+
if not stack:
|
|
157
|
+
last_valid_block = (start, match)
|
|
158
|
+
elif match.group(1) is not None: # Opening of a code block
|
|
159
|
+
stack.append(match)
|
|
160
|
+
if last_valid_block:
|
|
161
|
+
start, end = last_valid_block
|
|
162
|
+
thought = model_response["message"][: start.start()] + model_response["message"][end.end() :]
|
|
163
|
+
return thought, model_response["message"][start.end() : end.start()]
|
|
164
|
+
msg = "No action found in model response."
|
|
165
|
+
raise FormatError(msg)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class XMLThoughtActionParser(AbstractParseFunction, BaseModel):
|
|
169
|
+
"""
|
|
170
|
+
Expects the model response to be a discussion followed by a command wrapped in XML tags.
|
|
171
|
+
Example:
|
|
172
|
+
Let's look at the files in the current directory.
|
|
173
|
+
<command>
|
|
174
|
+
ls -l
|
|
175
|
+
</command>
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
error_message: str = dedent("""\
|
|
179
|
+
Your output was not formatted correctly. You must always include one discussion and one command as part of your response. Make sure you do not have multiple discussion/command tags.
|
|
180
|
+
Please make sure your output precisely matches the following format:
|
|
181
|
+
""")
|
|
182
|
+
|
|
183
|
+
type: Literal["xml_thought_action"] = "xml_thought_action"
|
|
184
|
+
"""Type for (de)serialization. Do not change."""
|
|
185
|
+
|
|
186
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False) -> tuple[str, str]:
|
|
187
|
+
"""
|
|
188
|
+
Parses the action from the output of the API call.
|
|
189
|
+
We assume that the action is the last code block in the model_response.
|
|
190
|
+
We also assume that the action is not nested within another code block.
|
|
191
|
+
This is problematic if the model_response includes many unnamed ``` blocks.
|
|
192
|
+
For instance:
|
|
193
|
+
<command>
|
|
194
|
+
This is a code block.
|
|
195
|
+
</command>
|
|
196
|
+
<command>
|
|
197
|
+
This is another code block.
|
|
198
|
+
</command>
|
|
199
|
+
|
|
200
|
+
In this case, only the second code block will be parsed as the action.
|
|
201
|
+
"""
|
|
202
|
+
if "<command>" not in model_response["message"] or "</command>" not in model_response["message"]:
|
|
203
|
+
msg = "No action found in model response."
|
|
204
|
+
raise FormatError(msg)
|
|
205
|
+
# `action` is everything between the last <command> and </command> tags
|
|
206
|
+
start_action = model_response["message"].rfind("<command>") + len(
|
|
207
|
+
"<command>"
|
|
208
|
+
) # start after the last <command> tag
|
|
209
|
+
end_thought = model_response["message"].rfind("<command>") # end before the last <command> tag
|
|
210
|
+
end_action = model_response["message"].rfind("</command>") # end before the last </command> tag
|
|
211
|
+
restart_thought = model_response["message"].rfind("</command>") + len(
|
|
212
|
+
"</command>"
|
|
213
|
+
) # start after the last </command> tag
|
|
214
|
+
# `thought` is everything not in between <command> and </command> tags (includes after the last </command> tag)
|
|
215
|
+
action = model_response["message"][start_action:end_action]
|
|
216
|
+
thought = model_response["message"][:end_thought] + model_response["message"][restart_thought:]
|
|
217
|
+
|
|
218
|
+
return thought.strip(), action.strip()
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
FN_REGEX_PATTERN = r"<function=([^>]+)>\n(.*?)</function>"
|
|
222
|
+
FN_PARAM_REGEX_PATTERN = r"<parameter=([^>]+)>(.*?)</parameter>"
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class XMLFunctionCallingParser(AbstractParseFunction, BaseModel):
|
|
226
|
+
"""
|
|
227
|
+
Expects the model response to be a tool calling format, where the command and parameters are specified
|
|
228
|
+
in XML tags.
|
|
229
|
+
Example:
|
|
230
|
+
Let's look at the files in the current directory.
|
|
231
|
+
<function=bash>
|
|
232
|
+
<parameter=command>find /testbed -type f -name "_discovery.py"</parameter>
|
|
233
|
+
</function>
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
error_message: str = dedent("""\
|
|
237
|
+
{%- if error_code == "missing" -%}
|
|
238
|
+
Your last output did not use any tool calls!
|
|
239
|
+
Please make sure your output includes exactly _ONE_ function call!
|
|
240
|
+
If you think you have already resolved the issue, please submit your changes by running the `submit` command.
|
|
241
|
+
If you think you cannot solve the problem, please run `submit`.
|
|
242
|
+
Else, please continue with a new tool call!
|
|
243
|
+
{%- elif error_code == "multiple" -%}
|
|
244
|
+
Your last output included multiple tool calls!
|
|
245
|
+
Please make sure your output includes a thought and exactly _ONE_ function call.
|
|
246
|
+
{%- elif error_code == "unexpected_arg" -%}
|
|
247
|
+
Your action could not be parsed properly: {{exception_message}}.
|
|
248
|
+
Make sure your function call doesn't include any extra arguments that are not in the allowed arguments, and only use the allowed commands.
|
|
249
|
+
{%- else -%}
|
|
250
|
+
Your action could not be parsed properly: {{exception_message}}.
|
|
251
|
+
{% endif %}
|
|
252
|
+
""")
|
|
253
|
+
|
|
254
|
+
type: Literal["xml_function_calling"] = "xml_function_calling"
|
|
255
|
+
|
|
256
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False) -> tuple[str, str]:
|
|
257
|
+
fn_match = re.search(FN_REGEX_PATTERN, model_response["message"], re.DOTALL)
|
|
258
|
+
if not fn_match:
|
|
259
|
+
msg = "No function found in model response."
|
|
260
|
+
raise FormatError(msg)
|
|
261
|
+
fn_name = fn_match.group(1).strip()
|
|
262
|
+
|
|
263
|
+
# Handle different names in SWE-agent vs. SWE-gym
|
|
264
|
+
if fn_name == "execute_bash":
|
|
265
|
+
fn_name = "bash"
|
|
266
|
+
if fn_name == "finish":
|
|
267
|
+
fn_name = "submit"
|
|
268
|
+
|
|
269
|
+
fn_body = fn_match.group(2)
|
|
270
|
+
thought = model_response["message"][: fn_match.start()] + model_response["message"][fn_match.end() :]
|
|
271
|
+
thought = thought.strip()
|
|
272
|
+
|
|
273
|
+
commands_dict = {c.name: c for c in commands}
|
|
274
|
+
command = commands_dict.get(fn_name)
|
|
275
|
+
if not command:
|
|
276
|
+
msg = f"Command '{fn_name}' not found in list of available commands."
|
|
277
|
+
raise FormatError(msg)
|
|
278
|
+
|
|
279
|
+
params_dict = {
|
|
280
|
+
param[0]: re.sub(r"^\n|\n$", "", param[1])
|
|
281
|
+
for param in re.findall(FN_PARAM_REGEX_PATTERN, fn_body, re.DOTALL)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if "view_range" in params_dict:
|
|
285
|
+
# Check that value is format as [x, y]
|
|
286
|
+
v = params_dict["view_range"]
|
|
287
|
+
if isinstance(v, str):
|
|
288
|
+
if not re.match(r"\[\d+,\s*\d+\]", v):
|
|
289
|
+
msg = f"view_range must be in the format [<start>, <end>], got {v}."
|
|
290
|
+
raise FormatError(msg)
|
|
291
|
+
params_dict["view_range"] = json.loads(v)
|
|
292
|
+
|
|
293
|
+
# Check if all required arguments are there
|
|
294
|
+
required_args = {arg.name for arg in command.arguments if arg.required}
|
|
295
|
+
missing_args = required_args - params_dict.keys()
|
|
296
|
+
if missing_args:
|
|
297
|
+
msg = f"Required argument(s) missing: {', '.join(missing_args)}"
|
|
298
|
+
raise FormatError(msg)
|
|
299
|
+
|
|
300
|
+
# Check if all arguments are valid
|
|
301
|
+
valid_args = {arg.name for arg in command.arguments}
|
|
302
|
+
extra_args = set(params_dict.keys()) - valid_args
|
|
303
|
+
if command.end_name:
|
|
304
|
+
# sometimes the model will include the end_name in the arguments - just ignore it
|
|
305
|
+
extra_args.discard(command.end_name)
|
|
306
|
+
if extra_args:
|
|
307
|
+
msg = f"Unexpected argument(s): {', '.join(extra_args)}"
|
|
308
|
+
raise FormatError(msg)
|
|
309
|
+
|
|
310
|
+
# Format arguments using their individual argument_format
|
|
311
|
+
formatted_args = {
|
|
312
|
+
arg.name: Template(arg.argument_format).render(
|
|
313
|
+
value=quote(params_dict[arg.name])
|
|
314
|
+
if _should_quote(params_dict[arg.name], command)
|
|
315
|
+
else params_dict[arg.name]
|
|
316
|
+
)
|
|
317
|
+
if arg.name in params_dict
|
|
318
|
+
else ""
|
|
319
|
+
for arg in command.arguments
|
|
320
|
+
}
|
|
321
|
+
return thought, command.invoke_format.format(**formatted_args).strip()
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class EditFormat(ThoughtActionParser, BaseModel):
|
|
325
|
+
"""
|
|
326
|
+
Expects the model response to be a discussion followed by a command wrapped in backticks.
|
|
327
|
+
Example:
|
|
328
|
+
We'll replace the contents of the current window with the following:
|
|
329
|
+
```
|
|
330
|
+
import os
|
|
331
|
+
os.listdir()
|
|
332
|
+
```
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
error_message: str = dedent("""\
|
|
336
|
+
Your output was not formatted correctly. You must wrap the replacement text in backticks (```).
|
|
337
|
+
Please make sure your output precisely matches the following format:
|
|
338
|
+
COMMENTS
|
|
339
|
+
You can write comments here about what you're going to do if you want.
|
|
340
|
+
|
|
341
|
+
```
|
|
342
|
+
New window contents.
|
|
343
|
+
Make sure you copy the entire contents of the window here, with the required indentation.
|
|
344
|
+
Make the changes to the window above directly in this window.
|
|
345
|
+
Remember that all of the window's contents will be replaced with the contents of this window.
|
|
346
|
+
Don't include line numbers in your response.
|
|
347
|
+
```
|
|
348
|
+
""")
|
|
349
|
+
|
|
350
|
+
type: Literal["edit_format"] = "edit_format"
|
|
351
|
+
"""Type for (de)serialization. Do not change."""
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class Identity(AbstractParseFunction, BaseModel):
|
|
355
|
+
"""This parser does not do any parsing. It just returns the model response as both the thought and action."""
|
|
356
|
+
|
|
357
|
+
error_message: str = """\
|
|
358
|
+
It seems like something went wrong with your output. Please try again.
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
type: Literal["identity"] = "identity"
|
|
362
|
+
"""Type for (de)serialization. Do not change."""
|
|
363
|
+
|
|
364
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False) -> tuple[str, str]:
|
|
365
|
+
"""
|
|
366
|
+
This doesn't do any parsing. It just returns the model response as the thought and action.
|
|
367
|
+
"""
|
|
368
|
+
return model_response["message"], model_response["message"]
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
class FunctionCallingParser(AbstractParseFunction, BaseModel):
|
|
372
|
+
"""Expects the model response to be a LiteLLM tool call."""
|
|
373
|
+
|
|
374
|
+
error_message: str = dedent("""\
|
|
375
|
+
{%- if error_code == "missing" -%}
|
|
376
|
+
Your last output did not use any tool calls!
|
|
377
|
+
Please make sure your output includes exactly _ONE_ function call!
|
|
378
|
+
You must invoke the function directly using the function call format.
|
|
379
|
+
You cannot invoke commands with ```, you have to use the function call format.
|
|
380
|
+
If you think you have already resolved the issue, please submit your changes by running the `submit` command.
|
|
381
|
+
If you think you cannot solve the problem, please run `exit_forfeit` (if available) or `submit`.
|
|
382
|
+
Else, please continue with a new tool call!
|
|
383
|
+
{%- elif error_code == "multiple" -%}
|
|
384
|
+
Your last output included multiple tool calls!
|
|
385
|
+
Please make sure your output includes a thought and exactly _ONE_ function call.
|
|
386
|
+
{%- elif error_code == "unexpected_arg" -%}
|
|
387
|
+
Your action could not be parsed properly: {{exception_message}}.
|
|
388
|
+
Make sure your function call doesn't include any extra arguments that are not in the allowed arguments, and only use the allowed commands.
|
|
389
|
+
{%- else -%}
|
|
390
|
+
Your action could not be parsed properly: {{exception_message}}.
|
|
391
|
+
{% endif %}
|
|
392
|
+
""")
|
|
393
|
+
|
|
394
|
+
type: Literal["function_calling"] = "function_calling"
|
|
395
|
+
"""Type for (de)serialization. Do not change."""
|
|
396
|
+
|
|
397
|
+
def _parse_tool_call(self, tool_call: dict, commands: list[Command]):
|
|
398
|
+
name = tool_call["function"]["name"]
|
|
399
|
+
command = {c.name: c for c in commands}.get(name)
|
|
400
|
+
if not command:
|
|
401
|
+
msg = f"Command '{name}' not found in list of available commands."
|
|
402
|
+
raise FunctionCallingFormatError(msg, "invalid_command")
|
|
403
|
+
if not isinstance(tool_call["function"]["arguments"], dict):
|
|
404
|
+
try:
|
|
405
|
+
values = json.loads(tool_call["function"]["arguments"])
|
|
406
|
+
except json.JSONDecodeError:
|
|
407
|
+
msg = "Tool call arguments are not valid JSON."
|
|
408
|
+
raise FunctionCallingFormatError(msg, "invalid_json")
|
|
409
|
+
required_args = {arg.name for arg in command.arguments if arg.required}
|
|
410
|
+
missing_args = required_args - values.keys()
|
|
411
|
+
if missing_args:
|
|
412
|
+
msg = f"Required argument(s) missing: {', '.join(missing_args)}"
|
|
413
|
+
raise FunctionCallingFormatError(msg, "missing_arg")
|
|
414
|
+
valid_args = {arg.name for arg in command.arguments}
|
|
415
|
+
extra_args = set(values.keys()) - valid_args
|
|
416
|
+
if command.end_name:
|
|
417
|
+
# sometimes the model will include the end_name in the arguments - just ignore it
|
|
418
|
+
extra_args.discard(command.end_name)
|
|
419
|
+
if extra_args:
|
|
420
|
+
msg = f"Unexpected argument(s): {', '.join(extra_args)}"
|
|
421
|
+
raise FunctionCallingFormatError(msg, "unexpected_arg")
|
|
422
|
+
|
|
423
|
+
def get_quoted_arg(value: Any) -> str:
|
|
424
|
+
if isinstance(value, str):
|
|
425
|
+
return quote(value) if _should_quote(value, command) else value
|
|
426
|
+
# See https://github.com/SWE-agent/SWE-agent/issues/1159
|
|
427
|
+
if value is None:
|
|
428
|
+
return ""
|
|
429
|
+
return value
|
|
430
|
+
|
|
431
|
+
formatted_args = {
|
|
432
|
+
arg.name: Template(arg.argument_format).render(value=get_quoted_arg(values[arg.name]))
|
|
433
|
+
if arg.name in values
|
|
434
|
+
else ""
|
|
435
|
+
for arg in command.arguments
|
|
436
|
+
}
|
|
437
|
+
return command.invoke_format.format(**formatted_args).strip()
|
|
438
|
+
|
|
439
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False):
|
|
440
|
+
message = model_response["message"]
|
|
441
|
+
tool_calls = model_response.get("tool_calls", None)
|
|
442
|
+
if tool_calls is None or len(tool_calls) != 1:
|
|
443
|
+
num_tools = len(tool_calls) if tool_calls else 0
|
|
444
|
+
msg = (
|
|
445
|
+
f"Expected exactly one tool call in model response - received {num_tools} "
|
|
446
|
+
f"tool calls with message: {message}"
|
|
447
|
+
)
|
|
448
|
+
error_code = "missing" if num_tools == 0 else "multiple"
|
|
449
|
+
raise FunctionCallingFormatError(msg, error_code, num_tools=num_tools)
|
|
450
|
+
tool_call = tool_calls[0]
|
|
451
|
+
action = self._parse_tool_call(tool_call, commands)
|
|
452
|
+
return message, action
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
class JsonParser(AbstractParseFunction, BaseModel):
|
|
456
|
+
"""Expects the model response to be a JSON object."""
|
|
457
|
+
|
|
458
|
+
error_message: str = dedent("""\
|
|
459
|
+
Your output could not be parsed as JSON. Please make sure your output 1) is valid JSON and
|
|
460
|
+
2) Includes the "thought" and "command" fields.
|
|
461
|
+
|
|
462
|
+
""")
|
|
463
|
+
|
|
464
|
+
type: Literal["json"] = "json"
|
|
465
|
+
"""Type for (de)serialization. Do not change."""
|
|
466
|
+
|
|
467
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False):
|
|
468
|
+
"""Parses the action from the output of the API call.
|
|
469
|
+
We assume that model output is a JSON object with the following fields:
|
|
470
|
+
{
|
|
471
|
+
"thought": "discussion text here.",
|
|
472
|
+
"command": {
|
|
473
|
+
"arguments": {
|
|
474
|
+
"arg1": "value1",
|
|
475
|
+
"arg2": "value2",
|
|
476
|
+
...
|
|
477
|
+
},
|
|
478
|
+
"name": "command_name"
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
"""
|
|
482
|
+
try:
|
|
483
|
+
data = json.loads(model_response["message"])
|
|
484
|
+
if not isinstance(data, dict):
|
|
485
|
+
msg = "Model output is not a JSON object."
|
|
486
|
+
raise FormatError(msg)
|
|
487
|
+
|
|
488
|
+
# Check if required keys are present
|
|
489
|
+
required_keys = ["thought", "command"]
|
|
490
|
+
for key in required_keys:
|
|
491
|
+
if key not in data:
|
|
492
|
+
msg = f"Key '{key}' is missing from model output."
|
|
493
|
+
raise FormatError(msg)
|
|
494
|
+
|
|
495
|
+
# Check structure of 'command' key
|
|
496
|
+
data_command = data["command"]
|
|
497
|
+
if not isinstance(data_command, dict):
|
|
498
|
+
msg = "Value of 'command' key is not a JSON object."
|
|
499
|
+
raise FormatError(msg)
|
|
500
|
+
|
|
501
|
+
# Check if required keys are present in 'command' object
|
|
502
|
+
command_keys = ["name"]
|
|
503
|
+
for key in command_keys:
|
|
504
|
+
if key not in data_command:
|
|
505
|
+
msg = f"Key '{key}' is missing from 'command' object."
|
|
506
|
+
raise FormatError(msg)
|
|
507
|
+
|
|
508
|
+
thought = data["thought"]
|
|
509
|
+
commands_dict = {c.name: c for c in commands}
|
|
510
|
+
command = commands_dict.get(data_command["name"])
|
|
511
|
+
|
|
512
|
+
# Handle command parsing based on strict mode
|
|
513
|
+
if command is None:
|
|
514
|
+
if strict:
|
|
515
|
+
msg = f"Command '{data_command['name']}' not found in list of available commands."
|
|
516
|
+
raise FormatError(msg)
|
|
517
|
+
# In non-strict mode, just join command name with argument values
|
|
518
|
+
return thought, " ".join([data_command["name"], *data_command.get("arguments", {}).values()])
|
|
519
|
+
|
|
520
|
+
# Format arguments using their individual argument_format
|
|
521
|
+
formatted_args = {}
|
|
522
|
+
if command.arguments:
|
|
523
|
+
for arg in command.arguments:
|
|
524
|
+
if arg.name in data_command.get("arguments", {}):
|
|
525
|
+
value = data_command["arguments"][arg.name]
|
|
526
|
+
if _should_quote(value, command):
|
|
527
|
+
value = quote(value)
|
|
528
|
+
formatted_args[arg.name] = Template(arg.argument_format).render(value=value)
|
|
529
|
+
elif strict and arg.required:
|
|
530
|
+
msg = f"Required argument '{arg.name}' missing for command '{command.name}'"
|
|
531
|
+
raise FormatError(msg)
|
|
532
|
+
|
|
533
|
+
# Use the formatted arguments with invoke_format
|
|
534
|
+
action = command.invoke_format.format(**formatted_args).strip()
|
|
535
|
+
return thought, action
|
|
536
|
+
except json.JSONDecodeError:
|
|
537
|
+
msg = "Model output is not valid JSON."
|
|
538
|
+
raise FormatError(msg)
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
class BashCodeBlockParser(AbstractParseFunction, BaseModel):
|
|
542
|
+
"""Executes all commands in ```bash code blocks."""
|
|
543
|
+
|
|
544
|
+
error_message: str = dedent("""\
|
|
545
|
+
No bash code blocks were detected in your output.
|
|
546
|
+
You need to include at least one bash code block in your output.
|
|
547
|
+
|
|
548
|
+
It must follow this format exactly to be valid:
|
|
549
|
+
```bash
|
|
550
|
+
cmd arg1 arg2 ...
|
|
551
|
+
...
|
|
552
|
+
|
|
553
|
+
Other types of code blocks (e.g. python, rust, none, etc.) won't be executed. Only bash.
|
|
554
|
+
""")
|
|
555
|
+
|
|
556
|
+
type: Literal["all_bash_code_blocks"] = "all_bash_code_blocks"
|
|
557
|
+
|
|
558
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False):
|
|
559
|
+
"""Parses the action from the output of the API call.
|
|
560
|
+
We assume that model output is a JSON object with the following fields:
|
|
561
|
+
"""
|
|
562
|
+
pattern = re.compile(r"```bash\n(.*?)\n```", re.DOTALL)
|
|
563
|
+
matches = pattern.findall(model_response["message"])
|
|
564
|
+
if not matches:
|
|
565
|
+
msg = "No bash code blocks were detected in your output."
|
|
566
|
+
raise FormatError(msg)
|
|
567
|
+
thought = pattern.sub("<extracted_code_block>", model_response["message"])
|
|
568
|
+
action = "\n".join(matches)
|
|
569
|
+
return thought, action
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
class SingleBashCodeBlockParser(AbstractParseFunction, BaseModel):
|
|
573
|
+
"""Executes all commands in ```bash code blocks."""
|
|
574
|
+
|
|
575
|
+
error_message: str = dedent("""\
|
|
576
|
+
We did not detect the right number of bash code blocks in your output.
|
|
577
|
+
You need to include EXACTLY ONE bash code block in your output.
|
|
578
|
+
|
|
579
|
+
It must follow this format exactly to be valid:
|
|
580
|
+
```bash
|
|
581
|
+
cmd arg1 arg2 ...
|
|
582
|
+
```
|
|
583
|
+
""")
|
|
584
|
+
|
|
585
|
+
type: Literal["single_bash_code_block"] = "single_bash_code_block"
|
|
586
|
+
|
|
587
|
+
def __call__(self, model_response: dict, commands: list[Command], strict=False):
|
|
588
|
+
"""Parses the action from the output of the API call.
|
|
589
|
+
We assume that model output is a JSON object with the following fields:
|
|
590
|
+
"""
|
|
591
|
+
pattern = re.compile(r"```bash\n(.*?)\n```", re.DOTALL)
|
|
592
|
+
matches = pattern.findall(model_response["message"])
|
|
593
|
+
if not matches:
|
|
594
|
+
msg = "No bash code blocks were detected in your output."
|
|
595
|
+
raise FormatError(msg)
|
|
596
|
+
if len(matches) > 1:
|
|
597
|
+
msg = (
|
|
598
|
+
"We detected multiple bash code blocks in your output. "
|
|
599
|
+
"You need to include EXACTLY ONE bash code block in your output."
|
|
600
|
+
)
|
|
601
|
+
raise FormatError(msg)
|
|
602
|
+
thought = pattern.sub("<extracted_code_block>", model_response["message"])
|
|
603
|
+
action = "\n".join(matches)
|
|
604
|
+
return thought, action
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
ParseFunction = (
|
|
608
|
+
ActionParser
|
|
609
|
+
| ThoughtActionParser
|
|
610
|
+
| ActionOnlyParser
|
|
611
|
+
| XMLThoughtActionParser
|
|
612
|
+
| XMLFunctionCallingParser
|
|
613
|
+
| FunctionCallingParser
|
|
614
|
+
| EditFormat
|
|
615
|
+
| Identity
|
|
616
|
+
| JsonParser
|
|
617
|
+
| BashCodeBlockParser
|
|
618
|
+
| SingleBashCodeBlockParser
|
|
619
|
+
)
|