@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import hashlib
|
|
3
|
+
import os
|
|
4
|
+
import uuid
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Literal, Protocol
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
|
11
|
+
|
|
12
|
+
from sweagent.utils.github import _get_problem_statement_from_github_issue, _parse_gh_issue_url
|
|
13
|
+
from sweagent.utils.log import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger("swea-config", emoji="🔧")
|
|
16
|
+
|
|
17
|
+
# Constants for image processing
|
|
18
|
+
VALID_IMAGE_MIME_TYPES = {
|
|
19
|
+
"image/png",
|
|
20
|
+
"image/jpeg",
|
|
21
|
+
"image/jpg", # Some servers return jpg instead of jpeg
|
|
22
|
+
"image/webp",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ProblemStatement(Protocol):
|
|
27
|
+
"""A problem statement for a task. Any class that implements this protocol
|
|
28
|
+
can be used as a problem statement.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
id: str
|
|
32
|
+
|
|
33
|
+
def get_problem_statement(self) -> str: ...
|
|
34
|
+
|
|
35
|
+
def get_problem_statement_for_env(self) -> str:
|
|
36
|
+
"""Used for setting environment variables in the container.
|
|
37
|
+
|
|
38
|
+
By default, this is the same as get_problem_statement().
|
|
39
|
+
"""
|
|
40
|
+
return self.get_problem_statement()
|
|
41
|
+
|
|
42
|
+
def get_extra_fields(self) -> dict[str, Any]: ...
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class _BuiltinProblemStatementBase(BaseModel):
|
|
46
|
+
"""A base class for the builtin problem statements to avoid typing much"""
|
|
47
|
+
|
|
48
|
+
def get_problem_statement(self) -> str: ...
|
|
49
|
+
|
|
50
|
+
def get_problem_statement_for_env(self) -> str:
|
|
51
|
+
return self.get_problem_statement()
|
|
52
|
+
|
|
53
|
+
def get_extra_fields(self) -> dict[str, Any]:
|
|
54
|
+
return {}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class EmptyProblemStatement(_BuiltinProblemStatementBase):
|
|
58
|
+
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
59
|
+
type: Literal["empty"] = "empty"
|
|
60
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
61
|
+
|
|
62
|
+
model_config = ConfigDict(extra="forbid")
|
|
63
|
+
|
|
64
|
+
def get_problem_statement(self) -> str:
|
|
65
|
+
return ""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class TextProblemStatement(_BuiltinProblemStatementBase):
|
|
69
|
+
text: str
|
|
70
|
+
|
|
71
|
+
extra_fields: dict[str, Any] = Field(default_factory=dict)
|
|
72
|
+
"""Any additional data to be added to the instance.
|
|
73
|
+
This data will be available when formatting prompt templates.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
type: Literal["text"] = "text"
|
|
77
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
78
|
+
|
|
79
|
+
id: str = None # type: ignore
|
|
80
|
+
|
|
81
|
+
model_config = ConfigDict(extra="forbid")
|
|
82
|
+
|
|
83
|
+
def model_post_init(self, __context: Any) -> None:
|
|
84
|
+
if self.id is None:
|
|
85
|
+
logger.info("Setting problem statement id to hash of text")
|
|
86
|
+
self.id = hashlib.sha256(self.text.encode()).hexdigest()[:6]
|
|
87
|
+
|
|
88
|
+
def get_problem_statement(self) -> str:
|
|
89
|
+
return self.text
|
|
90
|
+
|
|
91
|
+
def get_extra_fields(self) -> dict[str, Any]:
|
|
92
|
+
return self.extra_fields
|
|
93
|
+
|
|
94
|
+
def __repr__(self) -> str:
|
|
95
|
+
return f"TextProblemStatement(id={self.id}, text={self.text[:30]}...)"
|
|
96
|
+
|
|
97
|
+
def __str__(self) -> str:
|
|
98
|
+
return f"id={self.id}, text={self.text[:30]}..."
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class FileProblemStatement(_BuiltinProblemStatementBase):
|
|
102
|
+
path: Path
|
|
103
|
+
|
|
104
|
+
extra_fields: dict[str, Any] = Field(default_factory=dict)
|
|
105
|
+
"""Any additional data to be added to the instance.
|
|
106
|
+
This data will be available when formatting prompt templates.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
type: Literal["text_file"] = "text_file"
|
|
110
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
111
|
+
|
|
112
|
+
id: str = None # type: ignore
|
|
113
|
+
|
|
114
|
+
model_config = ConfigDict(extra="forbid")
|
|
115
|
+
|
|
116
|
+
def model_post_init(self, __context: Any) -> None:
|
|
117
|
+
if self.id is None:
|
|
118
|
+
logger.info("Setting problem statement id to hash of file contents (path: %s)", self.path)
|
|
119
|
+
self.id = hashlib.sha256(self.get_problem_statement().encode()).hexdigest()[:6]
|
|
120
|
+
|
|
121
|
+
def get_problem_statement(self) -> str:
|
|
122
|
+
return self.path.read_text()
|
|
123
|
+
|
|
124
|
+
def get_extra_fields(self) -> dict[str, Any]:
|
|
125
|
+
return self.extra_fields
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class GithubIssue(_BuiltinProblemStatementBase):
|
|
129
|
+
github_url: str
|
|
130
|
+
|
|
131
|
+
extra_fields: dict[str, Any] = Field(default_factory=dict)
|
|
132
|
+
"""Any additional data to be added to the instance.
|
|
133
|
+
This data will be available when formatting prompt templates.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
type: Literal["github"] = "github"
|
|
137
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
138
|
+
|
|
139
|
+
id: str = None # type: ignore
|
|
140
|
+
|
|
141
|
+
model_config = ConfigDict(extra="forbid")
|
|
142
|
+
|
|
143
|
+
def model_post_init(self, __context: Any) -> None:
|
|
144
|
+
if self.id is None:
|
|
145
|
+
logger.info("Setting problem statement based on github issue url")
|
|
146
|
+
owner, repo, issue_number = _parse_gh_issue_url(self.github_url)
|
|
147
|
+
self.id = f"{owner}__{repo}-i{issue_number}"
|
|
148
|
+
|
|
149
|
+
def get_problem_statement(self) -> str:
|
|
150
|
+
owner, repo, issue_number = _parse_gh_issue_url(self.github_url)
|
|
151
|
+
return _get_problem_statement_from_github_issue(owner, repo, issue_number, token=os.getenv("GITHUB_TOKEN"))
|
|
152
|
+
|
|
153
|
+
def get_extra_fields(self) -> dict[str, Any]:
|
|
154
|
+
return self.extra_fields
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class SWEBenchMultimodalProblemStatement(_BuiltinProblemStatementBase):
|
|
158
|
+
text: str
|
|
159
|
+
|
|
160
|
+
issue_images: list[str] = Field(default_factory=list)
|
|
161
|
+
"""List of image asset URLs.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
disable_image_processing: bool = False
|
|
165
|
+
"""If True, skip image downloading and processing, treating this as a text-only problem statement.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
extra_fields: dict[str, Any] = Field(default_factory=dict)
|
|
169
|
+
"""Any additional data to be added to the instance.
|
|
170
|
+
This data will be available when formatting prompt templates.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
type: Literal["swe_bench_multimodal"] = "swe_bench_multimodal"
|
|
174
|
+
"""Discriminator for (de)serialization/CLI. Do not change."""
|
|
175
|
+
|
|
176
|
+
id: str = None # type: ignore
|
|
177
|
+
|
|
178
|
+
_cached_problem_statement: str | None = PrivateAttr(default=None)
|
|
179
|
+
|
|
180
|
+
model_config = ConfigDict(extra="forbid")
|
|
181
|
+
|
|
182
|
+
def model_post_init(self, __context: Any) -> None:
|
|
183
|
+
if self.id is None:
|
|
184
|
+
logger.info("Setting problem statement id to hash of text")
|
|
185
|
+
self.id = hashlib.sha256(self.text.encode()).hexdigest()[:6]
|
|
186
|
+
|
|
187
|
+
def get_problem_statement_for_env(self) -> str:
|
|
188
|
+
"""Return the problem statement without images.
|
|
189
|
+
|
|
190
|
+
Images are not supported in the environment.
|
|
191
|
+
"""
|
|
192
|
+
return self.text
|
|
193
|
+
|
|
194
|
+
def get_problem_statement(self) -> str:
|
|
195
|
+
if self.disable_image_processing:
|
|
196
|
+
logger.info("Image processing disabled, returning text-only problem statement")
|
|
197
|
+
return self.text
|
|
198
|
+
|
|
199
|
+
if self._cached_problem_statement is not None:
|
|
200
|
+
return self._cached_problem_statement
|
|
201
|
+
|
|
202
|
+
processed_text = self.text
|
|
203
|
+
for link in self.issue_images:
|
|
204
|
+
try:
|
|
205
|
+
image_markdown = self._download_and_convert_image(link)
|
|
206
|
+
if image_markdown:
|
|
207
|
+
processed_text += f"\n\n{image_markdown}"
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.warning(f"Failed to process image from {link}: {e}")
|
|
210
|
+
|
|
211
|
+
# cache to avoid re-processing images
|
|
212
|
+
self._cached_problem_statement = processed_text
|
|
213
|
+
return processed_text
|
|
214
|
+
|
|
215
|
+
def get_extra_fields(self) -> dict[str, Any]:
|
|
216
|
+
return self.extra_fields
|
|
217
|
+
|
|
218
|
+
def _download_and_convert_image(self, url: str) -> str | None:
|
|
219
|
+
"""Download an image from URL and convert it to base64 markdown format.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
url: The URL of the image to download
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Base64 markdown string if successful, None if failed
|
|
226
|
+
|
|
227
|
+
Raises:
|
|
228
|
+
Various exceptions for network/processing errors
|
|
229
|
+
"""
|
|
230
|
+
try:
|
|
231
|
+
parsed_url = urlparse(url)
|
|
232
|
+
if not parsed_url.scheme or not parsed_url.netloc:
|
|
233
|
+
logger.warning(f"Invalid URL format: {url}")
|
|
234
|
+
return None
|
|
235
|
+
headers = {
|
|
236
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.133 Safari/537.36"
|
|
237
|
+
}
|
|
238
|
+
response = requests.get(url, headers=headers, timeout=30, stream=True)
|
|
239
|
+
response.raise_for_status()
|
|
240
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
241
|
+
if content_type == "image/jpg":
|
|
242
|
+
content_type = "image/jpeg"
|
|
243
|
+
if content_type not in VALID_IMAGE_MIME_TYPES:
|
|
244
|
+
logger.warning(f"Unsupported image MIME type '{content_type}' for URL: {url}. Not encoding image.")
|
|
245
|
+
return None
|
|
246
|
+
max_size = 10 * 1024 * 1024 # 10MB
|
|
247
|
+
content_length = response.headers.get("content-length")
|
|
248
|
+
if content_length and int(content_length) > max_size:
|
|
249
|
+
logger.warning(f"Image too large ({content_length} bytes) for URL: {url}")
|
|
250
|
+
return None
|
|
251
|
+
image_data = b""
|
|
252
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
253
|
+
image_data += chunk
|
|
254
|
+
if len(image_data) > max_size:
|
|
255
|
+
logger.warning(f"Image too large (>{max_size} bytes) for URL: {url}")
|
|
256
|
+
return None
|
|
257
|
+
if not image_data:
|
|
258
|
+
logger.warning(f"Empty image data for URL: {url}")
|
|
259
|
+
return None
|
|
260
|
+
b64_data = base64.b64encode(image_data).decode("ascii")
|
|
261
|
+
markdown = f""
|
|
262
|
+
logger.info(f"Successfully processed image from {url} ({len(image_data)} bytes, {content_type})")
|
|
263
|
+
return markdown
|
|
264
|
+
|
|
265
|
+
except requests.exceptions.Timeout:
|
|
266
|
+
logger.warning(f"Timeout downloading image from {url}")
|
|
267
|
+
return None
|
|
268
|
+
except requests.exceptions.RequestException as e:
|
|
269
|
+
logger.warning(f"Network error downloading image from {url}: {e}")
|
|
270
|
+
return None
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.warning(f"Unexpected error processing image from {url}: {e}")
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
def __repr__(self) -> str:
|
|
276
|
+
n_images = len(self.issue_images)
|
|
277
|
+
return f"SWEBenchMultimodalProblemStatement(id={self.id}, text={self.text[:30]}..., images={n_images})"
|
|
278
|
+
|
|
279
|
+
def __str__(self) -> str:
|
|
280
|
+
n_images = len(self.issue_images)
|
|
281
|
+
return f"id={self.id}, text={self.text[:30]}..., images={n_images}"
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
ProblemStatementConfig = (
|
|
285
|
+
TextProblemStatement
|
|
286
|
+
| SWEBenchMultimodalProblemStatement
|
|
287
|
+
| GithubIssue
|
|
288
|
+
| EmptyProblemStatement
|
|
289
|
+
| FileProblemStatement
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def problem_statement_from_simplified_input(
|
|
294
|
+
*, input: str, type: Literal["text", "text_file", "github_issue", "swe_bench_multimodal"]
|
|
295
|
+
) -> ProblemStatementConfig:
|
|
296
|
+
"""Get a problem statement from an `input` string and a `type`.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
input: Url/path/text
|
|
300
|
+
type: The type of problem statement
|
|
301
|
+
"""
|
|
302
|
+
if type == "text":
|
|
303
|
+
return TextProblemStatement(text=input)
|
|
304
|
+
elif type == "text_file":
|
|
305
|
+
return FileProblemStatement(path=Path(input))
|
|
306
|
+
elif type == "github_issue":
|
|
307
|
+
return GithubIssue(github_url=input)
|
|
308
|
+
elif type == "swe_bench_multimodal":
|
|
309
|
+
return SWEBenchMultimodalProblemStatement(text=input)
|
|
310
|
+
else:
|
|
311
|
+
msg = f"Unknown problem statement type: {type}"
|
|
312
|
+
raise ValueError(msg)
|