@elizaos/sweagent-root 2.0.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +71 -0
- package/python/LICENSE +21 -0
- package/python/config/README.md +15 -0
- package/python/config/bash_only.yaml +222 -0
- package/python/config/benchmarks/250212_sweagent_heavy_sbl.yaml +188 -0
- package/python/config/benchmarks/250225_anthropic_filemap_simple_review.yaml +75 -0
- package/python/config/benchmarks/250522_anthropic_filemap_simple_review.yaml +92 -0
- package/python/config/benchmarks/250526_anthropic_filemap_simple_review_sbl.yaml +93 -0
- package/python/config/benchmarks/anthropic_filemap_multilingual.yaml +66 -0
- package/python/config/coding_challenge.yaml +104 -0
- package/python/config/default.yaml +69 -0
- package/python/config/default_backticks.yaml +69 -0
- package/python/config/default_mm_no_images.yaml +82 -0
- package/python/config/default_mm_with_images.yaml +83 -0
- package/python/config/demo/default.yaml +80 -0
- package/python/config/demo/no_instructions.yaml +69 -0
- package/python/config/demo/only_bash.yaml +60 -0
- package/python/config/exotic/default_shell.yaml +52 -0
- package/python/config/exotic/windowed_replace.yaml +125 -0
- package/python/config/exotic/windowed_replace_late_repro.yaml +127 -0
- package/python/config/human/human.yaml +24 -0
- package/python/config/human/human_demo.yaml +52 -0
- package/python/config/sweagent_0_7/07.yaml +101 -0
- package/python/config/sweagent_0_7/07_fcalling.yaml +100 -0
- package/python/config/sweagent_0_7/07_from_url.yaml +114 -0
- package/python/config/sweagent_0_7/07_thought_action.yaml +102 -0
- package/python/config/sweagent_0_7/07_thought_action_xml.yaml +96 -0
- package/python/mlc_config.json +44 -0
- package/python/pyproject.toml +262 -0
- package/python/sweagent/__init__.py +114 -0
- package/python/sweagent/__main__.py +4 -0
- package/python/sweagent/agent/__init__.py +0 -0
- package/python/sweagent/agent/action_sampler.py +317 -0
- package/python/sweagent/agent/agents.py +1294 -0
- package/python/sweagent/agent/extra/shell_agent.py +106 -0
- package/python/sweagent/agent/history_processors.py +399 -0
- package/python/sweagent/agent/hooks/__init__.py +0 -0
- package/python/sweagent/agent/hooks/abstract.py +139 -0
- package/python/sweagent/agent/hooks/status.py +34 -0
- package/python/sweagent/agent/models.py +896 -0
- package/python/sweagent/agent/problem_statement.py +312 -0
- package/python/sweagent/agent/reviewer.py +664 -0
- package/python/sweagent/environment/__init__.py +0 -0
- package/python/sweagent/environment/hooks/__init__.py +0 -0
- package/python/sweagent/environment/hooks/abstract.py +60 -0
- package/python/sweagent/environment/hooks/status.py +28 -0
- package/python/sweagent/environment/repo.py +219 -0
- package/python/sweagent/environment/swe_env.py +276 -0
- package/python/sweagent/exceptions.py +54 -0
- package/python/sweagent/inspector/README.md +6 -0
- package/python/sweagent/inspector/__init__.py +0 -0
- package/python/sweagent/inspector/favicon.ico +0 -0
- package/python/sweagent/inspector/fileViewer.js +354 -0
- package/python/sweagent/inspector/icons/computer.png +0 -0
- package/python/sweagent/inspector/icons/edit_icon.svg +11 -0
- package/python/sweagent/inspector/icons/swe-agent-logo-50.png +0 -0
- package/python/sweagent/inspector/icons/swellama_blue.png +0 -0
- package/python/sweagent/inspector/icons/swellama_brown.png +0 -0
- package/python/sweagent/inspector/icons/swellama_grey.png +0 -0
- package/python/sweagent/inspector/icons/swellama_tan.png +0 -0
- package/python/sweagent/inspector/index.html +25 -0
- package/python/sweagent/inspector/server.py +354 -0
- package/python/sweagent/inspector/static.py +169 -0
- package/python/sweagent/inspector/style.css +454 -0
- package/python/sweagent/run/__init__.py +0 -0
- package/python/sweagent/run/_progress.py +158 -0
- package/python/sweagent/run/batch_instances.py +419 -0
- package/python/sweagent/run/common.py +387 -0
- package/python/sweagent/run/compare_runs.py +123 -0
- package/python/sweagent/run/extract_pred.py +19 -0
- package/python/sweagent/run/hooks/__init__.py +0 -0
- package/python/sweagent/run/hooks/abstract.py +67 -0
- package/python/sweagent/run/hooks/apply_patch.py +106 -0
- package/python/sweagent/run/hooks/open_pr.py +244 -0
- package/python/sweagent/run/hooks/swe_bench_evaluate.py +113 -0
- package/python/sweagent/run/inspector_cli.py +493 -0
- package/python/sweagent/run/merge_predictions.py +64 -0
- package/python/sweagent/run/quick_stats.py +96 -0
- package/python/sweagent/run/remove_unfinished.py +63 -0
- package/python/sweagent/run/rich_test.py +91 -0
- package/python/sweagent/run/run.py +147 -0
- package/python/sweagent/run/run_batch.py +442 -0
- package/python/sweagent/run/run_replay.py +219 -0
- package/python/sweagent/run/run_shell.py +155 -0
- package/python/sweagent/run/run_single.py +225 -0
- package/python/sweagent/run/run_traj_to_demo.py +85 -0
- package/python/sweagent/tools/__init__.py +0 -0
- package/python/sweagent/tools/bundle.py +57 -0
- package/python/sweagent/tools/commands.py +220 -0
- package/python/sweagent/tools/parsing.py +619 -0
- package/python/sweagent/tools/tools.py +430 -0
- package/python/sweagent/tools/utils.py +108 -0
- package/python/sweagent/types.py +102 -0
- package/python/sweagent/utils/__init__.py +0 -0
- package/python/sweagent/utils/config.py +80 -0
- package/python/sweagent/utils/files.py +27 -0
- package/python/sweagent/utils/github.py +118 -0
- package/python/sweagent/utils/jinja_warnings.py +14 -0
- package/python/sweagent/utils/log.py +175 -0
- package/python/sweagent/utils/patch_formatter.py +152 -0
- package/python/sweagent/utils/serialization.py +45 -0
- package/python/tests/__init__.py +0 -0
- package/python/tests/conftest.py +191 -0
- package/python/tests/test_agent.py +258 -0
- package/python/tests/test_batch_instance.py +43 -0
- package/python/tests/test_commands/_interactive_dummy.py +35 -0
- package/python/tests/test_commands/interactive_dummy_wrapper.sh +29 -0
- package/python/tests/test_data/config_files/dummy_interactive.yaml +62 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/README.md +13 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/customrandom.c +50 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/release +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/server +0 -0
- package/python/tests/test_data/data_sources/ctf/crypto/Katy/solver.py +12 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/README.md +16 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/challenge.json +9 -0
- package/python/tests/test_data/data_sources/ctf/forensics/flash/flash_c8429a430278283c0e571baebca3d139.zip +0 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/README.md +15 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/challenge.json +10 -0
- package/python/tests/test_data/data_sources/ctf/misc/networking_1/networking.pcap +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/Dockerfile +28 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/challenge.json +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/flag.txt +1 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup +0 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.c +26 -0
- package/python/tests/test_data/data_sources/ctf/pwn/warmup/warmup.py +9 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/README.md +14 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/challenge.json +8 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock +0 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/rock.cpp +167 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/solution.cpp +24 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/solution.py +6 -0
- package/python/tests/test_data/data_sources/ctf/rev/rock/test_solver/test.sh +10 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/000-default.conf +18 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/Dockerfile +20 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/file.pl +38 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/forms.pl +40 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/cgi/hello.pl +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/challenge.json +12 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/docker-compose.yml +14 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/flag +1 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/index.html +11 -0
- package/python/tests/test_data/data_sources/ctf/web/i_got_id_demo/solution.txt +1 -0
- package/python/tests/test_data/data_sources/debug_20240322.json +1 -0
- package/python/tests/test_data/data_sources/expert_instances.yaml +16 -0
- package/python/tests/test_data/data_sources/human_eval.json +1 -0
- package/python/tests/test_data/data_sources/simple_instances.yaml +3 -0
- package/python/tests/test_data/data_sources/simple_instances_long.yaml +30 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-dev-easy_first_only.json +1 -0
- package/python/tests/test_data/data_sources/swe-bench-lite-test.json +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/6e44b9__sweagenttestrepo-1c2844.traj +342 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py +15 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/args.yaml +518 -0
- package/python/tests/test_data/trajectories/gpt4__swe-agent__test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/swe-agent__test-repo-i1.traj +124 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl +1 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/args.yaml +520 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch +18 -0
- package/python/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/pydicom__pydicom-1458.traj +257 -0
- package/python/tests/test_env.py +66 -0
- package/python/tests/test_env_utils.py +129 -0
- package/python/tests/test_history_processors.py +40 -0
- package/python/tests/test_models.py +23 -0
- package/python/tests/test_openai_live.py +164 -0
- package/python/tests/test_packaging.py +7 -0
- package/python/tests/test_parsing.py +131 -0
- package/python/tests/test_problem_statement_multimodal.py +111 -0
- package/python/tests/test_quick_stats.py +42 -0
- package/python/tests/test_run.py +37 -0
- package/python/tests/test_run_batch.py +110 -0
- package/python/tests/test_run_hooks.py +114 -0
- package/python/tests/test_run_replay.py +33 -0
- package/python/tests/test_run_single.py +125 -0
- package/python/tests/test_tools_command_parsing.py +193 -0
- package/python/tests/test_utils.py +15 -0
- package/python/tests/tools/__init__.py +0 -0
- package/python/tests/tools/conftest.py +12 -0
- package/python/tests/tools/test_default_utils.py +153 -0
- package/python/tests/tools/test_edit_replace.py +0 -0
- package/python/tests/tools/test_split_string.py +82 -0
- package/python/tests/utils.py +29 -0
- package/python/tools/diff_state/bin/_state_diff_state +52 -0
- package/python/tools/diff_state/config.yaml +2 -0
- package/python/tools/edit_anthropic/bin/_state_anthropic +21 -0
- package/python/tools/edit_anthropic/bin/str_replace_editor +710 -0
- package/python/tools/edit_anthropic/config.yaml +56 -0
- package/python/tools/edit_anthropic/install.sh +3 -0
- package/python/tools/filemap/bin/filemap +45 -0
- package/python/tools/filemap/config.yaml +9 -0
- package/python/tools/filemap/install.sh +2 -0
- package/python/tools/forfeit/bin/exit_forfeit +5 -0
- package/python/tools/forfeit/config.yaml +5 -0
- package/python/tools/image_tools/bin/view_image +36 -0
- package/python/tools/image_tools/config.yaml +9 -0
- package/python/tools/multilingual_setup/bin/do_nothing +2 -0
- package/python/tools/multilingual_setup/config.yaml +1 -0
- package/python/tools/multilingual_setup/install.sh +45 -0
- package/python/tools/registry/bin/_read_env +10 -0
- package/python/tools/registry/bin/_write_env +10 -0
- package/python/tools/registry/config.yaml +1 -0
- package/python/tools/registry/install.sh +6 -0
- package/python/tools/registry/lib/__init__.py +0 -0
- package/python/tools/registry/lib/registry.py +56 -0
- package/python/tools/review_on_submit_m/README.md +6 -0
- package/python/tools/review_on_submit_m/bin/submit +54 -0
- package/python/tools/review_on_submit_m/config.yaml +6 -0
- package/python/tools/review_on_submit_m/install.sh +0 -0
- package/python/tools/search/bin/find_file +31 -0
- package/python/tools/search/bin/search_dir +39 -0
- package/python/tools/search/bin/search_file +55 -0
- package/python/tools/search/config.yaml +37 -0
- package/python/tools/search/install.sh +3 -0
- package/python/tools/submit/bin/submit +17 -0
- package/python/tools/submit/config.yaml +5 -0
- package/python/tools/web_browser/bin/click_mouse +41 -0
- package/python/tools/web_browser/bin/close_site +28 -0
- package/python/tools/web_browser/bin/double_click_mouse +37 -0
- package/python/tools/web_browser/bin/drag_mouse +46 -0
- package/python/tools/web_browser/bin/execute_script_on_page +39 -0
- package/python/tools/web_browser/bin/get_console_output +48 -0
- package/python/tools/web_browser/bin/move_mouse +35 -0
- package/python/tools/web_browser/bin/navigate_back +33 -0
- package/python/tools/web_browser/bin/navigate_forward +33 -0
- package/python/tools/web_browser/bin/open_site +36 -0
- package/python/tools/web_browser/bin/press_keys_on_page +51 -0
- package/python/tools/web_browser/bin/reload_page +33 -0
- package/python/tools/web_browser/bin/run_web_browser_server +394 -0
- package/python/tools/web_browser/bin/screenshot_site +38 -0
- package/python/tools/web_browser/bin/scroll_on_page +40 -0
- package/python/tools/web_browser/bin/set_browser_window_size +40 -0
- package/python/tools/web_browser/bin/type_text +34 -0
- package/python/tools/web_browser/bin/wait_time +39 -0
- package/python/tools/web_browser/config.yaml +155 -0
- package/python/tools/web_browser/install.sh +22 -0
- package/python/tools/web_browser/lib/browser_manager.py +404 -0
- package/python/tools/web_browser/lib/web_browser_config.py +33 -0
- package/python/tools/web_browser/lib/web_browser_utils.py +126 -0
- package/python/tools/web_browser/test_console.html +1 -0
- package/python/tools/windowed/bin/_state +25 -0
- package/python/tools/windowed/bin/create +29 -0
- package/python/tools/windowed/bin/goto +37 -0
- package/python/tools/windowed/bin/open +49 -0
- package/python/tools/windowed/bin/scroll_down +12 -0
- package/python/tools/windowed/bin/scroll_up +13 -0
- package/python/tools/windowed/config.yaml +38 -0
- package/python/tools/windowed/install.sh +15 -0
- package/python/tools/windowed/lib/__init__.py +0 -0
- package/python/tools/windowed/lib/flake8_utils.py +147 -0
- package/python/tools/windowed/lib/windowed_file.py +312 -0
- package/python/tools/windowed_edit_linting/bin/edit +128 -0
- package/python/tools/windowed_edit_linting/config.yaml +31 -0
- package/python/tools/windowed_edit_linting/install.sh +5 -0
- package/python/tools/windowed_edit_replace/bin/edit +172 -0
- package/python/tools/windowed_edit_replace/bin/insert +77 -0
- package/python/tools/windowed_edit_replace/config.yaml +60 -0
- package/python/tools/windowed_edit_replace/install.sh +5 -0
- package/python/tools/windowed_edit_rewrite/bin/edit +78 -0
- package/python/tools/windowed_edit_rewrite/config.yaml +11 -0
- package/python/tools/windowed_edit_rewrite/install.sh +5 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyEncryption.traj +318 -0
- package/python/trajectories/demonstrations/ctf/crypto/BabyTimeCapsule.traj +197 -0
- package/python/trajectories/demonstrations/ctf/crypto/eps.traj +289 -0
- package/python/trajectories/demonstrations/ctf/crypto/katy.traj +368 -0
- package/python/trajectories/demonstrations/ctf/forensics/flash.traj +102 -0
- package/python/trajectories/demonstrations/ctf/misc/networking_1.traj +102 -0
- package/python/trajectories/demonstrations/ctf/pwn/warmup.traj +159 -0
- package/python/trajectories/demonstrations/ctf/rev/rock.traj +251 -0
- package/python/trajectories/demonstrations/ctf/web/i_got_id_demo.traj +422 -0
- package/python/trajectories/demonstrations/function_calling_simple.traj +151 -0
- package/python/trajectories/demonstrations/human_thought__swe-bench-HumanEvalFix-python__lcb__t-0.00__p-0.95__c-4.00__install-0/humanevalfix-python-0.traj +129 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default__t-0.20__p-0.95__c-2.00__install-1___install_from_source/marshmallow-code__marshmallow-1867.traj +318 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling__install-1/marshmallow-code__marshmallow-1867.traj +594 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj +592 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace_from_source/marshmallow-code__marshmallow-1867.traj +3316 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_cursors_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +251 -0
- package/python/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj +399 -0
- package/python/trajectories/demonstrations/str_replace_anthropic_demo.yaml +432 -0
- package/rust/Cargo.toml +100 -0
- package/rust/README.md +49 -0
- package/rust/src/agent/action_sampler.rs +130 -0
- package/rust/src/agent/agents.rs +1029 -0
- package/rust/src/agent/history_processors.rs +277 -0
- package/rust/src/agent/hooks/mod.rs +208 -0
- package/rust/src/agent/mod.rs +24 -0
- package/rust/src/agent/models.rs +837 -0
- package/rust/src/agent/problem_statement.rs +355 -0
- package/rust/src/agent/reviewer.rs +505 -0
- package/rust/src/bin/sweagent.rs +784 -0
- package/rust/src/environment/deployment.rs +631 -0
- package/rust/src/environment/hooks/mod.rs +114 -0
- package/rust/src/environment/mod.rs +16 -0
- package/rust/src/environment/repo.rs +265 -0
- package/rust/src/environment/runtime.rs +237 -0
- package/rust/src/environment/swe_env.rs +248 -0
- package/rust/src/exceptions.rs +228 -0
- package/rust/src/lib.rs +68 -0
- package/rust/src/monitoring.rs +482 -0
- package/rust/src/run/hooks/mod.rs +134 -0
- package/rust/src/run/mod.rs +12 -0
- package/rust/src/run/run_batch.rs +563 -0
- package/rust/src/run/run_single.rs +196 -0
- package/rust/src/tools/bundle.rs +224 -0
- package/rust/src/tools/commands.rs +173 -0
- package/rust/src/tools/mod.rs +295 -0
- package/rust/src/tools/parsing.rs +354 -0
- package/rust/src/tools/registry.rs +143 -0
- package/rust/src/types.rs +554 -0
- package/rust/src/utils/config.rs +105 -0
- package/rust/src/utils/files.rs +137 -0
- package/rust/src/utils/github.rs +171 -0
- package/rust/src/utils/log.rs +65 -0
- package/rust/src/utils/mod.rs +17 -0
- package/rust/src/utils/serialization.rs +181 -0
- package/rust/src/utils/template.rs +173 -0
- package/typescript/README.md +335 -0
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
"""This is a command line tool to inspect trajectory JSON files."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import collections
|
|
5
|
+
import copy
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from rich.syntax import Syntax
|
|
13
|
+
from textual.app import App, ComposeResult
|
|
14
|
+
from textual.binding import Binding
|
|
15
|
+
from textual.containers import Container, Vertical, VerticalScroll
|
|
16
|
+
from textual.screen import ModalScreen
|
|
17
|
+
from textual.widgets import Footer, Header, Input, ListItem, ListView, Static
|
|
18
|
+
|
|
19
|
+
from sweagent.utils.files import load_file
|
|
20
|
+
from sweagent.utils.serialization import _yaml_serialization_with_linebreaks
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _move_items_top(d: dict, keys: list[str]) -> dict:
|
|
24
|
+
"""Reorder items in a dictionary.
|
|
25
|
+
|
|
26
|
+
The first keys will be those specified in `keys`, the rest will
|
|
27
|
+
be in the same order as in the original dictionary.
|
|
28
|
+
"""
|
|
29
|
+
new_d = {}
|
|
30
|
+
for key in keys:
|
|
31
|
+
if key in d:
|
|
32
|
+
new_d[key] = d[key]
|
|
33
|
+
for key in d.keys():
|
|
34
|
+
if key not in keys:
|
|
35
|
+
new_d[key] = d[key]
|
|
36
|
+
return new_d
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TrajectoryViewer(Static):
|
|
40
|
+
BINDINGS = [
|
|
41
|
+
Binding("right,l", "next_item", "Step++"),
|
|
42
|
+
Binding("left,h", "previous_item", "Step--"),
|
|
43
|
+
Binding("0", "first_item", "Step=0"),
|
|
44
|
+
Binding("$", "last_item", "Step=-1"),
|
|
45
|
+
Binding("v", "toggle_view", "Toggle view"),
|
|
46
|
+
Binding("j,down", "scroll_down", "Scroll down"),
|
|
47
|
+
Binding("k,up", "scroll_up", "Scroll up"),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
def __init__(self, path: Path, title: str, overview_stats: dict, *, gold_patch: str | None = None):
|
|
51
|
+
"""View a single trajectory."""
|
|
52
|
+
super().__init__()
|
|
53
|
+
self.i_step = -1
|
|
54
|
+
self.trajectory = json.loads(path.read_text())
|
|
55
|
+
self.show_full = False
|
|
56
|
+
self.title = title
|
|
57
|
+
self.overview_stats = overview_stats
|
|
58
|
+
self.gold_patch = gold_patch
|
|
59
|
+
|
|
60
|
+
def load_trajectory(self, path: Path, title: str, overview_stats: dict, *, gold_patch: str | None = None):
|
|
61
|
+
"""Load a new trajectory and update the viewer."""
|
|
62
|
+
print("Loading", path)
|
|
63
|
+
self.trajectory = json.loads(path.read_text())
|
|
64
|
+
self.title = title
|
|
65
|
+
self.gold_patch = gold_patch
|
|
66
|
+
self.overview_stats = overview_stats
|
|
67
|
+
self.scroll_top()
|
|
68
|
+
self.i_step = -1
|
|
69
|
+
self.update_content()
|
|
70
|
+
|
|
71
|
+
def compose(self) -> ComposeResult:
|
|
72
|
+
with VerticalScroll():
|
|
73
|
+
yield Static(id="content", markup=False)
|
|
74
|
+
|
|
75
|
+
def on_mount(self) -> None:
|
|
76
|
+
self.update_content()
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def n_steps(self) -> int:
|
|
80
|
+
return len(self.trajectory["trajectory"])
|
|
81
|
+
|
|
82
|
+
def _show_step_yaml(self, item: dict) -> None:
|
|
83
|
+
"""Show full yaml of trajectory item"""
|
|
84
|
+
content_str = _yaml_serialization_with_linebreaks(
|
|
85
|
+
_move_items_top(item, ["thought", "action", "observation", "response", "execution_time"])
|
|
86
|
+
)
|
|
87
|
+
syntax = Syntax(content_str, "yaml", theme="monokai", word_wrap=True)
|
|
88
|
+
content = self.query_one("#content")
|
|
89
|
+
content.update(syntax) # type: ignore
|
|
90
|
+
self.app.sub_title = f"{self.title} - Step {self.i_step + 1}/{self.n_steps} - Full View"
|
|
91
|
+
|
|
92
|
+
def _show_step_simple(self, item: dict) -> None:
|
|
93
|
+
# Simplified view - show action and observation as plain text
|
|
94
|
+
thought = item.get("thought", "")
|
|
95
|
+
action = item.get("action", "")
|
|
96
|
+
observation = item.get("observation", "")
|
|
97
|
+
|
|
98
|
+
content_str = f"THOUGHT:\n{thought}\n\nACTION:\n{action}\n\nOBSERVATION:\n{observation}"
|
|
99
|
+
content = self.query_one("#content")
|
|
100
|
+
content.update(content_str) # type: ignore
|
|
101
|
+
|
|
102
|
+
self.app.sub_title = f"{self.title} - Step {self.i_step + 1}/{self.n_steps} - Simple View"
|
|
103
|
+
|
|
104
|
+
def _show_info(self):
|
|
105
|
+
info = copy.deepcopy(self.trajectory["info"])
|
|
106
|
+
info["result"] = self.overview_stats["result"]
|
|
107
|
+
info["gold_patch"] = self.gold_patch
|
|
108
|
+
info = _move_items_top(info, ["result", "exit_status", "model_stats", "submission", "gold_patch"])
|
|
109
|
+
syntax = Syntax(_yaml_serialization_with_linebreaks(info), "yaml", theme="monokai", word_wrap=True)
|
|
110
|
+
content = self.query_one("#content")
|
|
111
|
+
content.update(syntax) # type: ignore
|
|
112
|
+
next_help = "Press l to see step 1" if self.i_step < 0 else f"Press h to see step {self.n_steps}"
|
|
113
|
+
self.app.sub_title = f"{self.title} - Info ({next_help})"
|
|
114
|
+
|
|
115
|
+
def update_content(self) -> None:
|
|
116
|
+
print(self.i_step)
|
|
117
|
+
if self.i_step < 0 or self.i_step >= self.n_steps:
|
|
118
|
+
return self._show_info()
|
|
119
|
+
|
|
120
|
+
item = self.trajectory["trajectory"][self.i_step]
|
|
121
|
+
|
|
122
|
+
if self.show_full:
|
|
123
|
+
return self._show_step_yaml(item)
|
|
124
|
+
|
|
125
|
+
return self._show_step_simple(item)
|
|
126
|
+
|
|
127
|
+
def action_next_item(self) -> None:
|
|
128
|
+
if self.i_step < self.n_steps:
|
|
129
|
+
self.i_step += 1
|
|
130
|
+
self.scroll_top()
|
|
131
|
+
self.update_content()
|
|
132
|
+
|
|
133
|
+
def action_previous_item(self) -> None:
|
|
134
|
+
if self.i_step > -1:
|
|
135
|
+
self.i_step -= 1
|
|
136
|
+
self.scroll_top()
|
|
137
|
+
self.update_content()
|
|
138
|
+
|
|
139
|
+
def action_toggle_view(self) -> None:
|
|
140
|
+
self.show_full = not self.show_full
|
|
141
|
+
self.update_content()
|
|
142
|
+
|
|
143
|
+
def action_first_item(self) -> None:
|
|
144
|
+
self.i_step = 0
|
|
145
|
+
self.update_content()
|
|
146
|
+
|
|
147
|
+
def action_last_item(self) -> None:
|
|
148
|
+
self.i_step = self.n_steps - 1
|
|
149
|
+
self.update_content()
|
|
150
|
+
|
|
151
|
+
def scroll_top(self) -> None:
|
|
152
|
+
"""Resets scrolling viewport"""
|
|
153
|
+
vs = self.query_one(VerticalScroll)
|
|
154
|
+
vs.scroll_home(animate=False)
|
|
155
|
+
|
|
156
|
+
def action_scroll_down(self) -> None:
|
|
157
|
+
vs = self.query_one(VerticalScroll)
|
|
158
|
+
vs.scroll_to(y=vs.scroll_target_y + 15)
|
|
159
|
+
|
|
160
|
+
def action_scroll_up(self) -> None:
|
|
161
|
+
vs = self.query_one(VerticalScroll)
|
|
162
|
+
vs.scroll_to(y=vs.scroll_target_y - 15)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class TrajectorySelectorScreen(ModalScreen[int]):
|
|
166
|
+
BINDINGS = [
|
|
167
|
+
Binding("escape", "dismiss(None)", "Cancel"),
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
def __init__(self, paths: list[Path], current_index: int, overview_stats: dict):
|
|
171
|
+
super().__init__()
|
|
172
|
+
self.paths = paths
|
|
173
|
+
self.current_index = current_index
|
|
174
|
+
self.overview_stats = overview_stats
|
|
175
|
+
self.all_items = [] # Store all items for filtering
|
|
176
|
+
self.filtered_indices = []
|
|
177
|
+
|
|
178
|
+
def _get_list_item_texts(self, paths: list[Path]) -> list[str]:
|
|
179
|
+
"""Remove the common prefix from a list of paths."""
|
|
180
|
+
prefix = os.path.commonpath([str(p) for p in paths])
|
|
181
|
+
labels = []
|
|
182
|
+
for p in paths:
|
|
183
|
+
ostat = self.overview_stats[p.stem]
|
|
184
|
+
ostat_str = f"{ostat['exit_status']} {ostat['result']} ${ostat['cost']:.2f} {ostat['api_calls']} calls"
|
|
185
|
+
shortened_path = str(p)[len(prefix) :].lstrip("/\\")
|
|
186
|
+
if Path(shortened_path).stem == Path(shortened_path).parent.name:
|
|
187
|
+
# We have the instance ID twice (in the folder and the traj)
|
|
188
|
+
shortened_path = Path(shortened_path).stem
|
|
189
|
+
labels.append(f"{shortened_path} - {ostat_str}")
|
|
190
|
+
|
|
191
|
+
return labels
|
|
192
|
+
|
|
193
|
+
def compose(self) -> ComposeResult:
|
|
194
|
+
with Vertical(id="dialog"):
|
|
195
|
+
yield Static(
|
|
196
|
+
"Press <TAB> to switch between search and list. Use <ARROW KEY>/<ENTER> to select.",
|
|
197
|
+
id="title",
|
|
198
|
+
markup=False,
|
|
199
|
+
)
|
|
200
|
+
yield Input(placeholder="Type to filter (auto-select if only one item remains)...", id="filter-input")
|
|
201
|
+
yield ListView(
|
|
202
|
+
*[ListItem(Static(p, markup=False)) for p in self._get_list_item_texts(self.paths)],
|
|
203
|
+
id="trajectory-list",
|
|
204
|
+
initial_index=self.current_index,
|
|
205
|
+
)
|
|
206
|
+
# Store all items for later filtering
|
|
207
|
+
self.all_items = self._get_list_item_texts(self.paths)
|
|
208
|
+
self.filtered_indices = list(range(len(self.all_items)))
|
|
209
|
+
|
|
210
|
+
def on_input_changed(self, event: Input.Changed) -> None:
|
|
211
|
+
"""Filter list items based on input"""
|
|
212
|
+
filter_text = event.value.lower()
|
|
213
|
+
list_view = self.query_one("#trajectory-list", ListView)
|
|
214
|
+
|
|
215
|
+
# Filter items and keep track of original indices
|
|
216
|
+
self.filtered_indices = [i for i, item in enumerate(self.all_items) if filter_text in item.lower()]
|
|
217
|
+
filtered_items = [self.all_items[i] for i in self.filtered_indices]
|
|
218
|
+
|
|
219
|
+
if len(filtered_items) == 1:
|
|
220
|
+
# Find the index of the filtered item in the original list
|
|
221
|
+
selected_index = self.all_items.index(filtered_items[0])
|
|
222
|
+
self.dismiss(selected_index)
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
# Update ListView with filtered items
|
|
226
|
+
list_view.clear()
|
|
227
|
+
for item in filtered_items:
|
|
228
|
+
list_view.append(ListItem(Static(item, markup=False)))
|
|
229
|
+
|
|
230
|
+
def on_list_view_selected(self, event: ListView.Selected) -> None:
|
|
231
|
+
# Map the filtered index back to the original index
|
|
232
|
+
original_index = self.filtered_indices[event.list_view.index]
|
|
233
|
+
print(f"Selected index: {original_index}")
|
|
234
|
+
self.dismiss(original_index)
|
|
235
|
+
|
|
236
|
+
CSS = """
|
|
237
|
+
#dialog {
|
|
238
|
+
background: $surface;
|
|
239
|
+
padding: 1;
|
|
240
|
+
border: thick $primary;
|
|
241
|
+
width: 100%;
|
|
242
|
+
height: 100%;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
#title {
|
|
246
|
+
text-align: center;
|
|
247
|
+
padding: 1;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
#filter-input {
|
|
251
|
+
dock: top;
|
|
252
|
+
margin: 1 0;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
ListView {
|
|
256
|
+
height: 100%;
|
|
257
|
+
border: solid $primary;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
ListItem {
|
|
261
|
+
padding: 0 1;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
ListItem:hover {
|
|
265
|
+
background: $accent;
|
|
266
|
+
}
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class FileViewerScreen(ModalScreen):
|
|
271
|
+
BINDINGS = [
|
|
272
|
+
Binding("q,escape", "dismiss", "Back"),
|
|
273
|
+
Binding("j,down", "scroll_down", "Scroll down"),
|
|
274
|
+
Binding("k,up", "scroll_up", "Scroll up"),
|
|
275
|
+
Binding("e", "open_editor", "Open in $EDITOR"),
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
def __init__(self, path: Path):
|
|
279
|
+
super().__init__()
|
|
280
|
+
self.path = path
|
|
281
|
+
|
|
282
|
+
def compose(self) -> ComposeResult:
|
|
283
|
+
with VerticalScroll():
|
|
284
|
+
text = self.path.read_text()
|
|
285
|
+
truncated = False
|
|
286
|
+
if len(text) > 10_000:
|
|
287
|
+
# More than ~1000 lines
|
|
288
|
+
self.app.notify(
|
|
289
|
+
"File is too large to display. Showing first 10k chars. Use e to open in editor.",
|
|
290
|
+
severity="warning",
|
|
291
|
+
)
|
|
292
|
+
text = text[:10_000]
|
|
293
|
+
truncated = True
|
|
294
|
+
if self.path.exists():
|
|
295
|
+
if self.path.suffix == ".traj" and not truncated:
|
|
296
|
+
# Syntax highlighting breaks if we truncate
|
|
297
|
+
content_str = _yaml_serialization_with_linebreaks(json.loads(text))
|
|
298
|
+
syntax = Syntax(content_str, "yaml", theme="monokai", word_wrap=True)
|
|
299
|
+
yield Static(syntax, markup=False)
|
|
300
|
+
else:
|
|
301
|
+
yield Static(text, markup=False)
|
|
302
|
+
else:
|
|
303
|
+
yield Static(f"No file found at {self.path}", markup=False)
|
|
304
|
+
|
|
305
|
+
def action_scroll_down(self) -> None:
|
|
306
|
+
vs = self.query_one(VerticalScroll)
|
|
307
|
+
vs.scroll_to(y=vs.scroll_target_y + 15)
|
|
308
|
+
|
|
309
|
+
def action_scroll_up(self) -> None:
|
|
310
|
+
vs = self.query_one(VerticalScroll)
|
|
311
|
+
vs.scroll_to(y=vs.scroll_target_y - 15)
|
|
312
|
+
|
|
313
|
+
async def action_open_editor(self) -> None:
|
|
314
|
+
editor = os.environ.get("EDITOR")
|
|
315
|
+
if not editor:
|
|
316
|
+
self.app.notify("No editor found in $EDITOR environment variable, cannot perform action", severity="error")
|
|
317
|
+
return
|
|
318
|
+
try:
|
|
319
|
+
# Suspend the TUI app to restore terminal state before launching editor
|
|
320
|
+
with self.app.suspend():
|
|
321
|
+
subprocess.run([editor, str(self.path)], check=True)
|
|
322
|
+
except subprocess.CalledProcessError:
|
|
323
|
+
pass
|
|
324
|
+
|
|
325
|
+
CSS = """
|
|
326
|
+
ScrollableContainer {
|
|
327
|
+
width: 100%;
|
|
328
|
+
height: 100%;
|
|
329
|
+
background: $surface;
|
|
330
|
+
padding: 1;
|
|
331
|
+
border: thick $primary;
|
|
332
|
+
}
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class TrajectoryInspectorApp(App):
|
|
337
|
+
BINDINGS = [
|
|
338
|
+
Binding("q", "quit", "Quit"),
|
|
339
|
+
Binding("L", "next_traj", "Traj++"),
|
|
340
|
+
Binding("H", "previous_traj", "Traj--"),
|
|
341
|
+
Binding("t", "show_traj_selector", "Select Traj"),
|
|
342
|
+
Binding("o", "show_log", "View Log"),
|
|
343
|
+
Binding("r", "show_full", "Show full"),
|
|
344
|
+
]
|
|
345
|
+
|
|
346
|
+
CSS = """
|
|
347
|
+
Screen {
|
|
348
|
+
layout: grid;
|
|
349
|
+
grid-size: 1;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
#viewer {
|
|
353
|
+
width: 100%;
|
|
354
|
+
height: 100%;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
ScrollView {
|
|
358
|
+
width: 100%;
|
|
359
|
+
height: 100%;
|
|
360
|
+
border: solid green;
|
|
361
|
+
}
|
|
362
|
+
"""
|
|
363
|
+
|
|
364
|
+
def __init__(self, input_path: str | Path, data_path: Path | None = None):
|
|
365
|
+
super().__init__()
|
|
366
|
+
self.input_path = Path(input_path)
|
|
367
|
+
if not self.input_path.exists():
|
|
368
|
+
msg = f"{self.input_path} doesn't exist"
|
|
369
|
+
raise FileNotFoundError(msg)
|
|
370
|
+
self.available_traj_paths = self._get_available_trajs()
|
|
371
|
+
if not self.available_traj_paths:
|
|
372
|
+
msg = "No trajectory *.traj files available"
|
|
373
|
+
raise ValueError(msg)
|
|
374
|
+
self.trajectory_index = 0
|
|
375
|
+
self.overview_stats = collections.defaultdict(dict)
|
|
376
|
+
self._build_overview_stats()
|
|
377
|
+
self._data = load_file(data_path)
|
|
378
|
+
|
|
379
|
+
def get_gold_patch(self, instance_id: str) -> str | None:
|
|
380
|
+
if self._data is None:
|
|
381
|
+
return None
|
|
382
|
+
return self._data.get(instance_id, {}).get("patch", None)
|
|
383
|
+
|
|
384
|
+
def _build_overview_stats(self):
|
|
385
|
+
results_path = self.input_path / "results.json"
|
|
386
|
+
results = None
|
|
387
|
+
if results_path.exists():
|
|
388
|
+
results = json.loads(results_path.read_text())
|
|
389
|
+
for traj in self.available_traj_paths:
|
|
390
|
+
instance_id = traj.stem
|
|
391
|
+
if results is None:
|
|
392
|
+
result = "❓"
|
|
393
|
+
elif instance_id in results["resolved_ids"]:
|
|
394
|
+
result = "✅"
|
|
395
|
+
else:
|
|
396
|
+
result = "❌"
|
|
397
|
+
self.overview_stats[instance_id]["result"] = result
|
|
398
|
+
|
|
399
|
+
def _get_info(traj: Path) -> tuple[str, dict]:
|
|
400
|
+
traj_info = json.loads(traj.read_text()).get("info", {})
|
|
401
|
+
return traj.stem, traj_info
|
|
402
|
+
|
|
403
|
+
with ThreadPoolExecutor() as executor:
|
|
404
|
+
# Map returns results in the same order as inputs
|
|
405
|
+
all_infos = executor.map(_get_info, self.available_traj_paths)
|
|
406
|
+
|
|
407
|
+
for instance_id, info in all_infos:
|
|
408
|
+
self.overview_stats[instance_id]["info"] = info
|
|
409
|
+
self.overview_stats[instance_id]["exit_status"] = info.get("exit_status", "?")
|
|
410
|
+
self.overview_stats[instance_id]["api_calls"] = info.get("model_stats", {}).get("api_calls", 0)
|
|
411
|
+
self.overview_stats[instance_id]["cost"] = info.get("model_stats", {}).get("instance_cost", 0)
|
|
412
|
+
|
|
413
|
+
def _get_viewer_title(self, index: int) -> str:
|
|
414
|
+
instance_id = self.available_traj_paths[index].stem
|
|
415
|
+
if len(instance_id) > 20:
|
|
416
|
+
instance_id = "..." + instance_id[-17:]
|
|
417
|
+
return f"Traj {index + 1}/{len(self.available_traj_paths)} - {instance_id}"
|
|
418
|
+
|
|
419
|
+
def _load_traj(self):
|
|
420
|
+
instance_id = self.available_traj_paths[self.trajectory_index].stem
|
|
421
|
+
traj_viewer = self.query_one(TrajectoryViewer)
|
|
422
|
+
traj_viewer.load_trajectory(
|
|
423
|
+
self.available_traj_paths[self.trajectory_index],
|
|
424
|
+
self._get_viewer_title(self.trajectory_index),
|
|
425
|
+
self.overview_stats[instance_id],
|
|
426
|
+
gold_patch=self.get_gold_patch(instance_id),
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
def _get_available_trajs(self) -> list[Path]:
|
|
430
|
+
if self.input_path.is_file():
|
|
431
|
+
return [self.input_path]
|
|
432
|
+
elif self.input_path.is_dir():
|
|
433
|
+
return sorted(self.input_path.rglob("*.traj"))
|
|
434
|
+
raise ValueError
|
|
435
|
+
|
|
436
|
+
def compose(self) -> ComposeResult:
|
|
437
|
+
yield Header()
|
|
438
|
+
with Container():
|
|
439
|
+
yield TrajectoryViewer(
|
|
440
|
+
self.available_traj_paths[self.trajectory_index],
|
|
441
|
+
self._get_viewer_title(self.trajectory_index),
|
|
442
|
+
self.overview_stats[self.available_traj_paths[self.trajectory_index].stem],
|
|
443
|
+
)
|
|
444
|
+
yield Footer()
|
|
445
|
+
|
|
446
|
+
def action_next_traj(self):
|
|
447
|
+
self.trajectory_index = (self.trajectory_index + 1) % len(self.available_traj_paths)
|
|
448
|
+
self._load_traj()
|
|
449
|
+
|
|
450
|
+
def action_previous_traj(self):
|
|
451
|
+
self.trajectory_index = (self.trajectory_index - 1) % len(self.available_traj_paths)
|
|
452
|
+
self._load_traj()
|
|
453
|
+
|
|
454
|
+
async def action_show_traj_selector(self) -> None:
|
|
455
|
+
selector = TrajectorySelectorScreen(self.available_traj_paths, self.trajectory_index, self.overview_stats)
|
|
456
|
+
|
|
457
|
+
def handler(index: int | None):
|
|
458
|
+
if index is not None:
|
|
459
|
+
self.trajectory_index = index
|
|
460
|
+
self._load_traj()
|
|
461
|
+
|
|
462
|
+
await self.push_screen(selector, handler) # This returns when the modal is dismissed
|
|
463
|
+
|
|
464
|
+
async def action_show_log(self) -> None:
|
|
465
|
+
current_traj = self.available_traj_paths[self.trajectory_index]
|
|
466
|
+
log_path = current_traj.with_suffix(".debug.log")
|
|
467
|
+
log_viewer = FileViewerScreen(log_path)
|
|
468
|
+
await self.push_screen(log_viewer)
|
|
469
|
+
|
|
470
|
+
async def action_show_full(self) -> None:
|
|
471
|
+
"""Show full yaml of trajectory file"""
|
|
472
|
+
current_traj = self.available_traj_paths[self.trajectory_index]
|
|
473
|
+
viewer = FileViewerScreen(current_traj)
|
|
474
|
+
await self.push_screen(viewer)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def main(args: list[str] | None = None):
|
|
478
|
+
parser = argparse.ArgumentParser(description="Inspect trajectory JSON files")
|
|
479
|
+
parser.add_argument(
|
|
480
|
+
"trajectory_path",
|
|
481
|
+
help="Path to the trajectory JSON file or directory containing trajectories",
|
|
482
|
+
default=os.getcwd(),
|
|
483
|
+
nargs="?",
|
|
484
|
+
)
|
|
485
|
+
parser.add_argument("-d", "--data_path", type=Path, help="Path to the data file to load gold patches from")
|
|
486
|
+
parsed_args = parser.parse_args(args)
|
|
487
|
+
|
|
488
|
+
app = TrajectoryInspectorApp(parsed_args.trajectory_path)
|
|
489
|
+
app.run()
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
if __name__ == "__main__":
|
|
493
|
+
main()
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from sweagent.utils.log import get_logger
|
|
6
|
+
|
|
7
|
+
"""Merge multiple predictions into a single file."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = get_logger("merge", emoji="➕")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def merge_predictions(directories: list[Path], output: Path | None = None) -> None:
|
|
14
|
+
"""Merge predictions found in `directories` into a single JSON file.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
directory: Directory containing predictions.
|
|
18
|
+
output: Output file. If not provided, the merged predictions will be
|
|
19
|
+
written to `directory/preds.json`.
|
|
20
|
+
"""
|
|
21
|
+
preds = []
|
|
22
|
+
for directory in directories:
|
|
23
|
+
new = list(directory.rglob("*.pred"))
|
|
24
|
+
preds.extend(new)
|
|
25
|
+
logger.debug("Found %d predictions in %s", len(new), directory)
|
|
26
|
+
logger.info("Found %d predictions", len(preds))
|
|
27
|
+
if not preds:
|
|
28
|
+
logger.warning("No predictions found in %s", directory)
|
|
29
|
+
return
|
|
30
|
+
if output is None:
|
|
31
|
+
output = directories[0] / "preds.json"
|
|
32
|
+
data = {}
|
|
33
|
+
for pred in preds:
|
|
34
|
+
_data = json.loads(pred.read_text())
|
|
35
|
+
instance_id = _data["instance_id"]
|
|
36
|
+
if "model_patch" not in _data:
|
|
37
|
+
logger.warning("Prediction %s does not contain a model patch. SKIPPING", pred)
|
|
38
|
+
continue
|
|
39
|
+
# Ensure model_patch is a string
|
|
40
|
+
_data["model_patch"] = str(_data["model_patch"]) if _data["model_patch"] is not None else ""
|
|
41
|
+
if instance_id in data:
|
|
42
|
+
msg = f"Duplicate instance ID found: {instance_id}"
|
|
43
|
+
raise ValueError(msg)
|
|
44
|
+
data[instance_id] = _data
|
|
45
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
output.write_text(json.dumps(data, indent=4))
|
|
47
|
+
logger.info("Wrote merged predictions to %s", output)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_cli_parser() -> argparse.ArgumentParser:
|
|
51
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
52
|
+
parser.add_argument("directories", type=Path, help="Directory containing predictions", nargs="+")
|
|
53
|
+
parser.add_argument("--output", type=Path, help="Output file")
|
|
54
|
+
return parser
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def run_from_cli(args: list[str] | None = None) -> None:
|
|
58
|
+
cli_parser = get_cli_parser()
|
|
59
|
+
cli_args = cli_parser.parse_args(args)
|
|
60
|
+
merge_predictions(cli_args.directories, cli_args.output)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
if __name__ == "__main__":
|
|
64
|
+
run_from_cli()
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import collections
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from sweagent.utils.log import get_logger
|
|
10
|
+
|
|
11
|
+
"""Calculate statistics from .traj files."""
|
|
12
|
+
|
|
13
|
+
logger = get_logger("quick-stats", emoji="📊")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def quick_stats(directory: Path | str = ".") -> str:
|
|
17
|
+
"""Calculate statistics from .traj files.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
directory: Directory to search for .traj files (default: current directory)
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
str: Summary of statistics
|
|
24
|
+
"""
|
|
25
|
+
directory = Path(directory)
|
|
26
|
+
# Find all .traj files
|
|
27
|
+
traj_files = list(directory.glob("**/*.traj"))
|
|
28
|
+
|
|
29
|
+
if not traj_files:
|
|
30
|
+
logger.warning("No .traj files found in %s", directory)
|
|
31
|
+
return "No .traj files found."
|
|
32
|
+
|
|
33
|
+
# Extract api_calls from each file
|
|
34
|
+
api_calls = []
|
|
35
|
+
files_by_exit_status = collections.defaultdict(list)
|
|
36
|
+
|
|
37
|
+
for file_path in traj_files:
|
|
38
|
+
try:
|
|
39
|
+
data = json.loads(file_path.read_text())
|
|
40
|
+
# Extract the api_calls value using dictionary path
|
|
41
|
+
if "info" in data and "model_stats" in data["info"] and "api_calls" in data["info"]["model_stats"]:
|
|
42
|
+
api_calls.append(data["info"]["model_stats"]["api_calls"])
|
|
43
|
+
if "info" in data and "exit_status" in data["info"]:
|
|
44
|
+
status = data["info"]["exit_status"]
|
|
45
|
+
files_by_exit_status[status].append(file_path)
|
|
46
|
+
except Exception as e:
|
|
47
|
+
logger.error("Error processing %s: %s", file_path, e)
|
|
48
|
+
|
|
49
|
+
files_by_exit_status = dict(sorted(files_by_exit_status.items(), key=lambda x: len(x[1]), reverse=True))
|
|
50
|
+
|
|
51
|
+
if not api_calls:
|
|
52
|
+
logger.warning("No valid api_calls data found in the .traj files")
|
|
53
|
+
return "No valid api_calls data found in the .traj files."
|
|
54
|
+
|
|
55
|
+
# Calculate and return the average
|
|
56
|
+
logger.info("Exit statuses:")
|
|
57
|
+
# Sort exit statuses by count (highest to lowest)
|
|
58
|
+
for status, files in files_by_exit_status.items():
|
|
59
|
+
logger.info("%s: %d", status, len(files))
|
|
60
|
+
|
|
61
|
+
average_api_calls = np.mean(api_calls)
|
|
62
|
+
logger.info("Avg api calls: %s", average_api_calls)
|
|
63
|
+
|
|
64
|
+
# Print exit statuses in the requested format
|
|
65
|
+
result = []
|
|
66
|
+
for status, files in files_by_exit_status.items():
|
|
67
|
+
result.append(f"\n## `{status}`\n")
|
|
68
|
+
# Extract unique subdirectories instead of full paths
|
|
69
|
+
subdirs = {str(Path(file_path).parent) for file_path in files}
|
|
70
|
+
result.append(" ".join(subdirs))
|
|
71
|
+
|
|
72
|
+
return "\n".join(result)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def get_cli_parser() -> argparse.ArgumentParser:
|
|
76
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"directory",
|
|
79
|
+
type=Path,
|
|
80
|
+
nargs="?",
|
|
81
|
+
default=Path("."),
|
|
82
|
+
help="Directory to search for .traj files (default: current directory)",
|
|
83
|
+
)
|
|
84
|
+
return parser
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def run_from_cli(args: list[str] | None = None) -> None:
|
|
88
|
+
cli_parser = get_cli_parser()
|
|
89
|
+
cli_args = cli_parser.parse_args(args)
|
|
90
|
+
|
|
91
|
+
result = quick_stats(cli_args.directory)
|
|
92
|
+
print(result)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
run_from_cli()
|