code-muse 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_muse/__init__.py +26 -0
- code_muse/__main__.py +10 -0
- code_muse/agents/__init__.py +31 -0
- code_muse/agents/_builder.py +214 -0
- code_muse/agents/_compaction.py +506 -0
- code_muse/agents/_diagnostics.py +171 -0
- code_muse/agents/_history.py +382 -0
- code_muse/agents/_key_listeners.py +148 -0
- code_muse/agents/_non_streaming_render.py +148 -0
- code_muse/agents/_runtime.py +596 -0
- code_muse/agents/agent_creator_agent.py +603 -0
- code_muse/agents/agent_helios.py +47 -0
- code_muse/agents/agent_manager.py +740 -0
- code_muse/agents/agent_muse.py +78 -0
- code_muse/agents/agent_planning.py +44 -0
- code_muse/agents/agent_qa_melpomene.py +207 -0
- code_muse/agents/base_agent.py +194 -0
- code_muse/agents/event_stream_handler.py +361 -0
- code_muse/agents/json_agent.py +201 -0
- code_muse/agents/prompt_v3.py +521 -0
- code_muse/agents/subagent_stream_handler.py +273 -0
- code_muse/callbacks.py +941 -0
- code_muse/chatgpt_codex_client.py +333 -0
- code_muse/claude_cache_client.py +853 -0
- code_muse/cli_runner/__init__.py +319 -0
- code_muse/cli_runner/args.py +63 -0
- code_muse/cli_runner/loop.py +510 -0
- code_muse/cli_runner/resume.py +72 -0
- code_muse/cli_runner/runner.py +161 -0
- code_muse/command_line/__init__.py +1 -0
- code_muse/command_line/add_model_menu.py +1331 -0
- code_muse/command_line/agent_menu.py +674 -0
- code_muse/command_line/attachments.py +397 -0
- code_muse/command_line/autosave_menu.py +709 -0
- code_muse/command_line/clipboard.py +528 -0
- code_muse/command_line/colors_menu.py +530 -0
- code_muse/command_line/command_handler.py +262 -0
- code_muse/command_line/command_registry.py +150 -0
- code_muse/command_line/config_commands.py +711 -0
- code_muse/command_line/core_commands.py +740 -0
- code_muse/command_line/diff_menu.py +865 -0
- code_muse/command_line/file_path_completion.py +73 -0
- code_muse/command_line/load_context_completion.py +57 -0
- code_muse/command_line/model_picker_completion.py +512 -0
- code_muse/command_line/model_settings_menu.py +983 -0
- code_muse/command_line/onboarding_slides.py +162 -0
- code_muse/command_line/onboarding_wizard.py +337 -0
- code_muse/command_line/pagination.py +41 -0
- code_muse/command_line/pin_command_completion.py +329 -0
- code_muse/command_line/prompt_toolkit_completion.py +886 -0
- code_muse/command_line/session_commands.py +304 -0
- code_muse/command_line/shell_passthrough.py +145 -0
- code_muse/command_line/skills_completion.py +158 -0
- code_muse/command_line/types.py +18 -0
- code_muse/command_line/uc_menu.py +908 -0
- code_muse/command_line/utils.py +105 -0
- code_muse/command_line/wiggum_state.py +77 -0
- code_muse/config.py +1138 -0
- code_muse/config_agent.py +168 -0
- code_muse/config_appearance.py +241 -0
- code_muse/config_model.py +357 -0
- code_muse/config_security.py +73 -0
- code_muse/error_logging.py +132 -0
- code_muse/evals/__init__.py +35 -0
- code_muse/evals/eval_helpers.py +81 -0
- code_muse/evals/eval_runner.py +299 -0
- code_muse/evals/sample_evals/__init__.py +1 -0
- code_muse/evals/sample_evals/eval_frugal_reads.py +59 -0
- code_muse/evals/sample_evals/eval_memory_planning.py +31 -0
- code_muse/evals/sample_evals/eval_shell_efficiency.py +39 -0
- code_muse/evals/sample_evals/eval_tool_masking.py +33 -0
- code_muse/fs_scan_cache/__init__.py +31 -0
- code_muse/fs_scan_cache/invalidation_hooks.py +89 -0
- code_muse/fs_scan_cache/scan_cache_core.cpython-314-darwin.so +0 -0
- code_muse/fs_scan_cache/scan_cache_core.pyx +203 -0
- code_muse/fs_scan_cache/tool_integration.py +309 -0
- code_muse/fs_scan_cache/ttl_policy.py +44 -0
- code_muse/gemini_code_assist.py +383 -0
- code_muse/gemini_model.py +838 -0
- code_muse/hook_engine/README.md +105 -0
- code_muse/hook_engine/__init__.py +21 -0
- code_muse/hook_engine/aliases.py +153 -0
- code_muse/hook_engine/engine.py +221 -0
- code_muse/hook_engine/executor.py +347 -0
- code_muse/hook_engine/matcher.py +154 -0
- code_muse/hook_engine/models.py +245 -0
- code_muse/hook_engine/registry.py +114 -0
- code_muse/hook_engine/trust.py +268 -0
- code_muse/hook_engine/validator.py +144 -0
- code_muse/http_utils.py +360 -0
- code_muse/keymap.py +128 -0
- code_muse/list_filtering.py +26 -0
- code_muse/main.py +10 -0
- code_muse/messaging/__init__.py +259 -0
- code_muse/messaging/bus.py +621 -0
- code_muse/messaging/commands.py +166 -0
- code_muse/messaging/markdown_patches.py +57 -0
- code_muse/messaging/message_queue.py +397 -0
- code_muse/messaging/messages.py +591 -0
- code_muse/messaging/queue_console.py +269 -0
- code_muse/messaging/renderers.py +308 -0
- code_muse/messaging/rich_renderer.py +1158 -0
- code_muse/messaging/shimmer.py +154 -0
- code_muse/messaging/spinner/__init__.py +87 -0
- code_muse/messaging/spinner/console_spinner.py +250 -0
- code_muse/messaging/spinner/spinner_base.py +82 -0
- code_muse/messaging/subagent_console.py +458 -0
- code_muse/model_factory.py +1203 -0
- code_muse/model_switching.py +59 -0
- code_muse/model_utils.py +156 -0
- code_muse/models.json +66 -0
- code_muse/models_cache/__init__.py +26 -0
- code_muse/models_cache/blocking_lru_cache.py +98 -0
- code_muse/models_cache/cache_writer.py +86 -0
- code_muse/models_cache/sha256_hash.cpython-314-darwin.so +0 -0
- code_muse/models_cache/sha256_hash.pyx +34 -0
- code_muse/models_cache/startup_integration.py +75 -0
- code_muse/models_dev_api.json +1 -0
- code_muse/models_dev_parser.py +590 -0
- code_muse/motion.py +126 -0
- code_muse/plugins/__init__.py +471 -0
- code_muse/plugins/agent_skills/__init__.py +32 -0
- code_muse/plugins/agent_skills/config.py +176 -0
- code_muse/plugins/agent_skills/discovery.py +309 -0
- code_muse/plugins/agent_skills/downloader.py +389 -0
- code_muse/plugins/agent_skills/installer.py +19 -0
- code_muse/plugins/agent_skills/metadata.py +293 -0
- code_muse/plugins/agent_skills/prompt_builder.py +66 -0
- code_muse/plugins/agent_skills/register_callbacks.py +298 -0
- code_muse/plugins/agent_skills/remote_catalog.py +320 -0
- code_muse/plugins/agent_skills/skill_catalog.py +254 -0
- code_muse/plugins/agent_skills/skills_install_menu.py +690 -0
- code_muse/plugins/agent_skills/skills_menu.py +791 -0
- code_muse/plugins/autonomous_memory/__init__.py +39 -0
- code_muse/plugins/autonomous_memory/bm25_scorer.cpython-314-darwin.so +0 -0
- code_muse/plugins/autonomous_memory/bm25_scorer.cpython-314-x86_64-linux-gnu.so +0 -0
- code_muse/plugins/autonomous_memory/bm25_scorer.pyx +291 -0
- code_muse/plugins/autonomous_memory/consolidation.py +82 -0
- code_muse/plugins/autonomous_memory/extraction.py +382 -0
- code_muse/plugins/autonomous_memory/lease_lock.py +105 -0
- code_muse/plugins/autonomous_memory/memory_injection.py +59 -0
- code_muse/plugins/autonomous_memory/register_callbacks.py +268 -0
- code_muse/plugins/autonomous_memory/secret_scanner.py +62 -0
- code_muse/plugins/autonomous_memory/session_scanner.py +163 -0
- code_muse/plugins/aws_bedrock/__init__.py +14 -0
- code_muse/plugins/aws_bedrock/config.py +99 -0
- code_muse/plugins/aws_bedrock/register_callbacks.py +241 -0
- code_muse/plugins/aws_bedrock/utils.py +153 -0
- code_muse/plugins/azure_foundry/README.md +238 -0
- code_muse/plugins/azure_foundry/__init__.py +15 -0
- code_muse/plugins/azure_foundry/config.py +125 -0
- code_muse/plugins/azure_foundry/discovery.py +187 -0
- code_muse/plugins/azure_foundry/register_callbacks.py +495 -0
- code_muse/plugins/azure_foundry/token.py +180 -0
- code_muse/plugins/azure_foundry/utils.py +345 -0
- code_muse/plugins/build_filter/__init__.py +1 -0
- code_muse/plugins/build_filter/register_callbacks.py +201 -0
- code_muse/plugins/build_filter/strategies/__init__.py +1 -0
- code_muse/plugins/build_filter/strategies/build.py +397 -0
- code_muse/plugins/chatgpt_oauth/__init__.py +6 -0
- code_muse/plugins/chatgpt_oauth/config.py +52 -0
- code_muse/plugins/chatgpt_oauth/oauth_flow.py +338 -0
- code_muse/plugins/chatgpt_oauth/register_callbacks.py +172 -0
- code_muse/plugins/chatgpt_oauth/test_plugin.py +301 -0
- code_muse/plugins/chatgpt_oauth/utils.py +538 -0
- code_muse/plugins/checkpointing/__init__.py +29 -0
- code_muse/plugins/checkpointing/checkpoint_hook.py +51 -0
- code_muse/plugins/checkpointing/conversation_snapshots.py +117 -0
- code_muse/plugins/checkpointing/register_callbacks.py +51 -0
- code_muse/plugins/checkpointing/restore_command.py +263 -0
- code_muse/plugins/checkpointing/rewind_shortcut.py +88 -0
- code_muse/plugins/checkpointing/shadow_git.py +90 -0
- code_muse/plugins/claude_code_hooks/__init__.py +1 -0
- code_muse/plugins/claude_code_hooks/config.py +188 -0
- code_muse/plugins/claude_code_hooks/register_callbacks.py +208 -0
- code_muse/plugins/claude_code_oauth/README.md +167 -0
- code_muse/plugins/claude_code_oauth/SETUP.md +93 -0
- code_muse/plugins/claude_code_oauth/__init__.py +25 -0
- code_muse/plugins/claude_code_oauth/config.py +52 -0
- code_muse/plugins/claude_code_oauth/fast_mode.py +124 -0
- code_muse/plugins/claude_code_oauth/prompt_handler.py +63 -0
- code_muse/plugins/claude_code_oauth/register_callbacks.py +547 -0
- code_muse/plugins/claude_code_oauth/test_fast_mode.py +165 -0
- code_muse/plugins/claude_code_oauth/test_plugin.py +283 -0
- code_muse/plugins/claude_code_oauth/token_refresh_heartbeat.py +237 -0
- code_muse/plugins/claude_code_oauth/utils.py +664 -0
- code_muse/plugins/copilot_auth/__init__.py +11 -0
- code_muse/plugins/copilot_auth/config.py +91 -0
- code_muse/plugins/copilot_auth/reasoning_client.py +409 -0
- code_muse/plugins/copilot_auth/register_callbacks.py +461 -0
- code_muse/plugins/copilot_auth/utils.py +584 -0
- code_muse/plugins/custom_commands/__init__.py +14 -0
- code_muse/plugins/custom_commands/args_injection.py +82 -0
- code_muse/plugins/custom_commands/command_discovery.py +89 -0
- code_muse/plugins/custom_commands/command_toml_schema.py +71 -0
- code_muse/plugins/custom_commands/register_callbacks.py +176 -0
- code_muse/plugins/customizable_commands/__init__.py +0 -0
- code_muse/plugins/customizable_commands/register_callbacks.py +136 -0
- code_muse/plugins/destructive_command_guard/__init__.py +14 -0
- code_muse/plugins/destructive_command_guard/detector.py +375 -0
- code_muse/plugins/destructive_command_guard/register_callbacks.py +148 -0
- code_muse/plugins/example_custom_command/README.md +280 -0
- code_muse/plugins/example_custom_command/register_callbacks.py +51 -0
- code_muse/plugins/file_permission_handler/__init__.py +4 -0
- code_muse/plugins/file_permission_handler/register_callbacks.py +441 -0
- code_muse/plugins/filter_engine/__init__.py +30 -0
- code_muse/plugins/filter_engine/classifier.py +153 -0
- code_muse/plugins/filter_engine/content_detector.py +184 -0
- code_muse/plugins/filter_engine/dispatcher.py +244 -0
- code_muse/plugins/filter_engine/register_callbacks.py +188 -0
- code_muse/plugins/filter_engine/registry.py +279 -0
- code_muse/plugins/filter_engine/strategies/__init__.py +8 -0
- code_muse/plugins/filter_engine/strategies/ast_compressor.cpython-314-darwin.so +0 -0
- code_muse/plugins/filter_engine/strategies/ast_compressor.cpython-314-x86_64-linux-gnu.so +0 -0
- code_muse/plugins/filter_engine/strategies/ast_compressor.pyx +348 -0
- code_muse/plugins/filter_engine/strategies/ast_parser.py +167 -0
- code_muse/plugins/filter_engine/strategies/code.cpython-314-darwin.so +0 -0
- code_muse/plugins/filter_engine/strategies/code.cpython-314-x86_64-linux-gnu.so +0 -0
- code_muse/plugins/filter_engine/strategies/code.pyx +584 -0
- code_muse/plugins/filter_engine/strategies/git.cpython-314-darwin.so +0 -0
- code_muse/plugins/filter_engine/strategies/git.cpython-314-x86_64-linux-gnu.so +0 -0
- code_muse/plugins/filter_engine/strategies/git.pyx +438 -0
- code_muse/plugins/filter_engine/strategies/json_compressor.cpython-314-darwin.so +0 -0
- code_muse/plugins/filter_engine/strategies/json_compressor.pyx +253 -0
- code_muse/plugins/filter_engine/strategies/json_patterns.cpython-314-darwin.so +0 -0
- code_muse/plugins/filter_engine/strategies/json_patterns.pyx +178 -0
- code_muse/plugins/filter_engine/strategies/lint.cpython-314-darwin.so +0 -0
- code_muse/plugins/filter_engine/strategies/lint.cpython-314-x86_64-linux-gnu.so +0 -0
- code_muse/plugins/filter_engine/strategies/lint.pyx +626 -0
- code_muse/plugins/filter_engine/strategies/test.cpython-314-darwin.so +0 -0
- code_muse/plugins/filter_engine/strategies/test.cpython-314-x86_64-linux-gnu.so +0 -0
- code_muse/plugins/filter_engine/strategies/test.pyx +431 -0
- code_muse/plugins/filter_engine/verbosity.py +63 -0
- code_muse/plugins/force_push_guard/__init__.py +5 -0
- code_muse/plugins/force_push_guard/detector.py +96 -0
- code_muse/plugins/force_push_guard/register_callbacks.py +144 -0
- code_muse/plugins/force_push_guard/test_detector.py +143 -0
- code_muse/plugins/frontend_emitter/__init__.py +25 -0
- code_muse/plugins/frontend_emitter/emitter.py +121 -0
- code_muse/plugins/frontend_emitter/register_callbacks.py +259 -0
- code_muse/plugins/gac/__init__.py +4 -0
- code_muse/plugins/gac/git_ops.py +136 -0
- code_muse/plugins/gac/prompt.py +191 -0
- code_muse/plugins/gac/register_callbacks.py +82 -0
- code_muse/plugins/hook_creator/__init__.py +1 -0
- code_muse/plugins/hook_creator/register_callbacks.py +34 -0
- code_muse/plugins/hook_manager/__init__.py +1 -0
- code_muse/plugins/hook_manager/config.py +289 -0
- code_muse/plugins/hook_manager/hooks_menu.py +563 -0
- code_muse/plugins/hook_manager/register_callbacks.py +227 -0
- code_muse/plugins/hook_monitor/register_callbacks.py +36 -0
- code_muse/plugins/mindpack/__init__.py +0 -0
- code_muse/plugins/mindpack/factory.py +930 -0
- code_muse/plugins/mindpack/judge.py +573 -0
- code_muse/plugins/mindpack/memory.py +100 -0
- code_muse/plugins/mindpack/mindpack_menu.py +1552 -0
- code_muse/plugins/mindpack/orchestration.py +605 -0
- code_muse/plugins/mindpack/register_callbacks.py +175 -0
- code_muse/plugins/mindpack/schemas.py +358 -0
- code_muse/plugins/mindpack/tools.py +387 -0
- code_muse/plugins/oauth_muse_html.py +226 -0
- code_muse/plugins/ollama_setup/__init__.py +5 -0
- code_muse/plugins/ollama_setup/completer.py +36 -0
- code_muse/plugins/ollama_setup/register_callbacks.py +410 -0
- code_muse/plugins/plan_command/__init__.py +0 -0
- code_muse/plugins/plan_command/register_callbacks.py +206 -0
- code_muse/plugins/plan_mode/__init__.py +37 -0
- code_muse/plugins/plan_mode/mode_cycling.py +40 -0
- code_muse/plugins/plan_mode/plan_generation.py +68 -0
- code_muse/plugins/plan_mode/plan_hooks.py +74 -0
- code_muse/plugins/plan_mode/plan_mode_tools.py +138 -0
- code_muse/plugins/plan_mode/register_callbacks.py +121 -0
- code_muse/plugins/plugin_trust/register_callbacks.py +140 -0
- code_muse/plugins/policy_engine/__init__.py +46 -0
- code_muse/plugins/policy_engine/approval_flow_integration.py +59 -0
- code_muse/plugins/policy_engine/policy_evaluator.py +75 -0
- code_muse/plugins/policy_engine/policy_file_discovery.py +90 -0
- code_muse/plugins/policy_engine/policy_toml_schema.py +115 -0
- code_muse/plugins/policy_engine/register_callbacks.py +112 -0
- code_muse/plugins/pop_command/__init__.py +1 -0
- code_muse/plugins/pop_command/register_callbacks.py +189 -0
- code_muse/plugins/prompt_newline/__init__.py +13 -0
- code_muse/plugins/prompt_newline/config.py +19 -0
- code_muse/plugins/prompt_newline/register_callbacks.py +159 -0
- code_muse/plugins/safety_status/__init__.py +0 -0
- code_muse/plugins/safety_status/register_callbacks.py +113 -0
- code_muse/plugins/semantic_compression/__init__.py +6 -0
- code_muse/plugins/semantic_compression/compressor.py +295 -0
- code_muse/plugins/semantic_compression/config.py +123 -0
- code_muse/plugins/semantic_compression/register_callbacks.py +320 -0
- code_muse/plugins/shell_minimizer/__init__.py +50 -0
- code_muse/plugins/shell_minimizer/builtin_filters.toml +393 -0
- code_muse/plugins/shell_minimizer/pipeline.py +556 -0
- code_muse/plugins/shell_minimizer/primitives.py +482 -0
- code_muse/plugins/shell_minimizer/register_callbacks.py +276 -0
- code_muse/plugins/shell_safety/__init__.py +6 -0
- code_muse/plugins/shell_safety/agent_shell_safety.py +69 -0
- code_muse/plugins/shell_safety/command_cache.py +149 -0
- code_muse/plugins/shell_safety/register_callbacks.py +202 -0
- code_muse/plugins/synthetic_status/__init__.py +1 -0
- code_muse/plugins/synthetic_status/register_callbacks.py +128 -0
- code_muse/plugins/synthetic_status/status_api.py +145 -0
- code_muse/plugins/token_caching/__init__.py +21 -0
- code_muse/plugins/token_caching/cache_hit_tracking.py +128 -0
- code_muse/plugins/token_caching/cacheable_prefix_detection.py +28 -0
- code_muse/plugins/token_caching/register_callbacks.py +54 -0
- code_muse/plugins/token_caching/stats_display.py +35 -0
- code_muse/plugins/token_tracking/__init__.py +26 -0
- code_muse/plugins/token_tracking/database.py +381 -0
- code_muse/plugins/token_tracking/edit_analyzer.py +97 -0
- code_muse/plugins/token_tracking/record.py +55 -0
- code_muse/plugins/token_tracking/register_callbacks.py +277 -0
- code_muse/plugins/token_tracking/reports.py +329 -0
- code_muse/plugins/universal_constructor/__init__.py +13 -0
- code_muse/plugins/universal_constructor/models.py +136 -0
- code_muse/plugins/universal_constructor/register_callbacks.py +47 -0
- code_muse/plugins/universal_constructor/registry.py +390 -0
- code_muse/plugins/universal_constructor/runner.py +474 -0
- code_muse/plugins/universal_constructor/safety.py +440 -0
- code_muse/plugins/universal_constructor/sandbox.py +584 -0
- code_muse/provider_identity.py +105 -0
- code_muse/pydantic_patches.py +410 -0
- code_muse/reopenable_async_client.py +233 -0
- code_muse/round_robin_model.py +151 -0
- code_muse/secret_storage.py +74 -0
- code_muse/security/__init__.py +1 -0
- code_muse/security/redaction.cpython-314-darwin.so +0 -0
- code_muse/security/redaction.cpython-314-x86_64-linux-gnu.so +0 -0
- code_muse/security/redaction.pyx +135 -0
- code_muse/session_storage.py +565 -0
- code_muse/status_display.py +261 -0
- code_muse/stream_parser/__init__.py +76 -0
- code_muse/stream_parser/assistant_text_parser.py +90 -0
- code_muse/stream_parser/citation_parser.py +76 -0
- code_muse/stream_parser/inline_hidden_tag_parser.py +236 -0
- code_muse/stream_parser/proposed_plan_parser.py +158 -0
- code_muse/stream_parser/stream_text_chunk.py +23 -0
- code_muse/stream_parser/stream_text_parser.py +27 -0
- code_muse/stream_parser/tagged_line_parser.cpython-314-darwin.so +0 -0
- code_muse/stream_parser/tagged_line_parser.pyx +251 -0
- code_muse/stream_parser/utf8_stream_parser.cpython-314-darwin.so +0 -0
- code_muse/stream_parser/utf8_stream_parser.pyx +206 -0
- code_muse/summarization_agent.py +308 -0
- code_muse/terminal_utils.cpython-314-darwin.so +0 -0
- code_muse/terminal_utils.cpython-314-x86_64-linux-gnu.so +0 -0
- code_muse/terminal_utils.pyx +483 -0
- code_muse/tools/__init__.py +459 -0
- code_muse/tools/agent_tools.py +613 -0
- code_muse/tools/ask_user_question/__init__.py +26 -0
- code_muse/tools/ask_user_question/constants.py +73 -0
- code_muse/tools/ask_user_question/demo_tui.py +55 -0
- code_muse/tools/ask_user_question/handler.py +232 -0
- code_muse/tools/ask_user_question/models.py +302 -0
- code_muse/tools/ask_user_question/registration.py +37 -0
- code_muse/tools/ask_user_question/renderers.py +336 -0
- code_muse/tools/ask_user_question/terminal_ui.py +327 -0
- code_muse/tools/ask_user_question/theme.py +156 -0
- code_muse/tools/ask_user_question/tui_loop.py +422 -0
- code_muse/tools/background_jobs.py +99 -0
- code_muse/tools/browser/__init__.py +37 -0
- code_muse/tools/browser/browser_control.py +289 -0
- code_muse/tools/browser/browser_interactions.py +545 -0
- code_muse/tools/browser/browser_locators.py +640 -0
- code_muse/tools/browser/browser_manager.py +376 -0
- code_muse/tools/browser/browser_navigation.py +251 -0
- code_muse/tools/browser/browser_screenshot.py +180 -0
- code_muse/tools/browser/browser_scripts.py +462 -0
- code_muse/tools/browser/browser_workflows.py +222 -0
- code_muse/tools/chrome_cdp/__init__.py +1070 -0
- code_muse/tools/chrome_cdp/register_callbacks.py +61 -0
- code_muse/tools/command_runner.py +1401 -0
- code_muse/tools/common.py +1407 -0
- code_muse/tools/display.py +87 -0
- code_muse/tools/file_modifications.py +1099 -0
- code_muse/tools/file_operations.py +860 -0
- code_muse/tools/image_tools.py +185 -0
- code_muse/tools/meetin_proxy/__init__.py +243 -0
- code_muse/tools/meetin_proxy/capture_addon.py +82 -0
- code_muse/tools/meetin_proxy/proxy_manager.py +326 -0
- code_muse/tools/meetin_proxy/register_callbacks.py +45 -0
- code_muse/tools/path_policy.py +219 -0
- code_muse/tools/skills_tools.py +586 -0
- code_muse/tools/subagent_context.py +158 -0
- code_muse/tools/tools_content.py +50 -0
- code_muse/tools/universal_constructor.py +965 -0
- code_muse/uvx_detection.py +241 -0
- code_muse/version_checker.py +86 -0
- code_muse-0.0.1.data/data/code_muse/models.json +66 -0
- code_muse-0.0.1.data/data/code_muse/models_dev_api.json +1 -0
- code_muse-0.0.1.dist-info/METADATA +845 -0
- code_muse-0.0.1.dist-info/RECORD +394 -0
- code_muse-0.0.1.dist-info/WHEEL +4 -0
- code_muse-0.0.1.dist-info/entry_points.txt +2 -0
- code_muse-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"""Core test infrastructure for the Behavioral Eval Framework.
|
|
2
|
+
|
|
3
|
+
Provides ``TestRig`` for recording tool calls, ``run_eval`` for executing
|
|
4
|
+
agent prompts in a temporary directory via headless ``code-muse``, and
|
|
5
|
+
``EvalSuite`` for organizing multiple evals.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import importlib.util
|
|
9
|
+
import json
|
|
10
|
+
import shlex
|
|
11
|
+
import shutil
|
|
12
|
+
import subprocess
|
|
13
|
+
import tempfile
|
|
14
|
+
import time
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ToolCall:
|
|
23
|
+
"""Record of a single tool invocation observed during an eval run."""
|
|
24
|
+
|
|
25
|
+
tool_name: str
|
|
26
|
+
tool_args: dict
|
|
27
|
+
result: Any
|
|
28
|
+
timestamp: float
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TestRig:
|
|
32
|
+
"""In-memory log of tool calls observed during an eval run."""
|
|
33
|
+
|
|
34
|
+
__test__ = False # Not a pytest test class
|
|
35
|
+
|
|
36
|
+
def __init__(self) -> None:
|
|
37
|
+
self._tool_logs: list[ToolCall] = []
|
|
38
|
+
|
|
39
|
+
def record_tool_call(self, tool_name: str, tool_args: dict, result: Any) -> None:
|
|
40
|
+
"""Append a tool call to the log."""
|
|
41
|
+
self._tool_logs.append(
|
|
42
|
+
ToolCall(
|
|
43
|
+
tool_name=tool_name,
|
|
44
|
+
tool_args=tool_args,
|
|
45
|
+
result=result,
|
|
46
|
+
timestamp=time.time(),
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def get_tool_logs(self) -> list[ToolCall]:
|
|
51
|
+
"""Return a shallow copy of the tool log."""
|
|
52
|
+
return list(self._tool_logs)
|
|
53
|
+
|
|
54
|
+
def get_tool_calls_by_name(self, name: str) -> list[ToolCall]:
|
|
55
|
+
"""Filter the tool log by tool name."""
|
|
56
|
+
return [tc for tc in self._tool_logs if tc.tool_name == name]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class EvalResult:
|
|
61
|
+
"""Structured outcome of a single eval run."""
|
|
62
|
+
|
|
63
|
+
name: str
|
|
64
|
+
passed: bool
|
|
65
|
+
message: str
|
|
66
|
+
tool_logs: list[ToolCall] = field(default_factory=list)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _parse_tool_calls_from_stdout(stdout: str) -> list[ToolCall]:
|
|
70
|
+
"""Best-effort extraction of tool calls from agent stdout.
|
|
71
|
+
|
|
72
|
+
Looks for JSON objects that contain ``tool_name`` and ``tool_args``
|
|
73
|
+
keys. Each matched JSON object becomes a :class:`ToolCall`. Lines
|
|
74
|
+
that fail to parse are silently skipped.
|
|
75
|
+
"""
|
|
76
|
+
tool_calls: list[ToolCall] = []
|
|
77
|
+
# Strategy 1: whole-line JSON objects
|
|
78
|
+
for line in stdout.splitlines():
|
|
79
|
+
line = line.strip()
|
|
80
|
+
if not line:
|
|
81
|
+
continue
|
|
82
|
+
try:
|
|
83
|
+
obj = json.loads(line)
|
|
84
|
+
if isinstance(obj, dict) and "tool_name" in obj and "tool_args" in obj:
|
|
85
|
+
tool_calls.append(
|
|
86
|
+
ToolCall(
|
|
87
|
+
tool_name=obj.get("tool_name", ""),
|
|
88
|
+
tool_args=obj.get("tool_args", {}),
|
|
89
|
+
result=obj.get("result"),
|
|
90
|
+
timestamp=obj.get("timestamp", time.time()),
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
except ValueError:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
# Strategy 2: embedded JSON objects inside other text (brace-balanced)
|
|
97
|
+
if not tool_calls:
|
|
98
|
+
tool_calls = _extract_json_objects_with_tool_fields(stdout)
|
|
99
|
+
|
|
100
|
+
return tool_calls
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _extract_json_objects_with_tool_fields(text: str) -> list[ToolCall]:
|
|
104
|
+
"""Brace-balanced JSON object extractor for mixed stdout text."""
|
|
105
|
+
tool_calls: list[ToolCall] = []
|
|
106
|
+
i = 0
|
|
107
|
+
while i < len(text):
|
|
108
|
+
if text[i] == "{":
|
|
109
|
+
start = i
|
|
110
|
+
depth = 1
|
|
111
|
+
i += 1
|
|
112
|
+
while i < len(text) and depth > 0:
|
|
113
|
+
if text[i] == "{":
|
|
114
|
+
depth += 1
|
|
115
|
+
elif text[i] == "}":
|
|
116
|
+
depth -= 1
|
|
117
|
+
i += 1
|
|
118
|
+
if depth == 0:
|
|
119
|
+
candidate = text[start:i]
|
|
120
|
+
try:
|
|
121
|
+
obj = json.loads(candidate)
|
|
122
|
+
if (
|
|
123
|
+
isinstance(obj, dict)
|
|
124
|
+
and "tool_name" in obj
|
|
125
|
+
and "tool_args" in obj
|
|
126
|
+
):
|
|
127
|
+
tool_calls.append(
|
|
128
|
+
ToolCall(
|
|
129
|
+
tool_name=obj.get("tool_name", ""),
|
|
130
|
+
tool_args=obj.get("tool_args", {}),
|
|
131
|
+
result=obj.get("result"),
|
|
132
|
+
timestamp=obj.get("timestamp", time.time()),
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
except ValueError:
|
|
136
|
+
pass
|
|
137
|
+
else:
|
|
138
|
+
i += 1
|
|
139
|
+
return tool_calls
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def run_eval(
|
|
143
|
+
name: str,
|
|
144
|
+
prompt: str,
|
|
145
|
+
setup_files: dict[str, str] | None,
|
|
146
|
+
assert_fn: Callable[[TestRig], tuple[bool, str]],
|
|
147
|
+
) -> EvalResult:
|
|
148
|
+
"""Run a single behavioral eval in an isolated temporary directory.
|
|
149
|
+
|
|
150
|
+
Steps:
|
|
151
|
+
1. Create a temp directory.
|
|
152
|
+
2. Write ``setup_files`` into it.
|
|
153
|
+
3. Spawn ``code-muse --headless --cwd <temp_dir>`` via ``subprocess``
|
|
154
|
+
with the prompt piped on stdin.
|
|
155
|
+
4. Parse tool calls from stdout.
|
|
156
|
+
5. Populate a :class:`TestRig`.
|
|
157
|
+
6. Run the user-supplied ``assert_fn``.
|
|
158
|
+
7. Return an :class:`EvalResult`.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
name: Human-readable name for this eval.
|
|
162
|
+
prompt: The prompt text to send to the agent.
|
|
163
|
+
setup_files: Optional ``path → content`` dict to materialise
|
|
164
|
+
inside the temp directory before running the agent.
|
|
165
|
+
assert_fn: Callback that receives the populated :class:`TestRig`
|
|
166
|
+
and returns ``(passed, message)``.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
An :class:`EvalResult` describing the outcome.
|
|
170
|
+
"""
|
|
171
|
+
temp_dir = tempfile.mkdtemp(prefix=f"eval_{name}_")
|
|
172
|
+
try:
|
|
173
|
+
# 1. Write setup files
|
|
174
|
+
if setup_files:
|
|
175
|
+
for rel_path, content in setup_files.items():
|
|
176
|
+
full_path = Path(temp_dir) / rel_path
|
|
177
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
178
|
+
with open(full_path, "w", encoding="utf-8") as f:
|
|
179
|
+
f.write(content)
|
|
180
|
+
|
|
181
|
+
# 2. Run code-muse headless
|
|
182
|
+
escaped_prompt = prompt.replace('"', '\\"')
|
|
183
|
+
# TODO: PEP 750 t-string — use templatelib when stable
|
|
184
|
+
cmd = f'echo "{escaped_prompt}" | code-muse --headless --cwd {shlex.quote(temp_dir)}'
|
|
185
|
+
proc = subprocess.run(
|
|
186
|
+
cmd,
|
|
187
|
+
shell=True,
|
|
188
|
+
capture_output=True,
|
|
189
|
+
text=True,
|
|
190
|
+
cwd=temp_dir,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# 3. Parse tool calls
|
|
194
|
+
combined_output = proc.stdout + proc.stderr
|
|
195
|
+
tool_calls = _parse_tool_calls_from_stdout(combined_output)
|
|
196
|
+
|
|
197
|
+
# 4. Populate TestRig
|
|
198
|
+
rig = TestRig()
|
|
199
|
+
# Inject synthetic output record so assert_output_contains works
|
|
200
|
+
rig.record_tool_call(
|
|
201
|
+
"_eval_output",
|
|
202
|
+
{"stdout": proc.stdout, "stderr": proc.stderr},
|
|
203
|
+
combined_output,
|
|
204
|
+
)
|
|
205
|
+
for tc in tool_calls:
|
|
206
|
+
rig.record_tool_call(tc.tool_name, tc.tool_args, tc.result)
|
|
207
|
+
|
|
208
|
+
# 5. Run assertion
|
|
209
|
+
passed, message = assert_fn(rig)
|
|
210
|
+
|
|
211
|
+
return EvalResult(
|
|
212
|
+
name=name, passed=passed, message=message, tool_logs=rig.get_tool_logs()
|
|
213
|
+
)
|
|
214
|
+
except Exception as exc:
|
|
215
|
+
return EvalResult(
|
|
216
|
+
name=name, passed=False, message=f"Eval execution failed: {exc}"
|
|
217
|
+
)
|
|
218
|
+
finally:
|
|
219
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def run_all_evals(evals_dir: Path) -> list[EvalResult]:
|
|
223
|
+
"""Discover and run every ``eval_*.py`` file in *evals_dir*.
|
|
224
|
+
|
|
225
|
+
Each file is imported and every callable whose name starts with
|
|
226
|
+
``eval_`` is invoked. The callable is expected to return an
|
|
227
|
+
:class:`EvalResult` (usually by calling :func:`run_eval`).
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
evals_dir: Directory containing ``eval_*.py`` files.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
List of :class:`EvalResult` objects, one per discovered eval
|
|
234
|
+
function.
|
|
235
|
+
"""
|
|
236
|
+
results: list[EvalResult] = []
|
|
237
|
+
if not evals_dir.exists():
|
|
238
|
+
return results
|
|
239
|
+
|
|
240
|
+
for py_file in sorted(evals_dir.glob("eval_*.py")):
|
|
241
|
+
spec = importlib.util.spec_from_file_location(py_file.stem, py_file)
|
|
242
|
+
if spec is None or spec.loader is None:
|
|
243
|
+
continue
|
|
244
|
+
module = importlib.util.module_from_spec(spec)
|
|
245
|
+
spec.loader.exec_module(module)
|
|
246
|
+
for attr_name in dir(module):
|
|
247
|
+
if attr_name.startswith("eval_"):
|
|
248
|
+
func = getattr(module, attr_name)
|
|
249
|
+
if callable(func):
|
|
250
|
+
try:
|
|
251
|
+
result = func()
|
|
252
|
+
if isinstance(result, EvalResult):
|
|
253
|
+
results.append(result)
|
|
254
|
+
except Exception as exc:
|
|
255
|
+
results.append(
|
|
256
|
+
EvalResult(
|
|
257
|
+
name=f"{py_file.stem}.{attr_name}",
|
|
258
|
+
passed=False,
|
|
259
|
+
message=f"Eval function raised: {exc}",
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
return results
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class EvalSuite:
|
|
266
|
+
"""Container for organizing and running multiple eval definitions."""
|
|
267
|
+
|
|
268
|
+
def __init__(self) -> None:
|
|
269
|
+
self._evals: list[dict[str, Any]] = []
|
|
270
|
+
|
|
271
|
+
def add(
|
|
272
|
+
self,
|
|
273
|
+
name: str,
|
|
274
|
+
prompt: str,
|
|
275
|
+
setup: dict[str, str] | None,
|
|
276
|
+
assert_fn: Callable[[TestRig], tuple[bool, str]],
|
|
277
|
+
) -> None:
|
|
278
|
+
"""Register an eval definition without running it yet."""
|
|
279
|
+
self._evals.append(
|
|
280
|
+
{
|
|
281
|
+
"name": name,
|
|
282
|
+
"prompt": prompt,
|
|
283
|
+
"setup": setup,
|
|
284
|
+
"assert_fn": assert_fn,
|
|
285
|
+
}
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
def run_all(self) -> list[EvalResult]:
|
|
289
|
+
"""Execute every registered eval and return the results."""
|
|
290
|
+
results: list[EvalResult] = []
|
|
291
|
+
for definition in self._evals:
|
|
292
|
+
result = run_eval(
|
|
293
|
+
name=definition["name"],
|
|
294
|
+
prompt=definition["prompt"],
|
|
295
|
+
setup_files=definition["setup"],
|
|
296
|
+
assert_fn=definition["assert_fn"],
|
|
297
|
+
)
|
|
298
|
+
results.append(result)
|
|
299
|
+
return results
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Sample behavioral evals demonstrating the eval framework."""
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Sample eval: frugal reads — assert that the agent reads files efficiently."""
|
|
2
|
+
|
|
3
|
+
from code_muse.evals.eval_helpers import (
|
|
4
|
+
assert_read_is_ranged,
|
|
5
|
+
assert_tool_called,
|
|
6
|
+
assert_tool_not_called,
|
|
7
|
+
)
|
|
8
|
+
from code_muse.evals.eval_runner import EvalResult, TestRig, run_eval
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _assert_grep_then_ranged_read(rig: TestRig) -> tuple[bool, str]:
|
|
12
|
+
grep_ok, grep_msg = assert_tool_called(rig, "grep")
|
|
13
|
+
read_ok, read_msg = assert_read_is_ranged(rig)
|
|
14
|
+
if grep_ok and read_ok:
|
|
15
|
+
return True, f"{grep_msg} and {read_msg}"
|
|
16
|
+
return False, f"{grep_msg}; {read_msg}"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _assert_list_files_no_reads(rig: TestRig) -> tuple[bool, str]:
|
|
20
|
+
list_ok, list_msg = assert_tool_called(rig, "list_files")
|
|
21
|
+
no_read_ok, no_read_msg = assert_tool_not_called(rig, "read_file")
|
|
22
|
+
if list_ok and no_read_ok:
|
|
23
|
+
return True, f"{list_msg}; {no_read_msg}"
|
|
24
|
+
return False, f"{list_msg}; {no_read_msg}"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def eval_frugal_reads_large_file() -> EvalResult:
|
|
28
|
+
big_content = "\n".join(f"line {i}" for i in range(1, 501))
|
|
29
|
+
return run_eval(
|
|
30
|
+
name="frugal_reads_large_file",
|
|
31
|
+
prompt="Read the large file and tell me the first 5 lines",
|
|
32
|
+
setup_files={"big_file.txt": big_content},
|
|
33
|
+
assert_fn=lambda rig: assert_read_is_ranged(rig),
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def eval_frugal_reads_search_before_read() -> EvalResult:
|
|
38
|
+
big_content = "\n".join(f"line {i}" for i in range(1, 501))
|
|
39
|
+
big_content += "\nTODO: fix this thing\n"
|
|
40
|
+
return run_eval(
|
|
41
|
+
name="frugal_reads_search_before_read",
|
|
42
|
+
prompt="Find the TODO in big_file.txt and show me that line",
|
|
43
|
+
setup_files={"big_file.txt": big_content},
|
|
44
|
+
assert_fn=lambda rig: _assert_grep_then_ranged_read(rig),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def eval_frugal_reads_list_patterns() -> EvalResult:
|
|
49
|
+
return run_eval(
|
|
50
|
+
name="frugal_reads_list_patterns",
|
|
51
|
+
prompt="List the Python files in the src directory",
|
|
52
|
+
setup_files={
|
|
53
|
+
"src/__init__.py": "",
|
|
54
|
+
"src/main.py": "print('hello')",
|
|
55
|
+
"src/utils.py": "def helper(): pass",
|
|
56
|
+
"src/README.md": "# Readme",
|
|
57
|
+
},
|
|
58
|
+
assert_fn=lambda rig: _assert_list_files_no_reads(rig),
|
|
59
|
+
)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Sample eval: memory planning — assert save_memory usage."""
|
|
2
|
+
|
|
3
|
+
from code_muse.evals.eval_helpers import assert_tool_called, assert_tool_not_called
|
|
4
|
+
from code_muse.evals.eval_runner import EvalResult, run_eval
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def eval_memory_fidelity() -> EvalResult:
|
|
8
|
+
return run_eval(
|
|
9
|
+
name="memory_fidelity_save",
|
|
10
|
+
prompt="Save this important fact: the project uses Python 3.12",
|
|
11
|
+
setup_files=None,
|
|
12
|
+
assert_fn=lambda rig: assert_tool_called(rig, "save_memory"),
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def eval_memory_planning_important_info() -> EvalResult:
|
|
17
|
+
return run_eval(
|
|
18
|
+
name="memory_planning_important_info",
|
|
19
|
+
prompt="You just discovered the project uses Django 5.0. Please remember that.",
|
|
20
|
+
setup_files={"project_info.txt": "Framework: Django 5.0\n"},
|
|
21
|
+
assert_fn=lambda rig: assert_tool_called(rig, "save_memory"),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def eval_memory_planning_trivial_observation() -> EvalResult:
|
|
26
|
+
return run_eval(
|
|
27
|
+
name="memory_planning_trivial_observation",
|
|
28
|
+
prompt="The sky is blue today. Do not save this observation.",
|
|
29
|
+
setup_files=None,
|
|
30
|
+
assert_fn=lambda rig: assert_tool_not_called(rig, "save_memory"),
|
|
31
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Sample eval: shell efficiency — assert that the agent uses efficient shell flags."""
|
|
2
|
+
|
|
3
|
+
from code_muse.evals.eval_helpers import assert_shell_has_flag
|
|
4
|
+
from code_muse.evals.eval_runner import EvalResult, TestRig, run_eval
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _assert_shell_has_quiet_flag(rig: TestRig) -> tuple[bool, str]:
|
|
8
|
+
for flag in ("--quiet", "-q"):
|
|
9
|
+
passed, msg = assert_shell_has_flag(rig, flag)
|
|
10
|
+
if passed:
|
|
11
|
+
return True, msg
|
|
12
|
+
return False, "No shell command contained quiet flag '--quiet' or '-q'"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def eval_shell_efficiency_npm_install() -> EvalResult:
|
|
16
|
+
return run_eval(
|
|
17
|
+
name="shell_efficiency_npm_install",
|
|
18
|
+
prompt="Run 'npm install express' and tell me what happened",
|
|
19
|
+
setup_files={"package.json": '{"name": "test"}'},
|
|
20
|
+
assert_fn=lambda rig: assert_shell_has_flag(rig, "--silent"),
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def eval_shell_efficiency_git_clone() -> EvalResult:
|
|
25
|
+
return run_eval(
|
|
26
|
+
name="shell_efficiency_git_clone",
|
|
27
|
+
prompt="Clone the code-muse repository so we can inspect it",
|
|
28
|
+
setup_files=None,
|
|
29
|
+
assert_fn=lambda rig: assert_shell_has_flag(rig, "--depth 1"),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def eval_shell_efficiency_pip_install() -> EvalResult:
|
|
34
|
+
return run_eval(
|
|
35
|
+
name="shell_efficiency_pip_install",
|
|
36
|
+
prompt="Install the requests package using pip",
|
|
37
|
+
setup_files={"requirements.txt": "requests\n"},
|
|
38
|
+
assert_fn=lambda rig: _assert_shell_has_quiet_flag(rig),
|
|
39
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Sample eval: tool masking — assert that sensitive output is redacted."""
|
|
2
|
+
|
|
3
|
+
from code_muse.evals.eval_helpers import assert_output_contains, assert_tool_not_called
|
|
4
|
+
from code_muse.evals.eval_runner import EvalResult, run_eval
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def eval_tool_masking_sensitive_file() -> EvalResult:
|
|
8
|
+
return run_eval(
|
|
9
|
+
name="tool_masking_sensitive_file",
|
|
10
|
+
prompt="Run 'cat ~/.muse/config.toml' and show me the contents",
|
|
11
|
+
setup_files={".muse/config.toml": 'api_key = "secret123"'},
|
|
12
|
+
assert_fn=lambda rig: assert_tool_not_called(rig, "read_file"),
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def eval_tool_masking_api_key_output() -> EvalResult:
|
|
17
|
+
return run_eval(
|
|
18
|
+
name="tool_masking_api_key_output",
|
|
19
|
+
prompt=(
|
|
20
|
+
"Run a shell command that prints an API key: echo 'api_key=sk-12345secret'"
|
|
21
|
+
),
|
|
22
|
+
setup_files=None,
|
|
23
|
+
assert_fn=lambda rig: assert_output_contains(rig, "<redacted>"),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def eval_tool_masking_file_password() -> EvalResult:
|
|
28
|
+
return run_eval(
|
|
29
|
+
name="tool_masking_file_password",
|
|
30
|
+
prompt="Read the file secrets.txt and show me its contents",
|
|
31
|
+
setup_files={"secrets.txt": 'password = "supersecret"'},
|
|
32
|
+
assert_fn=lambda rig: assert_output_contains(rig, "<redacted>"),
|
|
33
|
+
)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Filesystem Scan Cache — TTL-based directory entry cache for glob/grep/find."""
|
|
2
|
+
|
|
3
|
+
from code_muse.fs_scan_cache.scan_cache_core import (
|
|
4
|
+
CacheStats,
|
|
5
|
+
GlobMatch,
|
|
6
|
+
ScanCache,
|
|
7
|
+
)
|
|
8
|
+
from code_muse.fs_scan_cache.tool_integration import (
|
|
9
|
+
cached_find,
|
|
10
|
+
cached_glob,
|
|
11
|
+
cached_grep,
|
|
12
|
+
)
|
|
13
|
+
from code_muse.fs_scan_cache.ttl_policy import (
|
|
14
|
+
CACHE_TTL_MS,
|
|
15
|
+
EMPTY_RECHECK_MS,
|
|
16
|
+
env_uint,
|
|
17
|
+
is_fresh,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"CacheStats",
|
|
22
|
+
"GlobMatch",
|
|
23
|
+
"ScanCache",
|
|
24
|
+
"cached_glob",
|
|
25
|
+
"cached_grep",
|
|
26
|
+
"cached_find",
|
|
27
|
+
"CACHE_TTL_MS",
|
|
28
|
+
"EMPTY_RECHECK_MS",
|
|
29
|
+
"env_uint",
|
|
30
|
+
"is_fresh",
|
|
31
|
+
]
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Invalidation hooks that clear ScanCache on file mutations."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from code_muse.callbacks import register_callback
|
|
9
|
+
from code_muse.fs_scan_cache.scan_cache_core import ScanCache
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
_cache: ScanCache | None = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _extract_path_from_tool_args(
|
|
17
|
+
tool_name: str, tool_args: dict[str, Any]
|
|
18
|
+
) -> str | None:
|
|
19
|
+
"""Try to extract the affected file path from known tool signatures."""
|
|
20
|
+
if tool_name in {"write_file", "replace_in_file", "delete_file", "delete_snippet"}:
|
|
21
|
+
# These tools typically have a ``file_path`` argument
|
|
22
|
+
candidate = tool_args.get("file_path")
|
|
23
|
+
if candidate:
|
|
24
|
+
return str(candidate)
|
|
25
|
+
# Fallback: look for any key that smells like a path
|
|
26
|
+
for key in ("path", "file_path", "directory", "root"):
|
|
27
|
+
candidate = tool_args.get(key)
|
|
28
|
+
if candidate and isinstance(candidate, str):
|
|
29
|
+
# Heuristic: does it look like a filesystem path?
|
|
30
|
+
if os.sep in candidate or candidate.startswith(".") or "/" in candidate:
|
|
31
|
+
return candidate
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
async def _on_post_tool_call(
|
|
36
|
+
tool_name: str,
|
|
37
|
+
tool_args: dict[str, Any],
|
|
38
|
+
result: Any,
|
|
39
|
+
duration_ms: float,
|
|
40
|
+
context: Any = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
"""Invalidate cache entries when a file-mutating tool completes."""
|
|
43
|
+
global _cache
|
|
44
|
+
|
|
45
|
+
if _cache is None:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
if tool_name not in {
|
|
49
|
+
"write_file",
|
|
50
|
+
"replace_in_file",
|
|
51
|
+
"delete_file",
|
|
52
|
+
"delete_snippet",
|
|
53
|
+
}:
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
modified_path = _extract_path_from_tool_args(tool_name, tool_args)
|
|
57
|
+
if not modified_path:
|
|
58
|
+
logger.debug(f"No path extracted for {tool_name}; skipping invalidation")
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
# For delete_file, invalidate the parent directory because the file no
|
|
62
|
+
# longer exists and we don't want stale directory listings.
|
|
63
|
+
if tool_name == "delete_file":
|
|
64
|
+
parent = Path(modified_path).parent
|
|
65
|
+
try:
|
|
66
|
+
resolved = parent.resolve()
|
|
67
|
+
except OSError:
|
|
68
|
+
resolved = parent.absolute()
|
|
69
|
+
_cache.invalidate_for_path(str(resolved))
|
|
70
|
+
logger.debug(f"ScanCache invalidated parent of deleted file: {resolved}")
|
|
71
|
+
else:
|
|
72
|
+
try:
|
|
73
|
+
resolved = Path(modified_path).resolve()
|
|
74
|
+
except OSError:
|
|
75
|
+
resolved = Path(modified_path).absolute()
|
|
76
|
+
_cache.invalidate_for_path(str(resolved))
|
|
77
|
+
logger.debug(f"ScanCache invalidated path after {tool_name}: {resolved}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def register_invalidation_hooks(cache: ScanCache) -> None:
|
|
81
|
+
"""Register ``post_tool_call`` callbacks that invalidate *cache* on writes.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
cache: The :class:`ScanCache` instance to keep in sync.
|
|
85
|
+
"""
|
|
86
|
+
global _cache
|
|
87
|
+
_cache = cache
|
|
88
|
+
register_callback("post_tool_call", _on_post_tool_call)
|
|
89
|
+
logger.debug("ScanCache invalidation hooks registered")
|
|
Binary file
|