code-muse 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (394) hide show
  1. code_muse/__init__.py +26 -0
  2. code_muse/__main__.py +10 -0
  3. code_muse/agents/__init__.py +31 -0
  4. code_muse/agents/_builder.py +214 -0
  5. code_muse/agents/_compaction.py +506 -0
  6. code_muse/agents/_diagnostics.py +171 -0
  7. code_muse/agents/_history.py +382 -0
  8. code_muse/agents/_key_listeners.py +148 -0
  9. code_muse/agents/_non_streaming_render.py +148 -0
  10. code_muse/agents/_runtime.py +596 -0
  11. code_muse/agents/agent_creator_agent.py +603 -0
  12. code_muse/agents/agent_helios.py +47 -0
  13. code_muse/agents/agent_manager.py +740 -0
  14. code_muse/agents/agent_muse.py +78 -0
  15. code_muse/agents/agent_planning.py +44 -0
  16. code_muse/agents/agent_qa_melpomene.py +207 -0
  17. code_muse/agents/base_agent.py +194 -0
  18. code_muse/agents/event_stream_handler.py +361 -0
  19. code_muse/agents/json_agent.py +201 -0
  20. code_muse/agents/prompt_v3.py +521 -0
  21. code_muse/agents/subagent_stream_handler.py +273 -0
  22. code_muse/callbacks.py +941 -0
  23. code_muse/chatgpt_codex_client.py +333 -0
  24. code_muse/claude_cache_client.py +853 -0
  25. code_muse/cli_runner/__init__.py +319 -0
  26. code_muse/cli_runner/args.py +63 -0
  27. code_muse/cli_runner/loop.py +510 -0
  28. code_muse/cli_runner/resume.py +72 -0
  29. code_muse/cli_runner/runner.py +161 -0
  30. code_muse/command_line/__init__.py +1 -0
  31. code_muse/command_line/add_model_menu.py +1331 -0
  32. code_muse/command_line/agent_menu.py +674 -0
  33. code_muse/command_line/attachments.py +397 -0
  34. code_muse/command_line/autosave_menu.py +709 -0
  35. code_muse/command_line/clipboard.py +528 -0
  36. code_muse/command_line/colors_menu.py +530 -0
  37. code_muse/command_line/command_handler.py +262 -0
  38. code_muse/command_line/command_registry.py +150 -0
  39. code_muse/command_line/config_commands.py +711 -0
  40. code_muse/command_line/core_commands.py +740 -0
  41. code_muse/command_line/diff_menu.py +865 -0
  42. code_muse/command_line/file_path_completion.py +73 -0
  43. code_muse/command_line/load_context_completion.py +57 -0
  44. code_muse/command_line/model_picker_completion.py +512 -0
  45. code_muse/command_line/model_settings_menu.py +983 -0
  46. code_muse/command_line/onboarding_slides.py +162 -0
  47. code_muse/command_line/onboarding_wizard.py +337 -0
  48. code_muse/command_line/pagination.py +41 -0
  49. code_muse/command_line/pin_command_completion.py +329 -0
  50. code_muse/command_line/prompt_toolkit_completion.py +886 -0
  51. code_muse/command_line/session_commands.py +304 -0
  52. code_muse/command_line/shell_passthrough.py +145 -0
  53. code_muse/command_line/skills_completion.py +158 -0
  54. code_muse/command_line/types.py +18 -0
  55. code_muse/command_line/uc_menu.py +908 -0
  56. code_muse/command_line/utils.py +105 -0
  57. code_muse/command_line/wiggum_state.py +77 -0
  58. code_muse/config.py +1138 -0
  59. code_muse/config_agent.py +168 -0
  60. code_muse/config_appearance.py +241 -0
  61. code_muse/config_model.py +357 -0
  62. code_muse/config_security.py +73 -0
  63. code_muse/error_logging.py +132 -0
  64. code_muse/evals/__init__.py +35 -0
  65. code_muse/evals/eval_helpers.py +81 -0
  66. code_muse/evals/eval_runner.py +299 -0
  67. code_muse/evals/sample_evals/__init__.py +1 -0
  68. code_muse/evals/sample_evals/eval_frugal_reads.py +59 -0
  69. code_muse/evals/sample_evals/eval_memory_planning.py +31 -0
  70. code_muse/evals/sample_evals/eval_shell_efficiency.py +39 -0
  71. code_muse/evals/sample_evals/eval_tool_masking.py +33 -0
  72. code_muse/fs_scan_cache/__init__.py +31 -0
  73. code_muse/fs_scan_cache/invalidation_hooks.py +89 -0
  74. code_muse/fs_scan_cache/scan_cache_core.cpython-314-darwin.so +0 -0
  75. code_muse/fs_scan_cache/scan_cache_core.pyx +203 -0
  76. code_muse/fs_scan_cache/tool_integration.py +309 -0
  77. code_muse/fs_scan_cache/ttl_policy.py +44 -0
  78. code_muse/gemini_code_assist.py +383 -0
  79. code_muse/gemini_model.py +838 -0
  80. code_muse/hook_engine/README.md +105 -0
  81. code_muse/hook_engine/__init__.py +21 -0
  82. code_muse/hook_engine/aliases.py +153 -0
  83. code_muse/hook_engine/engine.py +221 -0
  84. code_muse/hook_engine/executor.py +347 -0
  85. code_muse/hook_engine/matcher.py +154 -0
  86. code_muse/hook_engine/models.py +245 -0
  87. code_muse/hook_engine/registry.py +114 -0
  88. code_muse/hook_engine/trust.py +268 -0
  89. code_muse/hook_engine/validator.py +144 -0
  90. code_muse/http_utils.py +360 -0
  91. code_muse/keymap.py +128 -0
  92. code_muse/list_filtering.py +26 -0
  93. code_muse/main.py +10 -0
  94. code_muse/messaging/__init__.py +259 -0
  95. code_muse/messaging/bus.py +621 -0
  96. code_muse/messaging/commands.py +166 -0
  97. code_muse/messaging/markdown_patches.py +57 -0
  98. code_muse/messaging/message_queue.py +397 -0
  99. code_muse/messaging/messages.py +591 -0
  100. code_muse/messaging/queue_console.py +269 -0
  101. code_muse/messaging/renderers.py +308 -0
  102. code_muse/messaging/rich_renderer.py +1158 -0
  103. code_muse/messaging/shimmer.py +154 -0
  104. code_muse/messaging/spinner/__init__.py +87 -0
  105. code_muse/messaging/spinner/console_spinner.py +250 -0
  106. code_muse/messaging/spinner/spinner_base.py +82 -0
  107. code_muse/messaging/subagent_console.py +458 -0
  108. code_muse/model_factory.py +1203 -0
  109. code_muse/model_switching.py +59 -0
  110. code_muse/model_utils.py +156 -0
  111. code_muse/models.json +66 -0
  112. code_muse/models_cache/__init__.py +26 -0
  113. code_muse/models_cache/blocking_lru_cache.py +98 -0
  114. code_muse/models_cache/cache_writer.py +86 -0
  115. code_muse/models_cache/sha256_hash.cpython-314-darwin.so +0 -0
  116. code_muse/models_cache/sha256_hash.pyx +34 -0
  117. code_muse/models_cache/startup_integration.py +75 -0
  118. code_muse/models_dev_api.json +1 -0
  119. code_muse/models_dev_parser.py +590 -0
  120. code_muse/motion.py +126 -0
  121. code_muse/plugins/__init__.py +471 -0
  122. code_muse/plugins/agent_skills/__init__.py +32 -0
  123. code_muse/plugins/agent_skills/config.py +176 -0
  124. code_muse/plugins/agent_skills/discovery.py +309 -0
  125. code_muse/plugins/agent_skills/downloader.py +389 -0
  126. code_muse/plugins/agent_skills/installer.py +19 -0
  127. code_muse/plugins/agent_skills/metadata.py +293 -0
  128. code_muse/plugins/agent_skills/prompt_builder.py +66 -0
  129. code_muse/plugins/agent_skills/register_callbacks.py +298 -0
  130. code_muse/plugins/agent_skills/remote_catalog.py +320 -0
  131. code_muse/plugins/agent_skills/skill_catalog.py +254 -0
  132. code_muse/plugins/agent_skills/skills_install_menu.py +690 -0
  133. code_muse/plugins/agent_skills/skills_menu.py +791 -0
  134. code_muse/plugins/autonomous_memory/__init__.py +39 -0
  135. code_muse/plugins/autonomous_memory/bm25_scorer.cpython-314-darwin.so +0 -0
  136. code_muse/plugins/autonomous_memory/bm25_scorer.cpython-314-x86_64-linux-gnu.so +0 -0
  137. code_muse/plugins/autonomous_memory/bm25_scorer.pyx +291 -0
  138. code_muse/plugins/autonomous_memory/consolidation.py +82 -0
  139. code_muse/plugins/autonomous_memory/extraction.py +382 -0
  140. code_muse/plugins/autonomous_memory/lease_lock.py +105 -0
  141. code_muse/plugins/autonomous_memory/memory_injection.py +59 -0
  142. code_muse/plugins/autonomous_memory/register_callbacks.py +268 -0
  143. code_muse/plugins/autonomous_memory/secret_scanner.py +62 -0
  144. code_muse/plugins/autonomous_memory/session_scanner.py +163 -0
  145. code_muse/plugins/aws_bedrock/__init__.py +14 -0
  146. code_muse/plugins/aws_bedrock/config.py +99 -0
  147. code_muse/plugins/aws_bedrock/register_callbacks.py +241 -0
  148. code_muse/plugins/aws_bedrock/utils.py +153 -0
  149. code_muse/plugins/azure_foundry/README.md +238 -0
  150. code_muse/plugins/azure_foundry/__init__.py +15 -0
  151. code_muse/plugins/azure_foundry/config.py +125 -0
  152. code_muse/plugins/azure_foundry/discovery.py +187 -0
  153. code_muse/plugins/azure_foundry/register_callbacks.py +495 -0
  154. code_muse/plugins/azure_foundry/token.py +180 -0
  155. code_muse/plugins/azure_foundry/utils.py +345 -0
  156. code_muse/plugins/build_filter/__init__.py +1 -0
  157. code_muse/plugins/build_filter/register_callbacks.py +201 -0
  158. code_muse/plugins/build_filter/strategies/__init__.py +1 -0
  159. code_muse/plugins/build_filter/strategies/build.py +397 -0
  160. code_muse/plugins/chatgpt_oauth/__init__.py +6 -0
  161. code_muse/plugins/chatgpt_oauth/config.py +52 -0
  162. code_muse/plugins/chatgpt_oauth/oauth_flow.py +338 -0
  163. code_muse/plugins/chatgpt_oauth/register_callbacks.py +172 -0
  164. code_muse/plugins/chatgpt_oauth/test_plugin.py +301 -0
  165. code_muse/plugins/chatgpt_oauth/utils.py +538 -0
  166. code_muse/plugins/checkpointing/__init__.py +29 -0
  167. code_muse/plugins/checkpointing/checkpoint_hook.py +51 -0
  168. code_muse/plugins/checkpointing/conversation_snapshots.py +117 -0
  169. code_muse/plugins/checkpointing/register_callbacks.py +51 -0
  170. code_muse/plugins/checkpointing/restore_command.py +263 -0
  171. code_muse/plugins/checkpointing/rewind_shortcut.py +88 -0
  172. code_muse/plugins/checkpointing/shadow_git.py +90 -0
  173. code_muse/plugins/claude_code_hooks/__init__.py +1 -0
  174. code_muse/plugins/claude_code_hooks/config.py +188 -0
  175. code_muse/plugins/claude_code_hooks/register_callbacks.py +208 -0
  176. code_muse/plugins/claude_code_oauth/README.md +167 -0
  177. code_muse/plugins/claude_code_oauth/SETUP.md +93 -0
  178. code_muse/plugins/claude_code_oauth/__init__.py +25 -0
  179. code_muse/plugins/claude_code_oauth/config.py +52 -0
  180. code_muse/plugins/claude_code_oauth/fast_mode.py +124 -0
  181. code_muse/plugins/claude_code_oauth/prompt_handler.py +63 -0
  182. code_muse/plugins/claude_code_oauth/register_callbacks.py +547 -0
  183. code_muse/plugins/claude_code_oauth/test_fast_mode.py +165 -0
  184. code_muse/plugins/claude_code_oauth/test_plugin.py +283 -0
  185. code_muse/plugins/claude_code_oauth/token_refresh_heartbeat.py +237 -0
  186. code_muse/plugins/claude_code_oauth/utils.py +664 -0
  187. code_muse/plugins/copilot_auth/__init__.py +11 -0
  188. code_muse/plugins/copilot_auth/config.py +91 -0
  189. code_muse/plugins/copilot_auth/reasoning_client.py +409 -0
  190. code_muse/plugins/copilot_auth/register_callbacks.py +461 -0
  191. code_muse/plugins/copilot_auth/utils.py +584 -0
  192. code_muse/plugins/custom_commands/__init__.py +14 -0
  193. code_muse/plugins/custom_commands/args_injection.py +82 -0
  194. code_muse/plugins/custom_commands/command_discovery.py +89 -0
  195. code_muse/plugins/custom_commands/command_toml_schema.py +71 -0
  196. code_muse/plugins/custom_commands/register_callbacks.py +176 -0
  197. code_muse/plugins/customizable_commands/__init__.py +0 -0
  198. code_muse/plugins/customizable_commands/register_callbacks.py +136 -0
  199. code_muse/plugins/destructive_command_guard/__init__.py +14 -0
  200. code_muse/plugins/destructive_command_guard/detector.py +375 -0
  201. code_muse/plugins/destructive_command_guard/register_callbacks.py +148 -0
  202. code_muse/plugins/example_custom_command/README.md +280 -0
  203. code_muse/plugins/example_custom_command/register_callbacks.py +51 -0
  204. code_muse/plugins/file_permission_handler/__init__.py +4 -0
  205. code_muse/plugins/file_permission_handler/register_callbacks.py +441 -0
  206. code_muse/plugins/filter_engine/__init__.py +30 -0
  207. code_muse/plugins/filter_engine/classifier.py +153 -0
  208. code_muse/plugins/filter_engine/content_detector.py +184 -0
  209. code_muse/plugins/filter_engine/dispatcher.py +244 -0
  210. code_muse/plugins/filter_engine/register_callbacks.py +188 -0
  211. code_muse/plugins/filter_engine/registry.py +279 -0
  212. code_muse/plugins/filter_engine/strategies/__init__.py +8 -0
  213. code_muse/plugins/filter_engine/strategies/ast_compressor.cpython-314-darwin.so +0 -0
  214. code_muse/plugins/filter_engine/strategies/ast_compressor.cpython-314-x86_64-linux-gnu.so +0 -0
  215. code_muse/plugins/filter_engine/strategies/ast_compressor.pyx +348 -0
  216. code_muse/plugins/filter_engine/strategies/ast_parser.py +167 -0
  217. code_muse/plugins/filter_engine/strategies/code.cpython-314-darwin.so +0 -0
  218. code_muse/plugins/filter_engine/strategies/code.cpython-314-x86_64-linux-gnu.so +0 -0
  219. code_muse/plugins/filter_engine/strategies/code.pyx +584 -0
  220. code_muse/plugins/filter_engine/strategies/git.cpython-314-darwin.so +0 -0
  221. code_muse/plugins/filter_engine/strategies/git.cpython-314-x86_64-linux-gnu.so +0 -0
  222. code_muse/plugins/filter_engine/strategies/git.pyx +438 -0
  223. code_muse/plugins/filter_engine/strategies/json_compressor.cpython-314-darwin.so +0 -0
  224. code_muse/plugins/filter_engine/strategies/json_compressor.pyx +253 -0
  225. code_muse/plugins/filter_engine/strategies/json_patterns.cpython-314-darwin.so +0 -0
  226. code_muse/plugins/filter_engine/strategies/json_patterns.pyx +178 -0
  227. code_muse/plugins/filter_engine/strategies/lint.cpython-314-darwin.so +0 -0
  228. code_muse/plugins/filter_engine/strategies/lint.cpython-314-x86_64-linux-gnu.so +0 -0
  229. code_muse/plugins/filter_engine/strategies/lint.pyx +626 -0
  230. code_muse/plugins/filter_engine/strategies/test.cpython-314-darwin.so +0 -0
  231. code_muse/plugins/filter_engine/strategies/test.cpython-314-x86_64-linux-gnu.so +0 -0
  232. code_muse/plugins/filter_engine/strategies/test.pyx +431 -0
  233. code_muse/plugins/filter_engine/verbosity.py +63 -0
  234. code_muse/plugins/force_push_guard/__init__.py +5 -0
  235. code_muse/plugins/force_push_guard/detector.py +96 -0
  236. code_muse/plugins/force_push_guard/register_callbacks.py +144 -0
  237. code_muse/plugins/force_push_guard/test_detector.py +143 -0
  238. code_muse/plugins/frontend_emitter/__init__.py +25 -0
  239. code_muse/plugins/frontend_emitter/emitter.py +121 -0
  240. code_muse/plugins/frontend_emitter/register_callbacks.py +259 -0
  241. code_muse/plugins/gac/__init__.py +4 -0
  242. code_muse/plugins/gac/git_ops.py +136 -0
  243. code_muse/plugins/gac/prompt.py +191 -0
  244. code_muse/plugins/gac/register_callbacks.py +82 -0
  245. code_muse/plugins/hook_creator/__init__.py +1 -0
  246. code_muse/plugins/hook_creator/register_callbacks.py +34 -0
  247. code_muse/plugins/hook_manager/__init__.py +1 -0
  248. code_muse/plugins/hook_manager/config.py +289 -0
  249. code_muse/plugins/hook_manager/hooks_menu.py +563 -0
  250. code_muse/plugins/hook_manager/register_callbacks.py +227 -0
  251. code_muse/plugins/hook_monitor/register_callbacks.py +36 -0
  252. code_muse/plugins/mindpack/__init__.py +0 -0
  253. code_muse/plugins/mindpack/factory.py +930 -0
  254. code_muse/plugins/mindpack/judge.py +573 -0
  255. code_muse/plugins/mindpack/memory.py +100 -0
  256. code_muse/plugins/mindpack/mindpack_menu.py +1552 -0
  257. code_muse/plugins/mindpack/orchestration.py +605 -0
  258. code_muse/plugins/mindpack/register_callbacks.py +175 -0
  259. code_muse/plugins/mindpack/schemas.py +358 -0
  260. code_muse/plugins/mindpack/tools.py +387 -0
  261. code_muse/plugins/oauth_muse_html.py +226 -0
  262. code_muse/plugins/ollama_setup/__init__.py +5 -0
  263. code_muse/plugins/ollama_setup/completer.py +36 -0
  264. code_muse/plugins/ollama_setup/register_callbacks.py +410 -0
  265. code_muse/plugins/plan_command/__init__.py +0 -0
  266. code_muse/plugins/plan_command/register_callbacks.py +206 -0
  267. code_muse/plugins/plan_mode/__init__.py +37 -0
  268. code_muse/plugins/plan_mode/mode_cycling.py +40 -0
  269. code_muse/plugins/plan_mode/plan_generation.py +68 -0
  270. code_muse/plugins/plan_mode/plan_hooks.py +74 -0
  271. code_muse/plugins/plan_mode/plan_mode_tools.py +138 -0
  272. code_muse/plugins/plan_mode/register_callbacks.py +121 -0
  273. code_muse/plugins/plugin_trust/register_callbacks.py +140 -0
  274. code_muse/plugins/policy_engine/__init__.py +46 -0
  275. code_muse/plugins/policy_engine/approval_flow_integration.py +59 -0
  276. code_muse/plugins/policy_engine/policy_evaluator.py +75 -0
  277. code_muse/plugins/policy_engine/policy_file_discovery.py +90 -0
  278. code_muse/plugins/policy_engine/policy_toml_schema.py +115 -0
  279. code_muse/plugins/policy_engine/register_callbacks.py +112 -0
  280. code_muse/plugins/pop_command/__init__.py +1 -0
  281. code_muse/plugins/pop_command/register_callbacks.py +189 -0
  282. code_muse/plugins/prompt_newline/__init__.py +13 -0
  283. code_muse/plugins/prompt_newline/config.py +19 -0
  284. code_muse/plugins/prompt_newline/register_callbacks.py +159 -0
  285. code_muse/plugins/safety_status/__init__.py +0 -0
  286. code_muse/plugins/safety_status/register_callbacks.py +113 -0
  287. code_muse/plugins/semantic_compression/__init__.py +6 -0
  288. code_muse/plugins/semantic_compression/compressor.py +295 -0
  289. code_muse/plugins/semantic_compression/config.py +123 -0
  290. code_muse/plugins/semantic_compression/register_callbacks.py +320 -0
  291. code_muse/plugins/shell_minimizer/__init__.py +50 -0
  292. code_muse/plugins/shell_minimizer/builtin_filters.toml +393 -0
  293. code_muse/plugins/shell_minimizer/pipeline.py +556 -0
  294. code_muse/plugins/shell_minimizer/primitives.py +482 -0
  295. code_muse/plugins/shell_minimizer/register_callbacks.py +276 -0
  296. code_muse/plugins/shell_safety/__init__.py +6 -0
  297. code_muse/plugins/shell_safety/agent_shell_safety.py +69 -0
  298. code_muse/plugins/shell_safety/command_cache.py +149 -0
  299. code_muse/plugins/shell_safety/register_callbacks.py +202 -0
  300. code_muse/plugins/synthetic_status/__init__.py +1 -0
  301. code_muse/plugins/synthetic_status/register_callbacks.py +128 -0
  302. code_muse/plugins/synthetic_status/status_api.py +145 -0
  303. code_muse/plugins/token_caching/__init__.py +21 -0
  304. code_muse/plugins/token_caching/cache_hit_tracking.py +128 -0
  305. code_muse/plugins/token_caching/cacheable_prefix_detection.py +28 -0
  306. code_muse/plugins/token_caching/register_callbacks.py +54 -0
  307. code_muse/plugins/token_caching/stats_display.py +35 -0
  308. code_muse/plugins/token_tracking/__init__.py +26 -0
  309. code_muse/plugins/token_tracking/database.py +381 -0
  310. code_muse/plugins/token_tracking/edit_analyzer.py +97 -0
  311. code_muse/plugins/token_tracking/record.py +55 -0
  312. code_muse/plugins/token_tracking/register_callbacks.py +277 -0
  313. code_muse/plugins/token_tracking/reports.py +329 -0
  314. code_muse/plugins/universal_constructor/__init__.py +13 -0
  315. code_muse/plugins/universal_constructor/models.py +136 -0
  316. code_muse/plugins/universal_constructor/register_callbacks.py +47 -0
  317. code_muse/plugins/universal_constructor/registry.py +390 -0
  318. code_muse/plugins/universal_constructor/runner.py +474 -0
  319. code_muse/plugins/universal_constructor/safety.py +440 -0
  320. code_muse/plugins/universal_constructor/sandbox.py +584 -0
  321. code_muse/provider_identity.py +105 -0
  322. code_muse/pydantic_patches.py +410 -0
  323. code_muse/reopenable_async_client.py +233 -0
  324. code_muse/round_robin_model.py +151 -0
  325. code_muse/secret_storage.py +74 -0
  326. code_muse/security/__init__.py +1 -0
  327. code_muse/security/redaction.cpython-314-darwin.so +0 -0
  328. code_muse/security/redaction.cpython-314-x86_64-linux-gnu.so +0 -0
  329. code_muse/security/redaction.pyx +135 -0
  330. code_muse/session_storage.py +565 -0
  331. code_muse/status_display.py +261 -0
  332. code_muse/stream_parser/__init__.py +76 -0
  333. code_muse/stream_parser/assistant_text_parser.py +90 -0
  334. code_muse/stream_parser/citation_parser.py +76 -0
  335. code_muse/stream_parser/inline_hidden_tag_parser.py +236 -0
  336. code_muse/stream_parser/proposed_plan_parser.py +158 -0
  337. code_muse/stream_parser/stream_text_chunk.py +23 -0
  338. code_muse/stream_parser/stream_text_parser.py +27 -0
  339. code_muse/stream_parser/tagged_line_parser.cpython-314-darwin.so +0 -0
  340. code_muse/stream_parser/tagged_line_parser.pyx +251 -0
  341. code_muse/stream_parser/utf8_stream_parser.cpython-314-darwin.so +0 -0
  342. code_muse/stream_parser/utf8_stream_parser.pyx +206 -0
  343. code_muse/summarization_agent.py +308 -0
  344. code_muse/terminal_utils.cpython-314-darwin.so +0 -0
  345. code_muse/terminal_utils.cpython-314-x86_64-linux-gnu.so +0 -0
  346. code_muse/terminal_utils.pyx +483 -0
  347. code_muse/tools/__init__.py +459 -0
  348. code_muse/tools/agent_tools.py +613 -0
  349. code_muse/tools/ask_user_question/__init__.py +26 -0
  350. code_muse/tools/ask_user_question/constants.py +73 -0
  351. code_muse/tools/ask_user_question/demo_tui.py +55 -0
  352. code_muse/tools/ask_user_question/handler.py +232 -0
  353. code_muse/tools/ask_user_question/models.py +302 -0
  354. code_muse/tools/ask_user_question/registration.py +37 -0
  355. code_muse/tools/ask_user_question/renderers.py +336 -0
  356. code_muse/tools/ask_user_question/terminal_ui.py +327 -0
  357. code_muse/tools/ask_user_question/theme.py +156 -0
  358. code_muse/tools/ask_user_question/tui_loop.py +422 -0
  359. code_muse/tools/background_jobs.py +99 -0
  360. code_muse/tools/browser/__init__.py +37 -0
  361. code_muse/tools/browser/browser_control.py +289 -0
  362. code_muse/tools/browser/browser_interactions.py +545 -0
  363. code_muse/tools/browser/browser_locators.py +640 -0
  364. code_muse/tools/browser/browser_manager.py +376 -0
  365. code_muse/tools/browser/browser_navigation.py +251 -0
  366. code_muse/tools/browser/browser_screenshot.py +180 -0
  367. code_muse/tools/browser/browser_scripts.py +462 -0
  368. code_muse/tools/browser/browser_workflows.py +222 -0
  369. code_muse/tools/chrome_cdp/__init__.py +1070 -0
  370. code_muse/tools/chrome_cdp/register_callbacks.py +61 -0
  371. code_muse/tools/command_runner.py +1401 -0
  372. code_muse/tools/common.py +1407 -0
  373. code_muse/tools/display.py +87 -0
  374. code_muse/tools/file_modifications.py +1099 -0
  375. code_muse/tools/file_operations.py +860 -0
  376. code_muse/tools/image_tools.py +185 -0
  377. code_muse/tools/meetin_proxy/__init__.py +243 -0
  378. code_muse/tools/meetin_proxy/capture_addon.py +82 -0
  379. code_muse/tools/meetin_proxy/proxy_manager.py +326 -0
  380. code_muse/tools/meetin_proxy/register_callbacks.py +45 -0
  381. code_muse/tools/path_policy.py +219 -0
  382. code_muse/tools/skills_tools.py +586 -0
  383. code_muse/tools/subagent_context.py +158 -0
  384. code_muse/tools/tools_content.py +50 -0
  385. code_muse/tools/universal_constructor.py +965 -0
  386. code_muse/uvx_detection.py +241 -0
  387. code_muse/version_checker.py +86 -0
  388. code_muse-0.0.1.data/data/code_muse/models.json +66 -0
  389. code_muse-0.0.1.data/data/code_muse/models_dev_api.json +1 -0
  390. code_muse-0.0.1.dist-info/METADATA +845 -0
  391. code_muse-0.0.1.dist-info/RECORD +394 -0
  392. code_muse-0.0.1.dist-info/WHEEL +4 -0
  393. code_muse-0.0.1.dist-info/entry_points.txt +2 -0
  394. code_muse-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,357 @@
1
+ """Config: model settings."""
2
+
3
+ import configparser
4
+
5
+ import code_muse.config as _config
6
+
7
+
8
+ def get_summarization_model_name() -> str:
9
+ """Return the model used for compaction/summarization.
10
+
11
+ Reads the ``summarization_model`` config key. If unset (or empty),
12
+ falls back to :func:`get_global_model_name`, preserving legacy behavior
13
+ for users who haven't explicitly configured a separate summarizer.
14
+
15
+ Rationale: summarization is a different workload than main-agent chat —
16
+ it's one-shot, large-context, and best served by a cheap-and-fast or
17
+ long-context specialist model. Decoupling it from the global model lets
18
+ users pick the right tool without changing their main agent.
19
+ """
20
+ value = _config.get_value("summarization_model")
21
+ if value:
22
+ return value
23
+ return _config.get_global_model_name()
24
+
25
+
26
+ def set_summarization_model_name(model: str) -> None:
27
+ """Persist the summarization model in the config file.
28
+
29
+ Pass an empty string to clear the setting and fall back to the global
30
+ model on subsequent calls to :func:`get_summarization_model_name`.
31
+ """
32
+ _config.set_config_value("summarization_model", model or "")
33
+
34
+
35
+ def get_muse_token():
36
+ """Returns the muse_token from config, or None if not set."""
37
+ return _config.get_value("muse_token")
38
+
39
+
40
+ def set_muse_token(token: str):
41
+ """Sets the muse_token in the persistent config file."""
42
+ _config.set_config_value("muse_token", token)
43
+
44
+
45
+ def get_openai_reasoning_effort() -> str:
46
+ """Return the configured OpenAI reasoning effort (minimal, low, medium, high, xhigh)."""
47
+ allowed_values = {"minimal", "low", "medium", "high", "xhigh"}
48
+ configured = (
49
+ (_config.get_value("openai_reasoning_effort") or "medium").strip().lower()
50
+ )
51
+ if configured not in allowed_values:
52
+ return "medium"
53
+ return configured
54
+
55
+
56
+ def set_openai_reasoning_effort(value: str) -> None:
57
+ """Persist the OpenAI reasoning effort ensuring it remains within allowed values."""
58
+ allowed_values = {"minimal", "low", "medium", "high", "xhigh"}
59
+ normalized = (value or "").strip().lower()
60
+ if normalized not in allowed_values:
61
+ raise ValueError(
62
+ f"Invalid reasoning effort '{value}'. Allowed: {', '.join(sorted(allowed_values))}"
63
+ )
64
+ _config.set_config_value("openai_reasoning_effort", normalized)
65
+
66
+
67
+ def get_openai_reasoning_summary() -> str:
68
+ """Return the configured OpenAI reasoning summary mode.
69
+
70
+ Supported values:
71
+ - auto: let the provider decide the best summary style
72
+ - concise: shorter reasoning summaries
73
+ - detailed: fuller reasoning summaries
74
+ """
75
+ allowed_values = {"auto", "concise", "detailed"}
76
+ configured = (
77
+ (_config.get_value("openai_reasoning_summary") or "detailed").strip().lower()
78
+ )
79
+ if configured not in allowed_values:
80
+ return "auto"
81
+ return configured
82
+
83
+
84
+ def set_openai_reasoning_summary(value: str) -> None:
85
+ """Persist the OpenAI reasoning summary mode ensuring it remains valid."""
86
+ allowed_values = {"auto", "concise", "detailed"}
87
+ normalized = (value or "").strip().lower()
88
+ if normalized not in allowed_values:
89
+ raise ValueError(
90
+ f"Invalid reasoning summary '{value}'. Allowed: {', '.join(sorted(allowed_values))}"
91
+ )
92
+ _config.set_config_value("openai_reasoning_summary", normalized)
93
+
94
+
95
+ def get_openai_verbosity() -> str:
96
+ """Return the configured OpenAI verbosity (low, medium, high).
97
+
98
+ Controls how concise vs. verbose the model's responses are:
99
+ - low: more concise responses
100
+ - medium: balanced (default)
101
+ - high: more verbose responses
102
+ """
103
+ allowed_values = {"low", "medium", "high"}
104
+ configured = (_config.get_value("openai_verbosity") or "medium").strip().lower()
105
+ if configured not in allowed_values:
106
+ return "medium"
107
+ return configured
108
+
109
+
110
+ def set_openai_verbosity(value: str) -> None:
111
+ """Persist the OpenAI verbosity ensuring it remains within allowed values."""
112
+ allowed_values = {"low", "medium", "high"}
113
+ normalized = (value or "").strip().lower()
114
+ if normalized not in allowed_values:
115
+ raise ValueError(
116
+ f"Invalid verbosity '{value}'. Allowed: {', '.join(sorted(allowed_values))}"
117
+ )
118
+ _config.set_config_value("openai_verbosity", normalized)
119
+
120
+
121
+ def get_temperature() -> float | None:
122
+ """Return the configured model temperature (0.0 to 2.0).
123
+
124
+ Returns:
125
+ Float between 0.0 and 2.0 if set, None if not configured.
126
+ This allows each model to use its own default when not overridden.
127
+ """
128
+ val = _config.get_value("temperature")
129
+ if val is None or val.strip() == "":
130
+ return None
131
+ try:
132
+ temp = float(val)
133
+ # Clamp to valid range (most APIs accept 0-2)
134
+ return max(0.0, min(2.0, temp))
135
+ except ValueError, TypeError:
136
+ return None
137
+
138
+
139
+ def set_temperature(value: float | None) -> None:
140
+ """Set the global model temperature in config.
141
+
142
+ Args:
143
+ value: Temperature between 0.0 and 2.0, or None to clear.
144
+ Lower values = more deterministic, higher = more creative.
145
+
146
+ Note: Consider using set_model_setting() for per-model temperature.
147
+ """
148
+ if value is None:
149
+ _config.set_config_value("temperature", "")
150
+ else:
151
+ # Validate and clamp
152
+ temp = max(0.0, min(2.0, float(value)))
153
+ _config.set_config_value("temperature", str(temp))
154
+
155
+
156
+ def _sanitize_model_name_for_key(model_name: str) -> str:
157
+ """Sanitize model name for use in config keys.
158
+
159
+ Replaces characters that might cause issues in config keys.
160
+ """
161
+ # Replace problematic characters with underscores
162
+ sanitized = model_name.replace(".", "_").replace("-", "_").replace("/", "_")
163
+ return sanitized.lower()
164
+
165
+
166
+ def get_model_setting(
167
+ model_name: str, setting: str, default: float | None = None
168
+ ) -> float | None:
169
+ """Get a specific setting for a model.
170
+
171
+ Args:
172
+ model_name: The model name (e.g., 'gpt-5', 'wafer.ai-glm-5.1')
173
+ setting: The setting name (e.g., 'temperature', 'top_p', 'seed')
174
+ default: Default value if not set
175
+
176
+ Returns:
177
+ The setting value as a float, or default if not set.
178
+ """
179
+ sanitized_name = _config._sanitize_model_name_for_key(model_name)
180
+ key = f"model_settings_{sanitized_name}_{setting}"
181
+ val = _config.get_value(key)
182
+
183
+ if val is None or val.strip() == "":
184
+ return default
185
+
186
+ try:
187
+ return float(val)
188
+ except ValueError, TypeError:
189
+ return default
190
+
191
+
192
+ def set_model_setting(model_name: str, setting: str, value: float | None) -> None:
193
+ """Set a specific setting for a model.
194
+
195
+ Args:
196
+ model_name: The model name (e.g., 'gpt-5', 'wafer.ai-glm-5.1')
197
+ setting: The setting name (e.g., 'temperature', 'seed')
198
+ value: The value to set, or None to clear
199
+ """
200
+ sanitized_name = _config._sanitize_model_name_for_key(model_name)
201
+ key = f"model_settings_{sanitized_name}_{setting}"
202
+
203
+ if value is None:
204
+ _config.set_config_value(key, "")
205
+ elif isinstance(value, float):
206
+ # Round floats to nearest hundredth to avoid floating point weirdness
207
+ # (allows 0.05 step increments for temperature/top_p)
208
+ _config.set_config_value(key, str(round(value, 2)))
209
+ else:
210
+ _config.set_config_value(key, str(value))
211
+
212
+
213
+ def get_all_model_settings(model_name: str) -> dict:
214
+ """Get all settings for a specific model.
215
+
216
+ Args:
217
+ model_name: The model name
218
+
219
+ Returns:
220
+ Dictionary of setting_name -> value for all configured settings.
221
+ """
222
+
223
+ sanitized_name = _config._sanitize_model_name_for_key(model_name)
224
+ prefix = f"model_settings_{sanitized_name}_"
225
+
226
+ config = configparser.ConfigParser()
227
+ config.read(_config.CONFIG_FILE)
228
+
229
+ settings = {}
230
+ if _config.DEFAULT_SECTION in config:
231
+ for key, val in config[_config.DEFAULT_SECTION].items():
232
+ if key.startswith(prefix) and val.strip():
233
+ setting_name = key[len(prefix) :]
234
+ # Handle different value types
235
+ val_stripped = val.strip()
236
+ # Check for boolean values first
237
+ if val_stripped.lower() in ("true", "false"):
238
+ settings[setting_name] = val_stripped.lower() == "true"
239
+ else:
240
+ # Try to parse as number (int first, then float)
241
+ try:
242
+ # Try int first for cleaner values like budget_tokens
243
+ if "." not in val_stripped:
244
+ settings[setting_name] = int(val_stripped)
245
+ else:
246
+ settings[setting_name] = float(val_stripped)
247
+ except ValueError, TypeError:
248
+ # Keep as string if not a number
249
+ settings[setting_name] = val_stripped
250
+
251
+ return settings
252
+
253
+
254
+ def clear_model_settings(model_name: str) -> None:
255
+ """Clear all settings for a specific model.
256
+
257
+ Args:
258
+ model_name: The model name
259
+ """
260
+
261
+ sanitized_name = _config._sanitize_model_name_for_key(model_name)
262
+ prefix = f"model_settings_{sanitized_name}_"
263
+
264
+ config = configparser.ConfigParser()
265
+ config.read(_config.CONFIG_FILE)
266
+
267
+ if _config.DEFAULT_SECTION in config:
268
+ keys_to_remove = [
269
+ key for key in config[_config.DEFAULT_SECTION] if key.startswith(prefix)
270
+ ]
271
+ for key in keys_to_remove:
272
+ del config[_config.DEFAULT_SECTION][key]
273
+
274
+ with open(_config.CONFIG_FILE, "w", encoding="utf-8") as f:
275
+ config.write(f)
276
+
277
+
278
+ def get_effective_model_settings(model_name: str | None = None) -> dict:
279
+ """Get all effective settings for a model, filtered by what the model supports.
280
+
281
+ This is the generalized way to get model settings. It:
282
+ 1. Gets all per-model settings from config
283
+ 2. Falls back to global temperature if not set per-model
284
+ 3. Filters to only include settings the model actually supports
285
+ 4. Converts seed to int (other settings stay as float)
286
+
287
+ Args:
288
+ model_name: The model name. If None, uses the current global model.
289
+
290
+ Returns:
291
+ Dictionary of setting_name -> value for all applicable settings.
292
+ Ready to be unpacked into ModelSettings.
293
+ """
294
+ if model_name is None:
295
+ model_name = _config.get_global_model_name()
296
+
297
+ # Start with all per-model settings
298
+ settings = _config.get_all_model_settings(model_name)
299
+
300
+ # Fall back to global temperature if not set per-model
301
+ if "temperature" not in settings:
302
+ global_temp = _config.get_temperature()
303
+ if global_temp is not None:
304
+ settings["temperature"] = global_temp
305
+
306
+ # Filter to only settings the model supports
307
+ effective_settings = {}
308
+ for setting_name, value in settings.items():
309
+ if _config.model_supports_setting(model_name, setting_name):
310
+ # Convert seed to int, keep others as float
311
+ if setting_name == "seed" and value is not None:
312
+ effective_settings[setting_name] = int(value)
313
+ else:
314
+ effective_settings[setting_name] = value
315
+
316
+ return effective_settings
317
+
318
+
319
+ def get_effective_temperature(model_name: str | None = None) -> float | None:
320
+ """Get the effective temperature for a model.
321
+
322
+ Checks per-model settings first, then falls back to global temperature.
323
+
324
+ Args:
325
+ model_name: The model name. If None, uses the current global model.
326
+
327
+ Returns:
328
+ Temperature value, or None if not configured.
329
+ """
330
+ settings = _config.get_effective_model_settings(model_name)
331
+ return settings.get("temperature")
332
+
333
+
334
+ def get_effective_top_p(model_name: str | None = None) -> float | None:
335
+ """Get the effective top_p for a model.
336
+
337
+ Args:
338
+ model_name: The model name. If None, uses the current global model.
339
+
340
+ Returns:
341
+ top_p value, or None if not configured.
342
+ """
343
+ settings = _config.get_effective_model_settings(model_name)
344
+ return settings.get("top_p")
345
+
346
+
347
+ def get_effective_seed(model_name: str | None = None) -> int | None:
348
+ """Get the effective seed for a model.
349
+
350
+ Args:
351
+ model_name: The model name. If None, uses the current global model.
352
+
353
+ Returns:
354
+ seed value as int, or None if not configured.
355
+ """
356
+ settings = _config.get_effective_model_settings(model_name)
357
+ return settings.get("seed")
@@ -0,0 +1,73 @@
1
+ """Config: security settings."""
2
+
3
+ import os
4
+
5
+ import code_muse.config as _config
6
+
7
+
8
+ def get_api_key(key_name: str) -> str:
9
+ """Get an API key from muse.cfg.
10
+
11
+ Args:
12
+ key_name: The name of the API key (e.g., 'OPENAI_API_KEY')
13
+
14
+ Returns:
15
+ The API key value, or empty string if not set
16
+ """
17
+ return _config.get_value(key_name) or ""
18
+
19
+
20
+ def set_api_key(key_name: str, value: str):
21
+ """Set an API key in muse.cfg.
22
+
23
+ Args:
24
+ key_name: The name of the API key (e.g., 'OPENAI_API_KEY')
25
+ value: The API key value (empty string to remove)
26
+ """
27
+ _config.set_config_value(key_name, value)
28
+
29
+
30
+ def load_api_keys_to_environment():
31
+ """Load all API keys from .env and muse.cfg into environment variables.
32
+
33
+ Priority order:
34
+ 1. .env file (highest priority) - if present in current directory
35
+ 2. muse.cfg - fallback if not in .env
36
+ 3. Existing environment variables - preserved if already set
37
+
38
+ This should be called on startup to ensure API keys are available.
39
+ """
40
+
41
+ api_key_names = [
42
+ "OPENAI_API_KEY",
43
+ "GEMINI_API_KEY",
44
+ "ANTHROPIC_API_KEY",
45
+ "CEREBRAS_API_KEY",
46
+ "SYN_API_KEY",
47
+ "AZURE_OPENAI_API_KEY",
48
+ "AZURE_OPENAI_ENDPOINT",
49
+ "OPENROUTER_API_KEY",
50
+ "ZAI_API_KEY",
51
+ ]
52
+
53
+ # Step 1: Load from .env file if it exists (highest priority)
54
+ # Look for .env in current working directory
55
+ env_file = _config.Path.cwd() / ".env"
56
+ if env_file.exists():
57
+ try:
58
+ from dotenv import load_dotenv
59
+
60
+ # override=True means .env values take precedence over existing env vars
61
+ load_dotenv(env_file, override=True)
62
+ except ImportError:
63
+ # python-dotenv not installed, skip .env loading
64
+ pass
65
+
66
+ # Step 2: Load from muse.cfg, but only if not already set
67
+ # This ensures .env has priority over muse.cfg
68
+ for key_name in api_key_names:
69
+ # Only load from config if not already in environment
70
+ if key_name not in os.environ or not os.environ[key_name]:
71
+ value = get_api_key(key_name)
72
+ if value:
73
+ os.environ[key_name] = value
@@ -0,0 +1,132 @@
1
+ """Error logging utility for code_muse.
2
+
3
+ Logs unexpected errors to XDG_STATE_HOME/code_muse/logs/ for debugging purposes.
4
+ Per XDG spec, logs are "state data" (actions history), not configuration.
5
+ Because even good agents make mistakes sometimes!
6
+ """
7
+
8
+ import os
9
+ import traceback
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+
13
+ from code_muse.config import STATE_DIR
14
+
15
+ # Logs directory within the state directory (per XDG spec, logs are state data)
16
+ LOGS_DIR = STATE_DIR / "logs"
17
+ ERROR_LOG_FILE = LOGS_DIR / "errors.log"
18
+ MAX_LOG_SIZE = 5 * 1024 * 1024 # 5MB
19
+
20
+
21
+ def _rotate_log_if_needed() -> None:
22
+ """Rotate the error log file if it exceeds MAX_LOG_SIZE."""
23
+ try:
24
+ if ERROR_LOG_FILE.exists() and ERROR_LOG_FILE.stat().st_size > MAX_LOG_SIZE:
25
+ rotated = str(ERROR_LOG_FILE) + ".1"
26
+ os.replace(ERROR_LOG_FILE, rotated)
27
+ except OSError:
28
+ pass
29
+
30
+
31
+ def _ensure_logs_dir() -> None:
32
+ """Create the logs directory if it doesn't exist (with 0700 perms per XDG spec)."""
33
+ Path(LOGS_DIR).mkdir(parents=True, exist_ok=True, mode=0o700)
34
+
35
+
36
+ def log_error(
37
+ error: Exception,
38
+ context: str | None = None,
39
+ include_traceback: bool = True,
40
+ ) -> None:
41
+ """Log an error to the error log file.
42
+
43
+ Args:
44
+ error: The exception to log
45
+ context: Optional context string describing where the error occurred
46
+ include_traceback: Whether to include the full traceback (default True)
47
+ """
48
+ try:
49
+ _ensure_logs_dir()
50
+ _rotate_log_if_needed()
51
+
52
+ timestamp = datetime.now().isoformat()
53
+ error_type = type(error).__name__
54
+ error_msg = str(error)
55
+
56
+ log_entry_parts = [
57
+ # TODO: PEP 750 t-string — use templatelib when stable
58
+ f"\n{'=' * 80}",
59
+ f"Timestamp: {timestamp}",
60
+ f"Error Type: {error_type}",
61
+ f"Error Message: {error_msg}",
62
+ ]
63
+
64
+ if context:
65
+ log_entry_parts.append(f"Context: {context}")
66
+
67
+ if include_traceback:
68
+ tb = traceback.format_exception(type(error), error, error.__traceback__)
69
+ log_entry_parts.append(f"Traceback:\n{''.join(tb)}")
70
+
71
+ if hasattr(error, "args") and error.args:
72
+ log_entry_parts.append(f"Args: {error.args}")
73
+
74
+ log_entry_parts.append(f"{'=' * 80}\n")
75
+
76
+ log_entry = "\n".join(log_entry_parts)
77
+
78
+ with open(ERROR_LOG_FILE, "a", encoding="utf-8") as f:
79
+ f.write(log_entry)
80
+
81
+ except Exception:
82
+ # If we can't log, we silently fail - don't want logging errors
83
+ # to cause more problems than they solve!
84
+ pass
85
+
86
+
87
+ def log_error_message(
88
+ message: str,
89
+ context: str | None = None,
90
+ ) -> None:
91
+ """Log a simple error message without an exception object.
92
+
93
+ Args:
94
+ message: The error message to log
95
+ context: Optional context string describing where the error occurred
96
+ """
97
+ try:
98
+ _ensure_logs_dir()
99
+ _rotate_log_if_needed()
100
+
101
+ timestamp = datetime.now().isoformat()
102
+
103
+ log_entry_parts = [
104
+ # TODO: PEP 750 t-string — use templatelib when stable
105
+ f"\n{'=' * 80}",
106
+ f"Timestamp: {timestamp}",
107
+ f"Message: {message}",
108
+ ]
109
+
110
+ if context:
111
+ log_entry_parts.append(f"Context: {context}")
112
+
113
+ log_entry_parts.append(f"{'=' * 80}\n")
114
+
115
+ log_entry = "\n".join(log_entry_parts)
116
+
117
+ with open(ERROR_LOG_FILE, "a", encoding="utf-8") as f:
118
+ f.write(log_entry)
119
+
120
+ except Exception:
121
+ # Silent fail - same reasoning as above
122
+ pass
123
+
124
+
125
+ def get_log_file_path() -> str:
126
+ """Return the path to the error log file."""
127
+ return ERROR_LOG_FILE
128
+
129
+
130
+ def get_logs_dir() -> str:
131
+ """Return the path to the logs directory."""
132
+ return LOGS_DIR
@@ -0,0 +1,35 @@
1
+ """Behavioral Eval Framework for Muse.
2
+
3
+ Standalone test framework for evaluating agent behavior through
4
+ subprocess-based end-to-end testing with tool-call assertions.
5
+ """
6
+
7
+ from code_muse.evals.eval_helpers import (
8
+ assert_output_contains,
9
+ assert_read_is_ranged,
10
+ assert_shell_has_flag,
11
+ assert_tool_called,
12
+ assert_tool_not_called,
13
+ )
14
+ from code_muse.evals.eval_runner import (
15
+ EvalResult,
16
+ EvalSuite,
17
+ TestRig,
18
+ ToolCall,
19
+ run_all_evals,
20
+ run_eval,
21
+ )
22
+
23
+ __all__ = [
24
+ "ToolCall",
25
+ "TestRig",
26
+ "EvalResult",
27
+ "run_eval",
28
+ "run_all_evals",
29
+ "EvalSuite",
30
+ "assert_tool_called",
31
+ "assert_tool_not_called",
32
+ "assert_shell_has_flag",
33
+ "assert_read_is_ranged",
34
+ "assert_output_contains",
35
+ ]
@@ -0,0 +1,81 @@
1
+ """Assertion helpers for the Behavioral Eval Framework.
2
+
3
+ Each helper inspects a :class:`TestRig` and returns ``(passed, message)``.
4
+ """
5
+
6
+ from code_muse.evals.eval_runner import TestRig
7
+
8
+
9
+ def assert_tool_called(
10
+ rig: TestRig, tool_name: str, min_count: int = 1
11
+ ) -> tuple[bool, str]:
12
+ """Check that *tool_name* was called at least *min_count* times."""
13
+ calls = rig.get_tool_calls_by_name(tool_name)
14
+ if len(calls) >= min_count:
15
+ return True, f"'{tool_name}' called {len(calls)} time(s) (≥ {min_count})"
16
+ return (
17
+ False,
18
+ f"Expected '{tool_name}' to be called ≥ {min_count} time(s), got {len(calls)}",
19
+ )
20
+
21
+
22
+ def assert_tool_not_called(rig: TestRig, tool_name: str) -> tuple[bool, str]:
23
+ """Check that *tool_name* was never called."""
24
+ calls = rig.get_tool_calls_by_name(tool_name)
25
+ if not calls:
26
+ return True, f"'{tool_name}' was not called"
27
+ return (
28
+ False,
29
+ f"Expected '{tool_name}' to not be called, got {len(calls)} call(s)",
30
+ )
31
+
32
+
33
+ def assert_shell_has_flag(rig: TestRig, flag: str) -> tuple[bool, str]:
34
+ """Check that shell commands include a specific flag (e.g. ``--silent``)."""
35
+ shell_calls = rig.get_tool_calls_by_name("agent_run_shell_command")
36
+ for tc in shell_calls:
37
+ command = tc.tool_args.get("command", "")
38
+ if flag in command:
39
+ return True, f"Shell command contains flag '{flag}'"
40
+ return (
41
+ False,
42
+ f"No shell command contained flag '{flag}' among {len(shell_calls)} call(s)",
43
+ )
44
+
45
+
46
+ def assert_read_is_ranged(rig: TestRig) -> tuple[bool, str]:
47
+ """Check that ``read_file`` calls use ``start_line``/``num_lines``."""
48
+ read_calls = rig.get_tool_calls_by_name("read_file")
49
+ if not read_calls:
50
+ return False, "No 'read_file' calls observed"
51
+
52
+ ranged = 0
53
+ for tc in read_calls:
54
+ if (
55
+ tc.tool_args.get("start_line") is not None
56
+ or tc.tool_args.get("num_lines") is not None
57
+ ):
58
+ ranged += 1
59
+
60
+ if ranged == len(read_calls):
61
+ return True, f"All {len(read_calls)} 'read_file' call(s) used range parameters"
62
+ return (
63
+ False,
64
+ f"Only {ranged}/{len(read_calls)} 'read_file' call(s) used range parameters",
65
+ )
66
+
67
+
68
+ def assert_output_contains(rig: TestRig, text: str) -> tuple[bool, str]:
69
+ """Check that the combined stdout/stderr captured during the eval contains *text*.
70
+
71
+ Note: This helper requires that the :class:`TestRig` was populated
72
+ with a synthetic ``output`` tool call by :func:`run_eval` (the
73
+ framework injects one when a ``_raw_output`` key is present).
74
+ """
75
+ output_calls = rig.get_tool_calls_by_name("_eval_output")
76
+ if not output_calls:
77
+ return False, "No eval output captured (internal)"
78
+ combined = output_calls[0].result or ""
79
+ if text in combined:
80
+ return True, f"Output contains '{text}'"
81
+ return False, f"Output does not contain '{text}'"