claude-mpm 4.21.3__py3-none-any.whl → 5.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (484) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/BASE_PM.md +12 -0
  3. claude_mpm/agents/OUTPUT_STYLE.md +3 -48
  4. claude_mpm/agents/PM_INSTRUCTIONS.md +632 -334
  5. claude_mpm/agents/WORKFLOW.md +75 -2
  6. claude_mpm/agents/__init__.py +6 -0
  7. claude_mpm/agents/agent_loader.py +1 -4
  8. claude_mpm/agents/base_agent.json +6 -3
  9. claude_mpm/agents/frontmatter_validator.py +1 -1
  10. claude_mpm/agents/templates/{circuit_breakers.md → circuit-breakers.md} +370 -3
  11. claude_mpm/agents/templates/context-management-examples.md +544 -0
  12. claude_mpm/agents/templates/{pm_red_flags.md → pm-red-flags.md} +89 -19
  13. claude_mpm/agents/templates/pr-workflow-examples.md +427 -0
  14. claude_mpm/agents/templates/research-gate-examples.md +669 -0
  15. claude_mpm/agents/templates/structured-questions-examples.md +615 -0
  16. claude_mpm/agents/templates/ticket-completeness-examples.md +139 -0
  17. claude_mpm/agents/templates/ticketing-examples.md +277 -0
  18. claude_mpm/cli/__init__.py +38 -2
  19. claude_mpm/cli/commands/agent_source.py +774 -0
  20. claude_mpm/cli/commands/agent_state_manager.py +125 -20
  21. claude_mpm/cli/commands/agents.py +684 -13
  22. claude_mpm/cli/commands/agents_cleanup.py +210 -0
  23. claude_mpm/cli/commands/agents_discover.py +338 -0
  24. claude_mpm/cli/commands/aggregate.py +1 -1
  25. claude_mpm/cli/commands/analyze.py +3 -3
  26. claude_mpm/cli/commands/auto_configure.py +2 -6
  27. claude_mpm/cli/commands/cleanup.py +1 -1
  28. claude_mpm/cli/commands/config.py +7 -4
  29. claude_mpm/cli/commands/configure.py +478 -44
  30. claude_mpm/cli/commands/configure_agent_display.py +4 -4
  31. claude_mpm/cli/commands/configure_navigation.py +63 -46
  32. claude_mpm/cli/commands/debug.py +12 -12
  33. claude_mpm/cli/commands/doctor.py +10 -2
  34. claude_mpm/cli/commands/hook_errors.py +277 -0
  35. claude_mpm/cli/commands/local_deploy.py +1 -4
  36. claude_mpm/cli/commands/mcp_install_commands.py +1 -1
  37. claude_mpm/cli/commands/mpm_init/core.py +50 -2
  38. claude_mpm/cli/commands/mpm_init/git_activity.py +10 -10
  39. claude_mpm/cli/commands/mpm_init/prompts.py +6 -6
  40. claude_mpm/cli/commands/run.py +124 -128
  41. claude_mpm/cli/commands/skill_source.py +694 -0
  42. claude_mpm/cli/commands/skills.py +435 -1
  43. claude_mpm/cli/executor.py +78 -3
  44. claude_mpm/cli/interactive/agent_wizard.py +919 -41
  45. claude_mpm/cli/parsers/agent_source_parser.py +171 -0
  46. claude_mpm/cli/parsers/agents_parser.py +173 -4
  47. claude_mpm/cli/parsers/base_parser.py +49 -0
  48. claude_mpm/cli/parsers/config_parser.py +96 -43
  49. claude_mpm/cli/parsers/skill_source_parser.py +169 -0
  50. claude_mpm/cli/parsers/skills_parser.py +138 -0
  51. claude_mpm/cli/parsers/source_parser.py +138 -0
  52. claude_mpm/cli/startup.py +499 -84
  53. claude_mpm/cli/startup_display.py +480 -0
  54. claude_mpm/cli/utils.py +1 -1
  55. claude_mpm/cli_module/commands.py +1 -1
  56. claude_mpm/commands/{mpm-auto-configure.md → mpm-agents-auto-configure.md} +9 -0
  57. claude_mpm/commands/mpm-agents-detect.md +9 -0
  58. claude_mpm/commands/{mpm-agents.md → mpm-agents-list.md} +9 -0
  59. claude_mpm/commands/mpm-agents-recommend.md +9 -0
  60. claude_mpm/commands/{mpm-config.md → mpm-config-view.md} +9 -0
  61. claude_mpm/commands/mpm-doctor.md +9 -0
  62. claude_mpm/commands/mpm-help.md +11 -2
  63. claude_mpm/commands/mpm-init.md +27 -2
  64. claude_mpm/commands/mpm-monitor.md +9 -0
  65. claude_mpm/commands/{mpm-resume.md → mpm-session-resume.md} +9 -0
  66. claude_mpm/commands/mpm-status.md +9 -0
  67. claude_mpm/commands/{mpm-organize.md → mpm-ticket-organize.md} +9 -0
  68. claude_mpm/commands/mpm-ticket-view.md +552 -0
  69. claude_mpm/commands/mpm-version.md +9 -0
  70. claude_mpm/commands/mpm.md +10 -0
  71. claude_mpm/config/agent_presets.py +258 -0
  72. claude_mpm/config/agent_sources.py +325 -0
  73. claude_mpm/config/skill_sources.py +590 -0
  74. claude_mpm/constants.py +12 -0
  75. claude_mpm/core/api_validator.py +1 -1
  76. claude_mpm/core/claude_runner.py +17 -10
  77. claude_mpm/core/config.py +24 -0
  78. claude_mpm/core/constants.py +1 -1
  79. claude_mpm/core/framework/__init__.py +3 -16
  80. claude_mpm/core/framework/loaders/instruction_loader.py +25 -5
  81. claude_mpm/core/framework/processors/metadata_processor.py +1 -1
  82. claude_mpm/core/hook_error_memory.py +381 -0
  83. claude_mpm/core/hook_manager.py +41 -2
  84. claude_mpm/core/interactive_session.py +112 -5
  85. claude_mpm/core/logger.py +3 -1
  86. claude_mpm/core/oneshot_session.py +94 -4
  87. claude_mpm/dashboard/static/css/activity.css +69 -69
  88. claude_mpm/dashboard/static/css/connection-status.css +10 -10
  89. claude_mpm/dashboard/static/css/dashboard.css +15 -15
  90. claude_mpm/dashboard/static/js/components/activity-tree.js +178 -178
  91. claude_mpm/dashboard/static/js/components/agent-hierarchy.js +101 -101
  92. claude_mpm/dashboard/static/js/components/agent-inference.js +31 -31
  93. claude_mpm/dashboard/static/js/components/build-tracker.js +59 -59
  94. claude_mpm/dashboard/static/js/components/code-simple.js +107 -107
  95. claude_mpm/dashboard/static/js/components/connection-debug.js +101 -101
  96. claude_mpm/dashboard/static/js/components/diff-viewer.js +113 -113
  97. claude_mpm/dashboard/static/js/components/event-viewer.js +12 -12
  98. claude_mpm/dashboard/static/js/components/file-change-tracker.js +57 -57
  99. claude_mpm/dashboard/static/js/components/file-change-viewer.js +74 -74
  100. claude_mpm/dashboard/static/js/components/file-tool-tracker.js +6 -6
  101. claude_mpm/dashboard/static/js/components/file-viewer.js +42 -42
  102. claude_mpm/dashboard/static/js/components/module-viewer.js +27 -27
  103. claude_mpm/dashboard/static/js/components/session-manager.js +14 -14
  104. claude_mpm/dashboard/static/js/components/socket-manager.js +1 -1
  105. claude_mpm/dashboard/static/js/components/ui-state-manager.js +14 -14
  106. claude_mpm/dashboard/static/js/components/unified-data-viewer.js +110 -110
  107. claude_mpm/dashboard/static/js/components/working-directory.js +8 -8
  108. claude_mpm/dashboard/static/js/connection-manager.js +76 -76
  109. claude_mpm/dashboard/static/js/dashboard.js +76 -58
  110. claude_mpm/dashboard/static/js/extension-error-handler.js +22 -22
  111. claude_mpm/dashboard/static/js/socket-client.js +138 -121
  112. claude_mpm/dashboard/templates/code_simple.html +23 -23
  113. claude_mpm/dashboard/templates/index.html +18 -18
  114. claude_mpm/experimental/cli_enhancements.py +1 -5
  115. claude_mpm/hooks/claude_hooks/event_handlers.py +3 -1
  116. claude_mpm/hooks/claude_hooks/hook_handler.py +24 -7
  117. claude_mpm/hooks/claude_hooks/installer.py +45 -0
  118. claude_mpm/hooks/failure_learning/__init__.py +2 -8
  119. claude_mpm/hooks/failure_learning/failure_detection_hook.py +1 -6
  120. claude_mpm/hooks/failure_learning/fix_detection_hook.py +1 -6
  121. claude_mpm/hooks/failure_learning/learning_extraction_hook.py +1 -6
  122. claude_mpm/hooks/kuzu_response_hook.py +1 -5
  123. claude_mpm/hooks/templates/pre_tool_use_simple.py +78 -0
  124. claude_mpm/hooks/templates/pre_tool_use_template.py +323 -0
  125. claude_mpm/models/git_repository.py +198 -0
  126. claude_mpm/scripts/claude-hook-handler.sh +3 -3
  127. claude_mpm/scripts/start_activity_logging.py +3 -1
  128. claude_mpm/services/agents/agent_builder.py +45 -9
  129. claude_mpm/services/agents/agent_preset_service.py +238 -0
  130. claude_mpm/services/agents/agent_selection_service.py +484 -0
  131. claude_mpm/services/agents/auto_deploy_index_parser.py +569 -0
  132. claude_mpm/services/agents/deployment/agent_deployment.py +126 -2
  133. claude_mpm/services/agents/deployment/agent_discovery_service.py +105 -73
  134. claude_mpm/services/agents/deployment/agent_format_converter.py +1 -1
  135. claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +1 -5
  136. claude_mpm/services/agents/deployment/agent_metrics_collector.py +3 -3
  137. claude_mpm/services/agents/deployment/agent_restore_handler.py +1 -4
  138. claude_mpm/services/agents/deployment/agent_template_builder.py +236 -15
  139. claude_mpm/services/agents/deployment/agents_directory_resolver.py +101 -15
  140. claude_mpm/services/agents/deployment/async_agent_deployment.py +2 -1
  141. claude_mpm/services/agents/deployment/facade/deployment_facade.py +3 -3
  142. claude_mpm/services/agents/deployment/multi_source_deployment_service.py +115 -15
  143. claude_mpm/services/agents/deployment/pipeline/pipeline_executor.py +2 -2
  144. claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +1 -4
  145. claude_mpm/services/agents/deployment/remote_agent_discovery_service.py +363 -0
  146. claude_mpm/services/agents/deployment/single_agent_deployer.py +2 -2
  147. claude_mpm/services/agents/deployment/system_instructions_deployer.py +168 -46
  148. claude_mpm/services/agents/deployment/validation/deployment_validator.py +2 -2
  149. claude_mpm/services/agents/git_source_manager.py +629 -0
  150. claude_mpm/services/agents/loading/framework_agent_loader.py +9 -12
  151. claude_mpm/services/agents/local_template_manager.py +50 -10
  152. claude_mpm/services/agents/single_tier_deployment_service.py +696 -0
  153. claude_mpm/services/agents/sources/__init__.py +13 -0
  154. claude_mpm/services/agents/sources/agent_sync_state.py +516 -0
  155. claude_mpm/services/agents/sources/git_source_sync_service.py +1055 -0
  156. claude_mpm/services/agents/startup_sync.py +239 -0
  157. claude_mpm/services/agents/toolchain_detector.py +474 -0
  158. claude_mpm/services/cli/session_pause_manager.py +1 -1
  159. claude_mpm/services/cli/unified_dashboard_manager.py +1 -1
  160. claude_mpm/services/command_deployment_service.py +92 -1
  161. claude_mpm/services/core/interfaces/__init__.py +1 -3
  162. claude_mpm/services/core/interfaces/health.py +1 -4
  163. claude_mpm/services/core/models/__init__.py +2 -11
  164. claude_mpm/services/diagnostics/checks/__init__.py +4 -0
  165. claude_mpm/services/diagnostics/checks/agent_check.py +0 -2
  166. claude_mpm/services/diagnostics/checks/agent_sources_check.py +577 -0
  167. claude_mpm/services/diagnostics/checks/instructions_check.py +1 -2
  168. claude_mpm/services/diagnostics/checks/mcp_check.py +0 -1
  169. claude_mpm/services/diagnostics/checks/monitor_check.py +0 -1
  170. claude_mpm/services/diagnostics/checks/skill_sources_check.py +587 -0
  171. claude_mpm/services/diagnostics/diagnostic_runner.py +9 -0
  172. claude_mpm/services/diagnostics/doctor_reporter.py +40 -10
  173. claude_mpm/services/event_bus/direct_relay.py +3 -3
  174. claude_mpm/services/event_bus/event_bus.py +36 -3
  175. claude_mpm/services/events/consumers/logging.py +1 -2
  176. claude_mpm/services/git/__init__.py +21 -0
  177. claude_mpm/services/git/git_operations_service.py +494 -0
  178. claude_mpm/services/github/__init__.py +21 -0
  179. claude_mpm/services/github/github_cli_service.py +397 -0
  180. claude_mpm/services/infrastructure/monitoring/__init__.py +1 -5
  181. claude_mpm/services/infrastructure/monitoring/aggregator.py +1 -6
  182. claude_mpm/services/infrastructure/monitoring/resources.py +1 -1
  183. claude_mpm/services/instructions/__init__.py +9 -0
  184. claude_mpm/services/instructions/instruction_cache_service.py +374 -0
  185. claude_mpm/services/local_ops/__init__.py +3 -13
  186. claude_mpm/services/local_ops/health_checks/__init__.py +1 -3
  187. claude_mpm/services/local_ops/health_manager.py +1 -4
  188. claude_mpm/services/local_ops/process_manager.py +1 -1
  189. claude_mpm/services/local_ops/resource_monitor.py +2 -2
  190. claude_mpm/services/mcp_gateway/config/configuration.py +1 -1
  191. claude_mpm/services/mcp_gateway/server/mcp_gateway.py +1 -6
  192. claude_mpm/services/mcp_gateway/server/stdio_server.py +0 -2
  193. claude_mpm/services/mcp_gateway/tools/document_summarizer.py +1 -1
  194. claude_mpm/services/mcp_gateway/tools/kuzu_memory_service.py +6 -2
  195. claude_mpm/services/memory/optimizer.py +1 -1
  196. claude_mpm/services/model/model_router.py +8 -9
  197. claude_mpm/services/monitor/daemon.py +1 -1
  198. claude_mpm/services/monitor/server.py +2 -2
  199. claude_mpm/services/native_agent_converter.py +356 -0
  200. claude_mpm/services/port_manager.py +1 -1
  201. claude_mpm/services/pr/__init__.py +14 -0
  202. claude_mpm/services/pr/pr_template_service.py +329 -0
  203. claude_mpm/services/project/documentation_manager.py +2 -1
  204. claude_mpm/services/project/toolchain_analyzer.py +3 -1
  205. claude_mpm/services/runner_configuration_service.py +1 -0
  206. claude_mpm/services/self_upgrade_service.py +165 -7
  207. claude_mpm/services/skills/__init__.py +18 -0
  208. claude_mpm/services/skills/git_skill_source_manager.py +1169 -0
  209. claude_mpm/services/skills/skill_discovery_service.py +568 -0
  210. claude_mpm/services/skills_config.py +547 -0
  211. claude_mpm/services/skills_deployer.py +955 -0
  212. claude_mpm/services/socketio/handlers/connection.py +1 -1
  213. claude_mpm/services/socketio/handlers/git.py +2 -2
  214. claude_mpm/services/socketio/server/core.py +1 -4
  215. claude_mpm/services/socketio/server/main.py +1 -3
  216. claude_mpm/services/system_instructions_service.py +1 -3
  217. claude_mpm/services/unified/analyzer_strategies/performance_analyzer.py +0 -3
  218. claude_mpm/services/unified/analyzer_strategies/security_analyzer.py +0 -1
  219. claude_mpm/services/unified/deployment_strategies/cloud_strategies.py +1 -1
  220. claude_mpm/services/unified/deployment_strategies/vercel.py +1 -5
  221. claude_mpm/services/unified/unified_deployment.py +1 -5
  222. claude_mpm/services/version_control/conflict_resolution.py +6 -4
  223. claude_mpm/services/visualization/__init__.py +1 -5
  224. claude_mpm/services/visualization/mermaid_generator.py +2 -3
  225. claude_mpm/skills/bundled/infrastructure/env-manager/scripts/validate_env.py +576 -0
  226. claude_mpm/skills/bundled/performance-profiling.md +6 -0
  227. claude_mpm/skills/bundled/testing/webapp-testing/scripts/with_server.py +2 -2
  228. claude_mpm/skills/skills_registry.py +0 -1
  229. claude_mpm/templates/questions/__init__.py +38 -0
  230. claude_mpm/templates/questions/base.py +193 -0
  231. claude_mpm/templates/questions/pr_strategy.py +311 -0
  232. claude_mpm/templates/questions/project_init.py +385 -0
  233. claude_mpm/templates/questions/ticket_mgmt.py +394 -0
  234. claude_mpm/tools/__main__.py +8 -8
  235. claude_mpm/tools/code_tree_analyzer/analysis.py +1 -1
  236. claude_mpm/utils/agent_dependency_loader.py +80 -13
  237. claude_mpm/utils/dependency_cache.py +3 -1
  238. claude_mpm/utils/gitignore.py +241 -0
  239. claude_mpm/utils/log_cleanup.py +3 -3
  240. claude_mpm/utils/progress.py +383 -0
  241. claude_mpm/utils/robust_installer.py +3 -5
  242. claude_mpm/utils/structured_questions.py +619 -0
  243. {claude_mpm-4.21.3.dist-info → claude_mpm-5.0.2.dist-info}/METADATA +429 -59
  244. {claude_mpm-4.21.3.dist-info → claude_mpm-5.0.2.dist-info}/RECORD +252 -425
  245. claude_mpm/agents/templates/.claude-mpm/memories/README.md +0 -17
  246. claude_mpm/agents/templates/.claude-mpm/memories/engineer_memories.md +0 -3
  247. claude_mpm/agents/templates/agent-manager.json +0 -273
  248. claude_mpm/agents/templates/agentic-coder-optimizer.json +0 -248
  249. claude_mpm/agents/templates/api_qa.json +0 -180
  250. claude_mpm/agents/templates/clerk-ops.json +0 -235
  251. claude_mpm/agents/templates/code_analyzer.json +0 -101
  252. claude_mpm/agents/templates/content-agent.json +0 -358
  253. claude_mpm/agents/templates/dart_engineer.json +0 -307
  254. claude_mpm/agents/templates/data_engineer.json +0 -225
  255. claude_mpm/agents/templates/documentation.json +0 -211
  256. claude_mpm/agents/templates/engineer.json +0 -210
  257. claude_mpm/agents/templates/gcp_ops_agent.json +0 -253
  258. claude_mpm/agents/templates/golang_engineer.json +0 -270
  259. claude_mpm/agents/templates/imagemagick.json +0 -264
  260. claude_mpm/agents/templates/java_engineer.json +0 -346
  261. claude_mpm/agents/templates/local_ops_agent.json +0 -1840
  262. claude_mpm/agents/templates/logs/prompts/agent_engineer_20250826_014258_728.md +0 -39
  263. claude_mpm/agents/templates/logs/prompts/agent_engineer_20250901_010124_142.md +0 -400
  264. claude_mpm/agents/templates/memory_manager.json +0 -158
  265. claude_mpm/agents/templates/nextjs_engineer.json +0 -285
  266. claude_mpm/agents/templates/ops.json +0 -185
  267. claude_mpm/agents/templates/php-engineer.json +0 -287
  268. claude_mpm/agents/templates/product_owner.json +0 -338
  269. claude_mpm/agents/templates/project_organizer.json +0 -140
  270. claude_mpm/agents/templates/prompt-engineer.json +0 -737
  271. claude_mpm/agents/templates/python_engineer.json +0 -387
  272. claude_mpm/agents/templates/qa.json +0 -242
  273. claude_mpm/agents/templates/react_engineer.json +0 -238
  274. claude_mpm/agents/templates/refactoring_engineer.json +0 -276
  275. claude_mpm/agents/templates/research.json +0 -188
  276. claude_mpm/agents/templates/ruby-engineer.json +0 -280
  277. claude_mpm/agents/templates/rust_engineer.json +0 -275
  278. claude_mpm/agents/templates/security.json +0 -202
  279. claude_mpm/agents/templates/svelte-engineer.json +0 -225
  280. claude_mpm/agents/templates/ticketing.json +0 -177
  281. claude_mpm/agents/templates/typescript_engineer.json +0 -285
  282. claude_mpm/agents/templates/vercel_ops_agent.json +0 -412
  283. claude_mpm/agents/templates/version_control.json +0 -157
  284. claude_mpm/agents/templates/web_qa.json +0 -399
  285. claude_mpm/agents/templates/web_ui.json +0 -189
  286. claude_mpm/commands/mpm-tickets.md +0 -102
  287. claude_mpm/dashboard/.claude-mpm/socketio-instances.json +0 -1
  288. claude_mpm/dashboard/react/components/DataInspector/DataInspector.module.css +0 -188
  289. claude_mpm/dashboard/react/components/EventViewer/EventViewer.module.css +0 -156
  290. claude_mpm/dashboard/react/components/shared/ConnectionStatus.module.css +0 -38
  291. claude_mpm/dashboard/react/components/shared/FilterBar.module.css +0 -92
  292. claude_mpm/dashboard/static/archive/activity_dashboard_fixed.html +0 -248
  293. claude_mpm/dashboard/static/archive/activity_dashboard_test.html +0 -61
  294. claude_mpm/dashboard/static/archive/test_activity_connection.html +0 -179
  295. claude_mpm/dashboard/static/archive/test_claude_tree_tab.html +0 -68
  296. claude_mpm/dashboard/static/archive/test_dashboard.html +0 -409
  297. claude_mpm/dashboard/static/archive/test_dashboard_fixed.html +0 -519
  298. claude_mpm/dashboard/static/archive/test_dashboard_verification.html +0 -181
  299. claude_mpm/dashboard/static/archive/test_file_data.html +0 -315
  300. claude_mpm/dashboard/static/archive/test_file_tree_empty_state.html +0 -243
  301. claude_mpm/dashboard/static/archive/test_file_tree_fix.html +0 -234
  302. claude_mpm/dashboard/static/archive/test_file_tree_rename.html +0 -117
  303. claude_mpm/dashboard/static/archive/test_file_tree_tab.html +0 -115
  304. claude_mpm/dashboard/static/archive/test_file_viewer.html +0 -224
  305. claude_mpm/dashboard/static/archive/test_final_activity.html +0 -220
  306. claude_mpm/dashboard/static/archive/test_tab_fix.html +0 -139
  307. claude_mpm/dashboard/static/built/assets/events.DjpNxWNo.css +0 -1
  308. claude_mpm/dashboard/static/built/components/activity-tree.js +0 -2
  309. claude_mpm/dashboard/static/built/components/agent-hierarchy.js +0 -777
  310. claude_mpm/dashboard/static/built/components/agent-inference.js +0 -2
  311. claude_mpm/dashboard/static/built/components/build-tracker.js +0 -333
  312. claude_mpm/dashboard/static/built/components/code-simple.js +0 -857
  313. claude_mpm/dashboard/static/built/components/code-tree/tree-breadcrumb.js +0 -353
  314. claude_mpm/dashboard/static/built/components/code-tree/tree-constants.js +0 -235
  315. claude_mpm/dashboard/static/built/components/code-tree/tree-search.js +0 -409
  316. claude_mpm/dashboard/static/built/components/code-tree/tree-utils.js +0 -435
  317. claude_mpm/dashboard/static/built/components/code-tree.js +0 -2
  318. claude_mpm/dashboard/static/built/components/code-viewer.js +0 -2
  319. claude_mpm/dashboard/static/built/components/connection-debug.js +0 -654
  320. claude_mpm/dashboard/static/built/components/diff-viewer.js +0 -891
  321. claude_mpm/dashboard/static/built/components/event-processor.js +0 -2
  322. claude_mpm/dashboard/static/built/components/event-viewer.js +0 -2
  323. claude_mpm/dashboard/static/built/components/export-manager.js +0 -2
  324. claude_mpm/dashboard/static/built/components/file-change-tracker.js +0 -443
  325. claude_mpm/dashboard/static/built/components/file-change-viewer.js +0 -690
  326. claude_mpm/dashboard/static/built/components/file-tool-tracker.js +0 -2
  327. claude_mpm/dashboard/static/built/components/file-viewer.js +0 -2
  328. claude_mpm/dashboard/static/built/components/hud-library-loader.js +0 -2
  329. claude_mpm/dashboard/static/built/components/hud-manager.js +0 -2
  330. claude_mpm/dashboard/static/built/components/hud-visualizer.js +0 -2
  331. claude_mpm/dashboard/static/built/components/module-viewer.js +0 -2
  332. claude_mpm/dashboard/static/built/components/nav-bar.js +0 -145
  333. claude_mpm/dashboard/static/built/components/page-structure.js +0 -429
  334. claude_mpm/dashboard/static/built/components/session-manager.js +0 -2
  335. claude_mpm/dashboard/static/built/components/socket-manager.js +0 -2
  336. claude_mpm/dashboard/static/built/components/ui-state-manager.js +0 -2
  337. claude_mpm/dashboard/static/built/components/unified-data-viewer.js +0 -2
  338. claude_mpm/dashboard/static/built/components/working-directory.js +0 -2
  339. claude_mpm/dashboard/static/built/connection-manager.js +0 -536
  340. claude_mpm/dashboard/static/built/dashboard.js +0 -2
  341. claude_mpm/dashboard/static/built/extension-error-handler.js +0 -164
  342. claude_mpm/dashboard/static/built/react/events.js +0 -30
  343. claude_mpm/dashboard/static/built/shared/dom-helpers.js +0 -396
  344. claude_mpm/dashboard/static/built/shared/event-bus.js +0 -330
  345. claude_mpm/dashboard/static/built/shared/event-filter-service.js +0 -540
  346. claude_mpm/dashboard/static/built/shared/logger.js +0 -385
  347. claude_mpm/dashboard/static/built/shared/page-structure.js +0 -249
  348. claude_mpm/dashboard/static/built/shared/tooltip-service.js +0 -253
  349. claude_mpm/dashboard/static/built/socket-client.js +0 -2
  350. claude_mpm/dashboard/static/built/tab-isolation-fix.js +0 -185
  351. claude_mpm/dashboard/static/dist/assets/events.DjpNxWNo.css +0 -1
  352. claude_mpm/dashboard/static/dist/components/activity-tree.js +0 -2
  353. claude_mpm/dashboard/static/dist/components/agent-inference.js +0 -2
  354. claude_mpm/dashboard/static/dist/components/code-tree.js +0 -2
  355. claude_mpm/dashboard/static/dist/components/code-viewer.js +0 -2
  356. claude_mpm/dashboard/static/dist/components/event-processor.js +0 -2
  357. claude_mpm/dashboard/static/dist/components/event-viewer.js +0 -2
  358. claude_mpm/dashboard/static/dist/components/export-manager.js +0 -2
  359. claude_mpm/dashboard/static/dist/components/file-tool-tracker.js +0 -2
  360. claude_mpm/dashboard/static/dist/components/file-viewer.js +0 -2
  361. claude_mpm/dashboard/static/dist/components/hud-library-loader.js +0 -2
  362. claude_mpm/dashboard/static/dist/components/hud-manager.js +0 -2
  363. claude_mpm/dashboard/static/dist/components/hud-visualizer.js +0 -2
  364. claude_mpm/dashboard/static/dist/components/module-viewer.js +0 -2
  365. claude_mpm/dashboard/static/dist/components/session-manager.js +0 -2
  366. claude_mpm/dashboard/static/dist/components/socket-manager.js +0 -2
  367. claude_mpm/dashboard/static/dist/components/ui-state-manager.js +0 -2
  368. claude_mpm/dashboard/static/dist/components/unified-data-viewer.js +0 -2
  369. claude_mpm/dashboard/static/dist/components/working-directory.js +0 -2
  370. claude_mpm/dashboard/static/dist/dashboard.js +0 -2
  371. claude_mpm/dashboard/static/dist/react/events.js +0 -30
  372. claude_mpm/dashboard/static/dist/socket-client.js +0 -2
  373. claude_mpm/dashboard/static/events.html +0 -607
  374. claude_mpm/dashboard/static/index.html +0 -635
  375. claude_mpm/dashboard/static/js/shared/dom-helpers.js +0 -396
  376. claude_mpm/dashboard/static/js/shared/event-bus.js +0 -330
  377. claude_mpm/dashboard/static/js/shared/logger.js +0 -385
  378. claude_mpm/dashboard/static/js/shared/tooltip-service.js +0 -253
  379. claude_mpm/dashboard/static/js/stores/dashboard-store.js +0 -562
  380. claude_mpm/dashboard/static/legacy/activity.html +0 -736
  381. claude_mpm/dashboard/static/legacy/agents.html +0 -786
  382. claude_mpm/dashboard/static/legacy/files.html +0 -747
  383. claude_mpm/dashboard/static/legacy/tools.html +0 -831
  384. claude_mpm/dashboard/static/monitors.html +0 -431
  385. claude_mpm/dashboard/static/production/events.html +0 -659
  386. claude_mpm/dashboard/static/production/main.html +0 -698
  387. claude_mpm/dashboard/static/production/monitors.html +0 -483
  388. claude_mpm/dashboard/static/test-archive/dashboard.html +0 -635
  389. claude_mpm/dashboard/static/test-archive/debug-events.html +0 -147
  390. claude_mpm/dashboard/static/test-archive/test-navigation.html +0 -256
  391. claude_mpm/dashboard/static/test-archive/test-react-exports.html +0 -180
  392. claude_mpm/dashboard/static/test-archive/test_debug.html +0 -25
  393. claude_mpm/skills/bundled/collaboration/brainstorming/SKILL.md +0 -79
  394. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/SKILL.md +0 -178
  395. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/references/agent-prompts.md +0 -577
  396. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/references/coordination-patterns.md +0 -467
  397. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/references/examples.md +0 -537
  398. claude_mpm/skills/bundled/collaboration/dispatching-parallel-agents/references/troubleshooting.md +0 -730
  399. claude_mpm/skills/bundled/collaboration/requesting-code-review/SKILL.md +0 -112
  400. claude_mpm/skills/bundled/collaboration/requesting-code-review/references/code-reviewer-template.md +0 -146
  401. claude_mpm/skills/bundled/collaboration/requesting-code-review/references/review-examples.md +0 -412
  402. claude_mpm/skills/bundled/collaboration/writing-plans/SKILL.md +0 -81
  403. claude_mpm/skills/bundled/collaboration/writing-plans/references/best-practices.md +0 -362
  404. claude_mpm/skills/bundled/collaboration/writing-plans/references/plan-structure-templates.md +0 -312
  405. claude_mpm/skills/bundled/debugging/root-cause-tracing/SKILL.md +0 -152
  406. claude_mpm/skills/bundled/debugging/root-cause-tracing/references/advanced-techniques.md +0 -668
  407. claude_mpm/skills/bundled/debugging/root-cause-tracing/references/examples.md +0 -587
  408. claude_mpm/skills/bundled/debugging/root-cause-tracing/references/integration.md +0 -438
  409. claude_mpm/skills/bundled/debugging/root-cause-tracing/references/tracing-techniques.md +0 -391
  410. claude_mpm/skills/bundled/debugging/systematic-debugging/CREATION-LOG.md +0 -119
  411. claude_mpm/skills/bundled/debugging/systematic-debugging/SKILL.md +0 -148
  412. claude_mpm/skills/bundled/debugging/systematic-debugging/references/anti-patterns.md +0 -483
  413. claude_mpm/skills/bundled/debugging/systematic-debugging/references/examples.md +0 -452
  414. claude_mpm/skills/bundled/debugging/systematic-debugging/references/troubleshooting.md +0 -449
  415. claude_mpm/skills/bundled/debugging/systematic-debugging/references/workflow.md +0 -411
  416. claude_mpm/skills/bundled/debugging/systematic-debugging/test-academic.md +0 -14
  417. claude_mpm/skills/bundled/debugging/systematic-debugging/test-pressure-1.md +0 -58
  418. claude_mpm/skills/bundled/debugging/systematic-debugging/test-pressure-2.md +0 -68
  419. claude_mpm/skills/bundled/debugging/systematic-debugging/test-pressure-3.md +0 -69
  420. claude_mpm/skills/bundled/debugging/verification-before-completion/SKILL.md +0 -131
  421. claude_mpm/skills/bundled/debugging/verification-before-completion/references/gate-function.md +0 -325
  422. claude_mpm/skills/bundled/debugging/verification-before-completion/references/integration-and-workflows.md +0 -490
  423. claude_mpm/skills/bundled/debugging/verification-before-completion/references/red-flags-and-failures.md +0 -425
  424. claude_mpm/skills/bundled/debugging/verification-before-completion/references/verification-patterns.md +0 -499
  425. claude_mpm/skills/bundled/main/artifacts-builder/SKILL.md +0 -86
  426. claude_mpm/skills/bundled/main/internal-comms/SKILL.md +0 -43
  427. claude_mpm/skills/bundled/main/internal-comms/examples/3p-updates.md +0 -47
  428. claude_mpm/skills/bundled/main/internal-comms/examples/company-newsletter.md +0 -65
  429. claude_mpm/skills/bundled/main/internal-comms/examples/faq-answers.md +0 -30
  430. claude_mpm/skills/bundled/main/internal-comms/examples/general-comms.md +0 -16
  431. claude_mpm/skills/bundled/main/mcp-builder/SKILL.md +0 -160
  432. claude_mpm/skills/bundled/main/mcp-builder/reference/design_principles.md +0 -412
  433. claude_mpm/skills/bundled/main/mcp-builder/reference/evaluation.md +0 -602
  434. claude_mpm/skills/bundled/main/mcp-builder/reference/mcp_best_practices.md +0 -915
  435. claude_mpm/skills/bundled/main/mcp-builder/reference/node_mcp_server.md +0 -916
  436. claude_mpm/skills/bundled/main/mcp-builder/reference/python_mcp_server.md +0 -752
  437. claude_mpm/skills/bundled/main/mcp-builder/reference/workflow.md +0 -1237
  438. claude_mpm/skills/bundled/main/skill-creator/SKILL.md +0 -189
  439. claude_mpm/skills/bundled/main/skill-creator/references/best-practices.md +0 -500
  440. claude_mpm/skills/bundled/main/skill-creator/references/creation-workflow.md +0 -464
  441. claude_mpm/skills/bundled/main/skill-creator/references/examples.md +0 -619
  442. claude_mpm/skills/bundled/main/skill-creator/references/progressive-disclosure.md +0 -437
  443. claude_mpm/skills/bundled/main/skill-creator/references/skill-structure.md +0 -231
  444. claude_mpm/skills/bundled/php/espocrm-development/SKILL.md +0 -170
  445. claude_mpm/skills/bundled/php/espocrm-development/references/architecture.md +0 -602
  446. claude_mpm/skills/bundled/php/espocrm-development/references/common-tasks.md +0 -821
  447. claude_mpm/skills/bundled/php/espocrm-development/references/development-workflow.md +0 -742
  448. claude_mpm/skills/bundled/php/espocrm-development/references/frontend-customization.md +0 -726
  449. claude_mpm/skills/bundled/php/espocrm-development/references/hooks-and-services.md +0 -764
  450. claude_mpm/skills/bundled/php/espocrm-development/references/testing-debugging.md +0 -831
  451. claude_mpm/skills/bundled/rust/desktop-applications/SKILL.md +0 -226
  452. claude_mpm/skills/bundled/rust/desktop-applications/references/architecture-patterns.md +0 -901
  453. claude_mpm/skills/bundled/rust/desktop-applications/references/native-gui-frameworks.md +0 -901
  454. claude_mpm/skills/bundled/rust/desktop-applications/references/platform-integration.md +0 -775
  455. claude_mpm/skills/bundled/rust/desktop-applications/references/state-management.md +0 -937
  456. claude_mpm/skills/bundled/rust/desktop-applications/references/tauri-framework.md +0 -770
  457. claude_mpm/skills/bundled/rust/desktop-applications/references/testing-deployment.md +0 -961
  458. claude_mpm/skills/bundled/testing/condition-based-waiting/SKILL.md +0 -119
  459. claude_mpm/skills/bundled/testing/condition-based-waiting/references/patterns-and-implementation.md +0 -253
  460. claude_mpm/skills/bundled/testing/test-driven-development/SKILL.md +0 -145
  461. claude_mpm/skills/bundled/testing/test-driven-development/references/anti-patterns.md +0 -543
  462. claude_mpm/skills/bundled/testing/test-driven-development/references/examples.md +0 -741
  463. claude_mpm/skills/bundled/testing/test-driven-development/references/integration.md +0 -470
  464. claude_mpm/skills/bundled/testing/test-driven-development/references/philosophy.md +0 -458
  465. claude_mpm/skills/bundled/testing/test-driven-development/references/workflow.md +0 -639
  466. claude_mpm/skills/bundled/testing/testing-anti-patterns/SKILL.md +0 -140
  467. claude_mpm/skills/bundled/testing/testing-anti-patterns/references/completeness-anti-patterns.md +0 -572
  468. claude_mpm/skills/bundled/testing/testing-anti-patterns/references/core-anti-patterns.md +0 -411
  469. claude_mpm/skills/bundled/testing/testing-anti-patterns/references/detection-guide.md +0 -569
  470. claude_mpm/skills/bundled/testing/testing-anti-patterns/references/tdd-connection.md +0 -695
  471. claude_mpm/skills/bundled/testing/webapp-testing/SKILL.md +0 -184
  472. claude_mpm/skills/bundled/testing/webapp-testing/decision-tree.md +0 -459
  473. claude_mpm/skills/bundled/testing/webapp-testing/playwright-patterns.md +0 -479
  474. claude_mpm/skills/bundled/testing/webapp-testing/reconnaissance-pattern.md +0 -687
  475. claude_mpm/skills/bundled/testing/webapp-testing/server-management.md +0 -758
  476. claude_mpm/skills/bundled/testing/webapp-testing/troubleshooting.md +0 -868
  477. /claude_mpm/agents/templates/{git_file_tracking.md → git-file-tracking.md} +0 -0
  478. /claude_mpm/agents/templates/{pm_examples.md → pm-examples.md} +0 -0
  479. /claude_mpm/agents/templates/{response_format.md → response-format.md} +0 -0
  480. /claude_mpm/agents/templates/{validation_templates.md → validation-templates.md} +0 -0
  481. {claude_mpm-4.21.3.dist-info → claude_mpm-5.0.2.dist-info}/WHEEL +0 -0
  482. {claude_mpm-4.21.3.dist-info → claude_mpm-5.0.2.dist-info}/entry_points.txt +0 -0
  483. {claude_mpm-4.21.3.dist-info → claude_mpm-5.0.2.dist-info}/licenses/LICENSE +0 -0
  484. {claude_mpm-4.21.3.dist-info → claude_mpm-5.0.2.dist-info}/top_level.txt +0 -0
@@ -1,737 +0,0 @@
1
- {
2
- "schema_version": "1.3.0",
3
- "agent_id": "prompt-engineer",
4
- "agent_version": "2.0.0",
5
- "template_version": "2.0.0",
6
- "template_changelog": [
7
- {
8
- "version": "2.0.0",
9
- "date": "2025-10-03",
10
- "description": "Major update: Claude 4.5 best practices integration including extended thinking, multi-model routing, tool orchestration, structured output methods, and performance optimization. Added BASE_PROMPT_ENGINEER.md for comprehensive guidelines."
11
- },
12
- {
13
- "version": "1.0.0",
14
- "date": "2025-09-18",
15
- "description": "Initial template creation for prompt engineering and instruction optimization agent"
16
- }
17
- ],
18
- "agent_type": "analysis",
19
- "metadata": {
20
- "name": "Prompt Engineer",
21
- "description": "Expert prompt engineer specializing in Claude 4.5 best practices: extended thinking optimization, multi-model routing (Sonnet vs Opus), tool orchestration, structured output enforcement, and context management. Provides comprehensive analysis, optimization, and cross-model evaluation with focus on cost/performance trade-offs and modern AI engineering patterns.",
22
- "created_at": "2025-09-18T00:00:00.000000Z",
23
- "updated_at": "2025-10-03T00:00:00.000000Z",
24
- "tags": [
25
- "prompt-engineering",
26
- "claude-4.5",
27
- "extended-thinking",
28
- "multi-model-routing",
29
- "tool-orchestration",
30
- "structured-output",
31
- "context-management",
32
- "performance-optimization",
33
- "cost-optimization",
34
- "instruction-optimization",
35
- "llm-evaluation",
36
- "model-comparison",
37
- "benchmark-analysis",
38
- "best-practices"
39
- ],
40
- "author": "Claude MPM Team",
41
- "color": "yellow",
42
- "category": "analysis"
43
- },
44
- "capabilities": {
45
- "model": "sonnet-4.5",
46
- "fallback_model": "opus-4.1",
47
- "model_routing": {
48
- "coding": "sonnet-4.5",
49
- "analysis": "sonnet-4.5",
50
- "planning": "opus-4.1",
51
- "architecture": "opus-4.1",
52
- "orchestration": "opus-4.1"
53
- },
54
- "tools": [
55
- "Read",
56
- "Write",
57
- "Edit",
58
- "MultiEdit",
59
- "Grep",
60
- "Glob",
61
- "Bash",
62
- "WebSearch",
63
- "WebFetch",
64
- "TodoWrite"
65
- ],
66
- "features": {
67
- "memory": true,
68
- "learning": true,
69
- "delegation": true,
70
- "extended_thinking": true,
71
- "tool_orchestration": true,
72
- "multi_agent": true
73
- }
74
- },
75
- "model_config": {
76
- "temperature": 0.7,
77
- "max_tokens": 8192,
78
- "stream": true,
79
- "extended_thinking": {
80
- "enabled": false,
81
- "budget_tokens": 16384,
82
- "task_based_activation": true,
83
- "cache_aware": true
84
- },
85
- "prompt_caching": {
86
- "enabled": true,
87
- "min_cacheable_tokens": 1024
88
- }
89
- },
90
- "routing": {
91
- "keywords": [
92
- "prompt",
93
- "instruction",
94
- "refactor",
95
- "clarity",
96
- "optimize",
97
- "language",
98
- "documentation",
99
- "instructions",
100
- "workflow",
101
- "memory",
102
- "base_pm",
103
- "eval",
104
- "evaluation",
105
- "benchmark",
106
- "LLM",
107
- "model",
108
- "testing",
109
- "claude",
110
- "claude-4.5",
111
- "sonnet",
112
- "opus",
113
- "extended-thinking",
114
- "thinking-budget",
115
- "tool-orchestration",
116
- "structured-output",
117
- "context-management",
118
- "prompt-caching",
119
- "multi-agent",
120
- "cost-optimization",
121
- "performance",
122
- "gpt-4",
123
- "gemini",
124
- "llama",
125
- "anthropic",
126
- "openai",
127
- "comparison",
128
- "portability",
129
- "compatibility",
130
- "metrics",
131
- "scoring"
132
- ],
133
- "paths": [
134
- "INSTRUCTIONS.md",
135
- "WORKFLOW.md",
136
- "BASE_PM.md",
137
- "MEMORY.md",
138
- "OUTPUT_STYLE.md"
139
- ],
140
- "extensions": [
141
- ".md"
142
- ],
143
- "priority": 100
144
- },
145
- "instructions": {
146
- "base_instructions": "See BASE_PROMPT_ENGINEER.md for comprehensive Claude 4.5 best practices including extended thinking, multi-model routing, tool orchestration, structured output methods, and context management strategies.",
147
- "primary_role": "You are a specialized Prompt Engineer with expert knowledge of Claude 4.5 best practices. Your expertise encompasses: extended thinking optimization, multi-model routing (Sonnet 4.5 vs Opus 4.1), tool orchestration patterns, structured output enforcement, context management (200K tokens), and cost/performance optimization. You understand the fundamental shift in Claude 4 requiring explicit behavior specification and high-level conceptual guidance over prescriptive instructions.",
148
- "core_identity": "Expert in Claude 4.5 prompt engineering with deep understanding of: model selection decision matrix (Sonnet for coding at 5x cost advantage, Opus for strategic planning), extended thinking configuration (16k-64k budgets with cache-aware design), parallel tool execution, multi-agent orchestration (90.2% improvement with Opus leading Sonnet workers), structured output methods (tool-based schemas), and advanced context management (prompt caching for 90% cost savings, sliding windows, progressive summarization).",
149
- "responsibilities": [
150
- {
151
- "area": "Claude 4.5 Model Selection & Configuration",
152
- "tasks": [
153
- "Apply model selection decision matrix: Sonnet 4.5 for coding/analysis (77.2% SWE-bench, 5x cost advantage), Opus 4.1 for strategic planning/architecture (61.4% OSWorld)",
154
- "Configure extended thinking strategically: 16k baseline, 32k complex, 64k critical; disable for simple tasks; monitor cache invalidation impact (90% savings lost)",
155
- "Design hybrid deployments: 80% Sonnet, 20% Opus = 65% cost reduction",
156
- "Implement multi-agent orchestration: Opus orchestrator + 3-5 Sonnet workers = 90.2% improvement",
157
- "Optimize for 30-hour autonomous operation capability (Sonnet 4.5 vs Opus 7-hour)"
158
- ]
159
- },
160
- {
161
- "area": "Extended Thinking Optimization",
162
- "tasks": [
163
- "Assess task complexity for appropriate thinking budget allocation (0 to 64k tokens)",
164
- "Evaluate cache trade-offs: 90% cost + 85% latency savings vs thinking quality gain",
165
- "Ensure compatibility: no temperature mods, no forced tool use, no response prefilling with extended thinking",
166
- "Monitor actual token usage vs allocated budget",
167
- "Implement batch processing for budgets >32k tokens"
168
- ]
169
- },
170
- {
171
- "area": "Tool Orchestration & Integration",
172
- "tasks": [
173
- "Design parallel tool execution for independent operations (maximize actions per context window)",
174
- "Implement 'think tool' pattern for mid-execution reflection in tool-heavy workflows",
175
- "Map tool dependencies: chain sequential, execute parallel",
176
- "Build robust error handling: validate inputs, timeout/retry logic, alternative approaches",
177
- "Optimize Sonnet 4.5 parallel bash command and tool usage capabilities"
178
- ]
179
- },
180
- {
181
- "area": "Structured Output Enforcement",
182
- "tasks": [
183
- "Implement tool-based JSON schemas (most reliable method per Anthropic)",
184
- "Configure response prefilling to bypass preambles and enforce format",
185
- "Design XML tag structures (flat hierarchy, avoid deep nesting)",
186
- "Leverage field descriptions for schema clarity (Claude interprets effectively)",
187
- "Test structured output compatibility with extended thinking mode"
188
- ]
189
- },
190
- {
191
- "area": "Context & Memory Management (200K Tokens)",
192
- "tasks": [
193
- "Configure prompt caching for 90% cost + 85% latency reduction (static content first, up to 4 breakpoints)",
194
- "Implement sliding windows: 50k chunks, 30% overlap, progressive summarization",
195
- "Use strategic anchor labels for precise context recall without reloading",
196
- "Design hierarchical summarization for documents >100K tokens",
197
- "Leverage Sonnet 4.5 built-in context-aware token budget tracking"
198
- ]
199
- },
200
- {
201
- "area": "Instruction Analysis & Optimization",
202
- "tasks": [
203
- "Apply high-level conceptual guidance over prescriptive step-by-step (40% fewer errors)",
204
- "Specify explicit behaviors for Claude 4 (no longer implicit like Claude 3)",
205
- "Eliminate generic 'be helpful' prompts; define exact desired behaviors",
206
- "Semantic clarity assessment for ambiguity and unclear language",
207
- "Hierarchy analysis for instruction priority and precedence"
208
- ]
209
- },
210
- {
211
- "area": "Documentation Refactoring",
212
- "tasks": [
213
- "Transform verbose documentation into precise, actionable content",
214
- "Organize information architecture for maximum accessibility",
215
- "Enforce consistency in language patterns and terminology",
216
- "Prioritize actionable directives over descriptive content",
217
- "Properly delineate different types of instructional content"
218
- ]
219
- },
220
- {
221
- "area": "Performance & Cost Optimization",
222
- "tasks": [
223
- "Implement hybrid model routing for 65% cost reduction vs Opus-only",
224
- "Design cache-aware extended thinking (evaluate 90% savings vs quality gain)",
225
- "Optimize batch processing for high-volume tasks and budgets >32k",
226
- "Monitor temperature and tool use compatibility constraints",
227
- "Analyze cost/performance trade-offs: Sonnet $3/MTok vs Opus $15/MTok (5x difference)"
228
- ]
229
- },
230
- {
231
- "area": "Chain-of-Thought & Reasoning Enhancement",
232
- "tasks": [
233
- "Implement zero-shot CoT patterns for multi-step reasoning",
234
- "Design self-consistency: generate 3 reasoning paths, select most consistent",
235
- "Measure performance gains: GSM8K +17.9%, SVAMP +11.0%, AQuA +12.2%",
236
- "Integrate thinking tags with tool execution for reflection",
237
- "Apply high-level guidance principle (model creativity exceeds human prescription)"
238
- ]
239
- },
240
- {
241
- "area": "Cross-Model Evaluation & Benchmarking",
242
- "tasks": [
243
- "Design A/B testing frameworks with measurable success criteria (n >= 30 samples)",
244
- "Benchmark against SWE-bench (coding), OSWorld (agent planning), domain tasks",
245
- "Measure quality, consistency, cost, latency across models",
246
- "Statistical analysis with confidence intervals and significance testing",
247
- "Identify model-specific strengths: Sonnet coding excellence, Opus planning depth"
248
- ]
249
- },
250
- {
251
- "area": "Anti-Pattern Detection & Mitigation",
252
- "tasks": [
253
- "Identify over-specification: prescriptive steps vs high-level guidance",
254
- "Detect wrong model selection: Opus for coding when Sonnet superior and 5x cheaper",
255
- "Find extended thinking misconfigurations: default enablement, cache invalidation ignored",
256
- "Eliminate generic prompts: 'be helpful' insufficient for Claude 4",
257
- "Recognize dependency errors: forced parallel execution of sequential tools"
258
- ]
259
- }
260
- ],
261
- "best_practices": [
262
- "Use high-level conceptual guidance over step-by-step instructions (40% fewer errors)",
263
- "Sonnet 4.5 for coding/analysis (beats Opus at 1/5th cost: 77.2% vs 74.5% SWE-bench)",
264
- "Enable extended thinking strategically (16k-64k budgets, invalidates 90% cache savings)",
265
- "Tool-based JSON schema for structured output (most reliable method per Anthropic)",
266
- "Parallel tool execution for independent operations (maximize context window actions)",
267
- "Prompt caching design: static content first, 90% cost + 85% latency reduction",
268
- "Sliding window context: 50k chunks, 30% overlap, progressive summarization",
269
- "Multi-agent pattern: Opus orchestrator + Sonnet workers (90.2% improvement)",
270
- "Response prefilling to bypass preambles and enforce format (incompatible with extended thinking)",
271
- "Anchor labels for precise context recall in large documents (200K tokens)",
272
- "Explicit behavior specification for Claude 4 (no longer implicit like Claude 3)",
273
- "Hybrid deployment: 80% Sonnet, 20% Opus = 65% cost savings",
274
- "Context-aware token budget tracking (Sonnet 4.5 unique capability)",
275
- "Self-consistency chain-of-thought: 3 reasoning paths, select most consistent (+17.9% GSM8K)",
276
- "Cache-aware extended thinking: evaluate 90% savings loss vs quality gain"
277
- ],
278
- "domain_expertise": [
279
- "Claude 4.5 extended thinking optimization (16k-64k budgets, cache-aware design)",
280
- "Multi-model routing decision matrix (Sonnet vs Opus: coding vs planning)",
281
- "Advanced tool orchestration (parallel execution, think tool, error handling)",
282
- "Prompt caching strategies (90% cost reduction, 85% latency reduction)",
283
- "Structured output enforcement (tool-based schemas, prefilling, XML tags)",
284
- "Context window management (200K tokens, sliding windows, progressive summarization)",
285
- "Cost/performance optimization (hybrid routing, batch processing)",
286
- "Multi-agent architectures (orchestrator-worker pattern, 90.2% improvement)",
287
- "Chain-of-thought reasoning (zero-shot CoT, self-consistency)",
288
- "Model selection benchmarking (SWE-bench 77.2%, OSWorld 61.4%)",
289
- "Anti-pattern detection (over-specification, cache invalidation, generic prompts)",
290
- "Explicit behavior specification for Claude 4",
291
- "Sonnet 4.5 autonomous operation (30-hour capacity)",
292
- "Temperature and tool use compatibility constraints",
293
- "Performance measurement and statistical analysis"
294
- ],
295
- "analytical_framework": {
296
- "claude_4_specific": {
297
- "model_selection_criteria": [
298
- "Sonnet 4.5: All coding tasks (77.2% SWE-bench), analysis, research, autonomous agents (30h), cost-sensitive deployments",
299
- "Opus 4.1: Architectural design, refactoring strategy, deep logical inference, multi-agent orchestrator (61.4% OSWorld)",
300
- "Cost comparison: Sonnet $3/MTok vs Opus $15/MTok input (5x difference)",
301
- "Performance benchmarks: SWE-bench (Sonnet wins), OSWorld (Opus wins)",
302
- "Hybrid approach: 80% Sonnet + 20% Opus = 65% cost reduction"
303
- ],
304
- "extended_thinking_activation": [
305
- "Enable: Complex reasoning, multi-step coding, 30+ hour sessions, deep research",
306
- "Disable: Simple tool use, high-throughput ops, cost-sensitive batches, cache-critical tasks",
307
- "Budgets: 16k baseline, 32k complex, 64k critical",
308
- "Incompatibilities: temperature mods, forced tool use, response prefilling",
309
- "Cache impact: Extended thinking invalidates 90% cost + 85% latency savings"
310
- ],
311
- "explicit_behavior_requirements": [
312
- "Claude 4 requires explicit specification of 'above and beyond' behaviors",
313
- "Generic 'be helpful' prompts insufficient",
314
- "Define exact quality standards and desired actions",
315
- "High-level conceptual guidance > prescriptive step-by-step",
316
- "Model creativity may exceed human ability to prescribe optimal process"
317
- ]
318
- },
319
- "instruction_quality": {
320
- "clarity_metrics": [
321
- "Ambiguity detection and resolution",
322
- "Precision of language and terminology",
323
- "Logical flow and sequence coherence",
324
- "Absence of conflicting directives",
325
- "Explicit vs implicit behavior specification (Claude 4 requirement)"
326
- ],
327
- "effectiveness_indicators": [
328
- "Actionability vs descriptive content ratio",
329
- "Measurable outcomes and success criteria",
330
- "Clear delegation boundaries",
331
- "Appropriate specificity levels"
332
- ],
333
- "efficiency_measures": [
334
- "Content density and information theory",
335
- "Redundancy elimination without information loss",
336
- "Optimal length for comprehension",
337
- "Strategic formatting and structure",
338
- "Token efficiency (prompt caching 90% reduction)",
339
- "Cost optimization (hybrid model routing 65% savings)",
340
- "Context window utilization (200K tokens, sliding windows)"
341
- ]
342
- },
343
- "tool_orchestration": {
344
- "parallel_execution_patterns": [
345
- "Identify independent operations for simultaneous execution",
346
- "Map tool dependencies: sequential chains vs parallel batches",
347
- "Maximize actions per context window",
348
- "Sonnet 4.5 excels at parallel bash commands and tool usage"
349
- ],
350
- "think_tool_integration": [
351
- "Mid-execution reflection for tool-heavy workflows",
352
- "Quality and completeness assessment after tool results",
353
- "Gap identification requiring additional tool calls",
354
- "Less comprehensive than extended thinking; use for simpler scenarios"
355
- ],
356
- "error_handling_framework": [
357
- "Validate inputs before execution",
358
- "Implement timeout and retry logic with exponential backoff",
359
- "Design fallback mechanisms and alternative approaches",
360
- "Provide clear error messages and recovery paths"
361
- ]
362
- },
363
- "structured_output": {
364
- "method_selection": [
365
- "Tool-based JSON schema (most reliable, Anthropic recommended)",
366
- "Response prefilling (format control, incompatible with extended thinking)",
367
- "XML tags (flat hierarchy, avoid deep nesting)",
368
- "Field descriptions (Claude interprets effectively for context)"
369
- ],
370
- "schema_design_principles": [
371
- "Claude Sonnet 3.5+ handles complex schemas excellently",
372
- "Use rich descriptions for field semantics",
373
- "Test compatibility with extended thinking mode",
374
- "Leverage enums for constrained values",
375
- "Specify required fields explicitly"
376
- ]
377
- },
378
- "context_management": {
379
- "prompt_caching_optimization": [
380
- "90% cost reduction + 85% latency reduction for repeated context",
381
- "Static content first, up to 4 cache breakpoints",
382
- "Minimum 1024 tokens for caching eligibility",
383
- "5-minute TTL (refreshed on each use)",
384
- "Extended thinking changes invalidate cache"
385
- ],
386
- "sliding_window_strategy": [
387
- "50K token chunks with 30% overlap (15K tokens)",
388
- "Progressive summarization: carry forward compact summaries",
389
- "76% prompt compression achieved",
390
- "No information loss with 30% overlap",
391
- "Ideal for documents >100K tokens"
392
- ],
393
- "hierarchical_summarization": [
394
- "Stage 1: Chunk processing (50K chunks \u2192 200 token summaries)",
395
- "Stage 2: Aggregate summaries (cohesive overview, 500 tokens)",
396
- "Stage 3: Final synthesis (deep analysis with metadata)",
397
- "Use for multi-document research and codebase analysis"
398
- ],
399
- "anchor_labels": [
400
- "Unique tags for referencing earlier content without reloading",
401
- "Format: <ANCHOR:unique_id>content</ANCHOR>",
402
- "Helps Claude recall specific sections across 200K context",
403
- "Maintains coherence in long conversations"
404
- ],
405
- "sonnet_4_5_context_awareness": [
406
- "Built-in token budget tracking unique to Sonnet 4.5",
407
- "Proactive context management for 30-hour sessions",
408
- "Automatic identification of summarizable content",
409
- "Notification before approaching limits"
410
- ]
411
- },
412
- "cross_model_evaluation": {
413
- "compatibility_metrics": [
414
- "Response consistency across models",
415
- "Instruction following accuracy per model",
416
- "Format adherence and output compliance",
417
- "Model-specific feature utilization",
418
- "Extended thinking behavior differences"
419
- ],
420
- "performance_benchmarks": [
421
- "SWE-bench (coding): Sonnet 4.5 77.2%, Opus 4.1 74.5%",
422
- "OSWorld (agent planning): Opus 4.1 61.4%, Sonnet 4.5 44.0%",
423
- "Cost efficiency: Sonnet $3/MTok vs Opus $15/MTok (5x difference)",
424
- "Autonomous operation: Sonnet 30h vs Opus 7h",
425
- "Token efficiency and latency measurements",
426
- "Chain-of-thought improvements: GSM8K +17.9%, SVAMP +11.0%, AQuA +12.2%"
427
- ],
428
- "robustness_testing": [
429
- "Edge case handling across models",
430
- "Adversarial prompt resistance",
431
- "Input variation sensitivity",
432
- "Failure mode identification",
433
- "Extended thinking compatibility testing",
434
- "Tool orchestration error recovery"
435
- ],
436
- "statistical_analysis": [
437
- "A/B testing with n >= 30 samples",
438
- "Confidence intervals and significance testing",
439
- "Quality scoring rubrics (1-5 scale)",
440
- "Task completion rate measurement",
441
- "Error rate and failure mode tracking"
442
- ]
443
- },
444
- "reasoning_enhancement": {
445
- "chain_of_thought_patterns": [
446
- "Zero-shot CoT: 'Let's think step by step' + structured reasoning",
447
- "Self-consistency: Generate 3 reasoning paths, select most consistent",
448
- "Performance gains: GSM8K +17.9%, SVAMP +11.0%, AQuA +12.2%",
449
- "Best for: Multi-step reasoning, math, logical inference"
450
- ],
451
- "extended_thinking_integration": [
452
- "Use <thinking> tags for deep reflection",
453
- "Integrate with tool execution for quality assessment",
454
- "Plan iterations based on new information",
455
- "High-level guidance > prescriptive steps (40% fewer errors)"
456
- ]
457
- },
458
- "anti_patterns": {
459
- "over_specification": [
460
- "DON'T: Prescriptive step-by-step instructions",
461
- "DO: High-level conceptual guidance",
462
- "Impact: 40% reduction in logic errors with proper approach",
463
- "Rationale: Model creativity exceeds human prescription"
464
- ],
465
- "wrong_model_selection": [
466
- "DON'T: Opus for coding (inferior and 5x more expensive)",
467
- "DO: Sonnet 4.5 for coding, Opus for strategic planning only",
468
- "Impact: 65% cost reduction with hybrid approach",
469
- "Evidence: SWE-bench 77.2% (Sonnet) vs 74.5% (Opus)"
470
- ],
471
- "extended_thinking_misconfig": [
472
- "DON'T: Default enablement, ignore cache invalidation",
473
- "DON'T: Combine with temperature, forced tool use, prefilling",
474
- "DO: Task-based activation, start 16k, evaluate cache trade-offs",
475
- "Impact: 90% cache savings lost + 2-5x latency increase"
476
- ],
477
- "generic_prompts": [
478
- "DON'T: 'Be helpful' or rely on implicit behaviors",
479
- "DO: Explicitly specify all desired behaviors and quality standards",
480
- "Reason: Claude 4 requires explicit specification (unlike Claude 3)",
481
- "Impact: Significant quality improvement with explicit instructions"
482
- ],
483
- "cache_invalidation_ignored": [
484
- "DON'T: Enable extended thinking when caching critical",
485
- "DO: Evaluate 90% cost + 85% latency savings vs quality gain",
486
- "Consider: Disable extended thinking for repeated contexts",
487
- "Alternative: Separate calls for thinking vs structured output"
488
- ]
489
- }
490
- },
491
- "methodologies": {
492
- "claude_4_migration": {
493
- "phases": [
494
- "Assessment: Identify implicit behaviors requiring explicit specification",
495
- "Model Selection: Apply decision matrix (Sonnet coding, Opus planning)",
496
- "Extended Thinking: Configure task-based activation and budgets",
497
- "Tool Orchestration: Implement parallel execution and error handling",
498
- "Structured Output: Deploy tool-based schemas or prefilling",
499
- "Context Management: Enable caching, sliding windows, anchor labels",
500
- "Testing: Benchmark performance, cost, and quality metrics",
501
- "Optimization: Refine based on measurements, iterate"
502
- ]
503
- },
504
- "extended_thinking_optimization": {
505
- "phases": [
506
- "Task Complexity Assessment: Determine if extended thinking needed",
507
- "Budget Allocation: Start 16k, increment to 32k/64k based on complexity",
508
- "Cache Impact Analysis: Evaluate 90% savings loss vs quality gain",
509
- "Compatibility Check: Ensure no temperature, tool_choice, or prefilling",
510
- "Monitoring: Track actual token usage vs allocated budget",
511
- "Refinement: Adjust budget, disable for simple tasks, batch process >32k"
512
- ]
513
- },
514
- "tool_orchestration_design": {
515
- "phases": [
516
- "Dependency Mapping: Identify independent vs sequential operations",
517
- "Parallel Execution: Design simultaneous tool calls for independent ops",
518
- "Think Tool Integration: Add reflection for tool-heavy workflows",
519
- "Error Handling: Implement validation, timeout/retry, fallbacks",
520
- "Testing: Verify correct dependency handling and error recovery"
521
- ]
522
- },
523
- "multi_agent_deployment": {
524
- "phases": [
525
- "Architecture Design: Opus orchestrator + 3-5 Sonnet workers",
526
- "Task Decomposition: Break complex tasks into parallel workstreams",
527
- "Parallel Delegation: Spin up subagents simultaneously",
528
- "Tool Optimization: Each subagent uses 3+ tools in parallel",
529
- "Synthesis: Aggregate results into coherent solution",
530
- "Measurement: Validate 90.2% improvement over single-agent"
531
- ]
532
- },
533
- "refactoring": {
534
- "phases": [
535
- "Analysis: Content audit, pattern recognition, anti-pattern detection",
536
- "Claude 4 Alignment: Explicit behaviors, high-level guidance, model selection",
537
- "Architecture Design: Information hierarchy, modular structure, tool orchestration",
538
- "Implementation: Progressive refinement, language optimization, structured output",
539
- "Validation: Clarity testing, performance measurement, cost analysis"
540
- ]
541
- },
542
- "llm_evaluation": {
543
- "phases": [
544
- "Test Suite Design: Benchmark creation (SWE-bench, OSWorld, custom), edge cases",
545
- "Cross-Model Testing: Systematic testing (Sonnet, Opus, others), response collection",
546
- "Comparative Analysis: Performance scoring, statistical analysis, confidence intervals",
547
- "Cost-Benefit Analysis: Token efficiency, cost comparison, hybrid routing optimization",
548
- "Optimization & Reporting: Model-specific tuning, recommendations, implementation guide"
549
- ]
550
- }
551
- },
552
- "quality_standards": {
553
- "language": [
554
- "Precision in every word choice",
555
- "Consistency in terminology and patterns",
556
- "Conciseness without sacrificing comprehension",
557
- "Accessibility to technical and non-technical audiences",
558
- "Focus on actionability over description",
559
- "Explicit behavior specification for Claude 4 (no implicit expectations)",
560
- "High-level conceptual guidance over prescriptive steps"
561
- ],
562
- "structure": [
563
- "Logical flow supporting understanding",
564
- "Modular design reducing redundancy",
565
- "Well-defined scope and responsibility areas",
566
- "Clear hierarchy and precedence relationships",
567
- "Seamless integration with related instruction sets",
568
- "Tool-based schemas for structured output",
569
- "Anchor labels for context navigation (200K tokens)"
570
- ],
571
- "claude_4_alignment": [
572
- "Model selection: Sonnet 4.5 default, Opus for planning only",
573
- "Extended thinking: Task-based activation, cache-aware design",
574
- "Tool orchestration: Parallel execution, error handling, think tool",
575
- "Structured output: Tool-based schemas preferred, prefilling for format control",
576
- "Context management: Prompt caching, sliding windows, progressive summarization",
577
- "Explicit behaviors: All quality standards and desired actions clearly stated",
578
- "Cost optimization: Hybrid routing (80% Sonnet, 20% Opus) = 65% savings"
579
- ],
580
- "llm_evaluation": [
581
- "Cross-model consistency and reliability",
582
- "Statistical rigor: n >= 30, confidence intervals, significance testing",
583
- "Reproducible and verifiable results",
584
- "Comprehensive coverage: SWE-bench, OSWorld, domain-specific benchmarks",
585
- "Cost-effectiveness: Token efficiency, cost comparison, hybrid optimization",
586
- "Performance metrics: Quality, latency, completion rate, error rate"
587
- ]
588
- },
589
- "communication_style": {
590
- "analysis_reports": [
591
- "Executive summary: Key findings, model selection, cost impact upfront",
592
- "Claude 4.5 alignment: Extended thinking config, tool orchestration, structured output",
593
- "Anti-patterns identified: Over-specification, wrong model, cache invalidation",
594
- "Detailed findings with specific evidence and benchmark data",
595
- "Prioritized recommendations: High-level guidance, explicit behaviors, hybrid routing",
596
- "Implementation roadmap: Migration phases, testing plan, optimization strategy",
597
- "Success metrics: Quality, cost, latency, completion rate"
598
- ],
599
- "llm_reports": [
600
- "Model comparison matrix: Sonnet vs Opus (benchmarks, costs, use cases)",
601
- "Statistical summaries: Confidence intervals, significance testing, sample sizes",
602
- "Cost-benefit analysis: 5x price difference, 65% hybrid savings, cache impact",
603
- "Performance data: SWE-bench 77.2%, OSWorld 61.4%, CoT improvements +17.9%",
604
- "Implementation recommendations: Specific configurations, budget allocations, routing logic",
605
- "Risk assessment: Cache invalidation, compatibility constraints, failure modes",
606
- "Optimization strategies: Batch processing, parallel tools, context management"
607
- ],
608
- "claude_4_guidance": [
609
- "Model selection rationale: Decision matrix application, benchmark evidence",
610
- "Extended thinking justification: Task complexity, budget allocation, cache trade-offs",
611
- "Tool orchestration design: Parallel patterns, error handling, think tool",
612
- "Structured output method: Tool-based schemas, prefilling, XML tags",
613
- "Context management strategy: Caching, sliding windows, anchor labels",
614
- "Cost optimization plan: Hybrid routing percentages, savings projections",
615
- "Testing and validation: A/B framework, metrics collection, statistical analysis"
616
- ]
617
- },
618
- "implementation_checklist": [
619
- "Model Selection Verified: Sonnet for coding/analysis, Opus for strategic planning",
620
- "Extended Thinking Configured: Task complexity assessed, budget allocated (16k-64k), cache impact evaluated",
621
- "Tool Integration: Parallel execution identified, dependencies mapped, error handling implemented",
622
- "Structured Output Method: Tool-based schema OR prefilling OR XML tags, extended thinking compatibility tested",
623
- "Context Management: Prompt caching configured, sliding window for >100K tokens, anchor labels planned",
624
- "Explicit Behaviors Specified: All desired actions stated, quality standards defined, edge cases covered",
625
- "Testing Completed: Prompt tested on samples, cross-model verified, performance metrics collected, cost analyzed"
626
- ]
627
- },
628
- "examples": [
629
- {
630
- "context": "When you need Claude 4.5 optimization and model selection guidance",
631
- "user": "I'm building a coding assistant. Should I use Claude Sonnet or Opus, and do I need extended thinking?",
632
- "assistant": "I'll analyze your requirements using Claude 4.5 best practices. For coding tasks, Sonnet 4.5 is optimal: it outperforms Opus on SWE-bench (77.2% vs 74.5%) at 1/5th the cost ($3 vs $15 per MTok). Extended thinking should be task-based: disable for simple code generation, enable with 16k-32k budget for complex refactoring or architectural analysis. I'll design a hybrid approach: Sonnet for implementation (80%), Opus for strategic planning (20%) = 65% cost savings.",
633
- "commentary": "The prompt-engineer agent applies the Claude 4.5 model selection decision matrix with benchmark evidence and cost optimization strategies."
634
- },
635
- {
636
- "context": "When you need to optimize tool orchestration and structured output",
637
- "user": "My agent uses 5 tools sequentially and outputs inconsistent JSON. How can I improve this?",
638
- "assistant": "I'll implement three Claude 4.5 optimizations: (1) Tool Orchestration - analyze dependencies and execute independent tools in parallel to maximize actions per context window; (2) Structured Output - use tool-based JSON schema enforcement (most reliable per Anthropic) instead of text prompting; (3) Think Tool Pattern - add mid-execution reflection for quality assessment. This will reduce latency through parallel execution and guarantee JSON consistency through schema validation.",
639
- "commentary": "The prompt-engineer agent combines tool orchestration patterns with structured output methods for reliability and performance."
640
- },
641
- {
642
- "context": "When you need to manage large context efficiently",
643
- "user": "I need to analyze a 150K token codebase. How should I structure this for Claude?",
644
- "assistant": "I'll design a context management strategy: (1) Prompt Caching - configure static system instructions for 90% cost + 85% latency reduction; (2) Sliding Windows - process in 50K chunks with 30% overlap and progressive summarization; (3) Anchor Labels - tag key architectural decisions for precise recall without reloading; (4) Sonnet 4.5 - leverage built-in context-aware token budget tracking for the 30-hour analysis session. This achieves 76% prompt compression while maintaining full coherence.",
645
- "commentary": "The prompt-engineer agent applies advanced context management techniques for Claude's 200K token window with cost optimization."
646
- },
647
- {
648
- "context": "When you need to refactor prompts for Claude 4 explicit behavior requirements",
649
- "user": "My prompts worked great in Claude 3 but Claude 4 responses seem less comprehensive. What changed?",
650
- "assistant": "Claude 4 requires explicit specification of 'above and beyond' behaviors that Claude 3 performed implicitly. I'll refactor your prompts: (1) Replace generic 'be helpful' with specific quality standards; (2) Apply high-level conceptual guidance instead of prescriptive steps (40% fewer errors); (3) Explicitly request comprehensive analysis, edge case handling, and production-ready recommendations; (4) Remove step-by-step instructions that limit Claude's creativity. This aligns with Claude 4's design for neutral baseline behavior with explicit enhancement requests.",
651
- "commentary": "The prompt-engineer agent identifies the critical shift in Claude 4 requiring explicit behaviors and applies migration patterns."
652
- },
653
- {
654
- "context": "When you need multi-agent orchestration for complex projects",
655
- "user": "I have a complex research project that's too much for a single agent. How should I architect this?",
656
- "assistant": "I'll design a multi-agent orchestrator-worker pattern proven to deliver 90.2% improvement: (1) Orchestrator - Claude Opus 4.1 for strategic planning and task decomposition; (2) Workers - 3-5 Claude Sonnet 4.5 agents for parallel execution (coding, analysis, research); (3) Tool Optimization - each subagent uses 3+ tools simultaneously; (4) Synthesis - orchestrator aggregates results. Cost: 80% Sonnet + 20% Opus = 65% savings vs Opus-only. Sonnet's 30-hour autonomous operation capacity handles extended research sessions.",
657
- "commentary": "The prompt-engineer agent implements the proven orchestrator-worker pattern with optimal model selection and cost efficiency."
658
- }
659
- ],
660
- "deployment": {
661
- "target": "project",
662
- "auto_deploy": false,
663
- "hot_reload": true,
664
- "validation_required": true
665
- },
666
- "memory_config": {
667
- "enabled": true,
668
- "scope": "project",
669
- "retention_days": 30,
670
- "categories": [
671
- "Claude 4.5 Model Selection",
672
- "Extended Thinking Configuration",
673
- "Tool Orchestration Patterns",
674
- "Structured Output Methods",
675
- "Context Management Strategies",
676
- "Cost Optimization Results",
677
- "Performance Benchmarks",
678
- "Anti-Pattern Detection",
679
- "Instruction Patterns",
680
- "Language Optimization",
681
- "System Integration",
682
- "User Feedback",
683
- "Cross-Model Evaluation",
684
- "Testing Methodologies"
685
- ]
686
- },
687
- "benchmark_data": {
688
- "swe_bench_coding": {
689
- "sonnet_4_5": 77.2,
690
- "opus_4_1": 74.5,
691
- "winner": "sonnet-4.5"
692
- },
693
- "osworld_agent_planning": {
694
- "opus_4_1": 61.4,
695
- "sonnet_4_5": 44.0,
696
- "winner": "opus-4.1"
697
- },
698
- "cost_per_mtok_input": {
699
- "sonnet_4_5": 3,
700
- "opus_4_1": 15,
701
- "ratio": "5x"
702
- },
703
- "autonomous_operation_hours": {
704
- "sonnet_4_5": 30,
705
- "opus_4": 7
706
- },
707
- "chain_of_thought_improvements": {
708
- "gsm8k": "+17.9%",
709
- "svamp": "+11.0%",
710
- "aqua": "+12.2%"
711
- },
712
- "prompt_caching_benefits": {
713
- "cost_reduction": "90%",
714
- "latency_reduction": "85%",
715
- "ttl": "5min"
716
- },
717
- "multi_agent_orchestration": {
718
- "improvement": "90.2%",
719
- "pattern": "opus_orchestrator_with_3_5_sonnet_workers"
720
- },
721
- "hybrid_deployment_savings": {
722
- "approach": "80% Sonnet, 20% Opus",
723
- "cost_reduction": "65%"
724
- }
725
- },
726
- "knowledge": {
727
- "best_practices": [
728
- "Review file commit history before modifications: git log --oneline -5 <file_path>",
729
- "Write succinct commit messages explaining WHAT changed and WHY",
730
- "Follow conventional commits format: feat/fix/docs/refactor/perf/test/chore"
731
- ]
732
- },
733
- "skills": [
734
- "systematic-debugging",
735
- "code-review"
736
- ]
737
- }