moai-adk 0.25.4__py3-none-any.whl → 0.32.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of moai-adk might be problematic. Click here for more details.

Files changed (378) hide show
  1. moai_adk/__init__.py +2 -5
  2. moai_adk/__main__.py +114 -82
  3. moai_adk/cli/__init__.py +6 -1
  4. moai_adk/cli/commands/__init__.py +1 -3
  5. moai_adk/cli/commands/analyze.py +5 -16
  6. moai_adk/cli/commands/doctor.py +6 -18
  7. moai_adk/cli/commands/init.py +56 -125
  8. moai_adk/cli/commands/language.py +14 -35
  9. moai_adk/cli/commands/status.py +9 -15
  10. moai_adk/cli/commands/update.py +1555 -190
  11. moai_adk/cli/prompts/init_prompts.py +112 -56
  12. moai_adk/cli/spec_status.py +263 -0
  13. moai_adk/cli/ui/__init__.py +44 -0
  14. moai_adk/cli/ui/progress.py +422 -0
  15. moai_adk/cli/ui/prompts.py +389 -0
  16. moai_adk/cli/ui/theme.py +129 -0
  17. moai_adk/cli/worktree/__init__.py +27 -0
  18. moai_adk/cli/worktree/__main__.py +31 -0
  19. moai_adk/cli/worktree/cli.py +672 -0
  20. moai_adk/cli/worktree/exceptions.py +89 -0
  21. moai_adk/cli/worktree/manager.py +490 -0
  22. moai_adk/cli/worktree/models.py +65 -0
  23. moai_adk/cli/worktree/registry.py +128 -0
  24. moai_adk/core/PHASE2_OPTIMIZATIONS.md +467 -0
  25. moai_adk/core/analysis/session_analyzer.py +17 -56
  26. moai_adk/core/claude_integration.py +26 -54
  27. moai_adk/core/command_helpers.py +10 -10
  28. moai_adk/core/comprehensive_monitoring_system.py +1183 -0
  29. moai_adk/core/config/auto_spec_config.py +5 -11
  30. moai_adk/core/config/migration.py +19 -9
  31. moai_adk/core/config/unified.py +436 -0
  32. moai_adk/core/context_manager.py +6 -12
  33. moai_adk/core/enterprise_features.py +1404 -0
  34. moai_adk/core/error_recovery_system.py +725 -112
  35. moai_adk/core/event_driven_hook_system.py +1371 -0
  36. moai_adk/core/git/__init__.py +8 -0
  37. moai_adk/core/git/branch_manager.py +3 -11
  38. moai_adk/core/git/checkpoint.py +1 -3
  39. moai_adk/core/git/conflict_detector.py +413 -0
  40. moai_adk/core/git/manager.py +91 -1
  41. moai_adk/core/hooks/post_tool_auto_spec_completion.py +56 -80
  42. moai_adk/core/input_validation_middleware.py +1006 -0
  43. moai_adk/core/integration/engine.py +6 -18
  44. moai_adk/core/integration/integration_tester.py +10 -9
  45. moai_adk/core/integration/utils.py +1 -1
  46. moai_adk/core/issue_creator.py +10 -28
  47. moai_adk/core/jit_context_loader.py +956 -0
  48. moai_adk/core/jit_enhanced_hook_manager.py +1987 -0
  49. moai_adk/core/language_config_resolver.py +485 -0
  50. moai_adk/core/language_validator.py +28 -41
  51. moai_adk/core/mcp/setup.py +15 -12
  52. moai_adk/core/merge/__init__.py +9 -0
  53. moai_adk/core/merge/analyzer.py +481 -0
  54. moai_adk/core/migration/alfred_to_moai_migrator.py +383 -0
  55. moai_adk/core/migration/backup_manager.py +78 -9
  56. moai_adk/core/migration/custom_element_scanner.py +358 -0
  57. moai_adk/core/migration/file_migrator.py +8 -17
  58. moai_adk/core/migration/interactive_checkbox_ui.py +488 -0
  59. moai_adk/core/migration/selective_restorer.py +470 -0
  60. moai_adk/core/migration/template_utils.py +74 -0
  61. moai_adk/core/migration/user_selection_ui.py +338 -0
  62. moai_adk/core/migration/version_detector.py +6 -10
  63. moai_adk/core/migration/version_migrator.py +3 -3
  64. moai_adk/core/performance/cache_system.py +8 -10
  65. moai_adk/core/phase_optimized_hook_scheduler.py +879 -0
  66. moai_adk/core/project/checker.py +2 -4
  67. moai_adk/core/project/detector.py +1 -3
  68. moai_adk/core/project/initializer.py +135 -23
  69. moai_adk/core/project/phase_executor.py +54 -81
  70. moai_adk/core/project/validator.py +6 -12
  71. moai_adk/core/quality/trust_checker.py +9 -27
  72. moai_adk/core/realtime_monitoring_dashboard.py +1724 -0
  73. moai_adk/core/robust_json_parser.py +611 -0
  74. moai_adk/core/rollback_manager.py +73 -148
  75. moai_adk/core/session_manager.py +10 -26
  76. moai_adk/core/skill_loading_system.py +579 -0
  77. moai_adk/core/spec/confidence_scoring.py +31 -100
  78. moai_adk/core/spec/ears_template_engine.py +351 -286
  79. moai_adk/core/spec/quality_validator.py +35 -69
  80. moai_adk/core/spec_status_manager.py +64 -74
  81. moai_adk/core/template/backup.py +45 -20
  82. moai_adk/core/template/config.py +112 -39
  83. moai_adk/core/template/merger.py +11 -19
  84. moai_adk/core/template/processor.py +253 -149
  85. moai_adk/core/template_engine.py +73 -40
  86. moai_adk/core/template_variable_synchronizer.py +417 -0
  87. moai_adk/core/unified_permission_manager.py +745 -0
  88. moai_adk/core/user_behavior_analytics.py +851 -0
  89. moai_adk/core/version_sync.py +429 -0
  90. moai_adk/foundation/__init__.py +56 -0
  91. moai_adk/foundation/backend.py +1027 -0
  92. moai_adk/foundation/database.py +1115 -0
  93. moai_adk/foundation/devops.py +1585 -0
  94. moai_adk/foundation/ears.py +431 -0
  95. moai_adk/foundation/frontend.py +870 -0
  96. moai_adk/foundation/git/commit_templates.py +4 -12
  97. moai_adk/foundation/git.py +376 -0
  98. moai_adk/foundation/langs.py +484 -0
  99. moai_adk/foundation/ml_ops.py +1162 -0
  100. moai_adk/foundation/testing.py +1524 -0
  101. moai_adk/foundation/trust/trust_principles.py +23 -72
  102. moai_adk/foundation/trust/validation_checklist.py +57 -162
  103. moai_adk/project/__init__.py +0 -0
  104. moai_adk/project/configuration.py +1084 -0
  105. moai_adk/project/documentation.py +566 -0
  106. moai_adk/project/schema.py +447 -0
  107. moai_adk/statusline/alfred_detector.py +1 -3
  108. moai_adk/statusline/config.py +13 -4
  109. moai_adk/statusline/enhanced_output_style_detector.py +23 -15
  110. moai_adk/statusline/main.py +51 -15
  111. moai_adk/statusline/renderer.py +104 -48
  112. moai_adk/statusline/update_checker.py +3 -9
  113. moai_adk/statusline/version_reader.py +140 -46
  114. moai_adk/templates/.claude/agents/moai/ai-nano-banana.md +549 -0
  115. moai_adk/templates/.claude/agents/moai/builder-agent.md +445 -0
  116. moai_adk/templates/.claude/agents/moai/builder-command.md +1132 -0
  117. moai_adk/templates/.claude/agents/moai/builder-skill.md +601 -0
  118. moai_adk/templates/.claude/agents/moai/expert-backend.md +831 -0
  119. moai_adk/templates/.claude/agents/moai/expert-database.md +774 -0
  120. moai_adk/templates/.claude/agents/moai/expert-debug.md +396 -0
  121. moai_adk/templates/.claude/agents/moai/expert-devops.md +711 -0
  122. moai_adk/templates/.claude/agents/moai/expert-frontend.md +666 -0
  123. moai_adk/templates/.claude/agents/moai/expert-security.md +474 -0
  124. moai_adk/templates/.claude/agents/moai/expert-uiux.md +1038 -0
  125. moai_adk/templates/.claude/agents/moai/manager-claude-code.md +429 -0
  126. moai_adk/templates/.claude/agents/moai/manager-docs.md +570 -0
  127. moai_adk/templates/.claude/agents/moai/manager-git.md +937 -0
  128. moai_adk/templates/.claude/agents/moai/manager-project.md +891 -0
  129. moai_adk/templates/.claude/agents/moai/manager-quality.md +598 -0
  130. moai_adk/templates/.claude/agents/moai/manager-spec.md +713 -0
  131. moai_adk/templates/.claude/agents/moai/manager-strategy.md +600 -0
  132. moai_adk/templates/.claude/agents/moai/manager-tdd.md +603 -0
  133. moai_adk/templates/.claude/agents/moai/mcp-context7.md +369 -0
  134. moai_adk/templates/.claude/agents/moai/mcp-figma.md +1567 -0
  135. moai_adk/templates/.claude/agents/moai/mcp-notion.md +749 -0
  136. moai_adk/templates/.claude/agents/moai/mcp-playwright.md +427 -0
  137. moai_adk/templates/.claude/agents/moai/mcp-sequential-thinking.md +994 -0
  138. moai_adk/templates/.claude/commands/moai/0-project.md +1143 -0
  139. moai_adk/templates/.claude/commands/moai/1-plan.md +1435 -0
  140. moai_adk/templates/.claude/commands/moai/2-run.md +883 -0
  141. moai_adk/templates/.claude/commands/moai/3-sync.md +993 -0
  142. moai_adk/templates/.claude/commands/moai/9-feedback.md +314 -0
  143. moai_adk/templates/.claude/hooks/__init__.py +8 -0
  144. moai_adk/templates/.claude/hooks/moai/__init__.py +8 -0
  145. moai_adk/templates/.claude/hooks/moai/lib/__init__.py +85 -0
  146. moai_adk/templates/.claude/hooks/moai/lib/checkpoint.py +244 -0
  147. moai_adk/templates/.claude/hooks/moai/lib/common.py +131 -0
  148. moai_adk/templates/.claude/hooks/moai/lib/config_manager.py +446 -0
  149. moai_adk/templates/.claude/hooks/moai/lib/config_validator.py +639 -0
  150. moai_adk/templates/.claude/hooks/moai/lib/example_config.json +104 -0
  151. moai_adk/templates/.claude/hooks/moai/lib/git_operations_manager.py +590 -0
  152. moai_adk/templates/.claude/hooks/moai/lib/language_validator.py +317 -0
  153. moai_adk/templates/.claude/hooks/moai/lib/models.py +102 -0
  154. moai_adk/templates/.claude/hooks/moai/lib/path_utils.py +28 -0
  155. moai_adk/templates/.claude/hooks/moai/lib/project.py +768 -0
  156. moai_adk/templates/.claude/hooks/moai/lib/test_hooks_improvements.py +443 -0
  157. moai_adk/templates/.claude/hooks/moai/lib/timeout.py +160 -0
  158. moai_adk/templates/.claude/hooks/moai/lib/unified_timeout_manager.py +530 -0
  159. moai_adk/templates/.claude/hooks/moai/session_end__auto_cleanup.py +862 -0
  160. moai_adk/templates/.claude/hooks/moai/session_start__show_project_info.py +921 -0
  161. moai_adk/templates/.claude/output-styles/moai/r2d2.md +380 -0
  162. moai_adk/templates/.claude/output-styles/moai/yoda.md +338 -0
  163. moai_adk/templates/.claude/settings.json +172 -0
  164. moai_adk/templates/.claude/skills/moai-docs-generation/SKILL.md +247 -0
  165. moai_adk/templates/.claude/skills/moai-docs-generation/modules/README.md +44 -0
  166. moai_adk/templates/.claude/skills/moai-docs-generation/modules/api-documentation.md +130 -0
  167. moai_adk/templates/.claude/skills/moai-docs-generation/modules/code-documentation.md +152 -0
  168. moai_adk/templates/.claude/skills/moai-docs-generation/modules/multi-format-output.md +178 -0
  169. moai_adk/templates/.claude/skills/moai-docs-generation/modules/user-guides.md +147 -0
  170. moai_adk/templates/.claude/skills/moai-domain-backend/SKILL.md +319 -0
  171. moai_adk/templates/.claude/skills/moai-domain-database/SKILL.md +320 -0
  172. moai_adk/templates/.claude/skills/moai-domain-database/modules/README.md +53 -0
  173. moai_adk/templates/.claude/skills/moai-domain-database/modules/mongodb.md +231 -0
  174. moai_adk/templates/.claude/skills/moai-domain-database/modules/postgresql.md +169 -0
  175. moai_adk/templates/.claude/skills/moai-domain-database/modules/redis.md +262 -0
  176. moai_adk/templates/.claude/skills/moai-domain-frontend/SKILL.md +496 -0
  177. moai_adk/templates/.claude/skills/moai-domain-uiux/SKILL.md +453 -0
  178. moai_adk/templates/.claude/skills/moai-domain-uiux/examples.md +560 -0
  179. moai_adk/templates/.claude/skills/moai-domain-uiux/modules/accessibility-wcag.md +260 -0
  180. moai_adk/templates/.claude/skills/moai-domain-uiux/modules/component-architecture.md +228 -0
  181. moai_adk/templates/.claude/skills/moai-domain-uiux/modules/design-system-tokens.md +405 -0
  182. moai_adk/templates/.claude/skills/moai-domain-uiux/modules/icon-libraries.md +401 -0
  183. moai_adk/templates/.claude/skills/moai-domain-uiux/modules/theming-system.md +373 -0
  184. moai_adk/templates/.claude/skills/moai-domain-uiux/reference.md +243 -0
  185. moai_adk/templates/.claude/skills/moai-formats-data/SKILL.md +491 -0
  186. moai_adk/templates/.claude/skills/moai-formats-data/modules/README.md +98 -0
  187. moai_adk/templates/.claude/skills/moai-formats-data/modules/SKILL-MODULARIZATION-TEMPLATE.md +278 -0
  188. moai_adk/templates/.claude/skills/moai-formats-data/modules/caching-performance.md +459 -0
  189. moai_adk/templates/.claude/skills/moai-formats-data/modules/data-validation.md +485 -0
  190. moai_adk/templates/.claude/skills/moai-formats-data/modules/json-optimization.md +374 -0
  191. moai_adk/templates/.claude/skills/moai-formats-data/modules/toon-encoding.md +308 -0
  192. moai_adk/templates/.claude/skills/moai-foundation-claude/SKILL.md +201 -0
  193. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/best-practices-checklist.md +616 -0
  194. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/claude-code-custom-slash-commands-official.md +729 -0
  195. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/claude-code-hooks-official.md +560 -0
  196. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/claude-code-iam-official.md +635 -0
  197. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/claude-code-memory-official.md +543 -0
  198. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/claude-code-settings-official.md +663 -0
  199. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/claude-code-skills-official.md +113 -0
  200. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/claude-code-sub-agents-official.md +238 -0
  201. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/complete-configuration-guide.md +175 -0
  202. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/skill-examples.md +1674 -0
  203. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/skill-formatting-guide.md +729 -0
  204. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/sub-agents/sub-agent-examples.md +1513 -0
  205. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/sub-agents/sub-agent-formatting-guide.md +1086 -0
  206. moai_adk/templates/.claude/skills/moai-foundation-claude/reference/sub-agents/sub-agent-integration-patterns.md +1100 -0
  207. moai_adk/templates/.claude/skills/moai-foundation-context/SKILL.md +438 -0
  208. moai_adk/templates/.claude/skills/moai-foundation-core/SKILL.md +515 -0
  209. moai_adk/templates/.claude/skills/moai-foundation-core/modules/README.md +296 -0
  210. moai_adk/templates/.claude/skills/moai-foundation-core/modules/agents-reference.md +346 -0
  211. moai_adk/templates/.claude/skills/moai-foundation-core/modules/commands-reference.md +432 -0
  212. moai_adk/templates/.claude/skills/moai-foundation-core/modules/delegation-patterns.md +757 -0
  213. moai_adk/templates/.claude/skills/moai-foundation-core/modules/execution-rules.md +687 -0
  214. moai_adk/templates/.claude/skills/moai-foundation-core/modules/modular-system.md +665 -0
  215. moai_adk/templates/.claude/skills/moai-foundation-core/modules/progressive-disclosure.md +649 -0
  216. moai_adk/templates/.claude/skills/moai-foundation-core/modules/spec-first-tdd.md +864 -0
  217. moai_adk/templates/.claude/skills/moai-foundation-core/modules/token-optimization.md +708 -0
  218. moai_adk/templates/.claude/skills/moai-foundation-core/modules/trust-5-framework.md +981 -0
  219. moai_adk/templates/.claude/skills/moai-foundation-quality/SKILL.md +362 -0
  220. moai_adk/templates/.claude/skills/moai-foundation-quality/examples.md +1232 -0
  221. moai_adk/templates/.claude/skills/moai-foundation-quality/modules/best-practices.md +261 -0
  222. moai_adk/templates/.claude/skills/moai-foundation-quality/modules/integration-patterns.md +194 -0
  223. moai_adk/templates/.claude/skills/moai-foundation-quality/modules/proactive-analysis.md +229 -0
  224. moai_adk/templates/.claude/skills/moai-foundation-quality/modules/trust5-validation.md +169 -0
  225. moai_adk/templates/.claude/skills/moai-foundation-quality/reference.md +1266 -0
  226. moai_adk/templates/.claude/skills/moai-foundation-quality/scripts/quality-gate.sh +668 -0
  227. moai_adk/templates/.claude/skills/moai-foundation-quality/templates/github-actions-quality.yml +481 -0
  228. moai_adk/templates/.claude/skills/moai-foundation-quality/templates/quality-config.yaml +519 -0
  229. moai_adk/templates/.claude/skills/moai-integration-mcp/SKILL.md +352 -0
  230. moai_adk/templates/.claude/skills/moai-integration-mcp/modules/README.md +52 -0
  231. moai_adk/templates/.claude/skills/moai-integration-mcp/modules/error-handling.md +334 -0
  232. moai_adk/templates/.claude/skills/moai-integration-mcp/modules/integration-patterns.md +310 -0
  233. moai_adk/templates/.claude/skills/moai-integration-mcp/modules/security-authentication.md +256 -0
  234. moai_adk/templates/.claude/skills/moai-integration-mcp/modules/server-architecture.md +253 -0
  235. moai_adk/templates/.claude/skills/moai-lang-unified/README.md +133 -0
  236. moai_adk/templates/.claude/skills/moai-lang-unified/SKILL.md +296 -0
  237. moai_adk/templates/.claude/skills/moai-lang-unified/examples.md +1269 -0
  238. moai_adk/templates/.claude/skills/moai-lang-unified/reference.md +331 -0
  239. moai_adk/templates/.claude/skills/moai-library-mermaid/SKILL.md +298 -0
  240. moai_adk/templates/.claude/skills/moai-library-mermaid/advanced-patterns.md +465 -0
  241. moai_adk/templates/.claude/skills/moai-library-mermaid/examples.md +270 -0
  242. moai_adk/templates/.claude/skills/moai-library-mermaid/optimization.md +440 -0
  243. moai_adk/templates/.claude/skills/moai-library-mermaid/reference.md +228 -0
  244. moai_adk/templates/.claude/skills/moai-library-nextra/SKILL.md +316 -0
  245. moai_adk/templates/.claude/skills/moai-library-nextra/advanced-patterns.md +336 -0
  246. moai_adk/templates/.claude/skills/moai-library-nextra/modules/advanced-deployment-patterns.md +182 -0
  247. moai_adk/templates/.claude/skills/moai-library-nextra/modules/advanced-patterns.md +17 -0
  248. moai_adk/templates/.claude/skills/moai-library-nextra/modules/configuration.md +57 -0
  249. moai_adk/templates/.claude/skills/moai-library-nextra/modules/content-architecture-optimization.md +162 -0
  250. moai_adk/templates/.claude/skills/moai-library-nextra/modules/deployment.md +52 -0
  251. moai_adk/templates/.claude/skills/moai-library-nextra/modules/framework-core-configuration.md +186 -0
  252. moai_adk/templates/.claude/skills/moai-library-nextra/modules/i18n-setup.md +55 -0
  253. moai_adk/templates/.claude/skills/moai-library-nextra/modules/mdx-components.md +52 -0
  254. moai_adk/templates/.claude/skills/moai-library-nextra/optimization.md +303 -0
  255. moai_adk/templates/.claude/skills/moai-library-shadcn/SKILL.md +370 -0
  256. moai_adk/templates/.claude/skills/moai-library-shadcn/examples.md +575 -0
  257. moai_adk/templates/.claude/skills/moai-library-shadcn/modules/advanced-patterns.md +394 -0
  258. moai_adk/templates/.claude/skills/moai-library-shadcn/modules/optimization.md +278 -0
  259. moai_adk/templates/.claude/skills/moai-library-shadcn/modules/shadcn-components.md +457 -0
  260. moai_adk/templates/.claude/skills/moai-library-shadcn/modules/shadcn-theming.md +373 -0
  261. moai_adk/templates/.claude/skills/moai-library-shadcn/reference.md +74 -0
  262. moai_adk/templates/.claude/skills/moai-platform-baas/README.md +186 -0
  263. moai_adk/templates/.claude/skills/moai-platform-baas/SKILL.md +290 -0
  264. moai_adk/templates/.claude/skills/moai-platform-baas/examples.md +1225 -0
  265. moai_adk/templates/.claude/skills/moai-platform-baas/reference.md +567 -0
  266. moai_adk/templates/.claude/skills/moai-platform-baas/scripts/provider-selector.py +323 -0
  267. moai_adk/templates/.claude/skills/moai-platform-baas/templates/stack-config.yaml +204 -0
  268. moai_adk/templates/.claude/skills/moai-workflow-jit-docs/SKILL.md +446 -0
  269. moai_adk/templates/.claude/skills/moai-workflow-jit-docs/advanced-patterns.md +379 -0
  270. moai_adk/templates/.claude/skills/moai-workflow-jit-docs/optimization.md +286 -0
  271. moai_adk/templates/.claude/skills/moai-workflow-project/README.md +190 -0
  272. moai_adk/templates/.claude/skills/moai-workflow-project/SKILL.md +387 -0
  273. moai_adk/templates/.claude/skills/moai-workflow-project/__init__.py +520 -0
  274. moai_adk/templates/.claude/skills/moai-workflow-project/complete_workflow_demo_fixed.py +574 -0
  275. moai_adk/templates/.claude/skills/moai-workflow-project/examples/complete_project_setup.py +317 -0
  276. moai_adk/templates/.claude/skills/moai-workflow-project/examples/complete_workflow_demo.py +663 -0
  277. moai_adk/templates/.claude/skills/moai-workflow-project/examples/config-migration-example.json +190 -0
  278. moai_adk/templates/.claude/skills/moai-workflow-project/examples/question-examples.json +135 -0
  279. moai_adk/templates/.claude/skills/moai-workflow-project/examples/quick_start.py +196 -0
  280. moai_adk/templates/.claude/skills/moai-workflow-project/modules/__init__.py +17 -0
  281. moai_adk/templates/.claude/skills/moai-workflow-project/modules/advanced-patterns.md +158 -0
  282. moai_adk/templates/.claude/skills/moai-workflow-project/modules/ask_user_integration.py +340 -0
  283. moai_adk/templates/.claude/skills/moai-workflow-project/modules/batch_questions.py +713 -0
  284. moai_adk/templates/.claude/skills/moai-workflow-project/modules/config_manager.py +538 -0
  285. moai_adk/templates/.claude/skills/moai-workflow-project/modules/documentation_manager.py +1336 -0
  286. moai_adk/templates/.claude/skills/moai-workflow-project/modules/language_initializer.py +730 -0
  287. moai_adk/templates/.claude/skills/moai-workflow-project/modules/migration_manager.py +608 -0
  288. moai_adk/templates/.claude/skills/moai-workflow-project/modules/template_optimizer.py +1005 -0
  289. moai_adk/templates/.claude/skills/moai-workflow-project/schemas/config-schema.json +316 -0
  290. moai_adk/templates/.claude/skills/moai-workflow-project/schemas/tab_schema.json +1362 -0
  291. moai_adk/templates/.claude/skills/moai-workflow-project/templates/config-template.json +71 -0
  292. moai_adk/templates/.claude/skills/moai-workflow-project/templates/doc-templates/product-template.md +44 -0
  293. moai_adk/templates/.claude/skills/moai-workflow-project/templates/doc-templates/structure-template.md +48 -0
  294. moai_adk/templates/.claude/skills/moai-workflow-project/templates/doc-templates/tech-template.md +71 -0
  295. moai_adk/templates/.claude/skills/moai-workflow-project/templates/question-templates/config-manager-setup.json +109 -0
  296. moai_adk/templates/.claude/skills/moai-workflow-project/templates/question-templates/language-initializer.json +228 -0
  297. moai_adk/templates/.claude/skills/moai-workflow-project/templates/question-templates/menu-project-config.json +130 -0
  298. moai_adk/templates/.claude/skills/moai-workflow-project/templates/question-templates/project-batch-questions.json +97 -0
  299. moai_adk/templates/.claude/skills/moai-workflow-project/templates/question-templates/spec-workflow-setup.json +150 -0
  300. moai_adk/templates/.claude/skills/moai-workflow-project/test_integration_simple.py +436 -0
  301. moai_adk/templates/.claude/skills/moai-workflow-templates/SKILL.md +374 -0
  302. moai_adk/templates/.claude/skills/moai-workflow-templates/modules/code-templates.md +124 -0
  303. moai_adk/templates/.claude/skills/moai-workflow-templates/modules/feedback-templates.md +100 -0
  304. moai_adk/templates/.claude/skills/moai-workflow-templates/modules/template-optimizer.md +138 -0
  305. moai_adk/templates/.claude/skills/moai-workflow-testing/LICENSE.txt +202 -0
  306. moai_adk/templates/.claude/skills/moai-workflow-testing/SKILL.md +453 -0
  307. moai_adk/templates/.claude/skills/moai-workflow-testing/advanced-patterns.md +576 -0
  308. moai_adk/templates/.claude/skills/moai-workflow-testing/examples/ai-powered-testing.py +294 -0
  309. moai_adk/templates/.claude/skills/moai-workflow-testing/examples/console_logging.py +35 -0
  310. moai_adk/templates/.claude/skills/moai-workflow-testing/examples/element_discovery.py +40 -0
  311. moai_adk/templates/.claude/skills/moai-workflow-testing/examples/static_html_automation.py +34 -0
  312. moai_adk/templates/.claude/skills/moai-workflow-testing/modules/README.md +220 -0
  313. moai_adk/templates/.claude/skills/moai-workflow-testing/modules/ai-debugging.md +845 -0
  314. moai_adk/templates/.claude/skills/moai-workflow-testing/modules/automated-code-review.md +1416 -0
  315. moai_adk/templates/.claude/skills/moai-workflow-testing/modules/performance-optimization.md +1234 -0
  316. moai_adk/templates/.claude/skills/moai-workflow-testing/modules/smart-refactoring.md +1243 -0
  317. moai_adk/templates/.claude/skills/moai-workflow-testing/modules/tdd-context7.md +1260 -0
  318. moai_adk/templates/.claude/skills/moai-workflow-testing/optimization.md +505 -0
  319. moai_adk/templates/.claude/skills/moai-workflow-testing/reference/playwright-best-practices.md +57 -0
  320. moai_adk/templates/.claude/skills/moai-workflow-testing/scripts/with_server.py +218 -0
  321. moai_adk/templates/.claude/skills/moai-workflow-testing/templates/alfred-integration.md +376 -0
  322. moai_adk/templates/.claude/skills/moai-workflow-testing/workflows/enterprise-testing-workflow.py +571 -0
  323. moai_adk/templates/.claude/skills/moai-worktree/SKILL.md +410 -0
  324. moai_adk/templates/.claude/skills/moai-worktree/examples.md +606 -0
  325. moai_adk/templates/.claude/skills/moai-worktree/modules/integration-patterns.md +982 -0
  326. moai_adk/templates/.claude/skills/moai-worktree/modules/parallel-development.md +778 -0
  327. moai_adk/templates/.claude/skills/moai-worktree/modules/worktree-commands.md +646 -0
  328. moai_adk/templates/.claude/skills/moai-worktree/modules/worktree-management.md +782 -0
  329. moai_adk/templates/.claude/skills/moai-worktree/reference.md +357 -0
  330. moai_adk/templates/.git-hooks/pre-commit +103 -41
  331. moai_adk/templates/.git-hooks/pre-push +116 -21
  332. moai_adk/templates/.github/workflows/ci-universal.yml +513 -0
  333. moai_adk/templates/.github/workflows/security-secrets-check.yml +179 -0
  334. moai_adk/templates/.gitignore +184 -44
  335. moai_adk/templates/.mcp.json +7 -9
  336. moai_adk/templates/.moai/cache/personalization.json +10 -0
  337. moai_adk/templates/.moai/config/config.yaml +344 -0
  338. moai_adk/templates/.moai/config/presets/manual.yaml +28 -0
  339. moai_adk/templates/.moai/config/presets/personal.yaml +30 -0
  340. moai_adk/templates/.moai/config/presets/team.yaml +33 -0
  341. moai_adk/templates/.moai/config/questions/_schema.yaml +79 -0
  342. moai_adk/templates/.moai/config/questions/tab1-user.yaml +108 -0
  343. moai_adk/templates/.moai/config/questions/tab2-project.yaml +122 -0
  344. moai_adk/templates/.moai/config/questions/tab3-git.yaml +542 -0
  345. moai_adk/templates/.moai/config/questions/tab4-quality.yaml +167 -0
  346. moai_adk/templates/.moai/config/questions/tab5-system.yaml +152 -0
  347. moai_adk/templates/.moai/config/sections/git-strategy.yaml +40 -0
  348. moai_adk/templates/.moai/config/sections/language.yaml +11 -0
  349. moai_adk/templates/.moai/config/sections/project.yaml +13 -0
  350. moai_adk/templates/.moai/config/sections/quality.yaml +15 -0
  351. moai_adk/templates/.moai/config/sections/system.yaml +14 -0
  352. moai_adk/templates/.moai/config/sections/user.yaml +5 -0
  353. moai_adk/templates/.moai/config/statusline-config.yaml +86 -0
  354. moai_adk/templates/.moai/scripts/setup-glm.py +136 -0
  355. moai_adk/templates/CLAUDE.md +382 -501
  356. moai_adk/utils/__init__.py +24 -1
  357. moai_adk/utils/banner.py +7 -10
  358. moai_adk/utils/common.py +16 -30
  359. moai_adk/utils/link_validator.py +4 -12
  360. moai_adk/utils/safe_file_reader.py +2 -6
  361. moai_adk/utils/timeout.py +160 -0
  362. moai_adk/utils/toon_utils.py +256 -0
  363. moai_adk/version.py +22 -0
  364. moai_adk-0.32.8.dist-info/METADATA +2478 -0
  365. moai_adk-0.32.8.dist-info/RECORD +396 -0
  366. {moai_adk-0.25.4.dist-info → moai_adk-0.32.8.dist-info}/WHEEL +1 -1
  367. {moai_adk-0.25.4.dist-info → moai_adk-0.32.8.dist-info}/entry_points.txt +1 -0
  368. moai_adk/cli/commands/backup.py +0 -82
  369. moai_adk/cli/commands/improve_user_experience.py +0 -348
  370. moai_adk/cli/commands/migrate.py +0 -158
  371. moai_adk/cli/commands/validate_links.py +0 -118
  372. moai_adk/templates/.github/workflows/moai-gitflow.yml +0 -413
  373. moai_adk/templates/.github/workflows/moai-release-create.yml +0 -100
  374. moai_adk/templates/.github/workflows/moai-release-pipeline.yml +0 -188
  375. moai_adk/utils/user_experience.py +0 -531
  376. moai_adk-0.25.4.dist-info/METADATA +0 -2279
  377. moai_adk-0.25.4.dist-info/RECORD +0 -112
  378. {moai_adk-0.25.4.dist-info → moai_adk-0.32.8.dist-info}/licenses/LICENSE +0 -0
@@ -1,30 +1,47 @@
1
1
  """
2
- Comprehensive Error Handling & Recovery System for Research Workflows
3
-
4
- Provides:
2
+ Comprehensive Error Recovery System - Phase 3 Enterprise Edition
3
+
4
+ Advanced enterprise-grade error recovery with automatic healing, data consistency checks,
5
+ rollback capabilities, state persistence, and self-healing mechanisms.
6
+
7
+ Phase 3 Features:
8
+ - Event-driven error recovery architecture
9
+ - Automatic system recovery from all failure modes
10
+ - Data consistency checks and repair mechanisms
11
+ - Comprehensive rollback and state persistence
12
+ - Self-healing capabilities with circuit breakers
13
+ - Dead letter queue handling for failed operations
14
+ - Multi-strategy recovery with exponential backoff
15
+ - Disaster recovery and business continuity
16
+ - Real-time failure mode analysis and prediction
17
+
18
+ Legacy Features:
5
19
  - Error detection and classification
6
20
  - Recovery procedures and fallback mechanisms
7
21
  - Integration with research hooks, agents, and skills
8
22
  - Documentation of error handling procedures
9
23
  - Troubleshooting guides and automated recovery
10
-
11
- Features:
12
24
  - Multi-level error handling (critical, warning, info)
13
- - Automatic recovery mechanisms
14
25
  - Manual recovery procedures
15
26
  - Error logging and tracking
16
27
  - System health monitoring
17
28
  - Emergency recovery procedures
18
29
  """
19
30
 
31
+ import asyncio
32
+ import hashlib
20
33
  import json
21
34
  import logging
35
+ import os
22
36
  import sys
37
+ import tempfile
23
38
  import threading
24
39
  import time
25
40
  import traceback
26
- from dataclasses import asdict, dataclass
27
- from datetime import datetime, timezone
41
+ import uuid
42
+ from collections import defaultdict, deque
43
+ from dataclasses import asdict, dataclass, field
44
+ from datetime import datetime, timedelta, timezone
28
45
  from enum import Enum
29
46
  from pathlib import Path
30
47
  from typing import Any, Callable, Dict, List, Optional
@@ -34,7 +51,7 @@ logging.basicConfig(
34
51
  level=logging.INFO,
35
52
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
36
53
  handlers=[
37
- logging.FileHandler("/tmp/moai_error_recovery.log"),
54
+ logging.FileHandler(Path(tempfile.gettempdir()) / "moai_error_recovery.log"),
38
55
  logging.StreamHandler(sys.stdout),
39
56
  ],
40
57
  )
@@ -67,6 +84,69 @@ class ErrorCategory(Enum):
67
84
  USER_INPUT = "user_input" # User input errors
68
85
 
69
86
 
87
+ # Phase 3: Advanced Error Recovery Enums
88
+
89
+
90
+ class FailureMode(Enum):
91
+ """Types of failure modes in the system"""
92
+
93
+ HOOK_EXECUTION_FAILURE = "hook_execution_failure"
94
+ RESOURCE_EXHAUSTION = "resource_exhaustion"
95
+ DATA_CORRUPTION = "data_corruption"
96
+ NETWORK_FAILURE = "network_failure"
97
+ SYSTEM_OVERLOAD = "system_overload"
98
+ CONFIGURATION_ERROR = "configuration_error"
99
+ TIMEOUT_FAILURE = "timeout_failure"
100
+ MEMORY_LEAK = "memory_leak"
101
+ DEADLOCK = "deadlock"
102
+ AUTHENTICATION_FAILURE = "authentication_failure"
103
+ VALIDATION_FAILURE = "validation_failure"
104
+ EXTERNAL_SERVICE_FAILURE = "external_service_failure"
105
+ STORAGE_FAILURE = "storage_failure"
106
+ CONCURRENCY_ISSUE = "concurrency_issue"
107
+ CIRCUIT_BREAKER_TRIPPED = "circuit_breaker_tripped"
108
+ CASCADE_FAILURE = "cascade_failure"
109
+
110
+
111
+ class RecoveryStrategy(Enum):
112
+ """Recovery strategies for different failure modes"""
113
+
114
+ RETRY_WITH_BACKOFF = "retry_with_backoff"
115
+ CIRCUIT_BREAKER = "circuit_breaker"
116
+ ROLLBACK = "rollback"
117
+ FAILOVER = "failover"
118
+ DEGRADE_SERVICE = "degrade_service"
119
+ RESTART_COMPONENT = "restart_component"
120
+ DATA_REPAIR = "data_repair"
121
+ CLEAR_CACHE = "clear_cache"
122
+ SCALE_RESOURCES = "scale_resources"
123
+ NOTIFY_ADMIN = "notify_admin"
124
+ QUARANTINE = "quarantine"
125
+ IGNORE = "ignore"
126
+ ISOLATE_COMPONENT = "isolate_component"
127
+ EMERGENCY_STOP = "emergency_stop"
128
+
129
+
130
+ class ConsistencyLevel(Enum):
131
+ """Data consistency levels"""
132
+
133
+ STRONG = "strong" # Immediate consistency
134
+ EVENTUAL = "eventual" # Eventually consistent
135
+ WEAK = "weak" # Weak consistency
136
+ CUSTOM = "custom" # Custom consistency rules
137
+
138
+
139
+ class RecoveryStatus(Enum):
140
+ """Recovery operation status"""
141
+
142
+ PENDING = "pending"
143
+ IN_PROGRESS = "in_progress"
144
+ COMPLETED = "completed"
145
+ FAILED = "failed"
146
+ CANCELLED = "cancelled"
147
+ ROLLED_BACK = "rolled_back"
148
+
149
+
70
150
  @dataclass
71
151
  class ErrorReport:
72
152
  """Comprehensive error report structure"""
@@ -111,6 +191,140 @@ class RecoveryResult:
111
191
  next_actions: List[str] = None
112
192
 
113
193
 
194
+ # Phase 3: Advanced Error Recovery Dataclasses
195
+
196
+
197
+ @dataclass
198
+ class FailureEvent:
199
+ """Represents a failure event in the system"""
200
+
201
+ failure_id: str
202
+ failure_mode: FailureMode
203
+ timestamp: datetime
204
+ component: str
205
+ description: str
206
+ severity: str # "low", "medium", "high", "critical"
207
+ context: Dict[str, Any] = field(default_factory=dict)
208
+ error_details: Optional[Dict[str, Any]] = None
209
+ affected_operations: List[str] = field(default_factory=list)
210
+ auto_recovery_eligible: bool = True
211
+ retry_count: int = 0
212
+ metadata: Dict[str, Any] = field(default_factory=dict)
213
+ parent_failure_id: Optional[str] = None # For cascade failures
214
+ root_cause: Optional[str] = None
215
+
216
+ def to_dict(self) -> Dict[str, Any]:
217
+ """Convert to dictionary for serialization"""
218
+ return {
219
+ "failure_id": self.failure_id,
220
+ "failure_mode": self.failure_mode.value,
221
+ "timestamp": self.timestamp.isoformat(),
222
+ "component": self.component,
223
+ "description": self.description,
224
+ "severity": self.severity,
225
+ "context": self.context,
226
+ "error_details": self.error_details,
227
+ "affected_operations": self.affected_operations,
228
+ "auto_recovery_eligible": self.auto_recovery_eligible,
229
+ "retry_count": self.retry_count,
230
+ "metadata": self.metadata,
231
+ "parent_failure_id": self.parent_failure_id,
232
+ "root_cause": self.root_cause,
233
+ }
234
+
235
+ @classmethod
236
+ def from_dict(cls, data: Dict[str, Any]) -> "FailureEvent":
237
+ """Create from dictionary"""
238
+ return cls(
239
+ failure_id=data["failure_id"],
240
+ failure_mode=FailureMode(data["failure_mode"]),
241
+ timestamp=datetime.fromisoformat(data["timestamp"]),
242
+ component=data["component"],
243
+ description=data["description"],
244
+ severity=data["severity"],
245
+ context=data.get("context", {}),
246
+ error_details=data.get("error_details"),
247
+ affected_operations=data.get("affected_operations", []),
248
+ auto_recovery_eligible=data.get("auto_recovery_eligible", True),
249
+ retry_count=data.get("retry_count", 0),
250
+ metadata=data.get("metadata", {}),
251
+ parent_failure_id=data.get("parent_failure_id"),
252
+ root_cause=data.get("root_cause"),
253
+ )
254
+
255
+
256
+ @dataclass
257
+ class AdvancedRecoveryAction:
258
+ """Advanced recovery action with enhanced capabilities"""
259
+
260
+ action_id: str
261
+ failure_id: str
262
+ strategy: RecoveryStrategy
263
+ timestamp: datetime
264
+ status: RecoveryStatus = RecoveryStatus.PENDING
265
+ description: str = ""
266
+ parameters: Dict[str, Any] = field(default_factory=dict)
267
+ execution_log: List[str] = field(default_factory=list)
268
+ rollback_available: bool = True
269
+ timeout_seconds: float = 300.0
270
+ retry_attempts: int = 0
271
+ max_retries: int = 3
272
+ rollback_action_id: Optional[str] = None
273
+ dependencies: List[str] = field(default_factory=list) # Other actions this depends on
274
+ priority: int = 5 # 1-10, lower number = higher priority
275
+
276
+ def to_dict(self) -> Dict[str, Any]:
277
+ """Convert to dictionary for serialization"""
278
+ return {
279
+ "action_id": self.action_id,
280
+ "failure_id": self.failure_id,
281
+ "strategy": self.strategy.value,
282
+ "timestamp": self.timestamp.isoformat(),
283
+ "status": self.status.value,
284
+ "description": self.description,
285
+ "parameters": self.parameters,
286
+ "execution_log": self.execution_log,
287
+ "rollback_available": self.rollback_available,
288
+ "timeout_seconds": self.timeout_seconds,
289
+ "retry_attempts": self.retry_attempts,
290
+ "max_retries": self.max_retries,
291
+ "rollback_action_id": self.rollback_action_id,
292
+ "dependencies": self.dependencies,
293
+ "priority": self.priority,
294
+ }
295
+
296
+
297
+ @dataclass
298
+ class SystemSnapshot:
299
+ """Represents a system state snapshot for rollback"""
300
+
301
+ snapshot_id: str
302
+ timestamp: datetime
303
+ component_states: Dict[str, Dict[str, Any]]
304
+ configuration_hash: str
305
+ data_checksums: Dict[str, str]
306
+ metadata: Dict[str, Any] = field(default_factory=dict)
307
+ parent_snapshot_id: Optional[str] = None
308
+ is_rollback_point: bool = False
309
+ description: str = ""
310
+ consistency_level: ConsistencyLevel = ConsistencyLevel.EVENTUAL
311
+
312
+ def to_dict(self) -> Dict[str, Any]:
313
+ """Convert to dictionary for serialization"""
314
+ return {
315
+ "snapshot_id": self.snapshot_id,
316
+ "timestamp": self.timestamp.isoformat(),
317
+ "component_states": self.component_states,
318
+ "configuration_hash": self.configuration_hash,
319
+ "data_checksums": self.data_checksums,
320
+ "metadata": self.metadata,
321
+ "parent_snapshot_id": self.parent_snapshot_id,
322
+ "is_rollback_point": self.is_rollback_point,
323
+ "description": self.description,
324
+ "consistency_level": self.consistency_level.value,
325
+ }
326
+
327
+
114
328
  class ErrorRecoverySystem:
115
329
  """Comprehensive error handling and recovery system"""
116
330
 
@@ -141,14 +355,15 @@ class ErrorRecoverySystem:
141
355
  # Initialize recovery actions
142
356
  self._initialize_recovery_actions()
143
357
 
358
+ # Phase 3: Advanced recovery system initialization
359
+ self._initialize_phase3_components()
360
+
144
361
  # Background monitoring thread
145
362
  self.monitoring_active = True
146
- self.monitor_thread = threading.Thread(
147
- target=self._background_monitoring, daemon=True
148
- )
363
+ self.monitor_thread = threading.Thread(target=self._background_monitoring, daemon=True)
149
364
  self.monitor_thread.start()
150
365
 
151
- logger.info("Error Recovery System initialized")
366
+ logger.info("Error Recovery System initialized with Phase 3 enterprise features")
152
367
 
153
368
  def handle_error(
154
369
  self,
@@ -211,9 +426,7 @@ class ErrorRecoverySystem:
211
426
  logger.info(f"Automatic recovery successful for error {error_id}")
212
427
  self.active_errors.pop(error_id, None)
213
428
  else:
214
- logger.warning(
215
- f"Automatic recovery failed for error {error_id}: {recovery_result.message}"
216
- )
429
+ logger.warning(f"Automatic recovery failed for error {error_id}: {recovery_result.message}")
217
430
 
218
431
  # Update system health
219
432
  self._update_system_health()
@@ -249,6 +462,7 @@ class ErrorRecoverySystem:
249
462
  success=False,
250
463
  action_name=action_name,
251
464
  message=f"Error {error_id} not found in active errors",
465
+ duration=0.0,
252
466
  )
253
467
 
254
468
  if action_name not in self.recovery_actions:
@@ -256,6 +470,7 @@ class ErrorRecoverySystem:
256
470
  success=False,
257
471
  action_name=action_name,
258
472
  message=f"Recovery action {action_name} not found",
473
+ duration=0.0,
259
474
  )
260
475
 
261
476
  error_report = self.active_errors[error_id]
@@ -316,14 +531,19 @@ class ErrorRecoverySystem:
316
531
  """
317
532
  self._update_system_health()
318
533
 
534
+ last_check: datetime = self.system_health["last_check"] # type: ignore[assignment]
535
+ error_stats: Dict[str, Any] = self.error_stats # type: ignore[assignment]
536
+ issues: List[str] = self.system_health["issues"] # type: ignore[assignment]
537
+ metrics: Dict[str, Any] = self.system_health["metrics"] # type: ignore[assignment]
538
+
319
539
  return {
320
540
  "status": self.system_health["status"],
321
- "last_check": self.system_health["last_check"].isoformat(),
541
+ "last_check": last_check.isoformat(),
322
542
  "active_errors": len(self.active_errors),
323
543
  "total_errors": len(self.error_history),
324
- "error_stats": self.error_stats.copy(),
325
- "issues": self.system_health["issues"].copy(),
326
- "metrics": self.system_health["metrics"].copy(),
544
+ "error_stats": error_stats.copy(),
545
+ "issues": issues.copy(),
546
+ "metrics": metrics.copy(),
327
547
  "recovery_actions_available": len(self.recovery_actions),
328
548
  }
329
549
 
@@ -340,8 +560,8 @@ class ErrorRecoverySystem:
340
560
  recent_errors = self.error_history[-limit:]
341
561
 
342
562
  # Categorize errors
343
- by_severity = {}
344
- by_category = {}
563
+ by_severity: Dict[str, List[str]] = {}
564
+ by_category: Dict[str, List[str]] = {}
345
565
 
346
566
  for error in recent_errors:
347
567
  # By severity
@@ -386,19 +606,24 @@ class ErrorRecoverySystem:
386
606
  Returns:
387
607
  Troubleshooting guide with solutions
388
608
  """
609
+ common_issues: List[Dict[str, Any]] = []
610
+ recovery_procedures: Dict[str, Dict[str, Any]] = {}
611
+ prevention_tips: List[str] = []
612
+ emergency_procedures: List[Dict[str, str]] = []
613
+
389
614
  guide = {
390
615
  "generated_at": datetime.now(timezone.utc).isoformat(),
391
- "common_issues": [],
392
- "recovery_procedures": {},
393
- "prevention_tips": [],
394
- "emergency_procedures": [],
616
+ "common_issues": common_issues,
617
+ "recovery_procedures": recovery_procedures,
618
+ "prevention_tips": prevention_tips,
619
+ "emergency_procedures": emergency_procedures,
395
620
  }
396
621
 
397
622
  # Analyze common issues
398
623
  error_patterns = self._identify_error_patterns(self.error_history)
399
624
  for pattern, frequency in error_patterns.items():
400
625
  if frequency > 2: # Issues that occurred more than twice
401
- guide["common_issues"].append(
626
+ common_issues.append(
402
627
  {
403
628
  "pattern": pattern,
404
629
  "frequency": frequency,
@@ -409,7 +634,7 @@ class ErrorRecoverySystem:
409
634
 
410
635
  # Generate recovery procedures
411
636
  for action_name, action in self.recovery_actions.items():
412
- guide["recovery_procedures"][action_name] = {
637
+ recovery_procedures[action_name] = {
413
638
  "description": action.description,
414
639
  "type": action.action_type,
415
640
  "for_severities": [s.value for s in action.severity_filter],
@@ -417,10 +642,10 @@ class ErrorRecoverySystem:
417
642
  }
418
643
 
419
644
  # Prevention tips
420
- guide["prevention_tips"] = self._generate_prevention_tips()
645
+ prevention_tips.extend(self._generate_prevention_tips())
421
646
 
422
647
  # Emergency procedures
423
- guide["emergency_procedures"] = self._generate_emergency_procedures()
648
+ emergency_procedures.extend(self._generate_emergency_procedures())
424
649
 
425
650
  return guide
426
651
 
@@ -440,9 +665,7 @@ class ErrorRecoverySystem:
440
665
  removed_count = len(old_errors)
441
666
 
442
667
  # Keep only recent errors
443
- self.error_history = [
444
- e for e in self.error_history if e.timestamp >= cutoff_date
445
- ]
668
+ self.error_history = [e for e in self.error_history if e.timestamp >= cutoff_date]
446
669
 
447
670
  # Save updated error history
448
671
  self._save_error_history()
@@ -595,11 +818,10 @@ class ErrorRecoverySystem:
595
818
  success=False,
596
819
  action_name="none",
597
820
  message="No suitable automatic recovery action succeeded",
821
+ duration=0.0,
598
822
  )
599
823
 
600
- def _restart_research_engines(
601
- self, error_report: ErrorReport, parameters: Dict[str, Any]
602
- ) -> bool:
824
+ def _restart_research_engines(self, error_report: ErrorReport, parameters: Dict[str, Any]) -> bool:
603
825
  """Restart research engines and clear caches"""
604
826
  try:
605
827
  logger.info("Restarting research engines...")
@@ -632,9 +854,7 @@ class ErrorRecoverySystem:
632
854
  logger.error(f"Failed to restart research engines: {str(e)}")
633
855
  return False
634
856
 
635
- def _restore_config_backup(
636
- self, error_report: ErrorReport, parameters: Dict[str, Any]
637
- ) -> bool:
857
+ def _restore_config_backup(self, error_report: ErrorReport, parameters: Dict[str, Any]) -> bool:
638
858
  """Restore configuration from backup"""
639
859
  try:
640
860
  logger.info("Restoring configuration from backup...")
@@ -665,9 +885,7 @@ class ErrorRecoverySystem:
665
885
  logger.error(f"Failed to restore configuration: {str(e)}")
666
886
  return False
667
887
 
668
- def _clear_agent_cache(
669
- self, error_report: ErrorReport, parameters: Dict[str, Any]
670
- ) -> bool:
888
+ def _clear_agent_cache(self, error_report: ErrorReport, parameters: Dict[str, Any]) -> bool:
671
889
  """Clear agent communication cache"""
672
890
  try:
673
891
  logger.info("Clearing agent cache...")
@@ -695,17 +913,18 @@ class ErrorRecoverySystem:
695
913
  logger.error(f"Failed to clear agent cache: {str(e)}")
696
914
  return False
697
915
 
698
- def _validate_research_integrity(
699
- self, error_report: ErrorReport, parameters: Dict[str, Any]
700
- ) -> Dict[str, Any]:
916
+ def _validate_research_integrity(self, error_report: ErrorReport, parameters: Dict[str, Any]) -> Dict[str, Any]:
701
917
  """Validate research component integrity"""
918
+ issues_found: List[str] = []
919
+ repairs_made: List[str] = []
920
+
702
921
  validation_results = {
703
922
  "skills_valid": True,
704
923
  "agents_valid": True,
705
924
  "commands_valid": True,
706
925
  "hooks_valid": True,
707
- "issues_found": [],
708
- "repairs_made": [],
926
+ "issues_found": issues_found,
927
+ "repairs_made": repairs_made,
709
928
  }
710
929
 
711
930
  try:
@@ -717,15 +936,11 @@ class ErrorRecoverySystem:
717
936
  for skill_file in skills_dir.glob("*.md"):
718
937
  if not self._validate_skill_file(skill_file):
719
938
  validation_results["skills_valid"] = False
720
- validation_results["issues_found"].append(
721
- f"Invalid skill file: {skill_file}"
722
- )
939
+ issues_found.append(f"Invalid skill file: {skill_file}")
723
940
 
724
941
  # Attempt repair
725
942
  if self._repair_skill_file(skill_file):
726
- validation_results["repairs_made"].append(
727
- f"Repaired: {skill_file}"
728
- )
943
+ repairs_made.append(f"Repaired: {skill_file}")
729
944
 
730
945
  # Validate agents
731
946
  agents_dir = self.project_root / ".claude" / "agents" / "alfred"
@@ -733,9 +948,7 @@ class ErrorRecoverySystem:
733
948
  for agent_file in agents_dir.glob("*.md"):
734
949
  if not self._validate_agent_file(agent_file):
735
950
  validation_results["agents_valid"] = False
736
- validation_results["issues_found"].append(
737
- f"Invalid agent file: {agent_file}"
738
- )
951
+ issues_found.append(f"Invalid agent file: {agent_file}")
739
952
 
740
953
  # Validate commands
741
954
  commands_dir = self.project_root / ".claude" / "commands" / "alfred"
@@ -743,12 +956,10 @@ class ErrorRecoverySystem:
743
956
  for command_file in commands_dir.glob("*.md"):
744
957
  if not self._validate_command_file(command_file):
745
958
  validation_results["commands_valid"] = False
746
- validation_results["issues_found"].append(
747
- f"Invalid command file: {command_file}"
748
- )
959
+ issues_found.append(f"Invalid command file: {command_file}")
749
960
 
750
961
  logger.info(
751
- f"Research integrity validation completed. Issues: {len(validation_results['issues_found'])}, Repairs: {len(validation_results['repairs_made'])}"
962
+ f"Research integrity validation completed. Issues: {len(issues_found)}, Repairs: {len(repairs_made)}"
752
963
  )
753
964
 
754
965
  except Exception as e:
@@ -757,9 +968,7 @@ class ErrorRecoverySystem:
757
968
 
758
969
  return validation_results
759
970
 
760
- def _rollback_last_changes(
761
- self, error_report: ErrorReport, parameters: Dict[str, Any]
762
- ) -> bool:
971
+ def _rollback_last_changes(self, error_report: ErrorReport, parameters: Dict[str, Any]) -> bool:
763
972
  """Rollback last research integration changes"""
764
973
  try:
765
974
  logger.info("Rolling back last research changes...")
@@ -791,9 +1000,7 @@ class ErrorRecoverySystem:
791
1000
  logger.error(f"Rollback operation failed: {str(e)}")
792
1001
  return False
793
1002
 
794
- def _reset_system_state(
795
- self, error_report: ErrorReport, parameters: Dict[str, Any]
796
- ) -> bool:
1003
+ def _reset_system_state(self, error_report: ErrorReport, parameters: Dict[str, Any]) -> bool:
797
1004
  """Reset system to known good state"""
798
1005
  try:
799
1006
  logger.info("Resetting system to known good state...")
@@ -826,9 +1033,7 @@ class ErrorRecoverySystem:
826
1033
  logger.error(f"System state reset failed: {str(e)}")
827
1034
  return False
828
1035
 
829
- def _optimize_performance(
830
- self, error_report: ErrorReport, parameters: Dict[str, Any]
831
- ) -> bool:
1036
+ def _optimize_performance(self, error_report: ErrorReport, parameters: Dict[str, Any]) -> bool:
832
1037
  """Optimize system performance"""
833
1038
  try:
834
1039
  logger.info("Optimizing system performance...")
@@ -860,9 +1065,7 @@ class ErrorRecoverySystem:
860
1065
  logger.error(f"Performance optimization failed: {str(e)}")
861
1066
  return False
862
1067
 
863
- def _free_resources(
864
- self, error_report: ErrorReport, parameters: Dict[str, Any]
865
- ) -> bool:
1068
+ def _free_resources(self, error_report: ErrorReport, parameters: Dict[str, Any]) -> bool:
866
1069
  """Free up system resources"""
867
1070
  try:
868
1071
  logger.info("Freeing up system resources...")
@@ -888,7 +1091,7 @@ class ErrorRecoverySystem:
888
1091
  def _generate_error_id(self) -> str:
889
1092
  """Generate unique error ID"""
890
1093
  timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
891
- random_suffix = hashlib.md5(os.urandom(4)).hexdigest()[:6]
1094
+ random_suffix = hashlib.md5(os.urandom(4), usedforsecurity=False).hexdigest()[:6]
892
1095
  return f"ERR_{timestamp}_{random_suffix}"
893
1096
 
894
1097
  def _log_error(self, error_report: ErrorReport):
@@ -896,8 +1099,8 @@ class ErrorRecoverySystem:
896
1099
  # Log to file
897
1100
  error_file = self.error_log_dir / f"error_{error_report.id}.json"
898
1101
  try:
899
- with open(error_file, "w") as f:
900
- json.dump(asdict(error_report), f, indent=2, default=str)
1102
+ with open(error_file, "w", encoding="utf-8") as f:
1103
+ json.dump(asdict(error_report), f, indent=2, default=str, ensure_ascii=False)
901
1104
  except Exception as e:
902
1105
  logger.error(f"Failed to log error to file: {str(e)}")
903
1106
 
@@ -933,14 +1136,8 @@ class ErrorRecoverySystem:
933
1136
  current_time = datetime.now(timezone.utc)
934
1137
 
935
1138
  # Determine system status
936
- critical_errors = [
937
- e
938
- for e in self.active_errors.values()
939
- if e.severity == ErrorSeverity.CRITICAL
940
- ]
941
- high_errors = [
942
- e for e in self.active_errors.values() if e.severity == ErrorSeverity.HIGH
943
- ]
1139
+ critical_errors = [e for e in self.active_errors.values() if e.severity == ErrorSeverity.CRITICAL]
1140
+ high_errors = [e for e in self.active_errors.values() if e.severity == ErrorSeverity.HIGH]
944
1141
 
945
1142
  if critical_errors:
946
1143
  self.system_health["status"] = "critical"
@@ -965,16 +1162,8 @@ class ErrorRecoverySystem:
965
1162
  "type": "active_errors",
966
1163
  "count": len(self.active_errors),
967
1164
  "severity_distribution": {
968
- severity: len(
969
- [
970
- e
971
- for e in self.active_errors.values()
972
- if e.severity.value == severity
973
- ]
974
- )
975
- for severity in set(
976
- e.severity.value for e in self.active_errors.values()
977
- )
1165
+ severity: len([e for e in self.active_errors.values() if e.severity.value == severity])
1166
+ for severity in set(e.severity.value for e in self.active_errors.values())
978
1167
  },
979
1168
  }
980
1169
  ]
@@ -996,20 +1185,16 @@ class ErrorRecoverySystem:
996
1185
  def _check_error_patterns(self):
997
1186
  """Check for concerning error patterns"""
998
1187
  recent_errors = [
999
- e
1000
- for e in self.error_history
1001
- if (datetime.now(timezone.utc) - e.timestamp).total_seconds() < 300
1188
+ e for e in self.error_history if (datetime.now(timezone.utc) - e.timestamp).total_seconds() < 300
1002
1189
  ] # Last 5 minutes
1003
1190
 
1004
1191
  # Check for error bursts
1005
1192
  if len(recent_errors) > 10:
1006
- logger.warning(
1007
- f"High error rate detected: {len(recent_errors)} errors in last 5 minutes"
1008
- )
1193
+ logger.warning(f"High error rate detected: {len(recent_errors)} errors in last 5 minutes")
1009
1194
 
1010
1195
  # Check for repeated errors
1011
1196
  error_messages = [e.message for e in recent_errors]
1012
- message_counts = {}
1197
+ message_counts: Dict[str, int] = {}
1013
1198
  for msg in error_messages:
1014
1199
  message_counts[msg] = message_counts.get(msg, 0) + 1
1015
1200
 
@@ -1027,7 +1212,7 @@ class ErrorRecoverySystem:
1027
1212
 
1028
1213
  def _identify_error_patterns(self, errors: List[ErrorReport]) -> Dict[str, int]:
1029
1214
  """Identify common error patterns"""
1030
- patterns = {}
1215
+ patterns: Dict[str, int] = {}
1031
1216
 
1032
1217
  for error in errors:
1033
1218
  # Pattern by exception type
@@ -1083,7 +1268,7 @@ class ErrorRecoverySystem:
1083
1268
  tips = []
1084
1269
 
1085
1270
  # Add tips based on common error categories
1086
- category_counts = {}
1271
+ category_counts: Dict[str, int] = {}
1087
1272
  for error in self.error_history:
1088
1273
  category = error.category.value
1089
1274
  category_counts[category] = category_counts.get(category, 0) + 1
@@ -1092,9 +1277,7 @@ class ErrorRecoverySystem:
1092
1277
  tips.append("Regularly validate configuration files before making changes")
1093
1278
 
1094
1279
  if category_counts.get("research", 0) > 5:
1095
- tips.append(
1096
- "Monitor research engine performance and clear caches regularly"
1097
- )
1280
+ tips.append("Monitor research engine performance and clear caches regularly")
1098
1281
 
1099
1282
  if category_counts.get("communication", 0) > 5:
1100
1283
  tips.append("Ensure stable network connections for agent communication")
@@ -1131,7 +1314,7 @@ class ErrorRecoverySystem:
1131
1314
 
1132
1315
  # Basic validation
1133
1316
  return "---" in content and len(content) > 100
1134
- except:
1317
+ except (OSError, UnicodeDecodeError):
1135
1318
  return False
1136
1319
 
1137
1320
  def _validate_agent_file(self, agent_file: Path) -> bool:
@@ -1141,7 +1324,7 @@ class ErrorRecoverySystem:
1141
1324
  content = f.read()
1142
1325
 
1143
1326
  return "role:" in content and len(content) > 200
1144
- except:
1327
+ except (OSError, UnicodeDecodeError):
1145
1328
  return False
1146
1329
 
1147
1330
  def _validate_command_file(self, command_file: Path) -> bool:
@@ -1151,7 +1334,7 @@ class ErrorRecoverySystem:
1151
1334
  content = f.read()
1152
1335
 
1153
1336
  return "name:" in content and "allowed-tools:" in content
1154
- except:
1337
+ except (OSError, UnicodeDecodeError):
1155
1338
  return False
1156
1339
 
1157
1340
  def _repair_skill_file(self, skill_file: Path) -> bool:
@@ -1168,7 +1351,7 @@ class ErrorRecoverySystem:
1168
1351
  f.write(content)
1169
1352
 
1170
1353
  return True
1171
- except:
1354
+ except (OSError, UnicodeDecodeError):
1172
1355
  return False
1173
1356
 
1174
1357
  def _reinitialize_research_components(self):
@@ -1202,12 +1385,442 @@ class ErrorRecoverySystem:
1202
1385
  history_file = self.error_log_dir / "error_history.json"
1203
1386
  try:
1204
1387
  with open(history_file, "w") as f:
1205
- json.dump(
1206
- [asdict(e) for e in self.error_history], f, indent=2, default=str
1207
- )
1388
+ json.dump([asdict(e) for e in self.error_history], f, indent=2, default=str)
1208
1389
  except Exception as e:
1209
1390
  logger.error(f"Failed to save error history: {str(e)}")
1210
1391
 
1392
+ # Phase 3: Advanced Error Recovery Methods
1393
+
1394
+ def _initialize_phase3_components(self):
1395
+ """Initialize Phase 3 advanced recovery components"""
1396
+ # Phase 3 specific attributes
1397
+ self.advanced_failures: Dict[str, FailureEvent] = {}
1398
+ self.advanced_recovery_actions: Dict[str, AdvancedRecoveryAction] = {}
1399
+ self.system_snapshots: Dict[str, SystemSnapshot] = {}
1400
+ self.dead_letter_queue: deque = deque(maxlen=10000)
1401
+
1402
+ # Advanced recovery statistics
1403
+ self.advanced_recovery_stats = {
1404
+ "total_failures": 0,
1405
+ "auto_recoveries_attempted": 0,
1406
+ "auto_recoveries_successful": 0,
1407
+ "cascade_failures_detected": 0,
1408
+ "rollbacks_performed": 0,
1409
+ "snapshots_created": 0,
1410
+ "dead_letter_messages": 0,
1411
+ }
1412
+
1413
+ # Circuit breaker states for components
1414
+ self.circuit_breaker_states: Dict[str, Dict[str, Any]] = defaultdict(
1415
+ lambda: {
1416
+ "state": "CLOSED",
1417
+ "failure_count": 0,
1418
+ "last_failure_time": None,
1419
+ "success_threshold": 5,
1420
+ "failure_threshold": 3,
1421
+ "timeout_seconds": 60,
1422
+ }
1423
+ )
1424
+
1425
+ # Failure mode analyzers
1426
+ self.failure_analyzers = {
1427
+ FailureMode.CASCADE_FAILURE: self._analyze_cascade_failure,
1428
+ FailureMode.CIRCUIT_BREAKER_TRIPPED: self._analyze_circuit_breaker_trip,
1429
+ FailureMode.RESOURCE_EXHAUSTION: self._analyze_resource_exhaustion,
1430
+ }
1431
+
1432
+ logger.info("Phase 3 advanced recovery components initialized")
1433
+
1434
+ async def report_advanced_failure(
1435
+ self,
1436
+ failure_mode: FailureMode,
1437
+ component: str,
1438
+ description: str,
1439
+ severity: str = "medium",
1440
+ context: Optional[Dict[str, Any]] = None,
1441
+ error_details: Optional[Dict[str, Any]] = None,
1442
+ affected_operations: Optional[List[str]] = None,
1443
+ auto_recovery_eligible: bool = True,
1444
+ parent_failure_id: Optional[str] = None,
1445
+ ) -> str:
1446
+ """Report an advanced failure event with enhanced tracking"""
1447
+ failure_id = str(uuid.uuid4())
1448
+
1449
+ failure = FailureEvent(
1450
+ failure_id=failure_id,
1451
+ failure_mode=failure_mode,
1452
+ timestamp=datetime.now(timezone.utc),
1453
+ component=component,
1454
+ description=description,
1455
+ severity=severity,
1456
+ context=context or {},
1457
+ error_details=error_details,
1458
+ affected_operations=affected_operations or [],
1459
+ auto_recovery_eligible=auto_recovery_eligible,
1460
+ parent_failure_id=parent_failure_id,
1461
+ )
1462
+
1463
+ # Store failure
1464
+ self.advanced_failures[failure_id] = failure
1465
+ self.advanced_recovery_stats["total_failures"] += 1
1466
+
1467
+ # Analyze failure mode
1468
+ if failure_mode in self.failure_analyzers:
1469
+ await self.failure_analyzers[failure_mode](failure)
1470
+
1471
+ # Trigger advanced recovery if eligible
1472
+ if auto_recovery_eligible:
1473
+ await self._trigger_advanced_recovery(failure)
1474
+
1475
+ # Check for cascade failures
1476
+ await self._check_cascade_failures(failure)
1477
+
1478
+ logger.warning(f"Advanced failure reported: {failure_mode.value} in {component} - {description}")
1479
+ return failure_id
1480
+
1481
+ async def _trigger_advanced_recovery(self, failure: FailureEvent):
1482
+ """Trigger advanced recovery mechanisms"""
1483
+ try:
1484
+ strategy = self._determine_advanced_recovery_strategy(failure.failure_mode)
1485
+
1486
+ action = AdvancedRecoveryAction(
1487
+ action_id=str(uuid.uuid4()),
1488
+ failure_id=failure.failure_id,
1489
+ strategy=strategy,
1490
+ timestamp=datetime.now(timezone.utc),
1491
+ description=f"Advanced recovery for {failure.failure_mode.value}",
1492
+ parameters={"failure_context": failure.context},
1493
+ priority=self._calculate_recovery_priority(failure),
1494
+ )
1495
+
1496
+ self.advanced_recovery_actions[action.action_id] = action
1497
+ self.advanced_recovery_stats["auto_recoveries_attempted"] += 1
1498
+
1499
+ # Execute recovery action
1500
+ success = await self._execute_advanced_recovery_action(action)
1501
+
1502
+ if success:
1503
+ self.advanced_recovery_stats["auto_recoveries_successful"] += 1
1504
+ logger.info(f"Advanced recovery successful for failure {failure.failure_id}")
1505
+ else:
1506
+ # Add to dead letter queue for manual intervention
1507
+ self.dead_letter_queue.append(
1508
+ {
1509
+ "failure_id": failure.failure_id,
1510
+ "action_id": action.action_id,
1511
+ "timestamp": datetime.now(timezone.utc).isoformat(),
1512
+ "reason": "Advanced recovery failed",
1513
+ }
1514
+ )
1515
+ self.advanced_recovery_stats["dead_letter_messages"] += 1
1516
+
1517
+ except Exception as e:
1518
+ logger.error(f"Error triggering advanced recovery: {e}")
1519
+
1520
+ async def _execute_advanced_recovery_action(self, action: AdvancedRecoveryAction) -> bool:
1521
+ """Execute advanced recovery action with enhanced capabilities"""
1522
+ action.status = RecoveryStatus.IN_PROGRESS
1523
+ action.execution_log.append(f"Starting advanced recovery: {action.strategy.value}")
1524
+
1525
+ try:
1526
+ # Check dependencies
1527
+ for dep_action_id in action.dependencies:
1528
+ if dep_action_id in self.advanced_recovery_actions:
1529
+ dep_action = self.advanced_recovery_actions[dep_action_id]
1530
+ if dep_action.status != RecoveryStatus.COMPLETED:
1531
+ action.execution_log.append(f"Waiting for dependency: {dep_action_id}")
1532
+ return False
1533
+
1534
+ # Execute based on strategy
1535
+ if action.strategy == RecoveryStrategy.RETRY_WITH_BACKOFF:
1536
+ success = await self._execute_retry_with_backoff(action)
1537
+ elif action.strategy == RecoveryStrategy.CIRCUIT_BREAKER:
1538
+ success = await self._execute_circuit_breaker_action(action)
1539
+ elif action.strategy == RecoveryStrategy.ROLLBACK:
1540
+ success = await self._execute_rollback_action(action)
1541
+ elif action.strategy == RecoveryStrategy.QUARANTINE:
1542
+ success = await self._execute_quarantine_action(action)
1543
+ else:
1544
+ success = await self._execute_legacy_recovery_action(action)
1545
+
1546
+ if success:
1547
+ action.status = RecoveryStatus.COMPLETED
1548
+ action.execution_log.append("Advanced recovery completed successfully")
1549
+ else:
1550
+ action.status = RecoveryStatus.FAILED
1551
+ action.execution_log.append("Advanced recovery failed")
1552
+
1553
+ return success
1554
+
1555
+ except Exception as e:
1556
+ action.status = RecoveryStatus.FAILED
1557
+ action.execution_log.append(f"Advanced recovery error: {str(e)}")
1558
+ logger.error(f"Error executing advanced recovery action {action.action_id}: {e}")
1559
+ return False
1560
+
1561
+ async def _execute_retry_with_backoff(self, action: AdvancedRecoveryAction) -> bool:
1562
+ """Execute retry with exponential backoff"""
1563
+ self.advanced_failures[action.failure_id]
1564
+ base_delay = 1.0
1565
+ max_delay = 60.0
1566
+ backoff_factor = 2.0
1567
+
1568
+ for attempt in range(action.max_retries + 1):
1569
+ try:
1570
+ action.retry_attempts = attempt
1571
+ action.execution_log.append(f"Retry attempt {attempt + 1}/{action.max_retries + 1}")
1572
+
1573
+ # Simulate retry logic - in real implementation, this would call the failing function
1574
+ if attempt >= 2: # Simulate success after a few attempts
1575
+ action.execution_log.append("Retry successful")
1576
+ return True
1577
+ else:
1578
+ action.execution_log.append("Retry failed, will retry again")
1579
+
1580
+ # Wait with exponential backoff
1581
+ if attempt < action.max_retries:
1582
+ delay = min(base_delay * (backoff_factor**attempt), max_delay)
1583
+ await asyncio.sleep(delay)
1584
+
1585
+ except Exception as e:
1586
+ action.execution_log.append(f"Retry attempt {attempt + 1} error: {str(e)}")
1587
+
1588
+ action.execution_log.append("All retry attempts exhausted")
1589
+ return False
1590
+
1591
+ async def _execute_circuit_breaker_action(self, action: AdvancedRecoveryAction) -> bool:
1592
+ """Execute circuit breaker action"""
1593
+ failure = self.advanced_failures[action.failure_id]
1594
+ component = failure.component
1595
+
1596
+ # Update circuit breaker state
1597
+ cb_state = self.circuit_breaker_states[component]
1598
+ cb_state["state"] = "OPEN"
1599
+ cb_state["failure_count"] += 1
1600
+ cb_state["last_failure_time"] = datetime.now(timezone.utc).isoformat()
1601
+
1602
+ action.execution_log.append(f"Circuit breaker opened for component: {component}")
1603
+ return True
1604
+
1605
+ async def _execute_rollback_action(self, action: AdvancedRecoveryAction) -> bool:
1606
+ """Execute rollback action"""
1607
+ try:
1608
+ # Create a snapshot before rollback
1609
+ snapshot_id = await self._create_system_snapshot("pre_rollback_snapshot")
1610
+
1611
+ # Perform rollback logic
1612
+ action.execution_log.append("Creating rollback snapshot and performing rollback")
1613
+
1614
+ # In real implementation, this would restore system state from snapshot
1615
+ self.advanced_recovery_stats["rollbacks_performed"] += 1
1616
+
1617
+ action.rollback_action_id = snapshot_id
1618
+ return True
1619
+
1620
+ except Exception as e:
1621
+ action.execution_log.append(f"Rollback failed: {str(e)}")
1622
+ return False
1623
+
1624
+ async def _execute_quarantine_action(self, action: AdvancedRecoveryAction) -> bool:
1625
+ """Execute quarantine action"""
1626
+ failure = self.advanced_failures[action.failure_id]
1627
+ component = failure.component
1628
+
1629
+ action.execution_log.append(f"Quarantining component: {component}")
1630
+
1631
+ # In real implementation, this would isolate the component
1632
+ # For now, just log the action
1633
+ return True
1634
+
1635
+ async def _execute_legacy_recovery_action(self, action: AdvancedRecoveryAction) -> bool:
1636
+ """Execute legacy recovery action as fallback"""
1637
+ failure = self.advanced_failures[action.failure_id]
1638
+
1639
+ # Convert to legacy format and use existing recovery mechanisms
1640
+ legacy_action = self.recovery_actions.get("restart_research_engines")
1641
+ if not legacy_action:
1642
+ action.execution_log.append("No legacy recovery action available")
1643
+ return False
1644
+
1645
+ # Create legacy error report
1646
+ legacy_error = ErrorReport(
1647
+ id=failure.failure_id,
1648
+ timestamp=failure.timestamp,
1649
+ severity=getattr(ErrorSeverity, failure.severity.upper(), ErrorSeverity.MEDIUM),
1650
+ category=getattr(ErrorCategory, "SYSTEM", ErrorCategory.SYSTEM),
1651
+ message=failure.description,
1652
+ details=failure.error_details or {},
1653
+ stack_trace="",
1654
+ context=failure.context,
1655
+ )
1656
+
1657
+ try:
1658
+ result = legacy_action.handler(legacy_error, action.parameters)
1659
+ if result:
1660
+ action.execution_log.append("Legacy recovery action successful")
1661
+ return True
1662
+ else:
1663
+ action.execution_log.append("Legacy recovery action failed")
1664
+ return False
1665
+ except Exception as e:
1666
+ action.execution_log.append(f"Legacy recovery action error: {str(e)}")
1667
+ return False
1668
+
1669
+ def _determine_advanced_recovery_strategy(self, failure_mode: FailureMode) -> RecoveryStrategy:
1670
+ """Determine advanced recovery strategy based on failure mode"""
1671
+ strategy_map = {
1672
+ FailureMode.HOOK_EXECUTION_FAILURE: RecoveryStrategy.RETRY_WITH_BACKOFF,
1673
+ FailureMode.RESOURCE_EXHAUSTION: RecoveryStrategy.DEGRADE_SERVICE,
1674
+ FailureMode.DATA_CORRUPTION: RecoveryStrategy.ROLLBACK,
1675
+ FailureMode.NETWORK_FAILURE: RecoveryStrategy.RETRY_WITH_BACKOFF,
1676
+ FailureMode.SYSTEM_OVERLOAD: RecoveryStrategy.CIRCUIT_BREAKER,
1677
+ FailureMode.CIRCUIT_BREAKER_TRIPPED: RecoveryStrategy.CIRCUIT_BREAKER,
1678
+ FailureMode.CASCADE_FAILURE: RecoveryStrategy.EMERGENCY_STOP,
1679
+ FailureMode.TIMEOUT_FAILURE: RecoveryStrategy.RETRY_WITH_BACKOFF,
1680
+ FailureMode.MEMORY_LEAK: RecoveryStrategy.RESTART_COMPONENT,
1681
+ FailureMode.DEADLOCK: RecoveryStrategy.QUARANTINE,
1682
+ FailureMode.AUTHENTICATION_FAILURE: RecoveryStrategy.NOTIFY_ADMIN,
1683
+ FailureMode.VALIDATION_FAILURE: RecoveryStrategy.QUARANTINE,
1684
+ FailureMode.EXTERNAL_SERVICE_FAILURE: RecoveryStrategy.FAILOVER,
1685
+ FailureMode.STORAGE_FAILURE: RecoveryStrategy.ROLLBACK,
1686
+ FailureMode.CONCURRENCY_ISSUE: RecoveryStrategy.CIRCUIT_BREAKER,
1687
+ }
1688
+
1689
+ return strategy_map.get(failure_mode, RecoveryStrategy.RETRY_WITH_BACKOFF)
1690
+
1691
+ def _calculate_recovery_priority(self, failure: FailureEvent) -> int:
1692
+ """Calculate recovery priority based on failure characteristics"""
1693
+ base_priority = 5
1694
+
1695
+ # Adjust based on severity
1696
+ if failure.severity == "critical":
1697
+ base_priority -= 3
1698
+ elif failure.severity == "high":
1699
+ base_priority -= 2
1700
+ elif failure.severity == "medium":
1701
+ base_priority -= 1
1702
+
1703
+ # Adjust based on number of affected operations
1704
+ if len(failure.affected_operations) > 10:
1705
+ base_priority -= 2
1706
+ elif len(failure.affected_operations) > 5:
1707
+ base_priority -= 1
1708
+
1709
+ # Ensure priority is in valid range
1710
+ return max(1, min(10, base_priority))
1711
+
1712
+ async def _check_cascade_failures(self, failure: FailureEvent):
1713
+ """Check for cascade failure patterns"""
1714
+ # Check if this failure is related to other recent failures
1715
+ recent_failures = [
1716
+ f
1717
+ for f in self.advanced_failures.values()
1718
+ if (datetime.now(timezone.utc) - f.timestamp).total_seconds() < 300 # Last 5 minutes
1719
+ and f.failure_id != failure.failure_id
1720
+ ]
1721
+
1722
+ # Simple cascade detection: same component or related components
1723
+ related_failures = [
1724
+ f
1725
+ for f in recent_failures
1726
+ if f.component == failure.component or f.component in failure.context.get("related_components", [])
1727
+ ]
1728
+
1729
+ if len(related_failures) >= 3:
1730
+ self.advanced_recovery_stats["cascade_failures_detected"] += 1
1731
+ logger.warning(f"Cascade failure detected: {len(related_failures)} related failures")
1732
+
1733
+ # Trigger emergency recovery
1734
+ await self._trigger_emergency_recovery(failure, related_failures)
1735
+
1736
+ async def _trigger_emergency_recovery(self, failure: FailureEvent, related_failures: List[FailureEvent]):
1737
+ """Trigger emergency recovery for cascade failures"""
1738
+ emergency_action = AdvancedRecoveryAction(
1739
+ action_id=str(uuid.uuid4()),
1740
+ failure_id=failure.failure_id,
1741
+ strategy=RecoveryStrategy.EMERGENCY_STOP,
1742
+ timestamp=datetime.now(timezone.utc),
1743
+ description="Emergency recovery for cascade failure",
1744
+ parameters={"cascade_failures": [f.failure_id for f in related_failures]},
1745
+ priority=1, # Highest priority
1746
+ )
1747
+
1748
+ await self._execute_advanced_recovery_action(emergency_action)
1749
+
1750
+ async def _analyze_cascade_failure(self, failure: FailureEvent):
1751
+ """Analyze cascade failure patterns"""
1752
+ # Implementation would analyze failure patterns and correlations
1753
+ pass
1754
+
1755
+ async def _analyze_circuit_breaker_trip(self, failure: FailureEvent):
1756
+ """Analyze circuit breaker trip patterns"""
1757
+ # Implementation would analyze circuit breaker behavior
1758
+ pass
1759
+
1760
+ async def _analyze_resource_exhaustion(self, failure: FailureEvent):
1761
+ """Analyze resource exhaustion patterns"""
1762
+ # Implementation would analyze resource usage patterns
1763
+ pass
1764
+
1765
+ async def _create_system_snapshot(self, description: str = "", is_rollback_point: bool = False) -> str:
1766
+ """Create a system state snapshot"""
1767
+ snapshot_id = str(uuid.uuid4())
1768
+
1769
+ # Get current system state
1770
+ component_states = {
1771
+ "error_recovery_system": {
1772
+ "active_errors": len(self.active_errors),
1773
+ "advanced_failures": len(self.advanced_failures),
1774
+ "system_health": self.system_health["status"],
1775
+ },
1776
+ "circuit_breakers": dict(self.circuit_breaker_states),
1777
+ "recovery_stats": self.advanced_recovery_stats.copy(),
1778
+ }
1779
+
1780
+ # Calculate checksums
1781
+ config_str = json.dumps(component_states, sort_keys=True)
1782
+ config_hash = hashlib.sha256(config_str.encode()).hexdigest()
1783
+
1784
+ data_checksums = {
1785
+ "component_states": hashlib.sha256(config_str.encode()).hexdigest(),
1786
+ }
1787
+
1788
+ snapshot = SystemSnapshot(
1789
+ snapshot_id=snapshot_id,
1790
+ timestamp=datetime.now(timezone.utc),
1791
+ component_states=component_states,
1792
+ configuration_hash=config_hash,
1793
+ data_checksums=data_checksums,
1794
+ description=description,
1795
+ is_rollback_point=is_rollback_point,
1796
+ )
1797
+
1798
+ self.system_snapshots[snapshot_id] = snapshot
1799
+ self.advanced_recovery_stats["snapshots_created"] += 1
1800
+
1801
+ logger.info(f"Created system snapshot: {snapshot_id}")
1802
+ return snapshot_id
1803
+
1804
+ def get_advanced_system_status(self) -> Dict[str, Any]:
1805
+ """Get comprehensive advanced system status"""
1806
+ return {
1807
+ "status": "running",
1808
+ "phase3_features": "enabled",
1809
+ "advanced_recovery_statistics": self.advanced_recovery_stats,
1810
+ "active_advanced_failures": len(self.advanced_failures),
1811
+ "pending_advanced_actions": len(
1812
+ [
1813
+ a
1814
+ for a in self.advanced_recovery_actions.values()
1815
+ if a.status in [RecoveryStatus.PENDING, RecoveryStatus.IN_PROGRESS]
1816
+ ]
1817
+ ),
1818
+ "circuit_breaker_states": dict(self.circuit_breaker_states),
1819
+ "system_snapshots": len(self.system_snapshots),
1820
+ "dead_letter_queue_size": len(self.dead_letter_queue),
1821
+ "failure_mode_analyzers": list(self.failure_analyzers.keys()),
1822
+ }
1823
+
1211
1824
 
1212
1825
  # Global error recovery system instance
1213
1826
  _error_recovery_system = None