cancan-microstack 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (440) hide show
  1. cancan_microstack/__init__.py +14 -0
  2. cancan_microstack/__version__.py +10 -0
  3. cancan_microstack/assets/__init__.py +6 -0
  4. cancan_microstack/assets/builds/caddy/Caddyfile +187 -0
  5. cancan_microstack/assets/builds/caddy/DEPLOYMENT.md +303 -0
  6. cancan_microstack/assets/builds/caddy/Dockerfile +46 -0
  7. cancan_microstack/assets/builds/caddy/README.md +343 -0
  8. cancan_microstack/assets/builds/caddy/geoip/README.md +5 -0
  9. cancan_microstack/assets/builds/caddy/start.sh +78 -0
  10. cancan_microstack/assets/builds/caddy/waf/coraza.conf +179 -0
  11. cancan_microstack/assets/builds/service/Dockerfile +59 -0
  12. cancan_microstack/assets/builds/service/README.md +13 -0
  13. cancan_microstack/assets/ddl/create_db.sql +22 -0
  14. cancan_microstack/assets/ddl/infra/execution_log_tbl.sql +46 -0
  15. cancan_microstack/assets/ddl/infra/node_instance_tbl.sql +56 -0
  16. cancan_microstack/assets/ddl/infra/service_action_log_tbl.sql +36 -0
  17. cancan_microstack/assets/ddl/infra/service_config_tbl.sql +26 -0
  18. cancan_microstack/assets/ddl/infra/service_info_tbl.sql +45 -0
  19. cancan_microstack/assets/ddl/infra/service_instance_tbl.sql +54 -0
  20. cancan_microstack/assets/ddl/infra/service_operation_tbl.sql +47 -0
  21. cancan_microstack/assets/ddl/infra/workflow_definition_tbl.sql +60 -0
  22. cancan_microstack/assets/ddl/infra/workflow_definition_version_tbl.sql +35 -0
  23. cancan_microstack/assets/ddl/infra/workflow_engine_alert_tbl.sql +34 -0
  24. cancan_microstack/assets/ddl/infra/workflow_run_tbl.sql +52 -0
  25. cancan_microstack/assets/ddl/ops/admin_user_tbl.sql +34 -0
  26. cancan_microstack/assets/ddl/ops/caddy_access_log_tbl.sql +91 -0
  27. cancan_microstack/assets/ddl/ops/caddy_certificate_tbl.sql +59 -0
  28. cancan_microstack/assets/ddl/ops/caddy_rate_limit_tbl.sql +64 -0
  29. cancan_microstack/assets/ddl/ops/caddy_route_tbl.sql +63 -0
  30. cancan_microstack/assets/ddl/ops/caddy_stats_tbl.sql +77 -0
  31. cancan_microstack/assets/ddl/trigger.sql +21 -0
  32. cancan_microstack/assets/docker/docker-compose.infra.yml +401 -0
  33. cancan_microstack/assets/scripts/README.md +195 -0
  34. cancan_microstack/assets/scripts/docker/build_images.sh +44 -0
  35. cancan_microstack/assets/scripts/docker/force_rebuild_images.sh +38 -0
  36. cancan_microstack/assets/scripts/docker/rebuild_all.sh +34 -0
  37. cancan_microstack/assets/scripts/docker/rebuild_compose.sh +61 -0
  38. cancan_microstack/assets/scripts/docker/restart.sh +35 -0
  39. cancan_microstack/assets/scripts/docker/restart_compose.sh +35 -0
  40. cancan_microstack/assets/scripts/docker/start.sh +78 -0
  41. cancan_microstack/assets/scripts/docker/start_all.sh +46 -0
  42. cancan_microstack/assets/scripts/docker/start_compose.sh +66 -0
  43. cancan_microstack/assets/scripts/docker/stop.sh +67 -0
  44. cancan_microstack/assets/scripts/docker/stop_all.sh +38 -0
  45. cancan_microstack/assets/scripts/docker/stop_compose.sh +38 -0
  46. cancan_microstack/assets/scripts/podman/build_images_podman.sh +59 -0
  47. cancan_microstack/assets/scripts/podman/cleanup_podman.sh +25 -0
  48. cancan_microstack/assets/scripts/podman/force_rebuild_images_podman.sh +56 -0
  49. cancan_microstack/assets/scripts/podman/rebuild_all_podman.sh +37 -0
  50. cancan_microstack/assets/scripts/podman/rebuild_compose_podman.sh +60 -0
  51. cancan_microstack/assets/scripts/podman/restart_compose_podman.sh +73 -0
  52. cancan_microstack/assets/scripts/podman/start_all_podman.sh +66 -0
  53. cancan_microstack/assets/scripts/podman/start_compose_podman.sh +80 -0
  54. cancan_microstack/assets/scripts/podman/start_podman.sh +91 -0
  55. cancan_microstack/assets/scripts/podman/stop.sh +73 -0
  56. cancan_microstack/assets/scripts/podman/stop_all_podman.sh +34 -0
  57. cancan_microstack/assets/scripts/podman/stop_compose_podman.sh +58 -0
  58. cancan_microstack/assets/scripts/start_controllersrv.sh +9 -0
  59. cancan_microstack/assets/scripts/utils/check_all_db_tables.sh +104 -0
  60. cancan_microstack/assets/scripts/utils/check_env.sh +177 -0
  61. cancan_microstack/assets/scripts/utils/check_service_management_deployment.sh +225 -0
  62. cancan_microstack/assets/scripts/utils/deploy_service_management.sh +176 -0
  63. cancan_microstack/assets/scripts/utils/force_reload_infrasrv.sh +52 -0
  64. cancan_microstack/assets/scripts/utils/monitor_service_management.sh +187 -0
  65. cancan_microstack/assets/scripts/utils/reset_postgres_volume.sh +68 -0
  66. cancan_microstack/assets/scripts/utils/test_async_operations.sh +141 -0
  67. cancan_microstack/assets/scripts/utils/verify_real_operations.sh +76 -0
  68. cancan_microstack/assets/service/Dockerfile +65 -0
  69. cancan_microstack/assets/www/adminops/assets/AppEmpty.vue_vue_type_script_setup_true_lang-BOKUurnM.js +1 -0
  70. cancan_microstack/assets/www/adminops/assets/ConfigManage-DKV5YOUz.js +1 -0
  71. cancan_microstack/assets/www/adminops/assets/ConfigManage-Y5bhy7wG.css +1 -0
  72. cancan_microstack/assets/www/adminops/assets/ConsoleManage-8ljYvCW2.js +1 -0
  73. cancan_microstack/assets/www/adminops/assets/ConsoleManage-BWpyqbuQ.css +1 -0
  74. cancan_microstack/assets/www/adminops/assets/DashboardNew-B9Nf1OPl.js +1 -0
  75. cancan_microstack/assets/www/adminops/assets/DashboardNew-DYWZKQ1V.css +1 -0
  76. cancan_microstack/assets/www/adminops/assets/LogSearch-CA0Jhe78.js +1 -0
  77. cancan_microstack/assets/www/adminops/assets/LogSearch-CCZfTNPF.css +1 -0
  78. cancan_microstack/assets/www/adminops/assets/LoginView-BId3kP3M.css +1 -0
  79. cancan_microstack/assets/www/adminops/assets/LoginView-BQZTV_Qy.js +1 -0
  80. cancan_microstack/assets/www/adminops/assets/OperationProgressDialog-BdEYwqFq.js +1 -0
  81. cancan_microstack/assets/www/adminops/assets/OperationProgressDialog-D-pASR8G.css +1 -0
  82. cancan_microstack/assets/www/adminops/assets/PageContainer-Byss-yUC.js +1 -0
  83. cancan_microstack/assets/www/adminops/assets/PageContainer-C3nSZwM7.css +1 -0
  84. cancan_microstack/assets/www/adminops/assets/RateLimitManage-BDI8jLpC.css +1 -0
  85. cancan_microstack/assets/www/adminops/assets/RateLimitManage-DJY4NiF-.js +1 -0
  86. cancan_microstack/assets/www/adminops/assets/RouteManage-DaUQ4QLw.css +1 -0
  87. cancan_microstack/assets/www/adminops/assets/RouteManage-w9XCU0UA.js +1 -0
  88. cancan_microstack/assets/www/adminops/assets/ServiceCard-BFzHe6Tw.css +1 -0
  89. cancan_microstack/assets/www/adminops/assets/ServiceCard-BJUhWnA-.js +1 -0
  90. cancan_microstack/assets/www/adminops/assets/ServiceDetail-Cw24WuKp.js +1 -0
  91. cancan_microstack/assets/www/adminops/assets/ServiceDetail-Yum47zdB.css +1 -0
  92. cancan_microstack/assets/www/adminops/assets/ServiceList-C7ryvbhE.js +1 -0
  93. cancan_microstack/assets/www/adminops/assets/ServiceList-Cgd01fUx.css +1 -0
  94. cancan_microstack/assets/www/adminops/assets/ServiceLogs-COpG9H0h.js +1 -0
  95. cancan_microstack/assets/www/adminops/assets/ServiceLogs-H_Alq0cf.css +1 -0
  96. cancan_microstack/assets/www/adminops/assets/StatsOverview-D0TwMQkA.js +39 -0
  97. cancan_microstack/assets/www/adminops/assets/StatsOverview-lqAN6pqM.css +1 -0
  98. cancan_microstack/assets/www/adminops/assets/TotpBindView-CWlAmzFt.js +1 -0
  99. cancan_microstack/assets/www/adminops/assets/TotpBindView-HoQC1lhx.css +1 -0
  100. cancan_microstack/assets/www/adminops/assets/TotpVerifyView-BHN1VtX1.css +1 -0
  101. cancan_microstack/assets/www/adminops/assets/TotpVerifyView-D3w_lZk8.js +1 -0
  102. cancan_microstack/assets/www/adminops/assets/WorkflowCenter-DU_mpIA0.css +1 -0
  103. cancan_microstack/assets/www/adminops/assets/WorkflowCenter-i50rZyxN.js +1 -0
  104. cancan_microstack/assets/www/adminops/assets/WorkflowDesigner-CnHokPL9.js +1 -0
  105. cancan_microstack/assets/www/adminops/assets/WorkflowDesigner-DaZaZpLd.css +1 -0
  106. cancan_microstack/assets/www/adminops/assets/WorkflowRuns-B09hK48c.js +1 -0
  107. cancan_microstack/assets/www/adminops/assets/WorkflowRuns-wGutKIIU.css +1 -0
  108. cancan_microstack/assets/www/adminops/assets/caddy-nnCKf8fG.js +1 -0
  109. cancan_microstack/assets/www/adminops/assets/format-Cuzxgna9.js +1 -0
  110. cancan_microstack/assets/www/adminops/assets/index-CiFlm8oc.js +64 -0
  111. cancan_microstack/assets/www/adminops/assets/index-UW0T1Dkc.css +1 -0
  112. cancan_microstack/assets/www/adminops/assets/service-BYlgGPs_.js +1 -0
  113. cancan_microstack/assets/www/adminops/assets/service-operation-6GzLw2Z1.js +1 -0
  114. cancan_microstack/assets/www/adminops/assets/style-CcIXnQ5y.css +1 -0
  115. cancan_microstack/assets/www/adminops/assets/style-lRnStdGu.js +39 -0
  116. cancan_microstack/assets/www/adminops/assets/useDebounce-BRlqfXqf.js +1 -0
  117. cancan_microstack/assets/www/adminops/assets/workflow-CUXs39Ac.js +1 -0
  118. cancan_microstack/assets/www/adminops/index.html +16 -0
  119. cancan_microstack/assets/www/adminops/vite.svg +1 -0
  120. cancan_microstack/cli/__init__.py +14 -0
  121. cancan_microstack/cli/__main__.py +9 -0
  122. cancan_microstack/cli/main.py +552 -0
  123. cancan_microstack/cmd/__init__.py +54 -0
  124. cancan_microstack/cmd/cancan/__init__.py +12 -0
  125. cancan_microstack/cmd/cancan/run.py +395 -0
  126. cancan_microstack/cmd/controllersrv/__init__.py +0 -0
  127. cancan_microstack/cmd/controllersrv/run.py +131 -0
  128. cancan_microstack/cmd/infrasrv/__init__.py +5 -0
  129. cancan_microstack/cmd/infrasrv/run.py +100 -0
  130. cancan_microstack/cmd/opsbffsrv/__init__.py +5 -0
  131. cancan_microstack/cmd/opsbffsrv/run.py +96 -0
  132. cancan_microstack/core/__init__.py +5 -0
  133. cancan_microstack/core/assets.py +123 -0
  134. cancan_microstack/core/compose_builder.py +102 -0
  135. cancan_microstack/core/doctor.py +152 -0
  136. cancan_microstack/core/microstack.py +71 -0
  137. cancan_microstack/core/runner.py +56 -0
  138. cancan_microstack/core/stack_manager.py +186 -0
  139. cancan_microstack/public/__init__.py +7 -0
  140. cancan_microstack/public/api/__init__.py +1 -0
  141. cancan_microstack/public/api/controllersrv_client.py +277 -0
  142. cancan_microstack/public/api/infrasrv_client.py +404 -0
  143. cancan_microstack/public/const/__init__.py +1 -0
  144. cancan_microstack/public/const/action_consts.py +18 -0
  145. cancan_microstack/public/const/app_consts.py +42 -0
  146. cancan_microstack/public/const/caddy_consts.py +22 -0
  147. cancan_microstack/public/const/controllersrv_consts.py +163 -0
  148. cancan_microstack/public/const/docker_consts.py +15 -0
  149. cancan_microstack/public/const/error.py +56 -0
  150. cancan_microstack/public/const/health_consts.py +52 -0
  151. cancan_microstack/public/const/hook_enums.py +56 -0
  152. cancan_microstack/public/const/logging_enums.py +13 -0
  153. cancan_microstack/public/const/metrics_enums.py +36 -0
  154. cancan_microstack/public/const/monitor_enums.py +26 -0
  155. cancan_microstack/public/const/operation_consts.py +53 -0
  156. cancan_microstack/public/const/opsbffsrv_error.py +92 -0
  157. cancan_microstack/public/const/overrides_consts.py +13 -0
  158. cancan_microstack/public/const/redis.py +17 -0
  159. cancan_microstack/public/const/service_consts.py +15 -0
  160. cancan_microstack/public/const/workflow_consts.py +65 -0
  161. cancan_microstack/public/error.py +41 -0
  162. cancan_microstack/public/logging/__init__.py +0 -0
  163. cancan_microstack/public/logging/initializer.py +109 -0
  164. cancan_microstack/public/logging/mq_handler.py +279 -0
  165. cancan_microstack/public/schemas/__init__.py +1 -0
  166. cancan_microstack/public/schemas/caddy/__init__.py +381 -0
  167. cancan_microstack/public/schemas/caddy/analysis.py +90 -0
  168. cancan_microstack/public/schemas/caddy/route.py +18 -0
  169. cancan_microstack/public/schemas/common.py +79 -0
  170. cancan_microstack/public/schemas/controllersrv/__init__.py +3 -0
  171. cancan_microstack/public/schemas/controllersrv/async_requests.py +30 -0
  172. cancan_microstack/public/schemas/controllersrv/compose_models.py +47 -0
  173. cancan_microstack/public/schemas/controllersrv/const.py +24 -0
  174. cancan_microstack/public/schemas/controllersrv/docker_models.py +45 -0
  175. cancan_microstack/public/schemas/controllersrv/docker_responses.py +104 -0
  176. cancan_microstack/public/schemas/controllersrv/requests.py +54 -0
  177. cancan_microstack/public/schemas/controllersrv/responses.py +124 -0
  178. cancan_microstack/public/schemas/controllersrv/task_models.py +102 -0
  179. cancan_microstack/public/schemas/controllersrv/validation.py +23 -0
  180. cancan_microstack/public/schemas/hook_metrics.py +124 -0
  181. cancan_microstack/public/schemas/hooks.py +39 -0
  182. cancan_microstack/public/schemas/infra/__init__.py +0 -0
  183. cancan_microstack/public/schemas/infra/cleanup.py +25 -0
  184. cancan_microstack/public/schemas/infra/container.py +74 -0
  185. cancan_microstack/public/schemas/infra/enums.py +135 -0
  186. cancan_microstack/public/schemas/infra/health_check.py +42 -0
  187. cancan_microstack/public/schemas/infra/hook_log.py +42 -0
  188. cancan_microstack/public/schemas/infra/operation.py +90 -0
  189. cancan_microstack/public/schemas/infra/overview.py +25 -0
  190. cancan_microstack/public/schemas/infra/push.py +33 -0
  191. cancan_microstack/public/schemas/infra/service_action_log.py +47 -0
  192. cancan_microstack/public/schemas/infra/service_config.py +10 -0
  193. cancan_microstack/public/schemas/infra/service_info.py +69 -0
  194. cancan_microstack/public/schemas/infra/service_instance.py +93 -0
  195. cancan_microstack/public/schemas/infra/service_management.py +152 -0
  196. cancan_microstack/public/schemas/infra/service_operation.py +79 -0
  197. cancan_microstack/public/schemas/infra/service_registry.py +158 -0
  198. cancan_microstack/public/schemas/infra/status_types.py +19 -0
  199. cancan_microstack/public/schemas/infra/workflow.py +566 -0
  200. cancan_microstack/public/schemas/logging/__init__.py +1 -0
  201. cancan_microstack/public/schemas/logging/log_event.py +121 -0
  202. cancan_microstack/public/schemas/opsbffsrv/__init__.py +1 -0
  203. cancan_microstack/public/schemas/opsbffsrv/async_ops.py +17 -0
  204. cancan_microstack/public/schemas/opsbffsrv/db_admin.py +147 -0
  205. cancan_microstack/public/schemas/opsbffsrv/db_init.py +48 -0
  206. cancan_microstack/public/schemas/opsbffsrv/service_config.py +89 -0
  207. cancan_microstack/public/schemas/opsbffsrv/service_logs.py +54 -0
  208. cancan_microstack/public/schemas/service_operation.py +24 -0
  209. cancan_microstack/public/schemas/service_registry.py +40 -0
  210. cancan_microstack/public/types/__init__.py +7 -0
  211. cancan_microstack/public/web/__init__.py +0 -0
  212. cancan_microstack/public/web/config_value.py +105 -0
  213. cancan_microstack/public/web/server.py +385 -0
  214. cancan_microstack/py.typed +0 -0
  215. cancan_microstack/runtime/__init__.py +0 -0
  216. cancan_microstack/runtime/compose_cmd.py +228 -0
  217. cancan_microstack/runtime/host_daemon.py +318 -0
  218. cancan_microstack/runtime/overrides.py +103 -0
  219. cancan_microstack/runtime/resources.py +25 -0
  220. cancan_microstack/runtime/workspace.py +94 -0
  221. cancan_microstack/services/__init__.py +0 -0
  222. cancan_microstack/services/controllersrv/__init__.py +8 -0
  223. cancan_microstack/services/controllersrv/application/__init__.py +0 -0
  224. cancan_microstack/services/controllersrv/application/docker_compose_app.py +427 -0
  225. cancan_microstack/services/controllersrv/conf/__init__.py +0 -0
  226. cancan_microstack/services/controllersrv/conf/config.py +76 -0
  227. cancan_microstack/services/controllersrv/conf/settings.py +54 -0
  228. cancan_microstack/services/controllersrv/domain/__init__.py +0 -0
  229. cancan_microstack/services/controllersrv/domain/docker_compose/__init__.py +0 -0
  230. cancan_microstack/services/controllersrv/domain/docker_compose/docker_compose_domain.py +278 -0
  231. cancan_microstack/services/controllersrv/domain/service_validator.py +327 -0
  232. cancan_microstack/services/controllersrv/domain/task/__init__.py +17 -0
  233. cancan_microstack/services/controllersrv/domain/task/task_queue.py +286 -0
  234. cancan_microstack/services/controllersrv/domain/task/task_worker.py +495 -0
  235. cancan_microstack/services/controllersrv/infrastructure/__init__.py +0 -0
  236. cancan_microstack/services/controllersrv/interface/__init__.py +0 -0
  237. cancan_microstack/services/controllersrv/interface/api/__init__.py +0 -0
  238. cancan_microstack/services/controllersrv/interface/api/docker_control_api.py +470 -0
  239. cancan_microstack/services/controllersrv/router.py +132 -0
  240. cancan_microstack/services/infrasrv/__init__.py +4 -0
  241. cancan_microstack/services/infrasrv/application/__init__.py +0 -0
  242. cancan_microstack/services/infrasrv/application/health_check_app.py +24 -0
  243. cancan_microstack/services/infrasrv/application/logging/__init__.py +1 -0
  244. cancan_microstack/services/infrasrv/application/logging/log_ingestion_service.py +183 -0
  245. cancan_microstack/services/infrasrv/application/service_config.py +22 -0
  246. cancan_microstack/services/infrasrv/application/service_logs_app.py +53 -0
  247. cancan_microstack/services/infrasrv/application/service_management_app.py +689 -0
  248. cancan_microstack/services/infrasrv/application/service_operation_tracker.py +251 -0
  249. cancan_microstack/services/infrasrv/application/service_registry.py +53 -0
  250. cancan_microstack/services/infrasrv/application/workflow/__init__.py +0 -0
  251. cancan_microstack/services/infrasrv/application/workflow/workflow_app.py +991 -0
  252. cancan_microstack/services/infrasrv/application/workflow/workflow_queue.py +302 -0
  253. cancan_microstack/services/infrasrv/application/workflow/workflow_tasks.py +46 -0
  254. cancan_microstack/services/infrasrv/application/workflow/workflow_worker_runtime.py +122 -0
  255. cancan_microstack/services/infrasrv/conf/__init__.py +0 -0
  256. cancan_microstack/services/infrasrv/conf/config.py +98 -0
  257. cancan_microstack/services/infrasrv/domain/__init__.py +0 -0
  258. cancan_microstack/services/infrasrv/domain/health_check/__init__.py +3 -0
  259. cancan_microstack/services/infrasrv/domain/health_check/health_check_domain.py +576 -0
  260. cancan_microstack/services/infrasrv/domain/hooks/__init__.py +19 -0
  261. cancan_microstack/services/infrasrv/domain/hooks/builtin_hooks.py +308 -0
  262. cancan_microstack/services/infrasrv/domain/hooks/hook_registry.py +43 -0
  263. cancan_microstack/services/infrasrv/domain/hooks/hooks_log_utils.py +275 -0
  264. cancan_microstack/services/infrasrv/domain/hooks/init.py +17 -0
  265. cancan_microstack/services/infrasrv/domain/hooks/metrics.py +205 -0
  266. cancan_microstack/services/infrasrv/domain/hooks/pre_registration_hooks.py +490 -0
  267. cancan_microstack/services/infrasrv/domain/registry/__init__.py +0 -0
  268. cancan_microstack/services/infrasrv/domain/registry/service_registry.py +509 -0
  269. cancan_microstack/services/infrasrv/domain/service_config/__init__.py +0 -0
  270. cancan_microstack/services/infrasrv/domain/service_config/service_config.py +50 -0
  271. cancan_microstack/services/infrasrv/domain/service_logs/__init__.py +0 -0
  272. cancan_microstack/services/infrasrv/domain/service_logs/service_logs_domain.py +51 -0
  273. cancan_microstack/services/infrasrv/domain/workflow/__init__.py +4 -0
  274. cancan_microstack/services/infrasrv/domain/workflow/engine.py +159 -0
  275. cancan_microstack/services/infrasrv/domain/workflow/node_handlers.py +509 -0
  276. cancan_microstack/services/infrasrv/domain/workflow/workflow_domain.py +164 -0
  277. cancan_microstack/services/infrasrv/infrastructure/__init__.py +0 -0
  278. cancan_microstack/services/infrasrv/infrastructure/api/__init__.py +0 -0
  279. cancan_microstack/services/infrasrv/infrastructure/api/controllersrv_api.py +165 -0
  280. cancan_microstack/services/infrasrv/infrastructure/cache/__init__.py +0 -0
  281. cancan_microstack/services/infrasrv/infrastructure/cache/service_registry_cache.py +174 -0
  282. cancan_microstack/services/infrasrv/infrastructure/db/__init__.py +0 -0
  283. cancan_microstack/services/infrasrv/infrastructure/db/model/__init__.py +0 -0
  284. cancan_microstack/services/infrasrv/infrastructure/db/model/execution_log_tbl.py +53 -0
  285. cancan_microstack/services/infrasrv/infrastructure/db/model/node_instance_tbl.py +55 -0
  286. cancan_microstack/services/infrasrv/infrastructure/db/model/service_action_log_tbl.py +44 -0
  287. cancan_microstack/services/infrasrv/infrastructure/db/model/service_config_tbl.py +30 -0
  288. cancan_microstack/services/infrasrv/infrastructure/db/model/service_info_tbl.py +59 -0
  289. cancan_microstack/services/infrasrv/infrastructure/db/model/service_instance_tbl.py +88 -0
  290. cancan_microstack/services/infrasrv/infrastructure/db/model/service_operation_tbl.py +73 -0
  291. cancan_microstack/services/infrasrv/infrastructure/db/model/workflow_definition_tbl.py +55 -0
  292. cancan_microstack/services/infrasrv/infrastructure/db/model/workflow_definition_version_tbl.py +43 -0
  293. cancan_microstack/services/infrasrv/infrastructure/db/model/workflow_engine_alert_tbl.py +57 -0
  294. cancan_microstack/services/infrasrv/infrastructure/db/model/workflow_run_tbl.py +56 -0
  295. cancan_microstack/services/infrasrv/infrastructure/db/operate/__init__.py +0 -0
  296. cancan_microstack/services/infrasrv/infrastructure/db/operate/service_action_log_op.py +239 -0
  297. cancan_microstack/services/infrasrv/infrastructure/db/operate/service_config.py +80 -0
  298. cancan_microstack/services/infrasrv/infrastructure/db/operate/service_config_manager.py +198 -0
  299. cancan_microstack/services/infrasrv/infrastructure/db/operate/service_info_op.py +297 -0
  300. cancan_microstack/services/infrasrv/infrastructure/db/operate/service_instance_op.py +688 -0
  301. cancan_microstack/services/infrasrv/infrastructure/db/operate/service_operation_op.py +387 -0
  302. cancan_microstack/services/infrasrv/infrastructure/db/operate/service_registry.py +124 -0
  303. cancan_microstack/services/infrasrv/infrastructure/db/operate/workflow_op.py +804 -0
  304. cancan_microstack/services/infrasrv/infrastructure/ddl_manager.py +31 -0
  305. cancan_microstack/services/infrasrv/infrastructure/mongo/__init__.py +1 -0
  306. cancan_microstack/services/infrasrv/infrastructure/mongo/log_repository.py +129 -0
  307. cancan_microstack/services/infrasrv/interface/__init__.py +0 -0
  308. cancan_microstack/services/infrasrv/interface/api/__init__.py +0 -0
  309. cancan_microstack/services/infrasrv/interface/api/health_check_api.py +29 -0
  310. cancan_microstack/services/infrasrv/interface/api/hooks.py +284 -0
  311. cancan_microstack/services/infrasrv/interface/api/internal.py +49 -0
  312. cancan_microstack/services/infrasrv/interface/api/internal_instance_api.py +265 -0
  313. cancan_microstack/services/infrasrv/interface/api/internal_operation_api.py +206 -0
  314. cancan_microstack/services/infrasrv/interface/api/service_config.py +50 -0
  315. cancan_microstack/services/infrasrv/interface/api/service_logs_api.py +49 -0
  316. cancan_microstack/services/infrasrv/interface/api/service_management_api.py +113 -0
  317. cancan_microstack/services/infrasrv/interface/api/service_registry.py +117 -0
  318. cancan_microstack/services/infrasrv/interface/api/workflow_api.py +303 -0
  319. cancan_microstack/services/infrasrv/interface/schedule/__init__.py +0 -0
  320. cancan_microstack/services/infrasrv/interface/schedule/cleanup.py +13 -0
  321. cancan_microstack/services/infrasrv/interface/schedule/health_check.py +27 -0
  322. cancan_microstack/services/infrasrv/interface/schedule/log_cleanup.py +26 -0
  323. cancan_microstack/services/infrasrv/interface/schedule/operation_tracker.py +25 -0
  324. cancan_microstack/services/infrasrv/interface/schedule/scheduler.py +39 -0
  325. cancan_microstack/services/infrasrv/interface/schedule/workflow_scheduler.py +115 -0
  326. cancan_microstack/services/infrasrv/router.py +341 -0
  327. cancan_microstack/services/opsbffsrv/__init__.py +4 -0
  328. cancan_microstack/services/opsbffsrv/application/__init__.py +0 -0
  329. cancan_microstack/services/opsbffsrv/application/async_operation_app.py +150 -0
  330. cancan_microstack/services/opsbffsrv/application/auth_app.py +285 -0
  331. cancan_microstack/services/opsbffsrv/application/caddy/__init__.py +0 -0
  332. cancan_microstack/services/opsbffsrv/application/caddy/access_log_analysis_app.py +344 -0
  333. cancan_microstack/services/opsbffsrv/application/caddy/access_log_ingestion_service.py +169 -0
  334. cancan_microstack/services/opsbffsrv/application/caddy/certificate_management_app.py +355 -0
  335. cancan_microstack/services/opsbffsrv/application/caddy/rate_limit_management_app.py +496 -0
  336. cancan_microstack/services/opsbffsrv/application/caddy/route_management_app.py +401 -0
  337. cancan_microstack/services/opsbffsrv/application/caddy/stats_aggregation_app.py +364 -0
  338. cancan_microstack/services/opsbffsrv/application/db_admin_app.py +103 -0
  339. cancan_microstack/services/opsbffsrv/application/db_init_app.py +283 -0
  340. cancan_microstack/services/opsbffsrv/application/logging/__init__.py +1 -0
  341. cancan_microstack/services/opsbffsrv/application/logging/log_query_app.py +28 -0
  342. cancan_microstack/services/opsbffsrv/application/service_config.py +158 -0
  343. cancan_microstack/services/opsbffsrv/application/service_logs_app.py +74 -0
  344. cancan_microstack/services/opsbffsrv/application/service_registry.py +36 -0
  345. cancan_microstack/services/opsbffsrv/application/workflow_ops_app.py +730 -0
  346. cancan_microstack/services/opsbffsrv/conf/__init__.py +0 -0
  347. cancan_microstack/services/opsbffsrv/conf/config.py +224 -0
  348. cancan_microstack/services/opsbffsrv/domain/__init__.py +0 -0
  349. cancan_microstack/services/opsbffsrv/domain/auth/__init__.py +0 -0
  350. cancan_microstack/services/opsbffsrv/domain/auth/admin_init.py +38 -0
  351. cancan_microstack/services/opsbffsrv/domain/auth/auth_domain.py +108 -0
  352. cancan_microstack/services/opsbffsrv/domain/caddy/__init__.py +0 -0
  353. cancan_microstack/services/opsbffsrv/domain/caddy/access_log_analysis.py +358 -0
  354. cancan_microstack/services/opsbffsrv/domain/caddy/certificate_management.py +325 -0
  355. cancan_microstack/services/opsbffsrv/domain/caddy/default_routes.py +53 -0
  356. cancan_microstack/services/opsbffsrv/domain/caddy/rate_limit_management.py +308 -0
  357. cancan_microstack/services/opsbffsrv/domain/caddy/route_management.py +279 -0
  358. cancan_microstack/services/opsbffsrv/domain/caddy/stats_aggregation.py +654 -0
  359. cancan_microstack/services/opsbffsrv/domain/db_admin/__init__.py +0 -0
  360. cancan_microstack/services/opsbffsrv/domain/db_admin/db_admin_domain.py +118 -0
  361. cancan_microstack/services/opsbffsrv/domain/db_init/__init__.py +3 -0
  362. cancan_microstack/services/opsbffsrv/domain/db_init/db_init_domain.py +358 -0
  363. cancan_microstack/services/opsbffsrv/domain/logging/__init__.py +1 -0
  364. cancan_microstack/services/opsbffsrv/domain/logging/log_query_domain.py +99 -0
  365. cancan_microstack/services/opsbffsrv/domain/service_config/__init__.py +0 -0
  366. cancan_microstack/services/opsbffsrv/domain/service_config/service_config.py +81 -0
  367. cancan_microstack/services/opsbffsrv/domain/service_registry/__init__.py +0 -0
  368. cancan_microstack/services/opsbffsrv/domain/service_registry/service_registry.py +292 -0
  369. cancan_microstack/services/opsbffsrv/infrastructure/__init__.py +0 -0
  370. cancan_microstack/services/opsbffsrv/infrastructure/api/__init__.py +0 -0
  371. cancan_microstack/services/opsbffsrv/infrastructure/api/infrasrv_api.py +242 -0
  372. cancan_microstack/services/opsbffsrv/infrastructure/auth/__init__.py +0 -0
  373. cancan_microstack/services/opsbffsrv/infrastructure/auth/captcha_service.py +67 -0
  374. cancan_microstack/services/opsbffsrv/infrastructure/auth/password_service.py +12 -0
  375. cancan_microstack/services/opsbffsrv/infrastructure/auth/redis_store.py +131 -0
  376. cancan_microstack/services/opsbffsrv/infrastructure/auth/totp_service.py +59 -0
  377. cancan_microstack/services/opsbffsrv/infrastructure/caddy/__init__.py +0 -0
  378. cancan_microstack/services/opsbffsrv/infrastructure/caddy/access_log_parser.py +307 -0
  379. cancan_microstack/services/opsbffsrv/infrastructure/caddy/admin_api_client.py +678 -0
  380. cancan_microstack/services/opsbffsrv/infrastructure/caddy/ip_geo_locator.py +176 -0
  381. cancan_microstack/services/opsbffsrv/infrastructure/db/__init__.py +0 -0
  382. cancan_microstack/services/opsbffsrv/infrastructure/db/model/__init__.py +0 -0
  383. cancan_microstack/services/opsbffsrv/infrastructure/db/model/admin_user_tbl.py +33 -0
  384. cancan_microstack/services/opsbffsrv/infrastructure/db/model/caddy_access_log_tbl.py +90 -0
  385. cancan_microstack/services/opsbffsrv/infrastructure/db/model/caddy_certificate_tbl.py +65 -0
  386. cancan_microstack/services/opsbffsrv/infrastructure/db/model/caddy_rate_limit_tbl.py +69 -0
  387. cancan_microstack/services/opsbffsrv/infrastructure/db/model/caddy_route_tbl.py +66 -0
  388. cancan_microstack/services/opsbffsrv/infrastructure/db/model/caddy_stats_tbl.py +78 -0
  389. cancan_microstack/services/opsbffsrv/infrastructure/db/model/service_action_log_tbl.py +44 -0
  390. cancan_microstack/services/opsbffsrv/infrastructure/db/model/service_config_tbl.py +30 -0
  391. cancan_microstack/services/opsbffsrv/infrastructure/db/model/service_info_tbl.py +51 -0
  392. cancan_microstack/services/opsbffsrv/infrastructure/db/model/service_instance_tbl.py +68 -0
  393. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/__init__.py +0 -0
  394. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/admin_user_operate.py +59 -0
  395. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/caddy_access_log.py +531 -0
  396. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/caddy_certificate.py +451 -0
  397. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/caddy_rate_limit.py +360 -0
  398. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/caddy_route.py +271 -0
  399. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/caddy_stats.py +343 -0
  400. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/service_action_log_op.py +57 -0
  401. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/service_config.py +86 -0
  402. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/service_info_op.py +79 -0
  403. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/service_instance.py +58 -0
  404. cancan_microstack/services/opsbffsrv/infrastructure/db/operate/service_registry.py +138 -0
  405. cancan_microstack/services/opsbffsrv/infrastructure/ddl_manager.py +31 -0
  406. cancan_microstack/services/opsbffsrv/infrastructure/mongo/__init__.py +1 -0
  407. cancan_microstack/services/opsbffsrv/infrastructure/mongo/log_query_repository.py +87 -0
  408. cancan_microstack/services/opsbffsrv/interface/__init__.py +0 -0
  409. cancan_microstack/services/opsbffsrv/interface/api/__init__.py +0 -0
  410. cancan_microstack/services/opsbffsrv/interface/api/async_operation_api.py +137 -0
  411. cancan_microstack/services/opsbffsrv/interface/api/auth_api.py +113 -0
  412. cancan_microstack/services/opsbffsrv/interface/api/caddy/__init__.py +3 -0
  413. cancan_microstack/services/opsbffsrv/interface/api/caddy/access_log_api.py +174 -0
  414. cancan_microstack/services/opsbffsrv/interface/api/caddy/certificate_api.py +235 -0
  415. cancan_microstack/services/opsbffsrv/interface/api/caddy/rate_limit_api.py +302 -0
  416. cancan_microstack/services/opsbffsrv/interface/api/caddy/route_api.py +250 -0
  417. cancan_microstack/services/opsbffsrv/interface/api/caddy/stats_api.py +243 -0
  418. cancan_microstack/services/opsbffsrv/interface/api/db_admin_api.py +62 -0
  419. cancan_microstack/services/opsbffsrv/interface/api/db_init_api.py +109 -0
  420. cancan_microstack/services/opsbffsrv/interface/api/instance_management_api.py +165 -0
  421. cancan_microstack/services/opsbffsrv/interface/api/log_query_api.py +41 -0
  422. cancan_microstack/services/opsbffsrv/interface/api/mongo_express_proxy_api.py +181 -0
  423. cancan_microstack/services/opsbffsrv/interface/api/pgweb_proxy_api.py +154 -0
  424. cancan_microstack/services/opsbffsrv/interface/api/rabbitmq_mgmt_proxy_api.py +518 -0
  425. cancan_microstack/services/opsbffsrv/interface/api/redis_commander_proxy_api.py +133 -0
  426. cancan_microstack/services/opsbffsrv/interface/api/service_config.py +146 -0
  427. cancan_microstack/services/opsbffsrv/interface/api/service_logs_api.py +81 -0
  428. cancan_microstack/services/opsbffsrv/interface/api/service_registry.py +66 -0
  429. cancan_microstack/services/opsbffsrv/interface/api/workflow_ops_api.py +413 -0
  430. cancan_microstack/services/opsbffsrv/interface/middleware/__init__.py +0 -0
  431. cancan_microstack/services/opsbffsrv/interface/middleware/auth_middleware.py +52 -0
  432. cancan_microstack/services/opsbffsrv/router.py +901 -0
  433. cancan_microstack/utils/__init__.py +1 -0
  434. cancan_microstack/utils/container_env.py +218 -0
  435. cancan_microstack-0.0.1.dist-info/METADATA +155 -0
  436. cancan_microstack-0.0.1.dist-info/RECORD +440 -0
  437. cancan_microstack-0.0.1.dist-info/WHEEL +5 -0
  438. cancan_microstack-0.0.1.dist-info/entry_points.txt +2 -0
  439. cancan_microstack-0.0.1.dist-info/licenses/LICENSE +21 -0
  440. cancan_microstack-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,576 @@
1
+ """
2
+ 健康检查领域层(升级版)
3
+
4
+ 支持:
5
+ 1. 多实例健康检查
6
+ 2. 操作窗口期智能豁免
7
+ 3. 区分正常/异常关闭
8
+ 4. 失败处理策略(连续失败计数)
9
+ 5. 自动调用 controllersrv 重启服务
10
+ """
11
+ import asyncio
12
+ from typing import (
13
+ Optional,
14
+ Tuple,
15
+ )
16
+ from datetime import (
17
+ datetime,
18
+ timezone,
19
+ timedelta,
20
+ )
21
+
22
+ from linglong_web.utils import logger
23
+ from linglong_web import (
24
+ HTTPClientConfig,
25
+ http_client,
26
+ )
27
+ from linglong_web import LinglongConfig
28
+ from cancan_microstack.public.const.health_consts import InstanceHealthStatus
29
+ from cancan_microstack.public.schemas.infra.status_types import InstanceStatus
30
+ from cancan_microstack.public.const.operation_consts import (
31
+ OperationStatus,
32
+ InitiatedBy,
33
+ InitiatedFrom,
34
+ )
35
+ from cancan_microstack.public.const.action_consts import HealthCheckAction
36
+ from cancan_microstack.public.schemas.infra.health_check import (
37
+ InstanceHealthDetail,
38
+ HealthCheckSummary,
39
+ )
40
+ from cancan_microstack.public.schemas.infra.service_instance import ServiceInstance
41
+ from cancan_microstack.public.schemas.infra.service_info import ServiceInfo
42
+ from cancan_microstack.public.schemas.infra.service_operation import ServiceOperation
43
+ from cancan_microstack.public.schemas.controllersrv.async_requests import (
44
+ AsyncServiceOperationPayload,
45
+ AsyncOperationParams,
46
+ )
47
+ from cancan_microstack.services.infrasrv.infrastructure.db.operate.service_instance_op import (
48
+ get_instances_by_status,
49
+ update_instance_health_status,
50
+ increment_instance_consecutive_failures,
51
+ reset_instance_consecutive_failures,
52
+ soft_delete_instance,
53
+ )
54
+ from cancan_microstack.services.infrasrv.infrastructure.db.operate.service_info_op import (
55
+ get_service_info_by_name,
56
+ )
57
+ from cancan_microstack.services.infrasrv.infrastructure.db.operate.service_operation_op import (
58
+ get_recent_operations_by_service,
59
+ )
60
+
61
+
62
+ class HealthCheckDomain:
63
+ """
64
+ 健康检查领域层
65
+
66
+ 新增功能:
67
+ - 支持多实例健康检查(遍历 service_instance_tbl)
68
+ - 操作窗口期豁免(检查 service_operation_tbl 最近操作)
69
+ - 区分正常/异常关闭(检查 service_info_tbl 的 expected_status)
70
+ - 智能失败处理(连续失败计数、自动重启)
71
+ """
72
+
73
+ # 健康检查配置
74
+ HEALTH_CHECK_TIMEOUT = 10.0 # 单次健康检查超时 10 秒
75
+ HEALTH_CHECK_RETRY_COUNT = 2 # 失败后重试 2 次
76
+ HEALTH_CHECK_RETRY_DELAY = 1.0 # 重试延迟 1 秒
77
+
78
+ # 失败阈值
79
+ CONSECUTIVE_FAILURE_THRESHOLD = 3 # 连续失败 3 次后标记为 unhealthy
80
+
81
+ # 心跳超时
82
+ HEARTBEAT_TIMEOUT_MINUTES = 5 # 5 分钟没有心跳认为不健康
83
+
84
+ # 操作窗口期(豁免健康检查)
85
+ OPERATION_WINDOW_MINUTES = 5 # 操作开始后 5 分钟内豁免健康检查
86
+
87
+ def __init__(self):
88
+ """初始化健康检查领域层"""
89
+ self.controllersrv_host = LinglongConfig.CONTROLLERSRV_HOST if hasattr(LinglongConfig, 'CONTROLLERSRV_HOST') else "http://localhost:22100"
90
+ # 保存后台任务的强引用,防止 fire-and-forget 任务在运行期间被 GC 回收。
91
+ # Hold strong references to background tasks so fire-and-forget tasks are not GC'd while running.
92
+ self._bg: set = set()
93
+ logger.info("HealthCheckDomain initialized")
94
+
95
+ async def health_check_all_instances(self) -> HealthCheckSummary:
96
+ """
97
+ 对所有实例进行健康检查
98
+
99
+ Returns:
100
+ 健康检查结果汇总
101
+ """
102
+ logger.info("Starting health check for all instances...")
103
+
104
+ # 获取所有运行中的实例 (UP)
105
+ all_instances = await get_instances_by_status(InstanceStatus.UP)
106
+
107
+ check_results = HealthCheckSummary(total=len(all_instances))
108
+
109
+ for instance in all_instances:
110
+ detail = await self._check_single_instance(instance)
111
+
112
+ # 统计结果
113
+ if detail.exempted:
114
+ check_results.exempted += 1
115
+ elif detail.expected_stopped:
116
+ check_results.expected_stopped += 1
117
+ elif detail.health_status == InstanceHealthStatus.HEALTHY:
118
+ check_results.healthy += 1
119
+ elif detail.health_status == InstanceHealthStatus.DEGRADED:
120
+ check_results.degraded += 1
121
+ else:
122
+ check_results.unhealthy += 1
123
+
124
+ check_results.details.append(detail)
125
+
126
+ logger.info(
127
+ f"Health check completed: {check_results.healthy} healthy, "
128
+ f"{check_results.degraded} degraded, {check_results.unhealthy} unhealthy, "
129
+ f"{check_results.exempted} exempted, {check_results.expected_stopped} expected_stopped"
130
+ )
131
+
132
+ return check_results
133
+
134
+ async def _check_single_instance(self, instance: ServiceInstance) -> InstanceHealthDetail:
135
+ """
136
+ 检查单个实例的健康状态
137
+
138
+ Args:
139
+ instance: ServiceInstance 对象
140
+
141
+ Returns:
142
+ 健康检查结果
143
+ """
144
+ instance_id = instance.instance_id
145
+ service_name = instance.service_name
146
+
147
+ # 准备初始字段,使用强类型字段
148
+ health_status = InstanceHealthStatus.UNKNOWN
149
+ consecutive_failures = instance.consecutive_failures or 0
150
+ exempted = False
151
+ exemption_reason = None
152
+ expected_stopped = False
153
+ action_taken: Optional[HealthCheckAction] = None
154
+ last_heartbeat = instance.last_heartbeat.isoformat() if instance.last_heartbeat else None
155
+
156
+ # 1. 检查是否在操作窗口期内(豁免检查)
157
+ in_operation_window, operation_type = await self._is_in_operation_window(service_name)
158
+ if in_operation_window:
159
+ exempted = True
160
+ exemption_reason = f"In operation window ({operation_type})"
161
+ health_status = InstanceHealthStatus.EXEMPTED
162
+ logger.debug(f"Instance {instance_id} exempted: {operation_type} operation in progress")
163
+ return InstanceHealthDetail(
164
+ instance_id=instance_id,
165
+ service_name=service_name,
166
+ host=instance.host,
167
+ port=instance.port,
168
+ status=instance.status,
169
+ health_status=health_status,
170
+ consecutive_failures=consecutive_failures,
171
+ exempted=exempted,
172
+ exemption_reason=exemption_reason,
173
+ expected_stopped=expected_stopped,
174
+ action_taken=action_taken,
175
+ last_heartbeat=last_heartbeat,
176
+ )
177
+
178
+ # 2. 检查期望状态(区分正常/异常关闭)
179
+ service_info: Optional[ServiceInfo] = await get_service_info_by_name(service_name)
180
+ if service_info:
181
+ expected_status = service_info.expected_status
182
+
183
+ # 如果期望状态是 stopped (DOWN),但实例还在运行,标记为异常
184
+ if expected_status == InstanceStatus.DOWN:
185
+ expected_stopped = True
186
+ health_status = InstanceHealthStatus.EXPECTED_STOPPED
187
+ logger.info(f"Instance {instance_id} expected to be stopped (expected_status=DOWN)")
188
+
189
+ # 调用 controllersrv 停止该实例
190
+ stop_task = asyncio.create_task(
191
+ self._auto_stop_instance(service_name, instance_id)
192
+ )
193
+ self._bg.add(stop_task)
194
+ stop_task.add_done_callback(self._bg.discard)
195
+ action_taken = HealthCheckAction.AUTO_STOP_SCHEDULED
196
+ return InstanceHealthDetail(
197
+ instance_id=instance_id,
198
+ service_name=service_name,
199
+ host=instance.host,
200
+ port=instance.port,
201
+ status=instance.status,
202
+ health_status=health_status,
203
+ consecutive_failures=consecutive_failures,
204
+ exempted=exempted,
205
+ exemption_reason=exemption_reason,
206
+ expected_stopped=expected_stopped,
207
+ action_taken=action_taken,
208
+ last_heartbeat=last_heartbeat,
209
+ )
210
+
211
+ # 3. 执行实际的健康检查
212
+ is_healthy = await self._perform_health_check_with_retry(instance)
213
+
214
+ if is_healthy:
215
+ # 健康检查成功
216
+ health_status = InstanceHealthStatus.HEALTHY
217
+ now_ts = datetime.now(timezone.utc)
218
+
219
+ # 重置连续失败次数
220
+ if instance.consecutive_failures > 0:
221
+ await reset_instance_consecutive_failures(instance_id)
222
+ logger.info(f"Instance {instance_id} recovered (previous failures: {instance.consecutive_failures})")
223
+
224
+ # 更新健康状态和最后心跳
225
+ await update_instance_health_status(
226
+ instance_id=instance_id,
227
+ health_status=InstanceHealthStatus.HEALTHY,
228
+ last_health_check=now_ts,
229
+ last_health_error=None,
230
+ last_heartbeat=now_ts,
231
+ )
232
+ last_heartbeat = now_ts.isoformat()
233
+
234
+ else:
235
+ # 健康检查失败
236
+ new_failure_count = await increment_instance_consecutive_failures(instance_id)
237
+
238
+ # 如果实例不存在(可能已被清理),直接返回
239
+ if new_failure_count is None:
240
+ logger.warning(f"Instance {instance_id} not found when incrementing failure count")
241
+ return InstanceHealthDetail(
242
+ instance_id=instance_id,
243
+ service_name=service_name,
244
+ host=instance.host,
245
+ port=instance.port,
246
+ status=instance.status,
247
+ health_status=InstanceHealthStatus.UNKNOWN,
248
+ consecutive_failures=consecutive_failures,
249
+ exempted=exempted,
250
+ exemption_reason="Instance Not Found",
251
+ expected_stopped=expected_stopped,
252
+ action_taken=action_taken,
253
+ last_heartbeat=last_heartbeat,
254
+ )
255
+
256
+ consecutive_failures = new_failure_count
257
+
258
+ if new_failure_count >= self.CONSECUTIVE_FAILURE_THRESHOLD:
259
+ # 达到失败阈值,标记为 unhealthy
260
+ health_status = InstanceHealthStatus.UNHEALTHY
261
+
262
+ await update_instance_health_status(
263
+ instance_id=instance_id,
264
+ health_status=InstanceHealthStatus.UNHEALTHY,
265
+ last_health_check=datetime.now(timezone.utc),
266
+ last_health_error=f"Failed {new_failure_count} consecutive health checks"
267
+ )
268
+
269
+ logger.error(
270
+ f"Instance {instance_id} marked as unhealthy "
271
+ f"after {new_failure_count} consecutive failures"
272
+ )
273
+
274
+ # 自动调用 controllersrv 重启实例
275
+ restart_task = asyncio.create_task(
276
+ self._auto_restart_instance(service_name, instance_id)
277
+ )
278
+ self._bg.add(restart_task)
279
+ restart_task.add_done_callback(self._bg.discard)
280
+ action_taken = HealthCheckAction.AUTO_RESTART_SCHEDULED
281
+
282
+ else:
283
+ # 未达到阈值,标记为 degraded
284
+ health_status = InstanceHealthStatus.DEGRADED
285
+
286
+ await update_instance_health_status(
287
+ instance_id=instance_id,
288
+ health_status=InstanceHealthStatus.DEGRADED,
289
+ last_health_check=datetime.now(timezone.utc),
290
+ last_health_error=f"Failed {new_failure_count} health checks"
291
+ )
292
+
293
+ logger.warning(
294
+ f"Instance {instance_id} degraded "
295
+ f"({new_failure_count}/{self.CONSECUTIVE_FAILURE_THRESHOLD} failures)"
296
+ )
297
+
298
+ return InstanceHealthDetail(
299
+ instance_id=instance_id,
300
+ service_name=service_name,
301
+ host=instance.host,
302
+ port=instance.port,
303
+ status=instance.status,
304
+ health_status=health_status,
305
+ consecutive_failures=consecutive_failures,
306
+ exempted=exempted,
307
+ exemption_reason=exemption_reason,
308
+ expected_stopped=expected_stopped,
309
+ action_taken=action_taken,
310
+ last_heartbeat=last_heartbeat,
311
+ )
312
+
313
+ async def _is_in_operation_window(
314
+ self,
315
+ service_name: str,
316
+ window_minutes: Optional[int] = None
317
+ ) -> Tuple[bool, Optional[str]]:
318
+ """
319
+ 检查服务是否在操作窗口期内
320
+
321
+ Args:
322
+ service_name: 服务名称
323
+ window_minutes: 窗口期时长(分钟),默认使用类常量
324
+
325
+ Returns:
326
+ (是否在窗口期, 操作类型)
327
+ """
328
+ if window_minutes is None:
329
+ window_minutes = self.OPERATION_WINDOW_MINUTES
330
+
331
+ # 查询最近的操作
332
+ recent_operations: list[ServiceOperation] = await get_recent_operations_by_service(
333
+ service_name=service_name,
334
+ time_window_seconds=window_minutes * 60,
335
+ status=OperationStatus.RUNNING # 只检查正在运行的操作
336
+ )
337
+
338
+ if recent_operations:
339
+ # 有正在运行的操作,豁免健康检查
340
+ operation = recent_operations[0]
341
+ operation_type = operation.operation_type
342
+ logger.debug(
343
+ f"Service {service_name} in operation window: {operation_type} "
344
+ f"(started at {operation.created_time})"
345
+ )
346
+ return True, operation_type
347
+
348
+ # 也检查最近完成的操作(5分钟内)
349
+ completed_operations: list[ServiceOperation] = await get_recent_operations_by_service(
350
+ service_name=service_name,
351
+ time_window_seconds=window_minutes * 60,
352
+ status=OperationStatus.SUCCESS
353
+ )
354
+
355
+ if completed_operations:
356
+ operation = completed_operations[0]
357
+ # 检查完成时间是否在窗口期内
358
+ if operation.completed_at:
359
+ now = datetime.now(timezone.utc)
360
+ time_since_completion = now - operation.completed_at
361
+
362
+ if time_since_completion < timedelta(minutes=window_minutes):
363
+ operation_type = operation.operation_type
364
+ logger.debug(
365
+ f"Service {service_name} in post-operation window: {operation_type} "
366
+ f"(completed {time_since_completion.total_seconds():.0f}s ago)"
367
+ )
368
+ return True, f"{operation_type} (completed)"
369
+
370
+ return False, None
371
+
372
+ async def _perform_health_check_with_retry(self, instance: ServiceInstance) -> bool:
373
+ """
374
+ 执行健康检查(带重试)
375
+
376
+ Args:
377
+ instance: ServiceInstance 对象
378
+
379
+ Returns:
380
+ 是否健康
381
+ """
382
+ # 1. 先检查心跳是否超时(超时不再立即返回,改为触发HTTP确认)
383
+ if instance.last_heartbeat:
384
+ now_utc = datetime.now(timezone.utc)
385
+ last_heartbeat_utc = instance.last_heartbeat
386
+ if last_heartbeat_utc.tzinfo is None:
387
+ last_heartbeat_utc = last_heartbeat_utc.replace(tzinfo=timezone.utc)
388
+
389
+ time_diff = now_utc - last_heartbeat_utc
390
+ if time_diff > timedelta(minutes=self.HEARTBEAT_TIMEOUT_MINUTES):
391
+ logger.warning(
392
+ f"Instance {instance.instance_id} heartbeat timeout: "
393
+ f"{time_diff.total_seconds():.0f}s (threshold: {self.HEARTBEAT_TIMEOUT_MINUTES * 60}s)"
394
+ )
395
+ else:
396
+ logger.warning(
397
+ f"Instance {instance.instance_id} has no heartbeat timestamp recorded; proceeding with HTTP check"
398
+ )
399
+
400
+ # 2. 尝试 HTTP 健康检查(带重试)
401
+ for attempt in range(self.HEALTH_CHECK_RETRY_COUNT + 1):
402
+ try:
403
+ is_healthy = await self._perform_http_health_check(instance)
404
+
405
+ if is_healthy:
406
+ if attempt > 0:
407
+ logger.info(
408
+ f"Instance {instance.instance_id} health check succeeded on retry {attempt}"
409
+ )
410
+ return True
411
+
412
+ # 失败后重试
413
+ if attempt < self.HEALTH_CHECK_RETRY_COUNT:
414
+ logger.debug(
415
+ f"Health check failed for {instance.instance_id}, "
416
+ f"retrying in {self.HEALTH_CHECK_RETRY_DELAY}s "
417
+ f"(attempt {attempt + 1}/{self.HEALTH_CHECK_RETRY_COUNT + 1})"
418
+ )
419
+ await asyncio.sleep(self.HEALTH_CHECK_RETRY_DELAY)
420
+
421
+ except Exception as e:
422
+ logger.error(
423
+ f"Health check error for {instance.instance_id} (attempt {attempt + 1}): {e}"
424
+ )
425
+ if attempt < self.HEALTH_CHECK_RETRY_COUNT:
426
+ await asyncio.sleep(self.HEALTH_CHECK_RETRY_DELAY)
427
+
428
+ # 所有重试都失败
429
+ return False
430
+
431
+ async def _handle_instance_id_mismatch(self, instance: ServiceInstance, actual_instance_id: str) -> None:
432
+ """处理实例 ID 不匹配的情况 / Handle stale registry records when IDs differ."""
433
+ logger.warning(
434
+ "Instance mismatch detected for service=%s host=%s:%s (record=%s, actual=%s). Marking stale record.",
435
+ instance.service_name,
436
+ instance.host,
437
+ instance.port,
438
+ instance.instance_id,
439
+ actual_instance_id,
440
+ )
441
+ await soft_delete_instance(instance.service_name, instance.instance_id)
442
+
443
+ async def _perform_http_health_check(self, instance: ServiceInstance) -> bool:
444
+ """
445
+ 执行 HTTP 健康检查
446
+
447
+ Args:
448
+ instance: ServiceInstance 对象
449
+
450
+ Returns:
451
+ 是否健康
452
+ """
453
+ try:
454
+ health_url = f"http://{instance.host}:{instance.port}/internal/health"
455
+
456
+ resp = await http_client.get(
457
+ health_url,
458
+ timeout=self.HEALTH_CHECK_TIMEOUT
459
+ )
460
+
461
+ if resp and resp.status == 200:
462
+ actual_instance_id = None
463
+ try:
464
+ body = await resp.json()
465
+ actual_instance_id = body.get("instance_id") if isinstance(body, dict) else None
466
+ except Exception as parse_exc: # noqa: BLE001
467
+ logger.warning(
468
+ "Failed to decode health response for %s: %s", instance.instance_id, parse_exc
469
+ )
470
+
471
+ if actual_instance_id and actual_instance_id != instance.instance_id:
472
+ await self._handle_instance_id_mismatch(instance, actual_instance_id)
473
+ return False
474
+
475
+ return True
476
+ else:
477
+ logger.debug(
478
+ f"Health check failed for {instance.instance_id}: "
479
+ f"HTTP {resp.status if resp else 'None'}"
480
+ )
481
+ return False
482
+
483
+ except asyncio.TimeoutError:
484
+ logger.warning(f"Health check timeout for {instance.instance_id}")
485
+ return False
486
+ except Exception as e:
487
+ logger.error(f"Health check exception for {instance.instance_id}: {e}")
488
+ return False
489
+
490
+ async def _auto_restart_instance(self, service_name: str, instance_id: str):
491
+ """
492
+ 自动重启实例(调用 controllersrv)
493
+
494
+ Args:
495
+ service_name: 服务名称
496
+ instance_id: 实例ID
497
+ """
498
+ try:
499
+ logger.warning(f"Auto-restarting unhealthy instance: {instance_id}")
500
+
501
+ # 调用 controllersrv 异步重启 API
502
+ url = f"{self.controllersrv_host}/v1/controllersrv/async/service/restart"
503
+
504
+ payload = AsyncServiceOperationPayload(
505
+ service_name=f"{service_name}.service",
506
+ operation_params=AsyncOperationParams(
507
+ instance_id=instance_id,
508
+ reason="auto_restart_unhealthy"
509
+ ),
510
+ initiated_by=InitiatedBy.INFRASRV_HEALTH_CHECK,
511
+ initiated_from=InitiatedFrom.HEALTH_CHECK_DOMAIN
512
+ )
513
+
514
+ resp = await http_client.post(
515
+ url,
516
+ json=payload.model_dump(),
517
+ timeout=HTTPClientConfig.INTERNAL_SERVICE_TIMEOUT
518
+ )
519
+
520
+ if resp and resp.status == 200:
521
+ data = await resp.json()
522
+ operation_id = data.get("data", {}).get("operation_id")
523
+ logger.info(f"Auto-restart scheduled for {instance_id}, operation_id: {operation_id}")
524
+ else:
525
+ logger.error(
526
+ f"Failed to schedule auto-restart for {instance_id}: "
527
+ f"HTTP {resp.status if resp else 'None'}"
528
+ )
529
+
530
+ except Exception as e:
531
+ logger.error(f"Error scheduling auto-restart for {instance_id}: {e}", exc_info=True)
532
+
533
+ async def _auto_stop_instance(self, service_name: str, instance_id: str):
534
+ """
535
+ 自动停止实例(调用 controllersrv)
536
+
537
+ 用于期望状态为 stopped 但实例还在运行的情况
538
+
539
+ Args:
540
+ service_name: 服务名称
541
+ instance_id: 实例ID
542
+ """
543
+ try:
544
+ logger.info(f"Auto-stopping instance (expected_status=stopped): {instance_id}")
545
+
546
+ # 调用 controllersrv 异步停止 API
547
+ url = f"{self.controllersrv_host}/v1/controllersrv/async/service/stop"
548
+
549
+ payload = AsyncServiceOperationPayload(
550
+ service_name=f"{service_name}.service",
551
+ operation_params=AsyncOperationParams(
552
+ instance_id=instance_id,
553
+ reason="auto_stop_expected_stopped"
554
+ ),
555
+ initiated_by=InitiatedBy.INFRASRV_HEALTH_CHECK,
556
+ initiated_from=InitiatedFrom.HEALTH_CHECK_DOMAIN
557
+ )
558
+
559
+ resp = await http_client.post(
560
+ url,
561
+ json=payload.model_dump(),
562
+ timeout=HTTPClientConfig.INTERNAL_SERVICE_TIMEOUT
563
+ )
564
+
565
+ if resp and resp.status == 200:
566
+ data = await resp.json()
567
+ operation_id = data.get("data", {}).get("operation_id")
568
+ logger.info(f"Auto-stop scheduled for {instance_id}, operation_id: {operation_id}")
569
+ else:
570
+ logger.error(
571
+ f"Failed to schedule auto-stop for {instance_id}: "
572
+ f"HTTP {resp.status if resp else 'None'}"
573
+ )
574
+
575
+ except Exception as e:
576
+ logger.error(f"Error scheduling auto-stop for {instance_id}: {e}", exc_info=True)
@@ -0,0 +1,19 @@
1
+ """Hooks package public API.
2
+
3
+ 钩子模块公共入口 / Hook module public entry.
4
+
5
+ 该包对外暴露全局 HookManager 的访问入口,避免调用方直接依赖具体实现文件。
6
+ Expose global HookManager accessors to avoid callers depending on internal modules.
7
+ """
8
+
9
+ from .hook_registry import (
10
+ get_hook_manager,
11
+ reset_hook_manager,
12
+ )
13
+ from .pre_registration_hooks import HookManager
14
+
15
+ __all__ = [
16
+ "HookManager",
17
+ "get_hook_manager",
18
+ "reset_hook_manager",
19
+ ]