blockmine 1.21.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (492) hide show
  1. package/.claude/agents/README.md +469 -0
  2. package/.claude/agents/auth-route-debugger.md +118 -0
  3. package/.claude/agents/auth-route-tester.md +93 -0
  4. package/.claude/agents/auto-error-resolver.md +97 -0
  5. package/.claude/agents/build-optimizer.md +236 -0
  6. package/.claude/agents/code-architecture-reviewer.md +83 -0
  7. package/.claude/agents/code-refactor-master.md +94 -0
  8. package/.claude/agents/cost-optimizer.md +134 -0
  9. package/.claude/agents/deployment-orchestrator.md +113 -0
  10. package/.claude/agents/documentation-architect.md +82 -0
  11. package/.claude/agents/frontend-error-fixer.md +77 -0
  12. package/.claude/agents/iac-code-generator.md +71 -0
  13. package/.claude/agents/incident-responder.md +346 -0
  14. package/.claude/agents/infrastructure-architect.md +31 -0
  15. package/.claude/agents/kubernetes-specialist.md +56 -0
  16. package/.claude/agents/migration-planner.md +181 -0
  17. package/.claude/agents/network-architect.md +196 -0
  18. package/.claude/agents/plan-reviewer.md +52 -0
  19. package/.claude/agents/refactor-planner.md +63 -0
  20. package/.claude/agents/security-scanner.md +102 -0
  21. package/.claude/agents/web-research-specialist.md +78 -0
  22. package/.claude/commands/cost-analysis.md +315 -0
  23. package/.claude/commands/dev-docs-update.md +55 -0
  24. package/.claude/commands/dev-docs.md +51 -0
  25. package/.claude/commands/incident-debug.md +247 -0
  26. package/.claude/commands/infra-plan.md +81 -0
  27. package/.claude/commands/migration-plan.md +478 -0
  28. package/.claude/commands/route-research-for-testing.md +37 -0
  29. package/.claude/commands/security-review.md +66 -0
  30. package/.claude/hooks/CONFIG.md +448 -0
  31. package/.claude/hooks/README.md +163 -0
  32. package/.claude/hooks/SKILL_ACTIVATION_COMPLETE.md +226 -0
  33. package/.claude/hooks/WINDOWS_HOOKS_README.md +151 -0
  34. package/.claude/hooks/add-skill-activation-banners.ts +132 -0
  35. package/.claude/hooks/comprehensive-skill-test.ts +1315 -0
  36. package/.claude/hooks/error-handling-reminder.sh +12 -0
  37. package/.claude/hooks/error-handling-reminder.ts +222 -0
  38. package/.claude/hooks/k8s-manifest-validator.sh +56 -0
  39. package/.claude/hooks/package-lock.json +556 -0
  40. package/.claude/hooks/package.json +16 -0
  41. package/.claude/hooks/post-tool-use-tracker.ps1 +174 -0
  42. package/.claude/hooks/post-tool-use-tracker.sh +183 -0
  43. package/.claude/hooks/security-policy-check.sh +247 -0
  44. package/.claude/hooks/skill-activation-prompt.ps1 +10 -0
  45. package/.claude/hooks/skill-activation-prompt.sh +10 -0
  46. package/.claude/hooks/skill-activation-prompt.ts +141 -0
  47. package/.claude/hooks/stop-build-check-enhanced.sh +130 -0
  48. package/.claude/hooks/terraform-validator.sh +53 -0
  49. package/.claude/hooks/test-input.json +7 -0
  50. package/.claude/hooks/test-skill-activation.ts +427 -0
  51. package/.claude/hooks/trigger-build-resolver.sh +79 -0
  52. package/.claude/hooks/tsc-check.sh +173 -0
  53. package/.claude/hooks/tsconfig.json +19 -0
  54. package/.claude/settings.json +59 -0
  55. package/.claude/settings.local.json +36 -14
  56. package/.claude/skills/README.md +507 -0
  57. package/.claude/skills/api-engineering/SKILL.md +63 -0
  58. package/.claude/skills/api-engineering/resources/api-versioning.md +88 -0
  59. package/.claude/skills/api-engineering/resources/graphql-patterns.md +106 -0
  60. package/.claude/skills/api-engineering/resources/rate-limiting.md +118 -0
  61. package/.claude/skills/api-engineering/resources/rest-api-design.md +105 -0
  62. package/.claude/skills/backend-dev-guidelines/SKILL.md +306 -0
  63. package/.claude/skills/backend-dev-guidelines/resources/architecture-overview.md +451 -0
  64. package/.claude/skills/backend-dev-guidelines/resources/async-and-errors.md +307 -0
  65. package/.claude/skills/backend-dev-guidelines/resources/complete-examples.md +638 -0
  66. package/.claude/skills/backend-dev-guidelines/resources/configuration.md +275 -0
  67. package/.claude/skills/backend-dev-guidelines/resources/database-patterns.md +224 -0
  68. package/.claude/skills/backend-dev-guidelines/resources/middleware-guide.md +213 -0
  69. package/.claude/skills/backend-dev-guidelines/resources/routing-and-controllers.md +756 -0
  70. package/.claude/skills/backend-dev-guidelines/resources/sentry-and-monitoring.md +336 -0
  71. package/.claude/skills/backend-dev-guidelines/resources/services-and-repositories.md +789 -0
  72. package/.claude/skills/backend-dev-guidelines/resources/testing-guide.md +235 -0
  73. package/.claude/skills/backend-dev-guidelines/resources/validation-patterns.md +754 -0
  74. package/.claude/skills/budget-and-cost-management/SKILL.md +850 -0
  75. package/.claude/skills/build-engineering/SKILL.md +431 -0
  76. package/.claude/skills/build-engineering/resources/artifact-repositories.md +72 -0
  77. package/.claude/skills/build-engineering/resources/build-caching.md +96 -0
  78. package/.claude/skills/build-engineering/resources/build-pipelines.md +105 -0
  79. package/.claude/skills/build-engineering/resources/build-security.md +95 -0
  80. package/.claude/skills/build-engineering/resources/build-systems.md +389 -0
  81. package/.claude/skills/build-engineering/resources/compilation-optimization.md +201 -0
  82. package/.claude/skills/build-engineering/resources/dependency-management.md +73 -0
  83. package/.claude/skills/build-engineering/resources/monorepo-builds.md +110 -0
  84. package/.claude/skills/build-engineering/resources/performance-optimization.md +113 -0
  85. package/.claude/skills/build-engineering/resources/reproducible-builds.md +82 -0
  86. package/.claude/skills/cloud-engineering/SKILL.md +675 -0
  87. package/.claude/skills/cloud-engineering/resources/aws-patterns.md +742 -0
  88. package/.claude/skills/cloud-engineering/resources/azure-patterns.md +714 -0
  89. package/.claude/skills/cloud-engineering/resources/cleared-cloud-environments.md +987 -0
  90. package/.claude/skills/cloud-engineering/resources/cloud-cost-optimization.md +757 -0
  91. package/.claude/skills/cloud-engineering/resources/cloud-networking.md +1058 -0
  92. package/.claude/skills/cloud-engineering/resources/cloud-security-tools.md +1530 -0
  93. package/.claude/skills/cloud-engineering/resources/cloud-security.md +990 -0
  94. package/.claude/skills/cloud-engineering/resources/gcp-patterns.md +758 -0
  95. package/.claude/skills/cloud-engineering/resources/migration-strategies.md +820 -0
  96. package/.claude/skills/cloud-engineering/resources/multi-cloud-strategies.md +670 -0
  97. package/.claude/skills/cloud-engineering/resources/oci-patterns.md +1198 -0
  98. package/.claude/skills/cloud-engineering/resources/serverless-patterns.md +795 -0
  99. package/.claude/skills/cloud-engineering/resources/well-architected-frameworks.md +966 -0
  100. package/.claude/skills/cybersecurity/SKILL.md +409 -0
  101. package/.claude/skills/cybersecurity/resources/security-architecture.md +266 -0
  102. package/.claude/skills/database-engineering/SKILL.md +61 -0
  103. package/.claude/skills/database-engineering/resources/backup-and-recovery.md +72 -0
  104. package/.claude/skills/database-engineering/resources/database-replication.md +63 -0
  105. package/.claude/skills/database-engineering/resources/postgresql-fundamentals.md +70 -0
  106. package/.claude/skills/database-engineering/resources/query-optimization.md +68 -0
  107. package/.claude/skills/devsecops/SKILL.md +374 -0
  108. package/.claude/skills/devsecops/resources/ci-cd-security.md +204 -0
  109. package/.claude/skills/devsecops/resources/compliance-automation.md +530 -0
  110. package/.claude/skills/devsecops/resources/compliance-frameworks.md +2322 -0
  111. package/.claude/skills/devsecops/resources/container-security.md +915 -0
  112. package/.claude/skills/devsecops/resources/cspm-integration.md +1440 -0
  113. package/.claude/skills/devsecops/resources/policy-enforcement.md +619 -0
  114. package/.claude/skills/devsecops/resources/secrets-management.md +755 -0
  115. package/.claude/skills/devsecops/resources/security-monitoring.md +146 -0
  116. package/.claude/skills/devsecops/resources/security-scanning.md +887 -0
  117. package/.claude/skills/devsecops/resources/security-testing.md +203 -0
  118. package/.claude/skills/devsecops/resources/supply-chain-security.md +518 -0
  119. package/.claude/skills/devsecops/resources/vulnerability-management.md +481 -0
  120. package/.claude/skills/devsecops/resources/zero-trust-architecture.md +177 -0
  121. package/.claude/skills/documentation-as-code/SKILL.md +323 -0
  122. package/.claude/skills/documentation-as-code/resources/api-documentation.md +90 -0
  123. package/.claude/skills/documentation-as-code/resources/changelog-management.md +79 -0
  124. package/.claude/skills/documentation-as-code/resources/diagram-generation.md +44 -0
  125. package/.claude/skills/documentation-as-code/resources/docs-as-code-workflow.md +99 -0
  126. package/.claude/skills/documentation-as-code/resources/documentation-automation.md +68 -0
  127. package/.claude/skills/documentation-as-code/resources/documentation-sites.md +79 -0
  128. package/.claude/skills/documentation-as-code/resources/markdown-best-practices.md +162 -0
  129. package/.claude/skills/documentation-as-code/resources/openapi-specification.md +77 -0
  130. package/.claude/skills/documentation-as-code/resources/readme-engineering.md +60 -0
  131. package/.claude/skills/documentation-as-code/resources/technical-writing-guide.md +202 -0
  132. package/.claude/skills/engineering-management/SKILL.md +356 -0
  133. package/.claude/skills/engineering-management/resources/career-ladders.md +609 -0
  134. package/.claude/skills/engineering-management/resources/hiring-and-assessment.md +555 -0
  135. package/.claude/skills/engineering-management/resources/one-on-one-guides.md +609 -0
  136. package/.claude/skills/engineering-management/resources/resource-planning.md +557 -0
  137. package/.claude/skills/engineering-management/resources/team-organization-patterns.md +491 -0
  138. package/.claude/skills/engineering-management/resources/technical-interviews.md +474 -0
  139. package/.claude/skills/engineering-operations-management/SKILL.md +817 -0
  140. package/.claude/skills/error-tracking/SKILL.md +379 -0
  141. package/.claude/skills/frontend-dev-guidelines/SKILL.md +403 -0
  142. package/.claude/skills/frontend-dev-guidelines/resources/common-patterns.md +331 -0
  143. package/.claude/skills/frontend-dev-guidelines/resources/complete-examples.md +872 -0
  144. package/.claude/skills/frontend-dev-guidelines/resources/component-patterns.md +502 -0
  145. package/.claude/skills/frontend-dev-guidelines/resources/data-fetching.md +767 -0
  146. package/.claude/skills/frontend-dev-guidelines/resources/file-organization.md +502 -0
  147. package/.claude/skills/frontend-dev-guidelines/resources/loading-and-error-states.md +501 -0
  148. package/.claude/skills/frontend-dev-guidelines/resources/performance.md +406 -0
  149. package/.claude/skills/frontend-dev-guidelines/resources/routing-guide.md +364 -0
  150. package/.claude/skills/frontend-dev-guidelines/resources/styling-guide.md +428 -0
  151. package/.claude/skills/frontend-dev-guidelines/resources/typescript-standards.md +418 -0
  152. package/.claude/skills/general-it-engineering/SKILL.md +393 -0
  153. package/.claude/skills/general-it-engineering/resources/asset-management.md +712 -0
  154. package/.claude/skills/general-it-engineering/resources/automation-orchestration.md +817 -0
  155. package/.claude/skills/general-it-engineering/resources/business-continuity.md +786 -0
  156. package/.claude/skills/general-it-engineering/resources/change-management.md +715 -0
  157. package/.claude/skills/general-it-engineering/resources/enterprise-monitoring.md +729 -0
  158. package/.claude/skills/general-it-engineering/resources/help-desk-operations.md +738 -0
  159. package/.claude/skills/general-it-engineering/resources/incident-service-management.md +834 -0
  160. package/.claude/skills/general-it-engineering/resources/it-governance.md +753 -0
  161. package/.claude/skills/general-it-engineering/resources/itil-framework.md +503 -0
  162. package/.claude/skills/general-it-engineering/resources/service-management.md +669 -0
  163. package/.claude/skills/infrastructure-architecture/SKILL.md +328 -0
  164. package/.claude/skills/infrastructure-architecture/resources/architecture-decision-records.md +505 -0
  165. package/.claude/skills/infrastructure-architecture/resources/architecture-patterns.md +528 -0
  166. package/.claude/skills/infrastructure-architecture/resources/capacity-planning.md +453 -0
  167. package/.claude/skills/infrastructure-architecture/resources/cleared-environment-architecture.md +773 -0
  168. package/.claude/skills/infrastructure-architecture/resources/cost-architecture.md +499 -0
  169. package/.claude/skills/infrastructure-architecture/resources/data-architecture.md +501 -0
  170. package/.claude/skills/infrastructure-architecture/resources/disaster-recovery.md +535 -0
  171. package/.claude/skills/infrastructure-architecture/resources/migration-architecture.md +512 -0
  172. package/.claude/skills/infrastructure-architecture/resources/multi-region-design.md +608 -0
  173. package/.claude/skills/infrastructure-architecture/resources/reference-architectures.md +562 -0
  174. package/.claude/skills/infrastructure-architecture/resources/security-architecture.md +538 -0
  175. package/.claude/skills/infrastructure-architecture/resources/system-design-principles.md +489 -0
  176. package/.claude/skills/infrastructure-architecture/resources/workload-classification.md +1000 -0
  177. package/.claude/skills/infrastructure-strategy/SKILL.md +924 -0
  178. package/.claude/skills/network-engineering/SKILL.md +385 -0
  179. package/.claude/skills/network-engineering/resources/dns-management.md +738 -0
  180. package/.claude/skills/network-engineering/resources/load-balancing.md +820 -0
  181. package/.claude/skills/network-engineering/resources/network-architecture.md +546 -0
  182. package/.claude/skills/network-engineering/resources/network-security.md +921 -0
  183. package/.claude/skills/network-engineering/resources/network-troubleshooting.md +749 -0
  184. package/.claude/skills/network-engineering/resources/routing-switching.md +373 -0
  185. package/.claude/skills/network-engineering/resources/sdn-networking.md +695 -0
  186. package/.claude/skills/network-engineering/resources/service-mesh-networking.md +777 -0
  187. package/.claude/skills/network-engineering/resources/tcp-ip-protocols.md +444 -0
  188. package/.claude/skills/network-engineering/resources/vpn-connectivity.md +672 -0
  189. package/.claude/skills/observability-engineering/SKILL.md +101 -0
  190. package/.claude/skills/observability-engineering/resources/apm-tools.md +97 -0
  191. package/.claude/skills/observability-engineering/resources/correlation-strategies.md +87 -0
  192. package/.claude/skills/observability-engineering/resources/distributed-tracing.md +98 -0
  193. package/.claude/skills/observability-engineering/resources/logs-aggregation.md +118 -0
  194. package/.claude/skills/observability-engineering/resources/observability-cost-optimization.md +141 -0
  195. package/.claude/skills/observability-engineering/resources/opentelemetry.md +110 -0
  196. package/.claude/skills/platform-engineering/SKILL.md +555 -0
  197. package/.claude/skills/platform-engineering/resources/architecture-overview.md +600 -0
  198. package/.claude/skills/platform-engineering/resources/container-orchestration.md +916 -0
  199. package/.claude/skills/platform-engineering/resources/cost-optimization.md +634 -0
  200. package/.claude/skills/platform-engineering/resources/developer-platforms.md +670 -0
  201. package/.claude/skills/platform-engineering/resources/gitops-automation.md +650 -0
  202. package/.claude/skills/platform-engineering/resources/infrastructure-as-code.md +778 -0
  203. package/.claude/skills/platform-engineering/resources/infrastructure-standards.md +708 -0
  204. package/.claude/skills/platform-engineering/resources/multi-tenancy.md +602 -0
  205. package/.claude/skills/platform-engineering/resources/platform-security.md +711 -0
  206. package/.claude/skills/platform-engineering/resources/resource-management.md +592 -0
  207. package/.claude/skills/platform-engineering/resources/service-mesh.md +628 -0
  208. package/.claude/skills/release-engineering/SKILL.md +393 -0
  209. package/.claude/skills/release-engineering/resources/artifact-management.md +108 -0
  210. package/.claude/skills/release-engineering/resources/build-optimization.md +84 -0
  211. package/.claude/skills/release-engineering/resources/ci-cd-pipelines.md +411 -0
  212. package/.claude/skills/release-engineering/resources/deployment-strategies.md +197 -0
  213. package/.claude/skills/release-engineering/resources/pipeline-security.md +62 -0
  214. package/.claude/skills/release-engineering/resources/progressive-delivery.md +83 -0
  215. package/.claude/skills/release-engineering/resources/release-automation.md +68 -0
  216. package/.claude/skills/release-engineering/resources/release-orchestration.md +77 -0
  217. package/.claude/skills/release-engineering/resources/rollback-strategies.md +66 -0
  218. package/.claude/skills/release-engineering/resources/versioning-strategies.md +59 -0
  219. package/.claude/skills/route-tester/SKILL.md +392 -0
  220. package/.claude/skills/skill-developer/ADVANCED.md +197 -0
  221. package/.claude/skills/skill-developer/HOOK_MECHANISMS.md +306 -0
  222. package/.claude/skills/skill-developer/PATTERNS_LIBRARY.md +152 -0
  223. package/.claude/skills/skill-developer/SKILL.md +430 -0
  224. package/.claude/skills/skill-developer/SKILL_RULES_REFERENCE.md +315 -0
  225. package/.claude/skills/skill-developer/TRIGGER_TYPES.md +305 -0
  226. package/.claude/skills/skill-developer/TROUBLESHOOTING.md +514 -0
  227. package/.claude/skills/skill-rules.json +2940 -0
  228. package/.claude/skills/sre/SKILL.md +464 -0
  229. package/.claude/skills/sre/resources/alerting-best-practices.md +282 -0
  230. package/.claude/skills/sre/resources/capacity-planning.md +226 -0
  231. package/.claude/skills/sre/resources/chaos-engineering.md +193 -0
  232. package/.claude/skills/sre/resources/disaster-recovery.md +232 -0
  233. package/.claude/skills/sre/resources/incident-management.md +436 -0
  234. package/.claude/skills/sre/resources/observability-stack.md +240 -0
  235. package/.claude/skills/sre/resources/on-call-runbooks.md +167 -0
  236. package/.claude/skills/sre/resources/performance-optimization.md +108 -0
  237. package/.claude/skills/sre/resources/reliability-patterns.md +183 -0
  238. package/.claude/skills/sre/resources/slo-sli-sla.md +464 -0
  239. package/.claude/skills/sre/resources/toil-reduction.md +145 -0
  240. package/.claude/skills/systems-engineering/SKILL.md +648 -0
  241. package/.claude/skills/systems-engineering/resources/automation-patterns.md +771 -0
  242. package/.claude/skills/systems-engineering/resources/configuration-management.md +998 -0
  243. package/.claude/skills/systems-engineering/resources/linux-administration.md +672 -0
  244. package/.claude/skills/systems-engineering/resources/networking-fundamentals.md +982 -0
  245. package/.claude/skills/systems-engineering/resources/performance-tuning.md +871 -0
  246. package/.claude/skills/systems-engineering/resources/powershell-scripting.md +482 -0
  247. package/.claude/skills/systems-engineering/resources/security-hardening.md +739 -0
  248. package/.claude/skills/systems-engineering/resources/shell-scripting.md +915 -0
  249. package/.claude/skills/systems-engineering/resources/storage-management.md +628 -0
  250. package/.claude/skills/systems-engineering/resources/system-monitoring.md +787 -0
  251. package/.claude/skills/systems-engineering/resources/troubleshooting-guide.md +753 -0
  252. package/.claude/skills/systems-engineering/resources/windows-administration.md +738 -0
  253. package/.claude/skills/technical-leadership/SKILL.md +728 -0
  254. package/CHANGELOG.md +102 -42
  255. package/CLAUDE.md +284 -0
  256. package/README.md +315 -71
  257. package/backend/docs/SECRETS_DOCUMENTATION.md +327 -0
  258. package/backend/jest.config.js +59 -0
  259. package/backend/package-lock.json +6801 -0
  260. package/backend/package.json +24 -4
  261. package/backend/prisma/migrations/20251026104609_add_websocket_api/migration.sql +33 -0
  262. package/backend/prisma/migrations/20251116111851_add_execution_trace/migration.sql +22 -0
  263. package/backend/prisma/migrations/20251120154914_add_panel_api_keys/migration.sql +21 -0
  264. package/backend/prisma/migrations/20251121110241_add_proxy_table/migration.sql +45 -0
  265. package/backend/prisma/migrations/migration_lock.toml +2 -2
  266. package/backend/prisma/schema.prisma +103 -1
  267. package/backend/src/__tests__/core/DependencyService.test.js +336 -0
  268. package/backend/src/__tests__/core/UserService.test.js +875 -0
  269. package/backend/src/__tests__/repositories/BaseRepository.test.js +146 -0
  270. package/backend/src/__tests__/repositories/BotRepository.test.js +118 -0
  271. package/backend/src/__tests__/repositories/CommandRepository.test.js +132 -0
  272. package/backend/src/__tests__/repositories/EventGraphRepository.test.js +93 -0
  273. package/backend/src/__tests__/repositories/GroupRepository.test.js +155 -0
  274. package/backend/src/__tests__/repositories/PermissionRepository.test.js +130 -0
  275. package/backend/src/__tests__/repositories/PluginRepository.test.js +107 -0
  276. package/backend/src/__tests__/repositories/ServerRepository.test.js +80 -0
  277. package/backend/src/__tests__/repositories/UserRepository.test.js +128 -0
  278. package/backend/src/__tests__/secretsFilter.test.js +425 -0
  279. package/backend/src/__tests__/services/BotLifecycleService.test.js +416 -0
  280. package/backend/src/__tests__/services/BotProcessManager.test.js +285 -0
  281. package/backend/src/__tests__/services/CacheManager.test.js +125 -0
  282. package/backend/src/__tests__/services/CommandExecutionService.test.js +460 -0
  283. package/backend/src/__tests__/services/ResourceMonitorService.test.js +207 -0
  284. package/backend/src/__tests__/services/TelemetryService.test.js +291 -0
  285. package/backend/src/__tests__/setup.js +25 -0
  286. package/backend/src/ai/plugin-assistant-system-prompt.md +788 -0
  287. package/backend/src/api/middleware/auth.js +27 -0
  288. package/backend/src/api/middleware/botAccess.js +7 -3
  289. package/backend/src/api/middleware/panelApiAuth.js +135 -0
  290. package/backend/src/api/routes/aiAssistant.js +995 -0
  291. package/backend/src/api/routes/apiKeys.js +181 -0
  292. package/backend/src/api/routes/auth.js +669 -633
  293. package/backend/src/api/routes/botCommands.js +107 -0
  294. package/backend/src/api/routes/botGroups.js +165 -0
  295. package/backend/src/api/routes/botHistory.js +108 -0
  296. package/backend/src/api/routes/botPermissions.js +99 -0
  297. package/backend/src/api/routes/botStatus.js +36 -0
  298. package/backend/src/api/routes/botUsers.js +162 -0
  299. package/backend/src/api/routes/bots.js +2451 -2360
  300. package/backend/src/api/routes/eventGraphs.js +4 -1
  301. package/backend/src/api/routes/logs.js +13 -3
  302. package/backend/src/api/routes/panel.js +66 -66
  303. package/backend/src/api/routes/panelApiKeys.js +179 -0
  304. package/backend/src/api/routes/pluginIde.js +1715 -135
  305. package/backend/src/api/routes/plugins.js +376 -218
  306. package/backend/src/api/routes/proxies.js +130 -0
  307. package/backend/src/api/routes/search.js +4 -0
  308. package/backend/src/api/routes/servers.js +20 -3
  309. package/backend/src/api/routes/settings.js +5 -0
  310. package/backend/src/api/routes/system.js +174 -0
  311. package/backend/src/api/routes/traces.js +131 -0
  312. package/backend/src/config/debug.config.js +36 -0
  313. package/backend/src/container.js +82 -0
  314. package/backend/src/core/BotHistoryStore.js +180 -0
  315. package/backend/src/core/BotManager.js +149 -868
  316. package/backend/src/core/BotManager.old.js +1093 -0
  317. package/backend/src/core/BotProcess.js +850 -191
  318. package/backend/src/core/EventGraphManager.js +194 -198
  319. package/backend/src/core/GraphExecutionEngine.js +709 -57
  320. package/backend/src/core/MessageQueue.js +39 -12
  321. package/backend/src/core/NodeRegistry.js +37 -1134
  322. package/backend/src/core/PluginLoader.js +99 -5
  323. package/backend/src/core/PluginManager.js +126 -15
  324. package/backend/src/core/PrismaService.js +32 -0
  325. package/backend/src/core/TaskScheduler.js +1 -1
  326. package/backend/src/core/UserService.js +3 -3
  327. package/backend/src/core/__tests__/PrismaService.test.js +24 -0
  328. package/backend/src/core/commands/README.md +305 -0
  329. package/backend/src/core/commands/dev.js +13 -7
  330. package/backend/src/core/commands/ping.js +10 -4
  331. package/backend/src/core/commands/whois.js +63 -0
  332. package/backend/src/core/config/validation.js +27 -0
  333. package/backend/src/core/constants/graphTypes.js +21 -0
  334. package/backend/src/core/node-registries/actions.js +202 -0
  335. package/backend/src/core/node-registries/arrays.js +155 -0
  336. package/backend/src/core/node-registries/bot.js +23 -0
  337. package/backend/src/core/node-registries/data.js +290 -0
  338. package/backend/src/core/node-registries/debug.js +26 -0
  339. package/backend/src/core/node-registries/events.js +201 -0
  340. package/backend/src/core/node-registries/flow.js +139 -0
  341. package/backend/src/core/node-registries/logic.js +62 -0
  342. package/backend/src/core/node-registries/math.js +42 -0
  343. package/backend/src/core/node-registries/objects.js +98 -0
  344. package/backend/src/core/node-registries/strings.js +187 -0
  345. package/backend/src/core/node-registries/time.js +113 -0
  346. package/backend/src/core/node-registries/type.js +25 -0
  347. package/backend/src/core/node-registries/users.js +79 -0
  348. package/backend/src/core/nodes/{action_bot_look_at.js → actions/bot_look_at.js} +36 -36
  349. package/backend/src/core/nodes/{action_bot_set_variable.js → actions/bot_set_variable.js} +32 -32
  350. package/backend/src/core/nodes/actions/create_command.js +189 -0
  351. package/backend/src/core/nodes/actions/delete_command.js +92 -0
  352. package/backend/src/core/nodes/{action_send_log.js → actions/send_log.js} +28 -23
  353. package/backend/src/core/nodes/{action_send_message.js → actions/send_message.js} +32 -32
  354. package/backend/src/core/nodes/actions/send_websocket_response.js +33 -0
  355. package/backend/src/core/nodes/actions/update_command.js +133 -0
  356. package/backend/src/core/nodes/arrays/get_next.js +35 -0
  357. package/backend/src/core/nodes/arrays/join.js +28 -0
  358. package/backend/src/core/nodes/{data_cast.js → data/cast.js} +10 -1
  359. package/backend/src/core/nodes/data/datetime_literal.js +27 -0
  360. package/backend/src/core/nodes/data/entity_info.js +69 -0
  361. package/backend/src/core/nodes/data/get_nearby_entities.js +32 -0
  362. package/backend/src/core/nodes/data/get_nearby_players.js +64 -0
  363. package/backend/src/core/nodes/{data_get_user_field.js → data/get_user_field.js} +1 -1
  364. package/backend/src/core/nodes/data/type_check.js +53 -0
  365. package/backend/src/core/nodes/{debug_log.js → debug/log.js} +16 -16
  366. package/backend/src/core/nodes/{flow_branch.js → flow/branch.js} +15 -15
  367. package/backend/src/core/nodes/{flow_break.js → flow/break.js} +14 -14
  368. package/backend/src/core/nodes/flow/delay.js +43 -0
  369. package/backend/src/core/nodes/{flow_for_each.js → flow/for_each.js} +39 -39
  370. package/backend/src/core/nodes/{flow_sequence.js → flow/sequence.js} +16 -16
  371. package/backend/src/core/nodes/{flow_switch.js → flow/switch.js} +47 -47
  372. package/backend/src/core/nodes/{flow_while.js → flow/while.js} +1 -1
  373. package/backend/src/core/nodes/logic/__tests__/compare.test.js +83 -0
  374. package/backend/src/core/nodes/logic/not.js +22 -0
  375. package/backend/src/core/nodes/math/__tests__/operation.test.js +65 -0
  376. package/backend/src/core/nodes/strings/__tests__/concat.test.js +89 -0
  377. package/backend/src/core/nodes/{string_starts_with.js → strings/starts_with.js} +1 -1
  378. package/backend/src/core/nodes/strings/to_lower.js +22 -0
  379. package/backend/src/core/nodes/strings/to_upper.js +22 -0
  380. package/backend/src/core/nodes/time/__tests__/now.test.js +24 -0
  381. package/backend/src/core/nodes/time/add.js +33 -0
  382. package/backend/src/core/nodes/time/compare.js +35 -0
  383. package/backend/src/core/nodes/time/diff.js +29 -0
  384. package/backend/src/core/nodes/time/format.js +32 -0
  385. package/backend/src/core/nodes/time/now.js +18 -0
  386. package/backend/src/core/nodes/type/to_string.js +32 -0
  387. package/backend/src/core/nodes/{user_check_blacklist.js → users/check_blacklist.js} +37 -37
  388. package/backend/src/core/nodes/{user_get_groups.js → users/get_groups.js} +36 -36
  389. package/backend/src/core/nodes/{user_get_permissions.js → users/get_permissions.js} +36 -36
  390. package/backend/src/core/nodes/{user_set_blacklist.js → users/set_blacklist.js} +37 -37
  391. package/backend/src/core/services/BotLifecycleService.js +835 -0
  392. package/backend/src/core/services/BotProcessManager.js +163 -0
  393. package/backend/src/core/services/CacheManager.js +111 -0
  394. package/backend/src/core/services/CommandExecutionService.js +430 -0
  395. package/backend/src/core/services/DebugSessionManager.js +347 -0
  396. package/backend/src/core/services/GraphCollaborationManager.js +501 -0
  397. package/backend/src/core/services/MinecraftBotManager.js +259 -0
  398. package/backend/src/core/services/MinecraftViewerService.js +216 -0
  399. package/backend/src/core/services/ResourceMonitorService.js +90 -0
  400. package/backend/src/core/services/TelemetryService.js +124 -0
  401. package/backend/src/core/services/TraceCollectorService.js +545 -0
  402. package/backend/src/core/services/ValidationService.js +132 -0
  403. package/backend/src/core/services/__tests__/ValidationService.test.js +148 -0
  404. package/backend/src/core/services.js +20 -5
  405. package/backend/src/core/system/CommandContext.js +84 -0
  406. package/backend/src/core/system/RuntimeCommandRegistry.js +116 -0
  407. package/backend/src/core/system/Transport.js +74 -0
  408. package/backend/src/core/utils/__tests__/jsonParser.test.js +44 -0
  409. package/backend/src/core/utils/jsonParser.js +18 -0
  410. package/backend/src/core/utils/secretsFilter.js +262 -0
  411. package/backend/src/core/utils/variableParser.js +89 -0
  412. package/backend/src/core/validation/__tests__/nodeSchemas.test.js +175 -0
  413. package/backend/src/core/validation/nodeSchemas.js +112 -0
  414. package/backend/src/lib/prisma.js +2 -4
  415. package/backend/src/real-time/botApi/handlers/commandHandlers.js +28 -0
  416. package/backend/src/real-time/botApi/handlers/graphHandlers.js +99 -0
  417. package/backend/src/real-time/botApi/handlers/graphWebSocketHandlers.js +147 -0
  418. package/backend/src/real-time/botApi/handlers/index.js +43 -0
  419. package/backend/src/real-time/botApi/handlers/messageHandlers.js +66 -0
  420. package/backend/src/real-time/botApi/handlers/statusHandlers.js +17 -0
  421. package/backend/src/real-time/botApi/handlers/userHandlers.js +141 -0
  422. package/backend/src/real-time/botApi/index.js +40 -0
  423. package/backend/src/real-time/botApi/middleware.js +79 -0
  424. package/backend/src/real-time/botApi/utils.js +65 -0
  425. package/backend/src/real-time/panelNamespace.js +387 -0
  426. package/backend/src/real-time/presence.js +7 -2
  427. package/backend/src/real-time/socketHandler.js +400 -5
  428. package/backend/src/repositories/BaseRepository.js +43 -0
  429. package/backend/src/repositories/BotRepository.js +42 -0
  430. package/backend/src/repositories/CommandRepository.js +53 -0
  431. package/backend/src/repositories/EventGraphRepository.js +40 -0
  432. package/backend/src/repositories/GroupRepository.js +69 -0
  433. package/backend/src/repositories/PermissionRepository.js +48 -0
  434. package/backend/src/repositories/PluginRepository.js +42 -0
  435. package/backend/src/repositories/ServerRepository.js +27 -0
  436. package/backend/src/repositories/UserRepository.js +48 -0
  437. package/backend/src/server.js +21 -0
  438. package/backend/src/test-refactor.js +85 -0
  439. package/frontend/dist/assets/index-B1serztM.js +11210 -0
  440. package/frontend/dist/assets/index-t6K1u4OV.css +32 -0
  441. package/frontend/dist/index.html +2 -2
  442. package/frontend/package-lock.json +9437 -0
  443. package/frontend/package.json +8 -5
  444. package/package.json +3 -2
  445. package/screen/console.png +0 -0
  446. package/screen/dashboard.png +0 -0
  447. package/screen/graph_collabe.png +0 -0
  448. package/screen/graph_live_debug.png +0 -0
  449. package/screen/management_command.png +0 -0
  450. package/screen/node_debug_trace.png +0 -0
  451. package/screen/plugin_/320/276/320/261/320/267/320/276/321/200.png +0 -0
  452. package/screen/websocket.png +0 -0
  453. package/screen//320/275/320/260/321/201/321/202/321/200/320/276/320/271/320/272/320/270_/320/276/321/202/320/264/320/265/320/273/321/214/320/275/321/213/321/205_/320/272/320/276/320/274/320/260/320/275/320/264_/320/272/320/260/320/266/320/264/321/203_/320/272/320/276/320/274/320/260/320/275/320/273/320/264/321/203_/320/274/320/276/320/266/320/275/320/276_/320/275/320/260/321/201/321/202/321/200/320/260/320/270/320/262/320/260/321/202/321/214.png +0 -0
  454. package/screen//320/277/320/273/320/260/320/275/320/270/321/200/320/276/320/262/321/211/320/270/320/272_/320/274/320/276/320/266/320/275/320/276_/320/267/320/260/320/264/320/260/320/262/320/260/321/202/321/214_/320/264/320/265/320/271/321/201/321/202/320/262/320/270/321/217_/320/277/320/276_/320/262/321/200/320/265/320/274/320/265/320/275/320/270.png +0 -0
  455. package/frontend/dist/assets/index-B9GedHEa.js +0 -8352
  456. package/frontend/dist/assets/index-zLiy9MDx.css +0 -1
  457. package/nul +0 -0
  458. /package/backend/src/core/nodes/{action_http_request.js → actions/http_request.js} +0 -0
  459. /package/backend/src/core/nodes/{array_add_element.js → arrays/add_element.js} +0 -0
  460. /package/backend/src/core/nodes/{array_contains.js → arrays/contains.js} +0 -0
  461. /package/backend/src/core/nodes/{array_find_index.js → arrays/find_index.js} +0 -0
  462. /package/backend/src/core/nodes/{array_get_by_index.js → arrays/get_by_index.js} +0 -0
  463. /package/backend/src/core/nodes/{array_get_random_element.js → arrays/get_random_element.js} +0 -0
  464. /package/backend/src/core/nodes/{array_remove_by_index.js → arrays/remove_by_index.js} +0 -0
  465. /package/backend/src/core/nodes/{bot_get_position.js → bot/get_position.js} +0 -0
  466. /package/backend/src/core/nodes/{data_array_literal.js → data/array_literal.js} +0 -0
  467. /package/backend/src/core/nodes/{data_boolean_literal.js → data/boolean_literal.js} +0 -0
  468. /package/backend/src/core/nodes/{data_get_argument.js → data/get_argument.js} +0 -0
  469. /package/backend/src/core/nodes/{data_get_bot_look.js → data/get_bot_look.js} +0 -0
  470. /package/backend/src/core/nodes/{data_get_entity_field.js → data/get_entity_field.js} +0 -0
  471. /package/backend/src/core/nodes/{data_get_server_players.js → data/get_server_players.js} +0 -0
  472. /package/backend/src/core/nodes/{data_get_variable.js → data/get_variable.js} +0 -0
  473. /package/backend/src/core/nodes/{data_length.js → data/length.js} +0 -0
  474. /package/backend/src/core/nodes/{data_make_object.js → data/make_object.js} +0 -0
  475. /package/backend/src/core/nodes/{data_number_literal.js → data/number_literal.js} +0 -0
  476. /package/backend/src/core/nodes/{data_string_literal.js → data/string_literal.js} +0 -0
  477. /package/backend/src/core/nodes/{logic_compare.js → logic/compare.js} +0 -0
  478. /package/backend/src/core/nodes/{logic_operation.js → logic/operation.js} +0 -0
  479. /package/backend/src/core/nodes/{math_operation.js → math/operation.js} +0 -0
  480. /package/backend/src/core/nodes/{math_random_number.js → math/random_number.js} +0 -0
  481. /package/backend/src/core/nodes/{object_create.js → objects/create.js} +0 -0
  482. /package/backend/src/core/nodes/{object_delete.js → objects/delete.js} +0 -0
  483. /package/backend/src/core/nodes/{object_get.js → objects/get.js} +0 -0
  484. /package/backend/src/core/nodes/{object_has_key.js → objects/has_key.js} +0 -0
  485. /package/backend/src/core/nodes/{object_set.js → objects/set.js} +0 -0
  486. /package/backend/src/core/nodes/{string_concat.js → strings/concat.js} +0 -0
  487. /package/backend/src/core/nodes/{string_contains.js → strings/contains.js} +0 -0
  488. /package/backend/src/core/nodes/{string_ends_with.js → strings/ends_with.js} +0 -0
  489. /package/backend/src/core/nodes/{string_equals.js → strings/equals.js} +0 -0
  490. /package/backend/src/core/nodes/{string_length.js → strings/length.js} +0 -0
  491. /package/backend/src/core/nodes/{string_matches.js → strings/matches.js} +0 -0
  492. /package/backend/src/core/nodes/{string_split.js → strings/split.js} +0 -0
@@ -0,0 +1,464 @@
1
+ # SRE - Site Reliability Engineering
2
+
3
+ Comprehensive guide for implementing SRE practices: SLI/SLO/SLA definitions, error budgets, incident management, on-call best practices, chaos engineering, observability, and reliability patterns for production systems.
4
+
5
+ ## Purpose
6
+
7
+ Enable teams to build and operate reliable, scalable systems by applying software engineering principles to infrastructure and operations challenges.
8
+
9
+ ## When to Use This Skill
10
+
11
+ Automatically activates when working on:
12
+ - Defining SLIs, SLOs, and SLAs
13
+ - Managing error budgets
14
+ - Incident response and management
15
+ - On-call rotation and procedures
16
+ - Observability and monitoring
17
+ - Chaos engineering and resilience testing
18
+ - Capacity planning and performance
19
+ - Disaster recovery planning
20
+
21
+ ## Quick Start Checklist
22
+
23
+ When implementing SRE practices:
24
+
25
+ - [ ] Define Service Level Indicators (SLIs)
26
+ - [ ] Set Service Level Objectives (SLOs) with stakeholders
27
+ - [ ] Implement error budget tracking
28
+ - [ ] Set up comprehensive monitoring and alerting
29
+ - [ ] Create incident response runbooks
30
+ - [ ] Establish on-call rotation
31
+ - [ ] Implement automated remediation
32
+ - [ ] Conduct post-incident reviews
33
+ - [ ] Perform chaos engineering experiments
34
+ - [ ] Document capacity planning process
35
+ - [ ] Set up disaster recovery procedures
36
+
37
+ ## Core Concepts
38
+
39
+ ### SLI / SLO / SLA Hierarchy
40
+
41
+ ```
42
+ SLA (Service Level Agreement)
43
+ ↓ Commits to customers
44
+
45
+ SLO (Service Level Objective)
46
+ ↓ Internal target (stricter than SLA)
47
+
48
+ SLI (Service Level Indicator)
49
+ ↓ Measurement that shows if you're meeting SLO
50
+
51
+ Metrics (actual measurements)
52
+ ```
53
+
54
+ **Example:**
55
+ ```
56
+ SLA: 99.9% uptime guaranteed (3 customers)
57
+ SLO: 99.95% uptime target (internal)
58
+ SLI: Percentage of successful HTTP requests
59
+ (2xx, 3xx responses / total requests)
60
+ ```
61
+
62
+ ### Error Budgets
63
+
64
+ ```
65
+ Error Budget = 1 - SLO
66
+
67
+ Example:
68
+ SLO: 99.9% → Error Budget: 0.1%
69
+
70
+ Monthly calculations (30 days):
71
+ - Total time: 43,200 minutes
72
+ - Allowed downtime: 43.2 minutes
73
+
74
+ If error budget is:
75
+ - Available → Can take risks, deploy features
76
+ - Depleted → Focus on reliability, freeze features
77
+ ```
78
+
79
+ ### The Four Golden Signals
80
+
81
+ ```yaml
82
+ 1. Latency: How long requests take
83
+ - Good: p50, p95, p99 latencies
84
+ - Bad: Average latency (hides outliers)
85
+
86
+ 2. Traffic: Demand on your system
87
+ - Requests per second
88
+ - Transactions per second
89
+ - Bandwidth usage
90
+
91
+ 3. Errors: Rate of failed requests
92
+ - HTTP 5xx errors
93
+ - Failed operations
94
+ - Wrong results
95
+
96
+ 4. Saturation: How "full" your service is
97
+ - CPU usage
98
+ - Memory usage
99
+ - Disk I/O
100
+ - Queue depth
101
+ ```
102
+
103
+ ## Common Patterns
104
+
105
+ ### Pattern 1: SLO Definition
106
+
107
+ ```yaml
108
+ # slos/api-service.yaml
109
+ apiVersion: v1
110
+ kind: SLO
111
+ metadata:
112
+ name: api-service-availability
113
+ service: api-service
114
+ spec:
115
+ description: "API service availability for customer requests"
116
+
117
+ # SLI definition
118
+ sli:
119
+ name: availability
120
+ description: "Percentage of successful requests"
121
+ query: |
122
+ sum(rate(http_requests_total{job="api-service",code=~"2.."}[5m]))
123
+ /
124
+ sum(rate(http_requests_total{job="api-service"}[5m]))
125
+
126
+ # SLO target
127
+ objectives:
128
+ - target: 0.999 # 99.9%
129
+ window: 30d # Rolling 30 days
130
+
131
+ # Alert thresholds
132
+ alerting:
133
+ - burn_rate: 14.4 # Alert if burning budget 14.4x faster
134
+ short_window: 1h
135
+ long_window: 5m
136
+ severity: critical
137
+
138
+ - burn_rate: 6
139
+ short_window: 6h
140
+ long_window: 30m
141
+ severity: warning
142
+ ```
143
+
144
+ **Latency SLO:**
145
+ ```yaml
146
+ apiVersion: v1
147
+ kind: SLO
148
+ metadata:
149
+ name: api-service-latency
150
+ spec:
151
+ sli:
152
+ name: latency
153
+ description: "95th percentile latency under 200ms"
154
+ query: |
155
+ histogram_quantile(0.95,
156
+ sum(rate(http_request_duration_seconds_bucket{job="api-service"}[5m])) by (le)
157
+ )
158
+
159
+ objectives:
160
+ - target: 0.200 # 200ms
161
+ window: 7d
162
+ ```
163
+
164
+ ### Pattern 2: Incident Response
165
+
166
+ **Incident Severity Levels:**
167
+ ```yaml
168
+ Severity 1 (Critical):
169
+ - Complete service outage
170
+ - Security breach
171
+ - Data loss
172
+ Response: Immediate, all hands
173
+ Escalation: 15 minutes
174
+
175
+ Severity 2 (High):
176
+ - Partial outage
177
+ - Degraded performance
178
+ - Feature broken
179
+ Response: Within 30 minutes
180
+ Escalation: 1 hour
181
+
182
+ Severity 3 (Medium):
183
+ - Minor issues
184
+ - Non-critical features affected
185
+ Response: Within 4 hours
186
+ Escalation: Next business day
187
+
188
+ Severity 4 (Low):
189
+ - Cosmetic issues
190
+ - Questions
191
+ Response: Best effort
192
+ ```
193
+
194
+ **Incident Runbook Template:**
195
+ ```markdown
196
+ # Incident: API Service High Latency
197
+
198
+ ## Symptoms
199
+ - API p95 latency > 500ms
200
+ - Users reporting slow responses
201
+ - Alert: "api-service-latency-high" firing
202
+
203
+ ## Impact
204
+ - Degraded user experience
205
+ - Potential timeout errors
206
+
207
+ ## Investigation Steps
208
+
209
+ 1. Check service status:
210
+ \`\`\`bash
211
+ kubectl get pods -n production -l app=api-service
212
+ kubectl top pods -n production -l app=api-service
213
+ \`\`\`
214
+
215
+ 2. Check recent deployments:
216
+ \`\`\`bash
217
+ kubectl rollout history deployment/api-service -n production
218
+ \`\`\`
219
+
220
+ 3. Check database performance:
221
+ - Query: `rate(pg_stat_database_tup_fetched[5m])`
222
+ - Check slow query log
223
+
224
+ 4. Check downstream dependencies:
225
+ - External API status
226
+ - Cache hit rate
227
+
228
+ ## Mitigation
229
+
230
+ ### Quick Fixes
231
+ - Scale up: `kubectl scale deployment api-service --replicas=10`
232
+ - Rollback: `kubectl rollout undo deployment/api-service`
233
+
234
+ ### Root Cause Fixes
235
+ - Optimize slow queries
236
+ - Add caching layer
237
+ - Increase resource limits
238
+
239
+ ## Communication
240
+
241
+ - Slack: #incidents channel
242
+ - Status page: Update customer-facing status
243
+ - Stakeholders: Notify via PagerDuty
244
+
245
+ ## Post-Incident
246
+
247
+ - Create post-incident review
248
+ - Update runbook with learnings
249
+ - File tickets for improvements
250
+ ```
251
+
252
+ ### Pattern 3: Chaos Engineering
253
+
254
+ ```yaml
255
+ # chaos-experiments/pod-failure.yaml
256
+ apiVersion: chaos-mesh.org/v1alpha1
257
+ kind: PodChaos
258
+ metadata:
259
+ name: api-service-pod-failure
260
+ namespace: chaos-testing
261
+ spec:
262
+ action: pod-failure
263
+ mode: one
264
+ selector:
265
+ namespaces:
266
+ - production
267
+ labelSelectors:
268
+ app: api-service
269
+ duration: "30s"
270
+ scheduler:
271
+ cron: "@every 2h" # Run every 2 hours
272
+
273
+ ---
274
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
275
+ 🎯 SKILL ACTIVATED: sre
276
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
277
+
278
+ # Network chaos
279
+ apiVersion: chaos-mesh.org/v1alpha1
280
+ kind: NetworkChaos
281
+ metadata:
282
+ name: api-network-delay
283
+ spec:
284
+ action: delay
285
+ mode: one
286
+ selector:
287
+ namespaces:
288
+ - production
289
+ labelSelectors:
290
+ app: api-service
291
+ delay:
292
+ latency: "100ms"
293
+ correlation: "100"
294
+ jitter: "0ms"
295
+ duration: "5m"
296
+ ```
297
+
298
+ ## Resource Files
299
+
300
+ For detailed guidance on specific topics, see:
301
+
302
+ ### Core SRE Practices
303
+ - **[slo-sli-sla.md](resources/slo-sli-sla.md)** - Service level definitions, SLO implementation, error budgets, SLA management
304
+ - **[incident-management.md](resources/incident-management.md)** - Incident response procedures, runbooks, post-mortems, severity levels
305
+ - **[toil-reduction.md](resources/toil-reduction.md)** - Identifying toil, automation strategies, measuring toil reduction
306
+
307
+ ### Observability
308
+ - **[observability-stack.md](resources/observability-stack.md)** - Metrics, logs, traces, Prometheus, Grafana, Jaeger, distributed tracing
309
+ - **[alerting-best-practices.md](resources/alerting-best-practices.md)** - Alert design, notification strategies, on-call alerting, alert fatigue
310
+ - **[performance-optimization.md](resources/performance-optimization.md)** - Profiling, performance tuning, optimization techniques, bottleneck analysis
311
+
312
+ ### Operations
313
+ - **[on-call-runbooks.md](resources/on-call-runbooks.md)** - On-call rotations, escalation procedures, runbook templates, burnout prevention
314
+ - **[capacity-planning.md](resources/capacity-planning.md)** - Load testing, capacity modeling, growth forecasting, resource planning
315
+ - **[disaster-recovery.md](resources/disaster-recovery.md)** - Backup strategies, RPO/RTO, DR testing, failover procedures, BC planning
316
+
317
+ ### Reliability Patterns
318
+ - **[reliability-patterns.md](resources/reliability-patterns.md)** - Circuit breakers, retries, timeouts, bulkheads, rate limiting, graceful degradation
319
+ - **[chaos-engineering.md](resources/chaos-engineering.md)** - Fault injection, resilience testing, game days, chaos experiments
320
+
321
+ ## Best Practices
322
+
323
+ ### 1. Start with User-Focused SLIs
324
+
325
+ Measure what matters to users, not infrastructure metrics.
326
+
327
+ ```yaml
328
+ # Good SLIs
329
+ - Request success rate
330
+ - Request latency
331
+ - Data freshness
332
+
333
+ # Poor SLIs
334
+ - CPU usage
335
+ - Memory usage
336
+ - Pod count
337
+ ```
338
+
339
+ ### 2. Set Realistic SLOs
340
+
341
+ ```
342
+ Don't aim for 100% - it's:
343
+ - Impossible to achieve
344
+ - Extremely expensive
345
+ - Prevents risk-taking
346
+
347
+ Typical SLOs:
348
+ - User-facing: 99.9% - 99.99%
349
+ - Internal services: 99% - 99.9%
350
+ - Batch jobs: 95% - 99%
351
+ ```
352
+
353
+ ### 3. Embrace Error Budgets
354
+
355
+ ```
356
+ Error budget available:
357
+ ✅ Deploy new features
358
+ ✅ Experiment
359
+ ✅ Take calculated risks
360
+
361
+ Error budget depleted:
362
+ ❌ Feature freeze
363
+ ✅ Focus on reliability
364
+ ✅ Fix technical debt
365
+ ```
366
+
367
+ ### 4. Automate Toil
368
+
369
+ ```
370
+ Toil = Manual, repetitive, automatable work
371
+
372
+ Track toil:
373
+ - Measure time spent on toil
374
+ - Target: <50% of SRE time on toil
375
+ - Automate high-frequency tasks
376
+ ```
377
+
378
+ ### 5. Blameless Post-Mortems
379
+
380
+ ```markdown
381
+ # Post-Incident Review Template
382
+
383
+ ## Incident Summary
384
+ - Date/Time
385
+ - Duration
386
+ - Severity
387
+ - Services affected
388
+
389
+ ## Impact
390
+ - Users affected
391
+ - Revenue impact
392
+ - SLO impact
393
+
394
+ ## Root Cause
395
+ Technical explanation (not who)
396
+
397
+ ## Timeline
398
+ Detailed chronology
399
+
400
+ ## Resolution
401
+ What fixed it
402
+
403
+ ## Action Items
404
+ - [ ] Short-term fixes
405
+ - [ ] Long-term improvements
406
+ - [ ] Process changes
407
+
408
+ ## Lessons Learned
409
+ What went well
410
+ What could be improved
411
+ ```
412
+
413
+ ### 6. Gradual Rollouts
414
+
415
+ ```
416
+ Deployment stages:
417
+ 1. Canary (1% traffic) - 15 mins
418
+ 2. Small (10% traffic) - 30 mins
419
+ 3. Medium (50% traffic) - 1 hour
420
+ 4. Full (100% traffic)
421
+
422
+ Automated rollback on:
423
+ - Error rate increase >1%
424
+ - Latency increase >20%
425
+ - Failed health checks
426
+ ```
427
+
428
+ ## Anti-Patterns to Avoid
429
+
430
+ ❌ 100% availability target (unrealistic, expensive)
431
+ ❌ Ignoring error budgets (defeats the purpose)
432
+ ❌ Alert fatigue (too many noisy alerts)
433
+ ❌ Manual toil (should be automated)
434
+ ❌ Blame culture (prevents learning)
435
+ ❌ No runbooks (tribal knowledge)
436
+ ❌ Reactive only (should be proactive)
437
+ ❌ Monitoring without alerting (data without action)
438
+ ❌ No capacity planning (surprise outages)
439
+ ❌ Skipping post-mortems (missing learning opportunities)
440
+
441
+ ## Integration Points
442
+
443
+ This skill integrates with:
444
+ - **platform-engineering**: Infrastructure reliability, Kubernetes operations
445
+ - **devsecops**: Security incident response, vulnerability management
446
+ - **release-engineering**: Safe deployments, rollback strategies
447
+ - **cloud-engineering**: Cloud monitoring, disaster recovery
448
+ - **systems-engineering**: Performance tuning, system monitoring
449
+
450
+ ## Triggers and Activation
451
+
452
+ This skill activates when you:
453
+ - Define or modify SLOs/SLIs
454
+ - Respond to incidents
455
+ - Set up monitoring and alerting
456
+ - Plan capacity or performance optimization
457
+ - Conduct post-incident reviews
458
+ - Implement chaos engineering
459
+
460
+ ---
461
+
462
+ **Focus:** Reliability through engineering discipline
463
+ **Key Principle:** Measure everything, improve continuously
464
+ **Maintained by:** SRE team based on Google SRE practices and industry standards