blockmine 1.21.0 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/.claude/agents/README.md +469 -0
  2. package/.claude/agents/auth-route-debugger.md +118 -0
  3. package/.claude/agents/auth-route-tester.md +93 -0
  4. package/.claude/agents/auto-error-resolver.md +97 -0
  5. package/.claude/agents/build-optimizer.md +236 -0
  6. package/.claude/agents/code-architecture-reviewer.md +83 -0
  7. package/.claude/agents/code-refactor-master.md +94 -0
  8. package/.claude/agents/cost-optimizer.md +134 -0
  9. package/.claude/agents/deployment-orchestrator.md +113 -0
  10. package/.claude/agents/documentation-architect.md +82 -0
  11. package/.claude/agents/frontend-error-fixer.md +77 -0
  12. package/.claude/agents/iac-code-generator.md +71 -0
  13. package/.claude/agents/incident-responder.md +346 -0
  14. package/.claude/agents/infrastructure-architect.md +31 -0
  15. package/.claude/agents/kubernetes-specialist.md +56 -0
  16. package/.claude/agents/migration-planner.md +181 -0
  17. package/.claude/agents/network-architect.md +196 -0
  18. package/.claude/agents/plan-reviewer.md +52 -0
  19. package/.claude/agents/refactor-planner.md +63 -0
  20. package/.claude/agents/security-scanner.md +102 -0
  21. package/.claude/agents/web-research-specialist.md +78 -0
  22. package/.claude/commands/cost-analysis.md +315 -0
  23. package/.claude/commands/dev-docs-update.md +55 -0
  24. package/.claude/commands/dev-docs.md +51 -0
  25. package/.claude/commands/incident-debug.md +247 -0
  26. package/.claude/commands/infra-plan.md +81 -0
  27. package/.claude/commands/migration-plan.md +478 -0
  28. package/.claude/commands/route-research-for-testing.md +37 -0
  29. package/.claude/commands/security-review.md +66 -0
  30. package/.claude/hooks/CONFIG.md +448 -0
  31. package/.claude/hooks/README.md +163 -0
  32. package/.claude/hooks/SKILL_ACTIVATION_COMPLETE.md +226 -0
  33. package/.claude/hooks/WINDOWS_HOOKS_README.md +151 -0
  34. package/.claude/hooks/add-skill-activation-banners.ts +132 -0
  35. package/.claude/hooks/comprehensive-skill-test.ts +1315 -0
  36. package/.claude/hooks/error-handling-reminder.sh +12 -0
  37. package/.claude/hooks/error-handling-reminder.ts +222 -0
  38. package/.claude/hooks/k8s-manifest-validator.sh +56 -0
  39. package/.claude/hooks/package-lock.json +556 -0
  40. package/.claude/hooks/package.json +16 -0
  41. package/.claude/hooks/post-tool-use-tracker.ps1 +174 -0
  42. package/.claude/hooks/post-tool-use-tracker.sh +183 -0
  43. package/.claude/hooks/security-policy-check.sh +247 -0
  44. package/.claude/hooks/skill-activation-prompt.ps1 +10 -0
  45. package/.claude/hooks/skill-activation-prompt.sh +10 -0
  46. package/.claude/hooks/skill-activation-prompt.ts +141 -0
  47. package/.claude/hooks/stop-build-check-enhanced.sh +130 -0
  48. package/.claude/hooks/terraform-validator.sh +53 -0
  49. package/.claude/hooks/test-input.json +7 -0
  50. package/.claude/hooks/test-skill-activation.ts +427 -0
  51. package/.claude/hooks/trigger-build-resolver.sh +79 -0
  52. package/.claude/hooks/tsc-check.sh +173 -0
  53. package/.claude/hooks/tsconfig.json +19 -0
  54. package/.claude/settings.json +55 -0
  55. package/.claude/settings.local.json +27 -14
  56. package/.claude/skills/README.md +507 -0
  57. package/.claude/skills/api-engineering/SKILL.md +63 -0
  58. package/.claude/skills/api-engineering/resources/api-versioning.md +88 -0
  59. package/.claude/skills/api-engineering/resources/graphql-patterns.md +106 -0
  60. package/.claude/skills/api-engineering/resources/rate-limiting.md +118 -0
  61. package/.claude/skills/api-engineering/resources/rest-api-design.md +105 -0
  62. package/.claude/skills/backend-dev-guidelines/SKILL.md +306 -0
  63. package/.claude/skills/backend-dev-guidelines/resources/architecture-overview.md +451 -0
  64. package/.claude/skills/backend-dev-guidelines/resources/async-and-errors.md +307 -0
  65. package/.claude/skills/backend-dev-guidelines/resources/complete-examples.md +638 -0
  66. package/.claude/skills/backend-dev-guidelines/resources/configuration.md +275 -0
  67. package/.claude/skills/backend-dev-guidelines/resources/database-patterns.md +224 -0
  68. package/.claude/skills/backend-dev-guidelines/resources/middleware-guide.md +213 -0
  69. package/.claude/skills/backend-dev-guidelines/resources/routing-and-controllers.md +756 -0
  70. package/.claude/skills/backend-dev-guidelines/resources/sentry-and-monitoring.md +336 -0
  71. package/.claude/skills/backend-dev-guidelines/resources/services-and-repositories.md +789 -0
  72. package/.claude/skills/backend-dev-guidelines/resources/testing-guide.md +235 -0
  73. package/.claude/skills/backend-dev-guidelines/resources/validation-patterns.md +754 -0
  74. package/.claude/skills/budget-and-cost-management/SKILL.md +850 -0
  75. package/.claude/skills/build-engineering/SKILL.md +431 -0
  76. package/.claude/skills/build-engineering/resources/artifact-repositories.md +72 -0
  77. package/.claude/skills/build-engineering/resources/build-caching.md +96 -0
  78. package/.claude/skills/build-engineering/resources/build-pipelines.md +105 -0
  79. package/.claude/skills/build-engineering/resources/build-security.md +95 -0
  80. package/.claude/skills/build-engineering/resources/build-systems.md +389 -0
  81. package/.claude/skills/build-engineering/resources/compilation-optimization.md +201 -0
  82. package/.claude/skills/build-engineering/resources/dependency-management.md +73 -0
  83. package/.claude/skills/build-engineering/resources/monorepo-builds.md +110 -0
  84. package/.claude/skills/build-engineering/resources/performance-optimization.md +113 -0
  85. package/.claude/skills/build-engineering/resources/reproducible-builds.md +82 -0
  86. package/.claude/skills/cloud-engineering/SKILL.md +675 -0
  87. package/.claude/skills/cloud-engineering/resources/aws-patterns.md +742 -0
  88. package/.claude/skills/cloud-engineering/resources/azure-patterns.md +714 -0
  89. package/.claude/skills/cloud-engineering/resources/cleared-cloud-environments.md +987 -0
  90. package/.claude/skills/cloud-engineering/resources/cloud-cost-optimization.md +757 -0
  91. package/.claude/skills/cloud-engineering/resources/cloud-networking.md +1058 -0
  92. package/.claude/skills/cloud-engineering/resources/cloud-security-tools.md +1530 -0
  93. package/.claude/skills/cloud-engineering/resources/cloud-security.md +990 -0
  94. package/.claude/skills/cloud-engineering/resources/gcp-patterns.md +758 -0
  95. package/.claude/skills/cloud-engineering/resources/migration-strategies.md +820 -0
  96. package/.claude/skills/cloud-engineering/resources/multi-cloud-strategies.md +670 -0
  97. package/.claude/skills/cloud-engineering/resources/oci-patterns.md +1198 -0
  98. package/.claude/skills/cloud-engineering/resources/serverless-patterns.md +795 -0
  99. package/.claude/skills/cloud-engineering/resources/well-architected-frameworks.md +966 -0
  100. package/.claude/skills/cybersecurity/SKILL.md +409 -0
  101. package/.claude/skills/cybersecurity/resources/security-architecture.md +266 -0
  102. package/.claude/skills/database-engineering/SKILL.md +61 -0
  103. package/.claude/skills/database-engineering/resources/backup-and-recovery.md +72 -0
  104. package/.claude/skills/database-engineering/resources/database-replication.md +63 -0
  105. package/.claude/skills/database-engineering/resources/postgresql-fundamentals.md +70 -0
  106. package/.claude/skills/database-engineering/resources/query-optimization.md +68 -0
  107. package/.claude/skills/devsecops/SKILL.md +374 -0
  108. package/.claude/skills/devsecops/resources/ci-cd-security.md +204 -0
  109. package/.claude/skills/devsecops/resources/compliance-automation.md +530 -0
  110. package/.claude/skills/devsecops/resources/compliance-frameworks.md +2322 -0
  111. package/.claude/skills/devsecops/resources/container-security.md +915 -0
  112. package/.claude/skills/devsecops/resources/cspm-integration.md +1440 -0
  113. package/.claude/skills/devsecops/resources/policy-enforcement.md +619 -0
  114. package/.claude/skills/devsecops/resources/secrets-management.md +755 -0
  115. package/.claude/skills/devsecops/resources/security-monitoring.md +146 -0
  116. package/.claude/skills/devsecops/resources/security-scanning.md +887 -0
  117. package/.claude/skills/devsecops/resources/security-testing.md +203 -0
  118. package/.claude/skills/devsecops/resources/supply-chain-security.md +518 -0
  119. package/.claude/skills/devsecops/resources/vulnerability-management.md +481 -0
  120. package/.claude/skills/devsecops/resources/zero-trust-architecture.md +177 -0
  121. package/.claude/skills/documentation-as-code/SKILL.md +323 -0
  122. package/.claude/skills/documentation-as-code/resources/api-documentation.md +90 -0
  123. package/.claude/skills/documentation-as-code/resources/changelog-management.md +79 -0
  124. package/.claude/skills/documentation-as-code/resources/diagram-generation.md +44 -0
  125. package/.claude/skills/documentation-as-code/resources/docs-as-code-workflow.md +99 -0
  126. package/.claude/skills/documentation-as-code/resources/documentation-automation.md +68 -0
  127. package/.claude/skills/documentation-as-code/resources/documentation-sites.md +79 -0
  128. package/.claude/skills/documentation-as-code/resources/markdown-best-practices.md +162 -0
  129. package/.claude/skills/documentation-as-code/resources/openapi-specification.md +77 -0
  130. package/.claude/skills/documentation-as-code/resources/readme-engineering.md +60 -0
  131. package/.claude/skills/documentation-as-code/resources/technical-writing-guide.md +202 -0
  132. package/.claude/skills/engineering-management/SKILL.md +356 -0
  133. package/.claude/skills/engineering-management/resources/career-ladders.md +609 -0
  134. package/.claude/skills/engineering-management/resources/hiring-and-assessment.md +555 -0
  135. package/.claude/skills/engineering-management/resources/one-on-one-guides.md +609 -0
  136. package/.claude/skills/engineering-management/resources/resource-planning.md +557 -0
  137. package/.claude/skills/engineering-management/resources/team-organization-patterns.md +491 -0
  138. package/.claude/skills/engineering-management/resources/technical-interviews.md +474 -0
  139. package/.claude/skills/engineering-operations-management/SKILL.md +817 -0
  140. package/.claude/skills/error-tracking/SKILL.md +379 -0
  141. package/.claude/skills/frontend-dev-guidelines/SKILL.md +403 -0
  142. package/.claude/skills/frontend-dev-guidelines/resources/common-patterns.md +331 -0
  143. package/.claude/skills/frontend-dev-guidelines/resources/complete-examples.md +872 -0
  144. package/.claude/skills/frontend-dev-guidelines/resources/component-patterns.md +502 -0
  145. package/.claude/skills/frontend-dev-guidelines/resources/data-fetching.md +767 -0
  146. package/.claude/skills/frontend-dev-guidelines/resources/file-organization.md +502 -0
  147. package/.claude/skills/frontend-dev-guidelines/resources/loading-and-error-states.md +501 -0
  148. package/.claude/skills/frontend-dev-guidelines/resources/performance.md +406 -0
  149. package/.claude/skills/frontend-dev-guidelines/resources/routing-guide.md +364 -0
  150. package/.claude/skills/frontend-dev-guidelines/resources/styling-guide.md +428 -0
  151. package/.claude/skills/frontend-dev-guidelines/resources/typescript-standards.md +418 -0
  152. package/.claude/skills/general-it-engineering/SKILL.md +393 -0
  153. package/.claude/skills/general-it-engineering/resources/asset-management.md +712 -0
  154. package/.claude/skills/general-it-engineering/resources/automation-orchestration.md +817 -0
  155. package/.claude/skills/general-it-engineering/resources/business-continuity.md +786 -0
  156. package/.claude/skills/general-it-engineering/resources/change-management.md +715 -0
  157. package/.claude/skills/general-it-engineering/resources/enterprise-monitoring.md +729 -0
  158. package/.claude/skills/general-it-engineering/resources/help-desk-operations.md +738 -0
  159. package/.claude/skills/general-it-engineering/resources/incident-service-management.md +834 -0
  160. package/.claude/skills/general-it-engineering/resources/it-governance.md +753 -0
  161. package/.claude/skills/general-it-engineering/resources/itil-framework.md +503 -0
  162. package/.claude/skills/general-it-engineering/resources/service-management.md +669 -0
  163. package/.claude/skills/infrastructure-architecture/SKILL.md +328 -0
  164. package/.claude/skills/infrastructure-architecture/resources/architecture-decision-records.md +505 -0
  165. package/.claude/skills/infrastructure-architecture/resources/architecture-patterns.md +528 -0
  166. package/.claude/skills/infrastructure-architecture/resources/capacity-planning.md +453 -0
  167. package/.claude/skills/infrastructure-architecture/resources/cleared-environment-architecture.md +773 -0
  168. package/.claude/skills/infrastructure-architecture/resources/cost-architecture.md +499 -0
  169. package/.claude/skills/infrastructure-architecture/resources/data-architecture.md +501 -0
  170. package/.claude/skills/infrastructure-architecture/resources/disaster-recovery.md +535 -0
  171. package/.claude/skills/infrastructure-architecture/resources/migration-architecture.md +512 -0
  172. package/.claude/skills/infrastructure-architecture/resources/multi-region-design.md +608 -0
  173. package/.claude/skills/infrastructure-architecture/resources/reference-architectures.md +562 -0
  174. package/.claude/skills/infrastructure-architecture/resources/security-architecture.md +538 -0
  175. package/.claude/skills/infrastructure-architecture/resources/system-design-principles.md +489 -0
  176. package/.claude/skills/infrastructure-architecture/resources/workload-classification.md +1000 -0
  177. package/.claude/skills/infrastructure-strategy/SKILL.md +924 -0
  178. package/.claude/skills/network-engineering/SKILL.md +385 -0
  179. package/.claude/skills/network-engineering/resources/dns-management.md +738 -0
  180. package/.claude/skills/network-engineering/resources/load-balancing.md +820 -0
  181. package/.claude/skills/network-engineering/resources/network-architecture.md +546 -0
  182. package/.claude/skills/network-engineering/resources/network-security.md +921 -0
  183. package/.claude/skills/network-engineering/resources/network-troubleshooting.md +749 -0
  184. package/.claude/skills/network-engineering/resources/routing-switching.md +373 -0
  185. package/.claude/skills/network-engineering/resources/sdn-networking.md +695 -0
  186. package/.claude/skills/network-engineering/resources/service-mesh-networking.md +777 -0
  187. package/.claude/skills/network-engineering/resources/tcp-ip-protocols.md +444 -0
  188. package/.claude/skills/network-engineering/resources/vpn-connectivity.md +672 -0
  189. package/.claude/skills/observability-engineering/SKILL.md +101 -0
  190. package/.claude/skills/observability-engineering/resources/apm-tools.md +97 -0
  191. package/.claude/skills/observability-engineering/resources/correlation-strategies.md +87 -0
  192. package/.claude/skills/observability-engineering/resources/distributed-tracing.md +98 -0
  193. package/.claude/skills/observability-engineering/resources/logs-aggregation.md +118 -0
  194. package/.claude/skills/observability-engineering/resources/observability-cost-optimization.md +141 -0
  195. package/.claude/skills/observability-engineering/resources/opentelemetry.md +110 -0
  196. package/.claude/skills/platform-engineering/SKILL.md +555 -0
  197. package/.claude/skills/platform-engineering/resources/architecture-overview.md +600 -0
  198. package/.claude/skills/platform-engineering/resources/container-orchestration.md +916 -0
  199. package/.claude/skills/platform-engineering/resources/cost-optimization.md +634 -0
  200. package/.claude/skills/platform-engineering/resources/developer-platforms.md +670 -0
  201. package/.claude/skills/platform-engineering/resources/gitops-automation.md +650 -0
  202. package/.claude/skills/platform-engineering/resources/infrastructure-as-code.md +778 -0
  203. package/.claude/skills/platform-engineering/resources/infrastructure-standards.md +708 -0
  204. package/.claude/skills/platform-engineering/resources/multi-tenancy.md +602 -0
  205. package/.claude/skills/platform-engineering/resources/platform-security.md +711 -0
  206. package/.claude/skills/platform-engineering/resources/resource-management.md +592 -0
  207. package/.claude/skills/platform-engineering/resources/service-mesh.md +628 -0
  208. package/.claude/skills/release-engineering/SKILL.md +393 -0
  209. package/.claude/skills/release-engineering/resources/artifact-management.md +108 -0
  210. package/.claude/skills/release-engineering/resources/build-optimization.md +84 -0
  211. package/.claude/skills/release-engineering/resources/ci-cd-pipelines.md +411 -0
  212. package/.claude/skills/release-engineering/resources/deployment-strategies.md +197 -0
  213. package/.claude/skills/release-engineering/resources/pipeline-security.md +62 -0
  214. package/.claude/skills/release-engineering/resources/progressive-delivery.md +83 -0
  215. package/.claude/skills/release-engineering/resources/release-automation.md +68 -0
  216. package/.claude/skills/release-engineering/resources/release-orchestration.md +77 -0
  217. package/.claude/skills/release-engineering/resources/rollback-strategies.md +66 -0
  218. package/.claude/skills/release-engineering/resources/versioning-strategies.md +59 -0
  219. package/.claude/skills/route-tester/SKILL.md +392 -0
  220. package/.claude/skills/skill-developer/ADVANCED.md +197 -0
  221. package/.claude/skills/skill-developer/HOOK_MECHANISMS.md +306 -0
  222. package/.claude/skills/skill-developer/PATTERNS_LIBRARY.md +152 -0
  223. package/.claude/skills/skill-developer/SKILL.md +430 -0
  224. package/.claude/skills/skill-developer/SKILL_RULES_REFERENCE.md +315 -0
  225. package/.claude/skills/skill-developer/TRIGGER_TYPES.md +305 -0
  226. package/.claude/skills/skill-developer/TROUBLESHOOTING.md +514 -0
  227. package/.claude/skills/skill-rules.json +2940 -0
  228. package/.claude/skills/sre/SKILL.md +464 -0
  229. package/.claude/skills/sre/resources/alerting-best-practices.md +282 -0
  230. package/.claude/skills/sre/resources/capacity-planning.md +226 -0
  231. package/.claude/skills/sre/resources/chaos-engineering.md +193 -0
  232. package/.claude/skills/sre/resources/disaster-recovery.md +232 -0
  233. package/.claude/skills/sre/resources/incident-management.md +436 -0
  234. package/.claude/skills/sre/resources/observability-stack.md +240 -0
  235. package/.claude/skills/sre/resources/on-call-runbooks.md +167 -0
  236. package/.claude/skills/sre/resources/performance-optimization.md +108 -0
  237. package/.claude/skills/sre/resources/reliability-patterns.md +183 -0
  238. package/.claude/skills/sre/resources/slo-sli-sla.md +464 -0
  239. package/.claude/skills/sre/resources/toil-reduction.md +145 -0
  240. package/.claude/skills/systems-engineering/SKILL.md +648 -0
  241. package/.claude/skills/systems-engineering/resources/automation-patterns.md +771 -0
  242. package/.claude/skills/systems-engineering/resources/configuration-management.md +998 -0
  243. package/.claude/skills/systems-engineering/resources/linux-administration.md +672 -0
  244. package/.claude/skills/systems-engineering/resources/networking-fundamentals.md +982 -0
  245. package/.claude/skills/systems-engineering/resources/performance-tuning.md +871 -0
  246. package/.claude/skills/systems-engineering/resources/powershell-scripting.md +482 -0
  247. package/.claude/skills/systems-engineering/resources/security-hardening.md +739 -0
  248. package/.claude/skills/systems-engineering/resources/shell-scripting.md +915 -0
  249. package/.claude/skills/systems-engineering/resources/storage-management.md +628 -0
  250. package/.claude/skills/systems-engineering/resources/system-monitoring.md +787 -0
  251. package/.claude/skills/systems-engineering/resources/troubleshooting-guide.md +753 -0
  252. package/.claude/skills/systems-engineering/resources/windows-administration.md +738 -0
  253. package/.claude/skills/technical-leadership/SKILL.md +728 -0
  254. package/CHANGELOG.md +90 -54
  255. package/README.md +94 -0
  256. package/backend/docs/SECRETS_DOCUMENTATION.md +327 -0
  257. package/backend/jest.config.js +59 -0
  258. package/backend/package-lock.json +6129 -0
  259. package/backend/package.json +16 -4
  260. package/backend/prisma/migrations/20251026104609_add_websocket_api/migration.sql +33 -0
  261. package/backend/prisma/schema.prisma +33 -0
  262. package/backend/src/__tests__/core/DependencyService.test.js +336 -0
  263. package/backend/src/__tests__/core/UserService.test.js +875 -0
  264. package/backend/src/__tests__/repositories/BaseRepository.test.js +146 -0
  265. package/backend/src/__tests__/repositories/BotRepository.test.js +118 -0
  266. package/backend/src/__tests__/repositories/CommandRepository.test.js +132 -0
  267. package/backend/src/__tests__/repositories/EventGraphRepository.test.js +93 -0
  268. package/backend/src/__tests__/repositories/GroupRepository.test.js +155 -0
  269. package/backend/src/__tests__/repositories/PermissionRepository.test.js +130 -0
  270. package/backend/src/__tests__/repositories/PluginRepository.test.js +107 -0
  271. package/backend/src/__tests__/repositories/ServerRepository.test.js +80 -0
  272. package/backend/src/__tests__/repositories/UserRepository.test.js +128 -0
  273. package/backend/src/__tests__/secretsFilter.test.js +425 -0
  274. package/backend/src/__tests__/services/BotLifecycleService.test.js +411 -0
  275. package/backend/src/__tests__/services/BotProcessManager.test.js +285 -0
  276. package/backend/src/__tests__/services/CacheManager.test.js +125 -0
  277. package/backend/src/__tests__/services/CommandExecutionService.test.js +460 -0
  278. package/backend/src/__tests__/services/ResourceMonitorService.test.js +207 -0
  279. package/backend/src/__tests__/services/TelemetryService.test.js +291 -0
  280. package/backend/src/__tests__/setup.js +25 -0
  281. package/backend/src/api/routes/apiKeys.js +181 -0
  282. package/backend/src/api/routes/bots.js +49 -7
  283. package/backend/src/api/routes/plugins.js +2 -1
  284. package/backend/src/api/routes/system.js +174 -0
  285. package/backend/src/container.js +82 -0
  286. package/backend/src/core/BotManager.js +142 -871
  287. package/backend/src/core/BotManager.old.js +1093 -0
  288. package/backend/src/core/BotProcess.js +1092 -858
  289. package/backend/src/core/EventGraphManager.js +280 -198
  290. package/backend/src/core/GraphExecutionEngine.js +321 -325
  291. package/backend/src/core/MessageQueue.js +27 -6
  292. package/backend/src/core/NodeRegistry.js +37 -1134
  293. package/backend/src/core/PluginManager.js +62 -12
  294. package/backend/src/core/PrismaService.js +32 -0
  295. package/backend/src/core/UserService.js +3 -3
  296. package/backend/src/core/__tests__/PrismaService.test.js +24 -0
  297. package/backend/src/core/commands/README.md +305 -0
  298. package/backend/src/core/commands/dev.js +13 -7
  299. package/backend/src/core/commands/ping.js +10 -4
  300. package/backend/src/core/commands/whois.js +63 -0
  301. package/backend/src/core/config/validation.js +27 -0
  302. package/backend/src/core/constants/graphTypes.js +21 -0
  303. package/backend/src/core/node-registries/actions.js +132 -0
  304. package/backend/src/core/node-registries/arrays.js +137 -0
  305. package/backend/src/core/node-registries/bot.js +23 -0
  306. package/backend/src/core/node-registries/data.js +290 -0
  307. package/backend/src/core/node-registries/debug.js +26 -0
  308. package/backend/src/core/node-registries/events.js +187 -0
  309. package/backend/src/core/node-registries/flow.js +139 -0
  310. package/backend/src/core/node-registries/logic.js +45 -0
  311. package/backend/src/core/node-registries/math.js +42 -0
  312. package/backend/src/core/node-registries/objects.js +98 -0
  313. package/backend/src/core/node-registries/strings.js +153 -0
  314. package/backend/src/core/node-registries/time.js +113 -0
  315. package/backend/src/core/node-registries/users.js +79 -0
  316. package/backend/src/core/nodes/{action_bot_look_at.js → actions/bot_look_at.js} +36 -36
  317. package/backend/src/core/nodes/{action_bot_set_variable.js → actions/bot_set_variable.js} +32 -32
  318. package/backend/src/core/nodes/{action_send_log.js → actions/send_log.js} +28 -23
  319. package/backend/src/core/nodes/{action_send_message.js → actions/send_message.js} +32 -32
  320. package/backend/src/core/nodes/actions/send_websocket_response.js +33 -0
  321. package/backend/src/core/nodes/arrays/get_next.js +35 -0
  322. package/backend/src/core/nodes/{data_cast.js → data/cast.js} +8 -0
  323. package/backend/src/core/nodes/data/datetime_literal.js +27 -0
  324. package/backend/src/core/nodes/data/entity_info.js +69 -0
  325. package/backend/src/core/nodes/data/get_nearby_entities.js +32 -0
  326. package/backend/src/core/nodes/data/get_nearby_players.js +64 -0
  327. package/backend/src/core/nodes/{data_get_user_field.js → data/get_user_field.js} +1 -1
  328. package/backend/src/core/nodes/data/type_check.js +53 -0
  329. package/backend/src/core/nodes/{debug_log.js → debug/log.js} +16 -16
  330. package/backend/src/core/nodes/{flow_branch.js → flow/branch.js} +15 -15
  331. package/backend/src/core/nodes/{flow_break.js → flow/break.js} +14 -14
  332. package/backend/src/core/nodes/flow/delay.js +43 -0
  333. package/backend/src/core/nodes/{flow_for_each.js → flow/for_each.js} +39 -39
  334. package/backend/src/core/nodes/{flow_sequence.js → flow/sequence.js} +16 -16
  335. package/backend/src/core/nodes/{flow_switch.js → flow/switch.js} +47 -47
  336. package/backend/src/core/nodes/{flow_while.js → flow/while.js} +1 -1
  337. package/backend/src/core/nodes/logic/__tests__/compare.test.js +83 -0
  338. package/backend/src/core/nodes/math/__tests__/operation.test.js +65 -0
  339. package/backend/src/core/nodes/strings/__tests__/concat.test.js +89 -0
  340. package/backend/src/core/nodes/time/__tests__/now.test.js +24 -0
  341. package/backend/src/core/nodes/time/add.js +33 -0
  342. package/backend/src/core/nodes/time/compare.js +35 -0
  343. package/backend/src/core/nodes/time/diff.js +29 -0
  344. package/backend/src/core/nodes/time/format.js +32 -0
  345. package/backend/src/core/nodes/time/now.js +18 -0
  346. package/backend/src/core/nodes/{user_check_blacklist.js → users/check_blacklist.js} +37 -37
  347. package/backend/src/core/nodes/{user_get_groups.js → users/get_groups.js} +36 -36
  348. package/backend/src/core/nodes/{user_get_permissions.js → users/get_permissions.js} +36 -36
  349. package/backend/src/core/nodes/{user_set_blacklist.js → users/set_blacklist.js} +37 -37
  350. package/backend/src/core/services/BotLifecycleService.js +596 -0
  351. package/backend/src/core/services/BotProcessManager.js +163 -0
  352. package/backend/src/core/services/CacheManager.js +111 -0
  353. package/backend/src/core/services/CommandExecutionService.js +351 -0
  354. package/backend/src/core/services/ResourceMonitorService.js +90 -0
  355. package/backend/src/core/services/TelemetryService.js +124 -0
  356. package/backend/src/core/services/ValidationService.js +132 -0
  357. package/backend/src/core/services/__tests__/ValidationService.test.js +148 -0
  358. package/backend/src/core/services.js +20 -5
  359. package/backend/src/core/system/CommandContext.js +84 -0
  360. package/backend/src/core/system/Transport.js +78 -0
  361. package/backend/src/core/utils/__tests__/jsonParser.test.js +44 -0
  362. package/backend/src/core/utils/jsonParser.js +18 -0
  363. package/backend/src/core/utils/secretsFilter.js +262 -0
  364. package/backend/src/core/utils/variableParser.js +89 -0
  365. package/backend/src/core/validation/__tests__/nodeSchemas.test.js +175 -0
  366. package/backend/src/core/validation/nodeSchemas.js +112 -0
  367. package/backend/src/lib/prisma.js +2 -4
  368. package/backend/src/real-time/botApi/handlers/commandHandlers.js +28 -0
  369. package/backend/src/real-time/botApi/handlers/graphHandlers.js +99 -0
  370. package/backend/src/real-time/botApi/handlers/graphWebSocketHandlers.js +147 -0
  371. package/backend/src/real-time/botApi/handlers/index.js +43 -0
  372. package/backend/src/real-time/botApi/handlers/messageHandlers.js +66 -0
  373. package/backend/src/real-time/botApi/handlers/statusHandlers.js +17 -0
  374. package/backend/src/real-time/botApi/handlers/userHandlers.js +141 -0
  375. package/backend/src/real-time/botApi/index.js +40 -0
  376. package/backend/src/real-time/botApi/middleware.js +79 -0
  377. package/backend/src/real-time/botApi/utils.js +54 -0
  378. package/backend/src/real-time/socketHandler.js +6 -2
  379. package/backend/src/repositories/BaseRepository.js +43 -0
  380. package/backend/src/repositories/BotRepository.js +42 -0
  381. package/backend/src/repositories/CommandRepository.js +53 -0
  382. package/backend/src/repositories/EventGraphRepository.js +40 -0
  383. package/backend/src/repositories/GroupRepository.js +69 -0
  384. package/backend/src/repositories/PermissionRepository.js +48 -0
  385. package/backend/src/repositories/PluginRepository.js +42 -0
  386. package/backend/src/repositories/ServerRepository.js +27 -0
  387. package/backend/src/repositories/UserRepository.js +48 -0
  388. package/backend/src/server.js +3 -0
  389. package/backend/src/test-refactor.js +85 -0
  390. package/frontend/dist/assets/index-CfTo92bP.css +1 -0
  391. package/frontend/dist/assets/index-CiFD5X9Z.js +8344 -0
  392. package/frontend/dist/index.html +2 -2
  393. package/frontend/package.json +0 -5
  394. package/package.json +2 -1
  395. package/frontend/dist/assets/index-B9GedHEa.js +0 -8352
  396. package/frontend/dist/assets/index-zLiy9MDx.css +0 -1
  397. package/nul +0 -0
  398. /package/backend/src/core/nodes/{action_http_request.js → actions/http_request.js} +0 -0
  399. /package/backend/src/core/nodes/{array_add_element.js → arrays/add_element.js} +0 -0
  400. /package/backend/src/core/nodes/{array_contains.js → arrays/contains.js} +0 -0
  401. /package/backend/src/core/nodes/{array_find_index.js → arrays/find_index.js} +0 -0
  402. /package/backend/src/core/nodes/{array_get_by_index.js → arrays/get_by_index.js} +0 -0
  403. /package/backend/src/core/nodes/{array_get_random_element.js → arrays/get_random_element.js} +0 -0
  404. /package/backend/src/core/nodes/{array_remove_by_index.js → arrays/remove_by_index.js} +0 -0
  405. /package/backend/src/core/nodes/{bot_get_position.js → bot/get_position.js} +0 -0
  406. /package/backend/src/core/nodes/{data_array_literal.js → data/array_literal.js} +0 -0
  407. /package/backend/src/core/nodes/{data_boolean_literal.js → data/boolean_literal.js} +0 -0
  408. /package/backend/src/core/nodes/{data_get_argument.js → data/get_argument.js} +0 -0
  409. /package/backend/src/core/nodes/{data_get_bot_look.js → data/get_bot_look.js} +0 -0
  410. /package/backend/src/core/nodes/{data_get_entity_field.js → data/get_entity_field.js} +0 -0
  411. /package/backend/src/core/nodes/{data_get_server_players.js → data/get_server_players.js} +0 -0
  412. /package/backend/src/core/nodes/{data_get_variable.js → data/get_variable.js} +0 -0
  413. /package/backend/src/core/nodes/{data_length.js → data/length.js} +0 -0
  414. /package/backend/src/core/nodes/{data_make_object.js → data/make_object.js} +0 -0
  415. /package/backend/src/core/nodes/{data_number_literal.js → data/number_literal.js} +0 -0
  416. /package/backend/src/core/nodes/{data_string_literal.js → data/string_literal.js} +0 -0
  417. /package/backend/src/core/nodes/{logic_compare.js → logic/compare.js} +0 -0
  418. /package/backend/src/core/nodes/{logic_operation.js → logic/operation.js} +0 -0
  419. /package/backend/src/core/nodes/{math_operation.js → math/operation.js} +0 -0
  420. /package/backend/src/core/nodes/{math_random_number.js → math/random_number.js} +0 -0
  421. /package/backend/src/core/nodes/{object_create.js → objects/create.js} +0 -0
  422. /package/backend/src/core/nodes/{object_delete.js → objects/delete.js} +0 -0
  423. /package/backend/src/core/nodes/{object_get.js → objects/get.js} +0 -0
  424. /package/backend/src/core/nodes/{object_has_key.js → objects/has_key.js} +0 -0
  425. /package/backend/src/core/nodes/{object_set.js → objects/set.js} +0 -0
  426. /package/backend/src/core/nodes/{string_concat.js → strings/concat.js} +0 -0
  427. /package/backend/src/core/nodes/{string_contains.js → strings/contains.js} +0 -0
  428. /package/backend/src/core/nodes/{string_ends_with.js → strings/ends_with.js} +0 -0
  429. /package/backend/src/core/nodes/{string_equals.js → strings/equals.js} +0 -0
  430. /package/backend/src/core/nodes/{string_length.js → strings/length.js} +0 -0
  431. /package/backend/src/core/nodes/{string_matches.js → strings/matches.js} +0 -0
  432. /package/backend/src/core/nodes/{string_split.js → strings/split.js} +0 -0
  433. /package/backend/src/core/nodes/{string_starts_with.js → strings/starts_with.js} +0 -0
@@ -0,0 +1,464 @@
1
+ # SLO, SLI, and SLA - Service Level Objectives, Indicators, and Agreements
2
+
3
+ Defining SLIs/SLOs/SLAs, error budgets, measuring reliability, and example calculations for site reliability engineering.
4
+
5
+ ## Table of Contents
6
+
7
+ - [Definitions](#definitions)
8
+ - [SLI - Service Level Indicators](#sli---service-level-indicators)
9
+ - [SLO - Service Level Objectives](#slo---service-level-objectives)
10
+ - [SLA - Service Level Agreements](#sla---service-level-agreements)
11
+ - [Error Budgets](#error-budgets)
12
+ - [Implementation](#implementation)
13
+ - [Monitoring and Measurement](#monitoring-and-measurement)
14
+ - [Best Practices](#best-practices)
15
+
16
+ ## Definitions
17
+
18
+ **SLI (Service Level Indicator):** Quantitative measure of service quality
19
+ **SLO (Service Level Objective):** Target value for an SLI
20
+ **SLA (Service Level Agreement):** Business agreement with consequences
21
+
22
+ ```
23
+ SLI: What we measure
24
+
25
+ SLO: What we promise internally
26
+
27
+ SLA: What we promise customers (with penalties)
28
+ ```
29
+
30
+ ## SLI - Service Level Indicators
31
+
32
+ ### Common SLIs
33
+
34
+ **Availability:**
35
+ ```
36
+ Availability = (Successful Requests / Total Requests) × 100%
37
+
38
+ Example:
39
+ 999,000 successful / 1,000,000 total = 99.9% availability
40
+ ```
41
+
42
+ **Latency:**
43
+ ```
44
+ Latency SLI = % of requests faster than threshold
45
+
46
+ Example:
47
+ 95% of requests complete within 200ms
48
+ 99% of requests complete within 500ms
49
+ ```
50
+
51
+ **Error Rate:**
52
+ ```
53
+ Error Rate = (Failed Requests / Total Requests) × 100%
54
+
55
+ Example:
56
+ 100 errors / 100,000 requests = 0.1% error rate
57
+ ```
58
+
59
+ **Throughput:**
60
+ ```
61
+ Throughput = Requests per second (RPS)
62
+
63
+ Example:
64
+ 1,000 requests per second sustained
65
+ ```
66
+
67
+ ### Prometheus Queries for SLIs
68
+
69
+ **Availability SLI:**
70
+ ```promql
71
+ # Success rate over 30 days
72
+ sum(rate(http_requests_total{status=~"2.."}[30d]))
73
+ /
74
+ sum(rate(http_requests_total[30d]))
75
+ ```
76
+
77
+ **Latency SLI (p95):**
78
+ ```promql
79
+ # 95th percentile latency
80
+ histogram_quantile(0.95,
81
+ sum(rate(http_request_duration_seconds_bucket[5m])) by (le)
82
+ )
83
+ ```
84
+
85
+ **Error Rate SLI:**
86
+ ```promql
87
+ # Error rate over 30 days
88
+ sum(rate(http_requests_total{status=~"5.."}[30d]))
89
+ /
90
+ sum(rate(http_requests_total[30d]))
91
+ ```
92
+
93
+ ## SLO - Service Level Objectives
94
+
95
+ ### Defining SLOs
96
+
97
+ **Four Golden Signals:**
98
+ 1. **Latency:** Request duration
99
+ 2. **Traffic:** Request rate
100
+ 3. **Errors:** Failed requests
101
+ 4. **Saturation:** Resource utilization
102
+
103
+ **Example SLOs:**
104
+ ```yaml
105
+ slos:
106
+ availability:
107
+ target: 99.9%
108
+ window: 30d
109
+ description: "Service is available and responding to requests"
110
+
111
+ latency:
112
+ target: 95%
113
+ threshold: 200ms
114
+ window: 30d
115
+ description: "95% of requests complete within 200ms"
116
+
117
+ error_rate:
118
+ target: 99.9%
119
+ window: 30d
120
+ description: "99.9% of requests succeed (0.1% error budget)"
121
+ ```
122
+
123
+ ### Availability Tiers
124
+
125
+ ```
126
+ 99.9% (three nines) = 43.2 minutes downtime/month
127
+ 99.95% (three-five) = 21.6 minutes downtime/month
128
+ 99.99% (four nines) = 4.32 minutes downtime/month
129
+ 99.999% (five nines) = 26 seconds downtime/month
130
+ ```
131
+
132
+ ### SLO Document Example
133
+
134
+ ```yaml
135
+ # api-service-slo.yaml
136
+ service: api-service
137
+ owner: platform-team
138
+ reviewed: 2024-01-15
139
+
140
+ slos:
141
+ - name: availability
142
+ description: API endpoint availability
143
+ sli:
144
+ query: |
145
+ sum(rate(http_requests_total{job="api",status=~"2.."}[30d]))
146
+ /
147
+ sum(rate(http_requests_total{job="api"}[30d]))
148
+ target: 0.999 # 99.9%
149
+ window: 30d
150
+
151
+ - name: latency-p95
152
+ description: 95th percentile latency under 200ms
153
+ sli:
154
+ query: |
155
+ histogram_quantile(0.95,
156
+ sum(rate(http_request_duration_seconds_bucket{job="api"}[5m])) by (le)
157
+ )
158
+ target: 0.2 # 200ms
159
+ window: 30d
160
+
161
+ - name: error-rate
162
+ description: Error rate below 0.1%
163
+ sli:
164
+ query: |
165
+ sum(rate(http_requests_total{job="api",status=~"5.."}[30d]))
166
+ /
167
+ sum(rate(http_requests_total{job="api"}[30d]))
168
+ target: 0.001 # 0.1% errors = 99.9% success
169
+ window: 30d
170
+
171
+ dependencies:
172
+ - database-service (99.95% SLO)
173
+ - cache-service (99.9% SLO)
174
+
175
+ alerting:
176
+ burn_rate_fast: 14.4 # 2% error budget in 1 hour
177
+ burn_rate_slow: 6 # 5% error budget in 6 hours
178
+ ```
179
+
180
+ ## SLA - Service Level Agreements
181
+
182
+ ### SLA vs SLO
183
+
184
+ **SLO (Internal):**
185
+ - Target: 99.9%
186
+ - No financial penalty
187
+ - Triggers internal response
188
+
189
+ **SLA (Customer-Facing):**
190
+ - Commitment: 99.5% (buffer below SLO)
191
+ - Financial penalty if missed
192
+ - Legal agreement
193
+
194
+ ### SLA Example
195
+
196
+ ```yaml
197
+ # customer-sla.yaml
198
+ service: api-platform
199
+ effective_date: 2024-01-01
200
+
201
+ commitments:
202
+ availability:
203
+ guarantee: 99.5%
204
+ measurement_period: monthly
205
+ exclusions:
206
+ - Scheduled maintenance (with 48hr notice)
207
+ - Customer-caused issues
208
+ - Force majeure
209
+
210
+ credits:
211
+ 99.0% - 99.5%: 10% monthly fee credit
212
+ 98.0% - 99.0%: 25% monthly fee credit
213
+ < 98.0%: 50% monthly fee credit
214
+
215
+ support:
216
+ severity_1: 1 hour response time
217
+ severity_2: 4 hours response time
218
+ severity_3: 24 hours response time
219
+
220
+ data_durability:
221
+ guarantee: 99.999999999% (11 nines)
222
+ ```
223
+
224
+ ## Error Budgets
225
+
226
+ ### Concept
227
+
228
+ ```
229
+ Error Budget = 1 - SLO
230
+
231
+ 99.9% SLO = 0.1% error budget
232
+ = 43.2 minutes/month
233
+ = 432 failed requests per million
234
+ ```
235
+
236
+ ### Error Budget Policy
237
+
238
+ ```yaml
239
+ # error-budget-policy.yaml
240
+ error_budget_policy:
241
+ # When error budget > 0: Normal operations
242
+ when_budget_available:
243
+ - Deploy during business hours
244
+ - Accept reasonable risk
245
+ - Focus on feature velocity
246
+ - Continue experimentation
247
+
248
+ # When error budget exhausted: Freeze changes
249
+ when_budget_exhausted:
250
+ - Halt all feature deployments
251
+ - Focus on reliability improvements
252
+ - Root cause analysis required
253
+ - Only critical bug fixes allowed
254
+ - Emergency change approval needed
255
+
256
+ # When error budget critically low
257
+ when_budget_critical: # < 25% remaining
258
+ - Heightened change review
259
+ - Increased monitoring
260
+ - Reduce deployment frequency
261
+ - Prepare contingency plans
262
+ ```
263
+
264
+ ### Error Budget Calculation
265
+
266
+ ```python
267
+ def calculate_error_budget(slo_target, total_requests, failed_requests):
268
+ """
269
+ Calculate error budget consumption
270
+
271
+ Args:
272
+ slo_target: Target SLO (e.g., 0.999 for 99.9%)
273
+ total_requests: Total requests in period
274
+ failed_requests: Failed requests in period
275
+
276
+ Returns:
277
+ dict with error budget metrics
278
+ """
279
+ allowed_failures = total_requests * (1 - slo_target)
280
+ error_budget_consumed = failed_requests / allowed_failures
281
+
282
+ return {
283
+ 'allowed_failures': allowed_failures,
284
+ 'actual_failures': failed_requests,
285
+ 'budget_consumed_pct': error_budget_consumed * 100,
286
+ 'budget_remaining_pct': (1 - error_budget_consumed) * 100,
287
+ 'is_exhausted': error_budget_consumed >= 1.0
288
+ }
289
+
290
+ # Example
291
+ result = calculate_error_budget(
292
+ slo_target=0.999,
293
+ total_requests=10_000_000,
294
+ failed_requests=5_000
295
+ )
296
+
297
+ print(f"Allowed failures: {result['allowed_failures']}") # 10,000
298
+ print(f"Actual failures: {result['actual_failures']}") # 5,000
299
+ print(f"Budget consumed: {result['budget_consumed_pct']:.1f}%") # 50%
300
+ print(f"Budget remaining: {result['budget_remaining_pct']:.1f}%") # 50%
301
+ ```
302
+
303
+ ## Implementation
304
+
305
+ ### Prometheus Recording Rules
306
+
307
+ ```yaml
308
+ # prometheus-slo-rules.yaml
309
+ groups:
310
+ - name: slo_recording_rules
311
+ interval: 30s
312
+ rules:
313
+ # Availability SLI
314
+ - record: slo:availability:ratio_rate30d
315
+ expr: |
316
+ sum(rate(http_requests_total{job="api",status=~"2.."}[30d]))
317
+ /
318
+ sum(rate(http_requests_total{job="api"}[30d]))
319
+
320
+ # Error budget remaining
321
+ - record: slo:error_budget:ratio
322
+ expr: |
323
+ 1 - (
324
+ (1 - slo:availability:ratio_rate30d)
325
+ /
326
+ (1 - 0.999)
327
+ )
328
+
329
+ # Latency SLI
330
+ - record: slo:latency:p95_30d
331
+ expr: |
332
+ histogram_quantile(0.95,
333
+ sum(rate(http_request_duration_seconds_bucket{job="api"}[30d])) by (le)
334
+ )
335
+ ```
336
+
337
+ ### Alerting Rules
338
+
339
+ ```yaml
340
+ # prometheus-slo-alerts.yaml
341
+ groups:
342
+ - name: slo_alerts
343
+ rules:
344
+ # Fast burn: 2% budget in 1 hour
345
+ - alert: ErrorBudgetBurnRateFast
346
+ expr: |
347
+ (
348
+ sum(rate(http_requests_total{job="api",status=~"5.."}[1h]))
349
+ /
350
+ sum(rate(http_requests_total{job="api"}[1h]))
351
+ ) > (14.4 * (1 - 0.999))
352
+ labels:
353
+ severity: critical
354
+ annotations:
355
+ summary: "Error budget burning too fast"
356
+ description: "2% of monthly error budget consumed in 1 hour"
357
+
358
+ # Slow burn: 5% budget in 6 hours
359
+ - alert: ErrorBudgetBurnRateSlow
360
+ expr: |
361
+ (
362
+ sum(rate(http_requests_total{job="api",status=~"5.."}[6h]))
363
+ /
364
+ sum(rate(http_requests_total{job="api"}[6h]))
365
+ ) > (6 * (1 - 0.999))
366
+ labels:
367
+ severity: warning
368
+ annotations:
369
+ summary: "Error budget burning at elevated rate"
370
+
371
+ # Budget exhausted
372
+ - alert: ErrorBudgetExhausted
373
+ expr: slo:error_budget:ratio <= 0
374
+ labels:
375
+ severity: critical
376
+ annotations:
377
+ summary: "Error budget fully consumed"
378
+ description: "Halt feature deployments, focus on reliability"
379
+ ```
380
+
381
+ ## Monitoring and Measurement
382
+
383
+ ### Grafana Dashboard
384
+
385
+ ```json
386
+ {
387
+ "dashboard": {
388
+ "title": "SLO Dashboard",
389
+ "panels": [
390
+ {
391
+ "title": "Error Budget Remaining",
392
+ "type": "gauge",
393
+ "targets": [{
394
+ "expr": "slo:error_budget:ratio * 100"
395
+ }],
396
+ "thresholds": [
397
+ { "value": 0, "color": "red" },
398
+ { "value": 25, "color": "yellow" },
399
+ { "value": 50, "color": "green" }
400
+ ]
401
+ },
402
+ {
403
+ "title": "Availability (30d)",
404
+ "type": "stat",
405
+ "targets": [{
406
+ "expr": "slo:availability:ratio_rate30d * 100"
407
+ }],
408
+ "format": "percent"
409
+ }
410
+ ]
411
+ }
412
+ }
413
+ ```
414
+
415
+ ## Best Practices
416
+
417
+ ### 1. Start Simple
418
+
419
+ ```yaml
420
+ # Begin with basic availability SLO
421
+ initial_slo:
422
+ availability: 99.9%
423
+ measurement: request_success_rate
424
+ ```
425
+
426
+ ### 2. User-Centric SLIs
427
+
428
+ ```
429
+ ✅ Good: "95% of page loads complete in < 2s"
430
+ ❌ Bad: "CPU usage < 80%"
431
+ ```
432
+
433
+ ### 3. Realistic Targets
434
+
435
+ ```
436
+ Don't aim for 100% - impossible and expensive
437
+ 99.9% is often appropriate for most services
438
+ 99.99% only if business truly requires it
439
+ ```
440
+
441
+ ### 4. Define Measurement Windows
442
+
443
+ ```
444
+ Use 30-day rolling windows
445
+ Shorter windows (1d, 7d) for faster feedback
446
+ ```
447
+
448
+ ### 5. Document Everything
449
+
450
+ ```yaml
451
+ # Include in SLO document:
452
+ - What is measured
453
+ - Why it matters
454
+ - How it's calculated
455
+ - Who owns it
456
+ - Review frequency
457
+ ```
458
+
459
+ ---
460
+
461
+ **Related Resources:**
462
+ - [incident-management.md](incident-management.md) - Responding to SLO violations
463
+ - [alerting-best-practices.md](alerting-best-practices.md) - SLO-based alerting
464
+ - [observability-stack.md](observability-stack.md) - Monitoring implementation
@@ -0,0 +1,145 @@
1
+ # Toil Reduction
2
+
3
+ Identifying toil, automation opportunities, self-healing systems, eliminating manual work, and improving operational efficiency.
4
+
5
+ ## What is Toil?
6
+
7
+ **Toil Characteristics:**
8
+ ```
9
+ Manual - Requires human intervention
10
+ Repetitive - Same task over and over
11
+ Automatable - Could be automated
12
+ Tactical - Interrupt-driven, reactive
13
+ No enduring value - Doesn't improve system
14
+ Scales linearly - More growth = more toil
15
+ ```
16
+
17
+ ## Identifying Toil
18
+
19
+ **Toil Audit:**
20
+ ```yaml
21
+ # Track on-call time spent
22
+ weekly_activities:
23
+ - task: Restart failed pods
24
+ time_spent: 2 hours
25
+ frequency: 15 times
26
+ toil_score: HIGH
27
+ automation_potential: HIGH
28
+
29
+ - task: Manual deployment
30
+ time_spent: 3 hours
31
+ frequency: 10 times
32
+ toil_score: CRITICAL
33
+ automation_potential: HIGH
34
+
35
+ - task: Update DNS records
36
+ time_spent: 30 minutes
37
+ frequency: 5 times
38
+ toil_score: MEDIUM
39
+ automation_potential: MEDIUM
40
+ ```
41
+
42
+ ## Automation Examples
43
+
44
+ **Auto-Remediation:**
45
+ ```yaml
46
+ # Kubernetes CronJob for cleanup
47
+ apiVersion: batch/v1
48
+ kind: CronJob
49
+ metadata:
50
+ name: cleanup-failed-pods
51
+ spec:
52
+ schedule: "*/30 * * * *"
53
+ jobTemplate:
54
+ spec:
55
+ template:
56
+ spec:
57
+ containers:
58
+ - name: cleanup
59
+ image: bitnami/kubectl
60
+ command:
61
+ - /bin/sh
62
+ - -c
63
+ - kubectl delete pods --field-selector status.phase=Failed
64
+ ```
65
+
66
+ **Self-Healing with Horizontal Pod Autoscaler:**
67
+ ```yaml
68
+ apiVersion: autoscaling/v2
69
+ kind: HorizontalPodAutoscaler
70
+ metadata:
71
+ name: api-hpa
72
+ spec:
73
+ scaleTargetRef:
74
+ apiVersion: apps/v1
75
+ kind: Deployment
76
+ name: api
77
+ minReplicas: 3
78
+ maxReplicas: 50
79
+ metrics:
80
+ - type: Resource
81
+ resource:
82
+ name: cpu
83
+ target:
84
+ type: Utilization
85
+ averageUtilization: 70
86
+ ```
87
+
88
+ **Automated Deployment:**
89
+ ```yaml
90
+ # ArgoCD for GitOps
91
+ apiVersion: argoproj.io/v1alpha1
92
+ kind: Application
93
+ metadata:
94
+ name: api-service
95
+ spec:
96
+ destination:
97
+ namespace: production
98
+ server: https://kubernetes.default.svc
99
+ source:
100
+ path: k8s/production
101
+ repoURL: https://github.com/example/repo
102
+ targetRevision: main
103
+ syncPolicy:
104
+ automated:
105
+ prune: true
106
+ selfHeal: true
107
+ ```
108
+
109
+ ## Toil Reduction Strategies
110
+
111
+ ### 1. Eliminate Manual Steps
112
+
113
+ ```
114
+ Before: SSH to server → restart service → check logs → update ticket
115
+ After: kubectl rollout restart → auto-verification → auto-notification
116
+ ```
117
+
118
+ ### 2. Self-Service Platforms
119
+
120
+ ```yaml
121
+ # Developer self-service
122
+ backstage_template:
123
+ - Create new service
124
+ - Provision infrastructure
125
+ - Setup CI/CD
126
+ - Configure monitoring
127
+ - All automated, no ops team needed
128
+ ```
129
+
130
+ ### 3. Intelligent Automation
131
+
132
+ ```python
133
+ # Auto-scale based on patterns
134
+ def intelligent_scaling(metrics):
135
+ if is_business_hours() and metrics['traffic'] > threshold:
136
+ scale_up()
137
+ elif is_weekend() and metrics['traffic'] < threshold:
138
+ scale_down()
139
+ ```
140
+
141
+ ---
142
+
143
+ **Related Resources:**
144
+ - [chaos-engineering.md](chaos-engineering.md)
145
+ - [reliability-patterns.md](reliability-patterns.md)