blockmine 1.24.0 → 1.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.en.md +427 -0
- package/README.md +40 -0
- package/backend/cli.js +1 -1
- package/backend/src/ai/plugin-assistant-system-prompt.md +664 -5
- package/backend/src/api/routes/bots.js +13 -0
- package/backend/src/api/routes/servers.js +14 -2
- package/backend/src/core/BotProcess.js +98 -2
- package/backend/src/core/PluginLoader.js +83 -3
- package/backend/src/core/PluginManager.js +75 -5
- package/backend/src/core/services/BotLifecycleService.js +186 -2
- package/backend/src/server.js +11 -1
- package/frontend/dist/assets/browser-ponyfill-DN7pwmHT.js +2 -0
- package/frontend/dist/assets/index-LSy71uwm.js +11261 -0
- package/frontend/dist/assets/index-SfhKxI4-.css +32 -0
- package/frontend/dist/flags/en.svg +32 -0
- package/frontend/dist/flags/ru.svg +5 -0
- package/frontend/dist/index.html +2 -2
- package/frontend/dist/locales/en/admin.json +100 -0
- package/frontend/dist/locales/en/api-keys.json +58 -0
- package/frontend/dist/locales/en/bots.json +110 -0
- package/frontend/dist/locales/en/common.json +47 -0
- package/frontend/dist/locales/en/configuration.json +22 -0
- package/frontend/dist/locales/en/console.json +10 -0
- package/frontend/dist/locales/en/dashboard.json +85 -0
- package/frontend/dist/locales/en/dialogs.json +70 -0
- package/frontend/dist/locales/en/event-graphs.json +50 -0
- package/frontend/dist/locales/en/graph-store.json +70 -0
- package/frontend/dist/locales/en/login.json +34 -0
- package/frontend/dist/locales/en/management.json +114 -0
- package/frontend/dist/locales/en/minecraft-viewer.json +27 -0
- package/frontend/dist/locales/en/nodes.json +1077 -0
- package/frontend/dist/locales/en/permissions.json +50 -0
- package/frontend/dist/locales/en/plugin-detail.json +49 -0
- package/frontend/dist/locales/en/plugins.json +110 -0
- package/frontend/dist/locales/en/proxies.json +81 -0
- package/frontend/dist/locales/en/servers.json +39 -0
- package/frontend/dist/locales/en/setup.json +17 -0
- package/frontend/dist/locales/en/sidebar.json +27 -0
- package/frontend/dist/locales/en/tasks.json +62 -0
- package/frontend/dist/locales/en/visual-editor.json +219 -0
- package/frontend/dist/locales/en/websocket.json +86 -0
- package/frontend/dist/locales/ru/admin.json +100 -0
- package/frontend/dist/locales/ru/api-keys.json +58 -0
- package/frontend/dist/locales/ru/bots.json +110 -0
- package/frontend/dist/locales/ru/common.json +49 -0
- package/frontend/dist/locales/ru/configuration.json +22 -0
- package/frontend/dist/locales/ru/console.json +10 -0
- package/frontend/dist/locales/ru/dashboard.json +85 -0
- package/frontend/dist/locales/ru/dialogs.json +70 -0
- package/frontend/dist/locales/ru/event-graphs.json +50 -0
- package/frontend/dist/locales/ru/graph-store.json +70 -0
- package/frontend/dist/locales/ru/login.json +34 -0
- package/frontend/dist/locales/ru/management.json +114 -0
- package/frontend/dist/locales/ru/minecraft-viewer.json +27 -0
- package/frontend/dist/locales/ru/nodes.json +1077 -0
- package/frontend/dist/locales/ru/permissions.json +50 -0
- package/frontend/dist/locales/ru/plugin-detail.json +49 -0
- package/frontend/dist/locales/ru/plugins.json +110 -0
- package/frontend/dist/locales/ru/proxies.json +81 -0
- package/frontend/dist/locales/ru/servers.json +39 -0
- package/frontend/dist/locales/ru/setup.json +17 -0
- package/frontend/dist/locales/ru/sidebar.json +27 -0
- package/frontend/dist/locales/ru/tasks.json +62 -0
- package/frontend/dist/locales/ru/visual-editor.json +221 -0
- package/frontend/dist/locales/ru/websocket.json +86 -0
- package/frontend/dist/monacoeditorwork/css.worker.bundle.js +7 -7
- package/frontend/dist/monacoeditorwork/html.worker.bundle.js +7 -7
- package/frontend/dist/monacoeditorwork/json.worker.bundle.js +7 -7
- package/frontend/dist/monacoeditorwork/ts.worker.bundle.js +3 -3
- package/frontend/package.json +4 -0
- package/package.json +1 -1
- package/screen/3dviewer.png +0 -0
- package/screen/console.png +0 -0
- package/screen/dashboard.png +0 -0
- package/screen/graph_collabe.png +0 -0
- package/screen/graph_live_debug.png +0 -0
- package/screen/language_selector.png +0 -0
- package/screen/management_command.png +0 -0
- package/screen/node_debug_trace.png +0 -0
- package/screen/plugin_/320/276/320/261/320/267/320/276/321/200.png +0 -0
- package/screen/websocket.png +0 -0
- package/screen//320/275/320/260/321/201/321/202/321/200/320/276/320/271/320/272/320/270_/320/276/321/202/320/264/320/265/320/273/321/214/320/275/321/213/321/205_/320/272/320/276/320/274/320/260/320/275/320/264_/320/272/320/260/320/266/320/264/321/203_/320/272/320/276/320/274/320/260/320/275/320/273/320/264/321/203_/320/274/320/276/320/266/320/275/320/276_/320/275/320/260/321/201/321/202/321/200/320/260/320/270/320/262/320/260/321/202/321/214.png +0 -0
- package/screen//320/277/320/273/320/260/320/275/320/270/321/200/320/276/320/262/321/211/320/270/320/272_/320/274/320/276/320/266/320/275/320/276_/320/267/320/260/320/264/320/260/320/262/320/260/321/202/321/214_/320/264/320/265/320/271/321/201/321/202/320/262/320/270/321/217_/320/277/320/276_/320/262/321/200/320/265/320/274/320/265/320/275/320/270.png +0 -0
- package/.claude/agents/README.md +0 -469
- package/.claude/agents/auth-route-debugger.md +0 -118
- package/.claude/agents/auth-route-tester.md +0 -93
- package/.claude/agents/auto-error-resolver.md +0 -97
- package/.claude/agents/build-optimizer.md +0 -236
- package/.claude/agents/code-architect.md +0 -34
- package/.claude/agents/code-architecture-reviewer.md +0 -83
- package/.claude/agents/code-explorer.md +0 -51
- package/.claude/agents/code-refactor-master.md +0 -94
- package/.claude/agents/code-reviewer.md +0 -46
- package/.claude/agents/cost-optimizer.md +0 -134
- package/.claude/agents/deployment-orchestrator.md +0 -113
- package/.claude/agents/documentation-architect.md +0 -82
- package/.claude/agents/frontend-error-fixer.md +0 -77
- package/.claude/agents/iac-code-generator.md +0 -71
- package/.claude/agents/incident-responder.md +0 -346
- package/.claude/agents/infrastructure-architect.md +0 -31
- package/.claude/agents/kubernetes-specialist.md +0 -56
- package/.claude/agents/migration-planner.md +0 -181
- package/.claude/agents/network-architect.md +0 -196
- package/.claude/agents/plan-reviewer.md +0 -52
- package/.claude/agents/refactor-planner.md +0 -63
- package/.claude/agents/security-scanner.md +0 -102
- package/.claude/agents/web-research-specialist.md +0 -78
- package/.claude/commands/cost-analysis.md +0 -315
- package/.claude/commands/dev-docs-update.md +0 -55
- package/.claude/commands/dev-docs.md +0 -51
- package/.claude/commands/feature-dev.md +0 -125
- package/.claude/commands/incident-debug.md +0 -247
- package/.claude/commands/infra-plan.md +0 -81
- package/.claude/commands/migration-plan.md +0 -478
- package/.claude/commands/route-research-for-testing.md +0 -37
- package/.claude/commands/security-review.md +0 -66
- package/.claude/hooks/CONFIG.md +0 -448
- package/.claude/hooks/README.md +0 -163
- package/.claude/hooks/SKILL_ACTIVATION_COMPLETE.md +0 -226
- package/.claude/hooks/WINDOWS_HOOKS_README.md +0 -151
- package/.claude/hooks/add-skill-activation-banners.ts +0 -132
- package/.claude/hooks/comprehensive-skill-test.ts +0 -1315
- package/.claude/hooks/error-handling-reminder.sh +0 -12
- package/.claude/hooks/error-handling-reminder.ts +0 -222
- package/.claude/hooks/k8s-manifest-validator.sh +0 -56
- package/.claude/hooks/package-lock.json +0 -556
- package/.claude/hooks/package.json +0 -16
- package/.claude/hooks/post-tool-use-tracker.ps1 +0 -174
- package/.claude/hooks/post-tool-use-tracker.sh +0 -183
- package/.claude/hooks/security-policy-check.sh +0 -247
- package/.claude/hooks/skill-activation-prompt.ps1 +0 -10
- package/.claude/hooks/skill-activation-prompt.sh +0 -10
- package/.claude/hooks/skill-activation-prompt.ts +0 -141
- package/.claude/hooks/stop-build-check-enhanced.sh +0 -130
- package/.claude/hooks/terraform-validator.sh +0 -53
- package/.claude/hooks/test-input.json +0 -7
- package/.claude/hooks/test-skill-activation.ts +0 -427
- package/.claude/hooks/trigger-build-resolver.sh +0 -79
- package/.claude/hooks/tsc-check.sh +0 -173
- package/.claude/hooks/tsconfig.json +0 -19
- package/.claude/settings.json +0 -59
- package/.claude/settings.local.json +0 -67
- package/.claude/skills/README.md +0 -507
- package/.claude/skills/api-engineering/SKILL.md +0 -63
- package/.claude/skills/api-engineering/resources/api-versioning.md +0 -88
- package/.claude/skills/api-engineering/resources/graphql-patterns.md +0 -106
- package/.claude/skills/api-engineering/resources/rate-limiting.md +0 -118
- package/.claude/skills/api-engineering/resources/rest-api-design.md +0 -105
- package/.claude/skills/backend-dev-guidelines/SKILL.md +0 -306
- package/.claude/skills/backend-dev-guidelines/resources/architecture-overview.md +0 -451
- package/.claude/skills/backend-dev-guidelines/resources/async-and-errors.md +0 -307
- package/.claude/skills/backend-dev-guidelines/resources/complete-examples.md +0 -638
- package/.claude/skills/backend-dev-guidelines/resources/configuration.md +0 -275
- package/.claude/skills/backend-dev-guidelines/resources/database-patterns.md +0 -224
- package/.claude/skills/backend-dev-guidelines/resources/middleware-guide.md +0 -213
- package/.claude/skills/backend-dev-guidelines/resources/routing-and-controllers.md +0 -756
- package/.claude/skills/backend-dev-guidelines/resources/sentry-and-monitoring.md +0 -336
- package/.claude/skills/backend-dev-guidelines/resources/services-and-repositories.md +0 -789
- package/.claude/skills/backend-dev-guidelines/resources/testing-guide.md +0 -235
- package/.claude/skills/backend-dev-guidelines/resources/validation-patterns.md +0 -754
- package/.claude/skills/budget-and-cost-management/SKILL.md +0 -850
- package/.claude/skills/build-engineering/SKILL.md +0 -431
- package/.claude/skills/build-engineering/resources/artifact-repositories.md +0 -72
- package/.claude/skills/build-engineering/resources/build-caching.md +0 -96
- package/.claude/skills/build-engineering/resources/build-pipelines.md +0 -105
- package/.claude/skills/build-engineering/resources/build-security.md +0 -95
- package/.claude/skills/build-engineering/resources/build-systems.md +0 -389
- package/.claude/skills/build-engineering/resources/compilation-optimization.md +0 -201
- package/.claude/skills/build-engineering/resources/dependency-management.md +0 -73
- package/.claude/skills/build-engineering/resources/monorepo-builds.md +0 -110
- package/.claude/skills/build-engineering/resources/performance-optimization.md +0 -113
- package/.claude/skills/build-engineering/resources/reproducible-builds.md +0 -82
- package/.claude/skills/cloud-engineering/SKILL.md +0 -675
- package/.claude/skills/cloud-engineering/resources/aws-patterns.md +0 -742
- package/.claude/skills/cloud-engineering/resources/azure-patterns.md +0 -714
- package/.claude/skills/cloud-engineering/resources/cleared-cloud-environments.md +0 -987
- package/.claude/skills/cloud-engineering/resources/cloud-cost-optimization.md +0 -757
- package/.claude/skills/cloud-engineering/resources/cloud-networking.md +0 -1058
- package/.claude/skills/cloud-engineering/resources/cloud-security-tools.md +0 -1530
- package/.claude/skills/cloud-engineering/resources/cloud-security.md +0 -990
- package/.claude/skills/cloud-engineering/resources/gcp-patterns.md +0 -758
- package/.claude/skills/cloud-engineering/resources/migration-strategies.md +0 -820
- package/.claude/skills/cloud-engineering/resources/multi-cloud-strategies.md +0 -670
- package/.claude/skills/cloud-engineering/resources/oci-patterns.md +0 -1198
- package/.claude/skills/cloud-engineering/resources/serverless-patterns.md +0 -795
- package/.claude/skills/cloud-engineering/resources/well-architected-frameworks.md +0 -966
- package/.claude/skills/cybersecurity/SKILL.md +0 -409
- package/.claude/skills/cybersecurity/resources/security-architecture.md +0 -266
- package/.claude/skills/database-engineering/SKILL.md +0 -61
- package/.claude/skills/database-engineering/resources/backup-and-recovery.md +0 -72
- package/.claude/skills/database-engineering/resources/database-replication.md +0 -63
- package/.claude/skills/database-engineering/resources/postgresql-fundamentals.md +0 -70
- package/.claude/skills/database-engineering/resources/query-optimization.md +0 -68
- package/.claude/skills/devsecops/SKILL.md +0 -374
- package/.claude/skills/devsecops/resources/ci-cd-security.md +0 -204
- package/.claude/skills/devsecops/resources/compliance-automation.md +0 -530
- package/.claude/skills/devsecops/resources/compliance-frameworks.md +0 -2322
- package/.claude/skills/devsecops/resources/container-security.md +0 -915
- package/.claude/skills/devsecops/resources/cspm-integration.md +0 -1440
- package/.claude/skills/devsecops/resources/policy-enforcement.md +0 -619
- package/.claude/skills/devsecops/resources/secrets-management.md +0 -755
- package/.claude/skills/devsecops/resources/security-monitoring.md +0 -146
- package/.claude/skills/devsecops/resources/security-scanning.md +0 -887
- package/.claude/skills/devsecops/resources/security-testing.md +0 -203
- package/.claude/skills/devsecops/resources/supply-chain-security.md +0 -518
- package/.claude/skills/devsecops/resources/vulnerability-management.md +0 -481
- package/.claude/skills/devsecops/resources/zero-trust-architecture.md +0 -177
- package/.claude/skills/documentation-as-code/SKILL.md +0 -323
- package/.claude/skills/documentation-as-code/resources/api-documentation.md +0 -90
- package/.claude/skills/documentation-as-code/resources/changelog-management.md +0 -79
- package/.claude/skills/documentation-as-code/resources/diagram-generation.md +0 -44
- package/.claude/skills/documentation-as-code/resources/docs-as-code-workflow.md +0 -99
- package/.claude/skills/documentation-as-code/resources/documentation-automation.md +0 -68
- package/.claude/skills/documentation-as-code/resources/documentation-sites.md +0 -79
- package/.claude/skills/documentation-as-code/resources/markdown-best-practices.md +0 -162
- package/.claude/skills/documentation-as-code/resources/openapi-specification.md +0 -77
- package/.claude/skills/documentation-as-code/resources/readme-engineering.md +0 -60
- package/.claude/skills/documentation-as-code/resources/technical-writing-guide.md +0 -202
- package/.claude/skills/engineering-management/SKILL.md +0 -356
- package/.claude/skills/engineering-management/resources/career-ladders.md +0 -609
- package/.claude/skills/engineering-management/resources/hiring-and-assessment.md +0 -555
- package/.claude/skills/engineering-management/resources/one-on-one-guides.md +0 -609
- package/.claude/skills/engineering-management/resources/resource-planning.md +0 -557
- package/.claude/skills/engineering-management/resources/team-organization-patterns.md +0 -491
- package/.claude/skills/engineering-management/resources/technical-interviews.md +0 -474
- package/.claude/skills/engineering-operations-management/SKILL.md +0 -817
- package/.claude/skills/error-tracking/SKILL.md +0 -379
- package/.claude/skills/frontend-design/SKILL.md +0 -42
- package/.claude/skills/frontend-dev-guidelines/SKILL.md +0 -403
- package/.claude/skills/frontend-dev-guidelines/resources/common-patterns.md +0 -331
- package/.claude/skills/frontend-dev-guidelines/resources/complete-examples.md +0 -872
- package/.claude/skills/frontend-dev-guidelines/resources/component-patterns.md +0 -502
- package/.claude/skills/frontend-dev-guidelines/resources/data-fetching.md +0 -767
- package/.claude/skills/frontend-dev-guidelines/resources/file-organization.md +0 -502
- package/.claude/skills/frontend-dev-guidelines/resources/loading-and-error-states.md +0 -501
- package/.claude/skills/frontend-dev-guidelines/resources/performance.md +0 -406
- package/.claude/skills/frontend-dev-guidelines/resources/routing-guide.md +0 -364
- package/.claude/skills/frontend-dev-guidelines/resources/styling-guide.md +0 -428
- package/.claude/skills/frontend-dev-guidelines/resources/typescript-standards.md +0 -418
- package/.claude/skills/general-it-engineering/SKILL.md +0 -393
- package/.claude/skills/general-it-engineering/resources/asset-management.md +0 -712
- package/.claude/skills/general-it-engineering/resources/automation-orchestration.md +0 -817
- package/.claude/skills/general-it-engineering/resources/business-continuity.md +0 -786
- package/.claude/skills/general-it-engineering/resources/change-management.md +0 -715
- package/.claude/skills/general-it-engineering/resources/enterprise-monitoring.md +0 -729
- package/.claude/skills/general-it-engineering/resources/help-desk-operations.md +0 -738
- package/.claude/skills/general-it-engineering/resources/incident-service-management.md +0 -834
- package/.claude/skills/general-it-engineering/resources/it-governance.md +0 -753
- package/.claude/skills/general-it-engineering/resources/itil-framework.md +0 -503
- package/.claude/skills/general-it-engineering/resources/service-management.md +0 -669
- package/.claude/skills/infrastructure-architecture/SKILL.md +0 -328
- package/.claude/skills/infrastructure-architecture/resources/architecture-decision-records.md +0 -505
- package/.claude/skills/infrastructure-architecture/resources/architecture-patterns.md +0 -528
- package/.claude/skills/infrastructure-architecture/resources/capacity-planning.md +0 -453
- package/.claude/skills/infrastructure-architecture/resources/cleared-environment-architecture.md +0 -773
- package/.claude/skills/infrastructure-architecture/resources/cost-architecture.md +0 -499
- package/.claude/skills/infrastructure-architecture/resources/data-architecture.md +0 -501
- package/.claude/skills/infrastructure-architecture/resources/disaster-recovery.md +0 -535
- package/.claude/skills/infrastructure-architecture/resources/migration-architecture.md +0 -512
- package/.claude/skills/infrastructure-architecture/resources/multi-region-design.md +0 -608
- package/.claude/skills/infrastructure-architecture/resources/reference-architectures.md +0 -562
- package/.claude/skills/infrastructure-architecture/resources/security-architecture.md +0 -538
- package/.claude/skills/infrastructure-architecture/resources/system-design-principles.md +0 -489
- package/.claude/skills/infrastructure-architecture/resources/workload-classification.md +0 -1000
- package/.claude/skills/infrastructure-strategy/SKILL.md +0 -924
- package/.claude/skills/network-engineering/SKILL.md +0 -385
- package/.claude/skills/network-engineering/resources/dns-management.md +0 -738
- package/.claude/skills/network-engineering/resources/load-balancing.md +0 -820
- package/.claude/skills/network-engineering/resources/network-architecture.md +0 -546
- package/.claude/skills/network-engineering/resources/network-security.md +0 -921
- package/.claude/skills/network-engineering/resources/network-troubleshooting.md +0 -749
- package/.claude/skills/network-engineering/resources/routing-switching.md +0 -373
- package/.claude/skills/network-engineering/resources/sdn-networking.md +0 -695
- package/.claude/skills/network-engineering/resources/service-mesh-networking.md +0 -777
- package/.claude/skills/network-engineering/resources/tcp-ip-protocols.md +0 -444
- package/.claude/skills/network-engineering/resources/vpn-connectivity.md +0 -672
- package/.claude/skills/node-development/SKILL.md +0 -317
- package/.claude/skills/observability-engineering/SKILL.md +0 -101
- package/.claude/skills/observability-engineering/resources/apm-tools.md +0 -97
- package/.claude/skills/observability-engineering/resources/correlation-strategies.md +0 -87
- package/.claude/skills/observability-engineering/resources/distributed-tracing.md +0 -98
- package/.claude/skills/observability-engineering/resources/logs-aggregation.md +0 -118
- package/.claude/skills/observability-engineering/resources/observability-cost-optimization.md +0 -141
- package/.claude/skills/observability-engineering/resources/opentelemetry.md +0 -110
- package/.claude/skills/platform-engineering/SKILL.md +0 -555
- package/.claude/skills/platform-engineering/resources/architecture-overview.md +0 -600
- package/.claude/skills/platform-engineering/resources/container-orchestration.md +0 -916
- package/.claude/skills/platform-engineering/resources/cost-optimization.md +0 -634
- package/.claude/skills/platform-engineering/resources/developer-platforms.md +0 -670
- package/.claude/skills/platform-engineering/resources/gitops-automation.md +0 -650
- package/.claude/skills/platform-engineering/resources/infrastructure-as-code.md +0 -778
- package/.claude/skills/platform-engineering/resources/infrastructure-standards.md +0 -708
- package/.claude/skills/platform-engineering/resources/multi-tenancy.md +0 -602
- package/.claude/skills/platform-engineering/resources/platform-security.md +0 -711
- package/.claude/skills/platform-engineering/resources/resource-management.md +0 -592
- package/.claude/skills/platform-engineering/resources/service-mesh.md +0 -628
- package/.claude/skills/release-engineering/SKILL.md +0 -393
- package/.claude/skills/release-engineering/resources/artifact-management.md +0 -108
- package/.claude/skills/release-engineering/resources/build-optimization.md +0 -84
- package/.claude/skills/release-engineering/resources/ci-cd-pipelines.md +0 -411
- package/.claude/skills/release-engineering/resources/deployment-strategies.md +0 -197
- package/.claude/skills/release-engineering/resources/pipeline-security.md +0 -62
- package/.claude/skills/release-engineering/resources/progressive-delivery.md +0 -83
- package/.claude/skills/release-engineering/resources/release-automation.md +0 -68
- package/.claude/skills/release-engineering/resources/release-orchestration.md +0 -77
- package/.claude/skills/release-engineering/resources/rollback-strategies.md +0 -66
- package/.claude/skills/release-engineering/resources/versioning-strategies.md +0 -59
- package/.claude/skills/route-tester/SKILL.md +0 -392
- package/.claude/skills/skill-developer/ADVANCED.md +0 -197
- package/.claude/skills/skill-developer/HOOK_MECHANISMS.md +0 -306
- package/.claude/skills/skill-developer/PATTERNS_LIBRARY.md +0 -152
- package/.claude/skills/skill-developer/SKILL.md +0 -430
- package/.claude/skills/skill-developer/SKILL_RULES_REFERENCE.md +0 -315
- package/.claude/skills/skill-developer/TRIGGER_TYPES.md +0 -305
- package/.claude/skills/skill-developer/TROUBLESHOOTING.md +0 -514
- package/.claude/skills/skill-rules.json +0 -2989
- package/.claude/skills/sre/SKILL.md +0 -464
- package/.claude/skills/sre/resources/alerting-best-practices.md +0 -282
- package/.claude/skills/sre/resources/capacity-planning.md +0 -226
- package/.claude/skills/sre/resources/chaos-engineering.md +0 -193
- package/.claude/skills/sre/resources/disaster-recovery.md +0 -232
- package/.claude/skills/sre/resources/incident-management.md +0 -436
- package/.claude/skills/sre/resources/observability-stack.md +0 -240
- package/.claude/skills/sre/resources/on-call-runbooks.md +0 -167
- package/.claude/skills/sre/resources/performance-optimization.md +0 -108
- package/.claude/skills/sre/resources/reliability-patterns.md +0 -183
- package/.claude/skills/sre/resources/slo-sli-sla.md +0 -464
- package/.claude/skills/sre/resources/toil-reduction.md +0 -145
- package/.claude/skills/systems-engineering/SKILL.md +0 -648
- package/.claude/skills/systems-engineering/resources/automation-patterns.md +0 -771
- package/.claude/skills/systems-engineering/resources/configuration-management.md +0 -998
- package/.claude/skills/systems-engineering/resources/linux-administration.md +0 -672
- package/.claude/skills/systems-engineering/resources/networking-fundamentals.md +0 -982
- package/.claude/skills/systems-engineering/resources/performance-tuning.md +0 -871
- package/.claude/skills/systems-engineering/resources/powershell-scripting.md +0 -482
- package/.claude/skills/systems-engineering/resources/security-hardening.md +0 -739
- package/.claude/skills/systems-engineering/resources/shell-scripting.md +0 -915
- package/.claude/skills/systems-engineering/resources/storage-management.md +0 -628
- package/.claude/skills/systems-engineering/resources/system-monitoring.md +0 -787
- package/.claude/skills/systems-engineering/resources/troubleshooting-guide.md +0 -753
- package/.claude/skills/systems-engineering/resources/windows-administration.md +0 -738
- package/.claude/skills/technical-leadership/SKILL.md +0 -728
- package/backend/docs/SECRETS_DOCUMENTATION.md +0 -327
- package/frontend/dist/assets/index-BC-NbKXi.css +0 -32
- package/frontend/dist/assets/index-DqJXZMHY.js +0 -11266
|
@@ -1,240 +0,0 @@
|
|
|
1
|
-
# Observability Stack
|
|
2
|
-
|
|
3
|
-
Prometheus + Grafana setup, Loki for logs, Jaeger/Tempo for distributed tracing, and comprehensive observability implementation.
|
|
4
|
-
|
|
5
|
-
## Table of Contents
|
|
6
|
-
|
|
7
|
-
- [Overview](#overview)
|
|
8
|
-
- [Metrics - Prometheus + Grafana](#metrics---prometheus--grafana)
|
|
9
|
-
- [Logs - Loki](#logs---loki)
|
|
10
|
-
- [Traces - Jaeger/Tempo](#traces---jaegertempo)
|
|
11
|
-
- [Integration](#integration)
|
|
12
|
-
|
|
13
|
-
## Overview
|
|
14
|
-
|
|
15
|
-
**Three Pillars of Observability:**
|
|
16
|
-
```
|
|
17
|
-
Metrics → What is happening (aggregated numbers)
|
|
18
|
-
Logs → Detailed event records
|
|
19
|
-
Traces → Request flow through system
|
|
20
|
-
```
|
|
21
|
-
|
|
22
|
-
## Metrics - Prometheus + Grafana
|
|
23
|
-
|
|
24
|
-
**Prometheus Setup:**
|
|
25
|
-
```yaml
|
|
26
|
-
# prometheus-values.yaml
|
|
27
|
-
prometheus:
|
|
28
|
-
prometheusSpec:
|
|
29
|
-
retention: 15d
|
|
30
|
-
retentionSize: "50GB"
|
|
31
|
-
storageSpec:
|
|
32
|
-
volumeClaimTemplate:
|
|
33
|
-
spec:
|
|
34
|
-
resources:
|
|
35
|
-
requests:
|
|
36
|
-
storage: 100Gi
|
|
37
|
-
|
|
38
|
-
serviceMonitorSelectorNilUsesHelmValues: false
|
|
39
|
-
podMonitorSelectorNilUsesHelmValues: false
|
|
40
|
-
|
|
41
|
-
additionalScrapeConfigs:
|
|
42
|
-
- job_name: 'kubernetes-pods'
|
|
43
|
-
kubernetes_sd_configs:
|
|
44
|
-
- role: pod
|
|
45
|
-
relabel_configs:
|
|
46
|
-
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
47
|
-
action: keep
|
|
48
|
-
regex: true
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
**ServiceMonitor:**
|
|
52
|
-
```yaml
|
|
53
|
-
apiVersion: monitoring.coreos.com/v1
|
|
54
|
-
kind: ServiceMonitor
|
|
55
|
-
metadata:
|
|
56
|
-
name: api-service
|
|
57
|
-
spec:
|
|
58
|
-
selector:
|
|
59
|
-
matchLabels:
|
|
60
|
-
app: api
|
|
61
|
-
endpoints:
|
|
62
|
-
- port: metrics
|
|
63
|
-
interval: 30s
|
|
64
|
-
path: /metrics
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
**Grafana Dashboard:**
|
|
68
|
-
```json
|
|
69
|
-
{
|
|
70
|
-
"dashboard": {
|
|
71
|
-
"title": "Service Overview",
|
|
72
|
-
"panels": [
|
|
73
|
-
{
|
|
74
|
-
"title": "Request Rate",
|
|
75
|
-
"targets": [{
|
|
76
|
-
"expr": "rate(http_requests_total[5m])"
|
|
77
|
-
}]
|
|
78
|
-
},
|
|
79
|
-
{
|
|
80
|
-
"title": "Error Rate",
|
|
81
|
-
"targets": [{
|
|
82
|
-
"expr": "rate(http_requests_total{status=~\"5..\"}[5m]) / rate(http_requests_total[5m])"
|
|
83
|
-
}]
|
|
84
|
-
},
|
|
85
|
-
{
|
|
86
|
-
"title": "Latency p95",
|
|
87
|
-
"targets": [{
|
|
88
|
-
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))"
|
|
89
|
-
}]
|
|
90
|
-
}
|
|
91
|
-
]
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
## Logs - Loki
|
|
97
|
-
|
|
98
|
-
**Loki Setup:**
|
|
99
|
-
```yaml
|
|
100
|
-
# loki-values.yaml
|
|
101
|
-
loki:
|
|
102
|
-
auth_enabled: false
|
|
103
|
-
|
|
104
|
-
ingester:
|
|
105
|
-
chunk_idle_period: 3m
|
|
106
|
-
chunk_retain_period: 1m
|
|
107
|
-
max_chunk_age: 1h
|
|
108
|
-
|
|
109
|
-
limits_config:
|
|
110
|
-
enforce_metric_name: false
|
|
111
|
-
reject_old_samples: true
|
|
112
|
-
reject_old_samples_max_age: 168h
|
|
113
|
-
ingestion_rate_mb: 10
|
|
114
|
-
ingestion_burst_size_mb: 20
|
|
115
|
-
|
|
116
|
-
schema_config:
|
|
117
|
-
configs:
|
|
118
|
-
- from: 2024-01-01
|
|
119
|
-
store: boltdb-shipper
|
|
120
|
-
object_store: s3
|
|
121
|
-
schema: v11
|
|
122
|
-
index:
|
|
123
|
-
prefix: loki_index_
|
|
124
|
-
period: 24h
|
|
125
|
-
|
|
126
|
-
promtail:
|
|
127
|
-
config:
|
|
128
|
-
clients:
|
|
129
|
-
- url: http://loki:3100/loki/api/v1/push
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
**Promtail Config:**
|
|
133
|
-
```yaml
|
|
134
|
-
scrape_configs:
|
|
135
|
-
- job_name: kubernetes-pods
|
|
136
|
-
kubernetes_sd_configs:
|
|
137
|
-
- role: pod
|
|
138
|
-
pipeline_stages:
|
|
139
|
-
- docker: {}
|
|
140
|
-
- json:
|
|
141
|
-
expressions:
|
|
142
|
-
level: level
|
|
143
|
-
timestamp: timestamp
|
|
144
|
-
message: message
|
|
145
|
-
- labels:
|
|
146
|
-
level:
|
|
147
|
-
app:
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
**LogQL Queries:**
|
|
151
|
-
```
|
|
152
|
-
# Recent errors
|
|
153
|
-
{app="api"} |= "error" | json
|
|
154
|
-
|
|
155
|
-
# Rate of errors
|
|
156
|
-
rate({app="api"} |= "error" [5m])
|
|
157
|
-
|
|
158
|
-
# Latency > 1s
|
|
159
|
-
{app="api"} | json | duration > 1s
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
## Traces - Jaeger/Tempo
|
|
163
|
-
|
|
164
|
-
**Tempo Setup:**
|
|
165
|
-
```yaml
|
|
166
|
-
tempo:
|
|
167
|
-
storage:
|
|
168
|
-
trace:
|
|
169
|
-
backend: s3
|
|
170
|
-
s3:
|
|
171
|
-
bucket: traces
|
|
172
|
-
endpoint: s3.amazonaws.com
|
|
173
|
-
|
|
174
|
-
receivers:
|
|
175
|
-
jaeger:
|
|
176
|
-
protocols:
|
|
177
|
-
grpc:
|
|
178
|
-
thrift_http:
|
|
179
|
-
otlp:
|
|
180
|
-
protocols:
|
|
181
|
-
grpc:
|
|
182
|
-
http:
|
|
183
|
-
```
|
|
184
|
-
|
|
185
|
-
**Application Instrumentation (OpenTelemetry):**
|
|
186
|
-
```typescript
|
|
187
|
-
import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
|
|
188
|
-
import { registerInstrumentations } from '@opentelemetry/instrumentation';
|
|
189
|
-
import { HttpInstrumentation } from '@opentelemetry/instrumentation-http';
|
|
190
|
-
import { JaegerExporter } from '@opentelemetry/exporter-jaeger';
|
|
191
|
-
|
|
192
|
-
const provider = new NodeTracerProvider();
|
|
193
|
-
provider.addSpanProcessor(
|
|
194
|
-
new BatchSpanProcessor(
|
|
195
|
-
new JaegerExporter({
|
|
196
|
-
endpoint: 'http://tempo:14268/api/traces'
|
|
197
|
-
})
|
|
198
|
-
)
|
|
199
|
-
);
|
|
200
|
-
|
|
201
|
-
provider.register();
|
|
202
|
-
|
|
203
|
-
registerInstrumentations({
|
|
204
|
-
instrumentations: [
|
|
205
|
-
new HttpInstrumentation(),
|
|
206
|
-
new ExpressInstrumentation()
|
|
207
|
-
]
|
|
208
|
-
});
|
|
209
|
-
```
|
|
210
|
-
|
|
211
|
-
## Integration
|
|
212
|
-
|
|
213
|
-
**Grafana Unified View:**
|
|
214
|
-
```json
|
|
215
|
-
{
|
|
216
|
-
"panels": [
|
|
217
|
-
{
|
|
218
|
-
"title": "Metrics",
|
|
219
|
-
"datasource": "Prometheus",
|
|
220
|
-
"targets": [{"expr": "rate(http_requests_total[5m])"}]
|
|
221
|
-
},
|
|
222
|
-
{
|
|
223
|
-
"title": "Logs",
|
|
224
|
-
"datasource": "Loki",
|
|
225
|
-
"targets": [{"expr": "{app=\"api\"}"}]
|
|
226
|
-
},
|
|
227
|
-
{
|
|
228
|
-
"title": "Traces",
|
|
229
|
-
"datasource": "Tempo",
|
|
230
|
-
"targets": [{"query": "service.name=\"api\""}]
|
|
231
|
-
}
|
|
232
|
-
]
|
|
233
|
-
}
|
|
234
|
-
```
|
|
235
|
-
|
|
236
|
-
---
|
|
237
|
-
|
|
238
|
-
**Related Resources:**
|
|
239
|
-
- [alerting-best-practices.md](alerting-best-practices.md)
|
|
240
|
-
- [slo-sli-sla.md](slo-sli-sla.md)
|
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
# On-Call Runbooks
|
|
2
|
-
|
|
3
|
-
Runbook structure, common scenarios, debugging procedures, escalation paths, and incident response playbooks.
|
|
4
|
-
|
|
5
|
-
## Runbook Template
|
|
6
|
-
|
|
7
|
-
```markdown
|
|
8
|
-
# Runbook: [Service Name] - [Issue Type]
|
|
9
|
-
|
|
10
|
-
## Overview
|
|
11
|
-
**Service:** api-service
|
|
12
|
-
**On-Call:** platform-team
|
|
13
|
-
**Severity:** SEV2
|
|
14
|
-
**Last Updated:** 2024-01-15
|
|
15
|
-
|
|
16
|
-
## Symptoms
|
|
17
|
-
- High API error rate (> 1%)
|
|
18
|
-
- Increased p95 latency (> 1s)
|
|
19
|
-
- Customer reports of failures
|
|
20
|
-
|
|
21
|
-
## Impact
|
|
22
|
-
- API requests failing
|
|
23
|
-
- Users unable to complete actions
|
|
24
|
-
- Revenue impact: $X/hour
|
|
25
|
-
|
|
26
|
-
## Diagnosis
|
|
27
|
-
|
|
28
|
-
### 1. Check Service Health
|
|
29
|
-
\`\`\`bash
|
|
30
|
-
# Check pod status
|
|
31
|
-
kubectl get pods -n production -l app=api-service
|
|
32
|
-
|
|
33
|
-
# Check recent logs
|
|
34
|
-
kubectl logs -n production -l app=api-service --tail=100
|
|
35
|
-
|
|
36
|
-
# Check metrics
|
|
37
|
-
curl https://grafana.example.com/d/api-health
|
|
38
|
-
\`\`\`
|
|
39
|
-
|
|
40
|
-
### 2. Check Dependencies
|
|
41
|
-
\`\`\`bash
|
|
42
|
-
# Database connectivity
|
|
43
|
-
psql -h db.example.com -U api -c "SELECT 1"
|
|
44
|
-
|
|
45
|
-
# Redis connectivity
|
|
46
|
-
redis-cli -h cache.example.com ping
|
|
47
|
-
|
|
48
|
-
# External API
|
|
49
|
-
curl https://external-api.example.com/health
|
|
50
|
-
\`\`\`
|
|
51
|
-
|
|
52
|
-
### 3. Check Recent Changes
|
|
53
|
-
\`\`\`bash
|
|
54
|
-
# Recent deployments
|
|
55
|
-
kubectl rollout history deployment/api-service -n production
|
|
56
|
-
|
|
57
|
-
# Recent config changes
|
|
58
|
-
git log --since="2 hours ago" -- config/
|
|
59
|
-
\`\`\`
|
|
60
|
-
|
|
61
|
-
## Common Causes
|
|
62
|
-
|
|
63
|
-
### Database Connection Pool Exhausted
|
|
64
|
-
**Symptoms:** Connection timeout errors
|
|
65
|
-
**Fix:**
|
|
66
|
-
\`\`\`bash
|
|
67
|
-
# Scale connection pool
|
|
68
|
-
kubectl set env deployment/api-service DB_POOL_SIZE=100
|
|
69
|
-
|
|
70
|
-
# Or restart pods to reset connections
|
|
71
|
-
kubectl rollout restart deployment/api-service
|
|
72
|
-
\`\`\`
|
|
73
|
-
|
|
74
|
-
### High Traffic Spike
|
|
75
|
-
**Symptoms:** All resources at capacity
|
|
76
|
-
**Fix:**
|
|
77
|
-
\`\`\`bash
|
|
78
|
-
# Scale up replicas
|
|
79
|
-
kubectl scale deployment/api-service --replicas=20
|
|
80
|
-
|
|
81
|
-
# Enable rate limiting
|
|
82
|
-
kubectl apply -f rate-limit-config.yaml
|
|
83
|
-
\`\`\`
|
|
84
|
-
|
|
85
|
-
### Downstream Service Failure
|
|
86
|
-
**Symptoms:** Timeouts to specific service
|
|
87
|
-
**Fix:**
|
|
88
|
-
\`\`\`bash
|
|
89
|
-
# Enable circuit breaker
|
|
90
|
-
kubectl apply -f circuit-breaker.yaml
|
|
91
|
-
|
|
92
|
-
# Or disable affected feature
|
|
93
|
-
kubectl set env deployment/api-service FEATURE_X_ENABLED=false
|
|
94
|
-
\`\`\`
|
|
95
|
-
|
|
96
|
-
## Mitigation Steps
|
|
97
|
-
|
|
98
|
-
1. **Immediate (< 5 minutes)**
|
|
99
|
-
- Roll back recent deployment if applicable
|
|
100
|
-
- Scale resources if needed
|
|
101
|
-
- Enable circuit breakers
|
|
102
|
-
|
|
103
|
-
2. **Short-term (< 30 minutes)**
|
|
104
|
-
- Identify root cause
|
|
105
|
-
- Apply targeted fix
|
|
106
|
-
- Monitor recovery
|
|
107
|
-
|
|
108
|
-
3. **Long-term (follow-up)**
|
|
109
|
-
- Schedule postmortem
|
|
110
|
-
- Implement preventive measures
|
|
111
|
-
- Update runbook
|
|
112
|
-
|
|
113
|
-
## Escalation
|
|
114
|
-
|
|
115
|
-
- **L1 (Primary):** platform-team on-call
|
|
116
|
-
- **L2 (Secondary):** platform-team-lead
|
|
117
|
-
- **L3 (Manager):** Engineering Manager
|
|
118
|
-
- **External:** DBA team (for database issues)
|
|
119
|
-
|
|
120
|
-
## Related Runbooks
|
|
121
|
-
- [Database Connection Issues](runbook-db-connections.md)
|
|
122
|
-
- [High Traffic Handling](runbook-traffic-spike.md)
|
|
123
|
-
- [Deployment Rollback](runbook-rollback.md)
|
|
124
|
-
|
|
125
|
-
## Validation
|
|
126
|
-
After mitigation:
|
|
127
|
-
- [ ] Error rate < 0.1%
|
|
128
|
-
- [ ] Latency back to normal (< 200ms p95)
|
|
129
|
-
- [ ] All pods healthy
|
|
130
|
-
- [ ] No active alerts
|
|
131
|
-
- [ ] Customer impact resolved
|
|
132
|
-
\`\`\`
|
|
133
|
-
|
|
134
|
-
## Common Runbooks
|
|
135
|
-
|
|
136
|
-
**High Memory Usage:**
|
|
137
|
-
```markdown
|
|
138
|
-
# Diagnosis
|
|
139
|
-
kubectl top pods -n production
|
|
140
|
-
kubectl describe pod <pod-name>
|
|
141
|
-
|
|
142
|
-
# Check for memory leaks
|
|
143
|
-
curl http://pod-ip:9090/debug/pprof/heap
|
|
144
|
-
|
|
145
|
-
# Mitigation
|
|
146
|
-
kubectl set resources deployment/api-service --limits=memory=2Gi
|
|
147
|
-
kubectl rollout restart deployment/api-service
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
**Disk Space Full:**
|
|
151
|
-
```markdown
|
|
152
|
-
# Check disk usage
|
|
153
|
-
df -h
|
|
154
|
-
|
|
155
|
-
# Find large files
|
|
156
|
-
du -sh /* | sort -hr | head -10
|
|
157
|
-
|
|
158
|
-
# Clean up
|
|
159
|
-
docker system prune -a --volumes -f
|
|
160
|
-
kubectl delete pods --field-selector status.phase=Failed
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
---
|
|
164
|
-
|
|
165
|
-
**Related Resources:**
|
|
166
|
-
- [incident-management.md](incident-management.md)
|
|
167
|
-
- [observability-stack.md](observability-stack.md)
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
# Performance Optimization
|
|
2
|
-
|
|
3
|
-
Profiling techniques, bottleneck identification, database optimization, caching strategies, and performance tuning.
|
|
4
|
-
|
|
5
|
-
## Profiling
|
|
6
|
-
|
|
7
|
-
**Application Profiling:**
|
|
8
|
-
```bash
|
|
9
|
-
# Node.js profiling
|
|
10
|
-
node --prof app.js
|
|
11
|
-
node --prof-process isolate-*-v8.log > processed.txt
|
|
12
|
-
|
|
13
|
-
# Python profiling
|
|
14
|
-
python -m cProfile -o profile.stats app.py
|
|
15
|
-
python -m pstats profile.stats
|
|
16
|
-
|
|
17
|
-
# Go profiling
|
|
18
|
-
go tool pprof http://localhost:6060/debug/pprof/profile
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
**Database Query Profiling:**
|
|
22
|
-
```sql
|
|
23
|
-
-- PostgreSQL
|
|
24
|
-
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user@example.com';
|
|
25
|
-
|
|
26
|
-
-- Add index
|
|
27
|
-
CREATE INDEX idx_users_email ON users(email);
|
|
28
|
-
|
|
29
|
-
-- MySQL
|
|
30
|
-
EXPLAIN SELECT * FROM users WHERE email = 'user@example.com';
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
## Caching Strategies
|
|
34
|
-
|
|
35
|
-
**Multi-Layer Caching:**
|
|
36
|
-
```
|
|
37
|
-
Application Cache (in-memory)
|
|
38
|
-
↓ miss
|
|
39
|
-
CDN Cache (edge)
|
|
40
|
-
↓ miss
|
|
41
|
-
Redis Cache (distributed)
|
|
42
|
-
↓ miss
|
|
43
|
-
Database
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
**Redis Caching:**
|
|
47
|
-
```typescript
|
|
48
|
-
import Redis from 'ioredis';
|
|
49
|
-
const redis = new Redis();
|
|
50
|
-
|
|
51
|
-
async function getCachedData(key: string) {
|
|
52
|
-
// Try cache first
|
|
53
|
-
const cached = await redis.get(key);
|
|
54
|
-
if (cached) {
|
|
55
|
-
return JSON.parse(cached);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
// Fetch from database
|
|
59
|
-
const data = await database.query(key);
|
|
60
|
-
|
|
61
|
-
// Cache for 1 hour
|
|
62
|
-
await redis.setex(key, 3600, JSON.stringify(data));
|
|
63
|
-
|
|
64
|
-
return data;
|
|
65
|
-
}
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
## Database Optimization
|
|
69
|
-
|
|
70
|
-
**Indexing Strategy:**
|
|
71
|
-
```sql
|
|
72
|
-
-- Add index for frequent queries
|
|
73
|
-
CREATE INDEX idx_orders_user_created ON orders(user_id, created_at);
|
|
74
|
-
|
|
75
|
-
-- Partial index for active records
|
|
76
|
-
CREATE INDEX idx_active_users ON users(id) WHERE status = 'active';
|
|
77
|
-
|
|
78
|
-
-- Covering index
|
|
79
|
-
CREATE INDEX idx_users_lookup ON users(email) INCLUDE (name, created_at);
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
**Connection Pooling:**
|
|
83
|
-
```typescript
|
|
84
|
-
import { Pool } from 'pg';
|
|
85
|
-
|
|
86
|
-
const pool = new Pool({
|
|
87
|
-
max: 20, // Maximum connections
|
|
88
|
-
idleTimeoutMillis: 30000,
|
|
89
|
-
connectionTimeoutMillis: 2000,
|
|
90
|
-
});
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
## Performance Best Practices
|
|
94
|
-
|
|
95
|
-
1. **Use CDN for static assets**
|
|
96
|
-
2. **Implement proper caching**
|
|
97
|
-
3. **Optimize database queries**
|
|
98
|
-
4. **Use connection pooling**
|
|
99
|
-
5. **Enable compression**
|
|
100
|
-
6. **Lazy load resources**
|
|
101
|
-
7. **Minimize payload size**
|
|
102
|
-
8. **Use async/parallel processing**
|
|
103
|
-
|
|
104
|
-
---
|
|
105
|
-
|
|
106
|
-
**Related Resources:**
|
|
107
|
-
- [capacity-planning.md](capacity-planning.md)
|
|
108
|
-
- [observability-stack.md](observability-stack.md)
|
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
# Reliability Patterns
|
|
2
|
-
|
|
3
|
-
Circuit breakers, retries, timeouts, bulkheads, rate limiting, graceful degradation, and resilience design patterns.
|
|
4
|
-
|
|
5
|
-
## Circuit Breaker Pattern
|
|
6
|
-
|
|
7
|
-
**Concept:**
|
|
8
|
-
```
|
|
9
|
-
Closed → Normal operation
|
|
10
|
-
↓ (failures exceed threshold)
|
|
11
|
-
Open → Fail fast, don't call service
|
|
12
|
-
↓ (after timeout)
|
|
13
|
-
Half-Open → Test if service recovered
|
|
14
|
-
↓ (success)
|
|
15
|
-
Closed → Resume normal operation
|
|
16
|
-
```
|
|
17
|
-
|
|
18
|
-
**Implementation (Resilience4j):**
|
|
19
|
-
```java
|
|
20
|
-
CircuitBreakerConfig config = CircuitBreakerConfig.custom()
|
|
21
|
-
.failureRateThreshold(50)
|
|
22
|
-
.waitDurationInOpenState(Duration.ofMillis(30000))
|
|
23
|
-
.slidingWindowSize(10)
|
|
24
|
-
.build();
|
|
25
|
-
|
|
26
|
-
CircuitBreaker circuitBreaker = CircuitBreaker.of("api", config);
|
|
27
|
-
|
|
28
|
-
Supplier<String> decoratedSupplier = CircuitBreaker
|
|
29
|
-
.decorateSupplier(circuitBreaker, () -> callExternalService());
|
|
30
|
-
|
|
31
|
-
String result = Try.ofSupplier(decoratedSupplier)
|
|
32
|
-
.recover(throwable -> "fallback value")
|
|
33
|
-
.get();
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
## Retry Pattern
|
|
37
|
-
|
|
38
|
-
**Exponential Backoff:**
|
|
39
|
-
```typescript
|
|
40
|
-
async function retryWithBackoff<T>(
|
|
41
|
-
fn: () => Promise<T>,
|
|
42
|
-
maxRetries = 3
|
|
43
|
-
): Promise<T> {
|
|
44
|
-
for (let i = 0; i < maxRetries; i++) {
|
|
45
|
-
try {
|
|
46
|
-
return await fn();
|
|
47
|
-
} catch (error) {
|
|
48
|
-
if (i === maxRetries - 1) throw error;
|
|
49
|
-
|
|
50
|
-
const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
|
|
51
|
-
await new Promise(resolve => setTimeout(resolve, delay));
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
throw new Error('Max retries exceeded');
|
|
55
|
-
}
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
## Timeout Pattern
|
|
59
|
-
|
|
60
|
-
**Service Timeouts:**
|
|
61
|
-
```yaml
|
|
62
|
-
timeouts:
|
|
63
|
-
connection_timeout: 5s
|
|
64
|
-
read_timeout: 30s
|
|
65
|
-
write_timeout: 30s
|
|
66
|
-
idle_timeout: 120s
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
**Implementation:**
|
|
70
|
-
```typescript
|
|
71
|
-
async function fetchWithTimeout(url: string, timeout = 5000) {
|
|
72
|
-
const controller = new AbortController();
|
|
73
|
-
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
74
|
-
|
|
75
|
-
try {
|
|
76
|
-
const response = await fetch(url, { signal: controller.signal });
|
|
77
|
-
return response;
|
|
78
|
-
} finally {
|
|
79
|
-
clearTimeout(timeoutId);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
## Bulkhead Pattern
|
|
85
|
-
|
|
86
|
-
**Resource Isolation:**
|
|
87
|
-
```
|
|
88
|
-
Service A → Connection Pool A (20 connections)
|
|
89
|
-
Service B → Connection Pool B (20 connections)
|
|
90
|
-
Service C → Connection Pool C (10 connections)
|
|
91
|
-
|
|
92
|
-
Failure in Service A doesn't affect B or C
|
|
93
|
-
```
|
|
94
|
-
|
|
95
|
-
**Thread Pool Isolation:**
|
|
96
|
-
```java
|
|
97
|
-
ThreadPoolExecutor serviceAPool = new ThreadPoolExecutor(
|
|
98
|
-
10, // core size
|
|
99
|
-
20, // max size
|
|
100
|
-
60L, TimeUnit.SECONDS,
|
|
101
|
-
new LinkedBlockingQueue<>(100)
|
|
102
|
-
);
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
## Rate Limiting
|
|
106
|
-
|
|
107
|
-
**Token Bucket:**
|
|
108
|
-
```typescript
|
|
109
|
-
class RateLimiter {
|
|
110
|
-
private tokens: number;
|
|
111
|
-
private capacity: number;
|
|
112
|
-
private refillRate: number;
|
|
113
|
-
|
|
114
|
-
async acquire(): Promise<boolean> {
|
|
115
|
-
if (this.tokens >= 1) {
|
|
116
|
-
this.tokens--;
|
|
117
|
-
return true;
|
|
118
|
-
}
|
|
119
|
-
return false;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
refill() {
|
|
123
|
-
this.tokens = Math.min(
|
|
124
|
-
this.capacity,
|
|
125
|
-
this.tokens + this.refillRate
|
|
126
|
-
);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
**Nginx Rate Limiting:**
|
|
132
|
-
```nginx
|
|
133
|
-
limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
|
|
134
|
-
|
|
135
|
-
server {
|
|
136
|
-
location /api/ {
|
|
137
|
-
limit_req zone=api burst=20 nodelay;
|
|
138
|
-
proxy_pass http://backend;
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
## Graceful Degradation
|
|
144
|
-
|
|
145
|
-
**Feature Flags:**
|
|
146
|
-
```typescript
|
|
147
|
-
async function getRecommendations(userId: string) {
|
|
148
|
-
if (!featureFlags.isEnabled('ml_recommendations')) {
|
|
149
|
-
// Fallback to simple algorithm
|
|
150
|
-
return getPopularItems();
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
try {
|
|
154
|
-
return await mlService.getRecommendations(userId);
|
|
155
|
-
} catch (error) {
|
|
156
|
-
// Degrade gracefully
|
|
157
|
-
return getPopularItems();
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
**Cache Fallback:**
|
|
163
|
-
```typescript
|
|
164
|
-
async function getData(key: string) {
|
|
165
|
-
try {
|
|
166
|
-
return await database.get(key);
|
|
167
|
-
} catch (error) {
|
|
168
|
-
// Serve stale data from cache
|
|
169
|
-
const stale = await cache.get(key);
|
|
170
|
-
if (stale) {
|
|
171
|
-
logger.warn('Serving stale data due to DB error');
|
|
172
|
-
return stale;
|
|
173
|
-
}
|
|
174
|
-
throw error;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
```
|
|
178
|
-
|
|
179
|
-
---
|
|
180
|
-
|
|
181
|
-
**Related Resources:**
|
|
182
|
-
- [chaos-engineering.md](chaos-engineering.md)
|
|
183
|
-
- [incident-management.md](incident-management.md)
|