claude-flow-novice 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/CLAUDE.md +669 -51
- package/.claude/agents/agent-principles/CODER_AGENT_GUIDELINES.md +1245 -0
- package/.claude/agents/agent-principles/agent-type-guidelines.md +137 -0
- package/.claude/agents/agent-principles/format-selection.md +20 -0
- package/.claude/agents/agent-principles/prompt-engineering.md +165 -35
- package/.claude/agents/agent-principles/quality-metrics.md +83 -2
- package/.claude/agents/analysis/code-analyzer.md +722 -0
- package/.claude/agents/analysis/code-review/analyze-code-quality.md +33 -3
- package/.claude/agents/analysis/perf-analyzer.md +812 -0
- package/.claude/agents/architecture/system-architect.md +25 -11
- package/.claude/agents/cfn-loop/product-owner.md +458 -2
- package/.claude/agents/code-booster.md +13 -2
- package/.claude/agents/consensus/byzantine-coordinator.md +259 -6
- package/.claude/agents/consensus/consensus-builder.md +135 -2
- package/.claude/agents/consensus/crdt-synchronizer.md +307 -771
- package/.claude/agents/consensus/gossip-coordinator.md +227 -2
- package/.claude/agents/consensus/performance-benchmarker.md +385 -704
- package/.claude/agents/consensus/quorum-manager.md +241 -749
- package/.claude/agents/consensus/raft-manager.md +195 -2
- package/.claude/agents/consensus/security-manager.md +461 -518
- package/.claude/agents/core-agents/analyst.md +560 -0
- package/.claude/agents/core-agents/architect.md +578 -0
- package/.claude/agents/core-agents/base-template-generator.md +137 -0
- package/.claude/agents/core-agents/coder.md +409 -0
- package/.claude/agents/core-agents/coordinator.md +1429 -0
- package/.claude/agents/core-agents/planner.md +343 -0
- package/.claude/agents/core-agents/researcher.md +414 -0
- package/.claude/agents/core-agents/reviewer.md +652 -0
- package/.claude/agents/core-agents/task-coordinator.md +400 -0
- package/.claude/agents/core-agents/tester.md +912 -0
- package/.claude/agents/development/backend/dev-backend-api.md +418 -23
- package/.claude/agents/devops/devops-engineer.md +240 -433
- package/.claude/agents/documentation/api-docs/docs-api-openapi.md +350 -11
- package/.claude/agents/examples/blocking-coordinator-example.md +388 -0
- package/.claude/agents/frontend/interaction-tester.md +334 -17
- package/.claude/agents/frontend/react-frontend-engineer.md +255 -2
- package/.claude/agents/frontend/state-architect.md +235 -9
- package/.claude/agents/frontend/ui-designer.md +261 -132
- package/.claude/agents/goal/goal-planner.md +803 -52
- package/.claude/agents/planning-team/api-designer-persona.md +736 -0
- package/.claude/agents/planning-team/security-architect-persona.md +643 -0
- package/.claude/agents/planning-team/system-architect-persona.md +585 -0
- package/.claude/agents/product-owner-team/accessibility-advocate-persona.md +796 -0
- package/.claude/agents/product-owner-team/cto-agent.md +473 -0
- package/.claude/agents/product-owner-team/power-user-persona.md +590 -0
- package/.claude/agents/product-owner-team/product-owner-agent.md +806 -0
- package/.claude/agents/security/security-specialist.md +515 -13
- package/.claude/agents/sparc/architecture.md +237 -1
- package/.claude/agents/sparc/pseudocode.md +237 -1
- package/.claude/agents/sparc/refinement.md +244 -1
- package/.claude/agents/sparc/specification.md +282 -21
- package/.claude/agents/specialized/code-booster.md +826 -0
- package/.claude/agents/specialized/mobile/mobile-dev.md +560 -0
- package/.claude/agents/specialized/mobile/spec-mobile-react-native.md +33 -1
- package/.claude/agents/swarm/adaptive-coordinator-enhanced.md +485 -746
- package/.claude/agents/swarm/adaptive-coordinator.md +269 -37
- package/.claude/agents/swarm/blocking-coordinator-example.md +456 -0
- package/.claude/agents/swarm/hierarchical-coordinator.md +324 -60
- package/.claude/agents/swarm/mesh-coordinator.md +774 -324
- package/.claude/agents/swarm/test-coordinator.md +123 -74
- package/.claude/agents/testing/e2e/playwright-agent.md +32 -0
- package/.claude/agents/testing/interaction-tester.md +525 -0
- package/.claude/agents/testing/playwright-tester.md +405 -0
- package/.claude/agents/testing/production-validator.md +644 -0
- package/.claude/agents/testing/tdd-london-swarm.md +659 -0
- package/.claude/agents/testing/unit/tdd-london-swarm.md +27 -0
- package/.claude/agents/testing/validation/production-validator.md +390 -1
- package/.claude/agents-ignore/mesh-coordinator-backup.md +435 -0
- package/.claude/commands/cfn-loop-document.md +441 -0
- package/.claude/commands/github-commit.md +289 -0
- package/.claude-flow-novice/.claude/agents/CLAUDE.md +669 -51
- package/.claude-flow-novice/.claude/agents/agent-principles/agent-type-guidelines.md +137 -0
- package/.claude-flow-novice/.claude/agents/agent-principles/format-selection.md +20 -0
- package/.claude-flow-novice/.claude/agents/agent-principles/prompt-engineering.md +165 -35
- package/.claude-flow-novice/.claude/agents/agent-principles/quality-metrics.md +83 -2
- package/.claude-flow-novice/.claude/agents/analysis/code-analyzer.md +722 -192
- package/.claude-flow-novice/.claude/agents/analysis/code-review/analyze-code-quality.md +33 -3
- package/.claude-flow-novice/.claude/agents/analysis/perf-analyzer.md +812 -0
- package/.claude-flow-novice/.claude/agents/architecture/system-architect.md +25 -11
- package/.claude-flow-novice/.claude/agents/cfn-loop/product-owner.md +458 -2
- package/.claude-flow-novice/.claude/agents/code-booster.md +13 -2
- package/.claude-flow-novice/.claude/agents/consensus/byzantine-coordinator.md +259 -6
- package/.claude-flow-novice/.claude/agents/consensus/consensus-builder.md +135 -2
- package/.claude-flow-novice/.claude/agents/consensus/crdt-synchronizer.md +307 -771
- package/.claude-flow-novice/.claude/agents/consensus/gossip-coordinator.md +227 -2
- package/.claude-flow-novice/.claude/agents/consensus/performance-benchmarker.md +385 -704
- package/.claude-flow-novice/.claude/agents/consensus/quorum-manager.md +241 -749
- package/.claude-flow-novice/.claude/agents/consensus/raft-manager.md +195 -2
- package/.claude-flow-novice/.claude/agents/consensus/security-manager.md +461 -518
- package/.claude-flow-novice/.claude/agents/core-agents/analyst.md +560 -0
- package/.claude-flow-novice/.claude/agents/core-agents/architect.md +578 -0
- package/.claude-flow-novice/.claude/agents/core-agents/base-template-generator.md +137 -0
- package/.claude-flow-novice/.claude/agents/core-agents/coder.md +409 -0
- package/.claude-flow-novice/.claude/agents/core-agents/coordinator.md +1429 -0
- package/.claude-flow-novice/.claude/agents/core-agents/planner.md +343 -0
- package/.claude-flow-novice/.claude/agents/core-agents/researcher.md +414 -0
- package/.claude-flow-novice/.claude/agents/core-agents/reviewer.md +652 -0
- package/.claude-flow-novice/.claude/agents/core-agents/task-coordinator.md +400 -0
- package/.claude-flow-novice/.claude/agents/core-agents/tester.md +912 -0
- package/.claude-flow-novice/.claude/agents/development/backend/dev-backend-api.md +418 -23
- package/.claude-flow-novice/.claude/agents/devops/devops-engineer.md +240 -433
- package/.claude-flow-novice/.claude/agents/documentation/api-docs/docs-api-openapi.md +350 -11
- package/.claude-flow-novice/.claude/agents/examples/blocking-coordinator-example.md +388 -0
- package/.claude-flow-novice/.claude/agents/frontend/interaction-tester.md +334 -17
- package/.claude-flow-novice/.claude/agents/frontend/react-frontend-engineer.md +255 -2
- package/.claude-flow-novice/.claude/agents/frontend/state-architect.md +235 -9
- package/.claude-flow-novice/.claude/agents/frontend/ui-designer.md +261 -132
- package/.claude-flow-novice/.claude/agents/goal/goal-planner.md +803 -52
- package/.claude-flow-novice/.claude/agents/planning-team/api-designer-persona.md +736 -0
- package/.claude-flow-novice/.claude/agents/planning-team/security-architect-persona.md +643 -0
- package/.claude-flow-novice/.claude/agents/planning-team/system-architect-persona.md +585 -0
- package/.claude-flow-novice/.claude/agents/predesign-negotiation/accessibility-advocate-persona.md +796 -0
- package/.claude-flow-novice/.claude/agents/predesign-negotiation/cto-agent.md +473 -0
- package/.claude-flow-novice/.claude/agents/predesign-negotiation/power-user-persona.md +590 -0
- package/.claude-flow-novice/.claude/agents/predesign-negotiation/product-owner-agent.md +806 -0
- package/.claude-flow-novice/.claude/agents/product-owner-team/accessibility-advocate-persona.md +796 -0
- package/.claude-flow-novice/.claude/agents/product-owner-team/cto-agent.md +473 -0
- package/.claude-flow-novice/.claude/agents/product-owner-team/power-user-persona.md +590 -0
- package/.claude-flow-novice/.claude/agents/product-owner-team/product-owner-agent.md +806 -0
- package/.claude-flow-novice/.claude/agents/security/security-specialist.md +515 -13
- package/.claude-flow-novice/.claude/agents/sparc/architecture.md +237 -1
- package/.claude-flow-novice/.claude/agents/sparc/pseudocode.md +237 -1
- package/.claude-flow-novice/.claude/agents/sparc/refinement.md +244 -1
- package/.claude-flow-novice/.claude/agents/sparc/specification.md +282 -21
- package/.claude-flow-novice/.claude/agents/specialized/code-booster.md +826 -0
- package/.claude-flow-novice/.claude/agents/specialized/mobile/mobile-dev.md +560 -0
- package/.claude-flow-novice/.claude/agents/specialized/mobile/spec-mobile-react-native.md +33 -1
- package/.claude-flow-novice/.claude/agents/swarm/adaptive-coordinator-enhanced.md +485 -746
- package/.claude-flow-novice/.claude/agents/swarm/adaptive-coordinator.md +269 -37
- package/.claude-flow-novice/.claude/agents/swarm/blocking-coordinator-example.md +456 -0
- package/.claude-flow-novice/.claude/agents/swarm/hierarchical-coordinator.md +324 -60
- package/.claude-flow-novice/.claude/agents/swarm/mesh-coordinator.md +774 -324
- package/.claude-flow-novice/.claude/agents/swarm/test-coordinator.md +123 -74
- package/.claude-flow-novice/.claude/agents/testing/e2e/playwright-agent.md +32 -0
- package/.claude-flow-novice/.claude/agents/testing/interaction-tester.md +525 -0
- package/.claude-flow-novice/.claude/agents/testing/playwright-tester.md +405 -0
- package/.claude-flow-novice/.claude/agents/testing/production-validator.md +644 -0
- package/.claude-flow-novice/.claude/agents/testing/tdd-london-swarm.md +659 -0
- package/.claude-flow-novice/.claude/agents/testing/unit/tdd-london-swarm.md +27 -0
- package/.claude-flow-novice/.claude/agents/testing/validation/production-validator.md +390 -1
- package/.claude-flow-novice/config/typescript/tsconfig.tsbuildinfo +1 -1
- package/.claude-flow-novice/dist/__tests__/redis/RedisHealthMonitor.test.d.ts +14 -0
- package/.claude-flow-novice/dist/agents/heartbeat-manager.d.ts +73 -0
- package/.claude-flow-novice/dist/agents/lifecycle-cleanup-enhanced.d.ts +190 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/agent-lifecycle-sqlite.test.d.ts +17 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/blocking-coordination-audit.test.d.ts +16 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/blocking-coordination-signals.test.d.ts +14 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/byzantine-consensus-adapter.test.d.ts +14 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/byzantine-performance.test.d.ts +17 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/cfn-loop-byzantine-integration.test.d.ts +15 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/cfn-loop-e2e.test.d.ts +15 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/cfn-loop-memory-manager.test.d.ts +9 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/cleanup-integration.test.d.ts +21 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/cleanup-performance-validation.test.d.ts +13 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/coordinator-timeout-handler.test.d.ts +14 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/dead-coordinator-detection.test.d.ts +15 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/doc-code-examples-validator.d.ts +35 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/doc-executable-examples.test.d.ts +10 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/extended-timeout-testing.test.d.ts +24 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/heartbeat-warning-system.test.d.ts +21 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/redis-health-monitor.test.d.ts +22 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/signal-ack-protocol.test.d.ts +21 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/sqlite-memory-manager.test.d.ts +19 -0
- package/.claude-flow-novice/dist/cfn-loop/__tests__/test-utilities.d.ts +133 -0
- package/.claude-flow-novice/dist/cfn-loop/agent-lifecycle-sqlite.d.ts +143 -0
- package/.claude-flow-novice/dist/cfn-loop/blocking-coordination-signals.d.ts +178 -0
- package/.claude-flow-novice/dist/cfn-loop/blocking-coordination.d.ts +268 -0
- package/.claude-flow-novice/dist/cfn-loop/byzantine-consensus-adapter.d.ts +193 -0
- package/.claude-flow-novice/dist/cfn-loop/cfn-loop-memory-manager.d.ts +221 -0
- package/.claude-flow-novice/dist/cfn-loop/cfn-loop-orchestrator.d.ts +193 -1
- package/.claude-flow-novice/dist/cfn-loop/checkpoint-serializer.d.ts +113 -0
- package/.claude-flow-novice/dist/cfn-loop/circuit-breaker.d.ts +8 -2
- package/.claude-flow-novice/dist/cfn-loop/conflict-resolver.d.ts +221 -0
- package/.claude-flow-novice/dist/cfn-loop/consensus/enterprise-planning-consensus.d.ts +61 -0
- package/.claude-flow-novice/dist/cfn-loop/consensus/mvp-consensus.d.ts +33 -0
- package/.claude-flow-novice/dist/cfn-loop/coordination-validator.d.ts +121 -0
- package/.claude-flow-novice/dist/cfn-loop/coordinator-timeout-handler.d.ts +195 -0
- package/.claude-flow-novice/dist/cfn-loop/crash-detector.d.ts +138 -0
- package/.claude-flow-novice/dist/cfn-loop/epic-report-generator.d.ts +136 -0
- package/.claude-flow-novice/dist/cfn-loop/git-checkpoint-integration.example.d.ts +13 -0
- package/.claude-flow-novice/dist/cfn-loop/git-checkpoint-manager.d.ts +165 -0
- package/.claude-flow-novice/dist/cfn-loop/heartbeat-integration-example.d.ts +16 -0
- package/.claude-flow-novice/dist/cfn-loop/heartbeat-warning-system.d.ts +202 -0
- package/.claude-flow-novice/dist/cfn-loop/meta-coordinator.d.ts +208 -0
- package/.claude-flow-novice/dist/cfn-loop/modes/__tests__/mode-selection.test.d.ts +9 -0
- package/.claude-flow-novice/dist/cfn-loop/modes/enterprise-mode.d.ts +37 -0
- package/.claude-flow-novice/dist/cfn-loop/modes/index.d.ts +111 -0
- package/.claude-flow-novice/dist/cfn-loop/modes/mvp-mode.d.ts +31 -0
- package/.claude-flow-novice/dist/cfn-loop/modes/standard-mode.d.ts +31 -0
- package/.claude-flow-novice/dist/cfn-loop/modes/types.d.ts +135 -0
- package/.claude-flow-novice/dist/cfn-loop/product-owner/enterprise-owner-team.d.ts +50 -0
- package/.claude-flow-novice/dist/cfn-loop/product-owner/mvp-owner.d.ts +31 -0
- package/.claude-flow-novice/dist/cfn-loop/recovery-engine.d.ts +183 -0
- package/.claude-flow-novice/dist/cfn-loop/redis-health-integration-example.d.ts +13 -0
- package/.claude-flow-novice/dist/cfn-loop/redis-health-monitor.d.ts +164 -0
- package/.claude-flow-novice/dist/cfn-loop/redis-pubsub-helpers.d.ts +230 -0
- package/.claude-flow-novice/dist/cfn-loop/sprint-coordinator-enhanced.d.ts +199 -0
- package/.claude-flow-novice/dist/cfn-loop/state-checkpoint-manager.d.ts +198 -0
- package/.claude-flow-novice/dist/cfn-loop/test-aggregator.d.ts +205 -0
- package/.claude-flow-novice/dist/cfn-loop/test-lock-coordinator.d.ts +176 -0
- package/.claude-flow-novice/dist/cfn-loop/test-product-owner-decision.d.ts +19 -0
- package/.claude-flow-novice/dist/cfn-loop/types.d.ts +174 -0
- package/.claude-flow-novice/dist/cfn-loop/validator-methods-replacement.d.ts +68 -0
- package/.claude-flow-novice/dist/cli/cleanup-orphans.d.ts +54 -0
- package/.claude-flow-novice/dist/cli/commands/agent-lifecycle.d.ts +226 -0
- package/.claude-flow-novice/dist/cli/commands/cfn-loop-parallel.d.ts +21 -0
- package/.claude-flow-novice/dist/cli/commands/recovery-resume.d.ts +33 -0
- package/.claude-flow-novice/dist/cli/commands/recovery-status.d.ts +57 -0
- package/.claude-flow-novice/dist/cli/commands/recovery.d.ts +88 -0
- package/.claude-flow-novice/dist/cli/commands/validate-coordination.d.ts +14 -0
- package/.claude-flow-novice/dist/cli/node-compat.d.ts +1 -1
- package/.claude-flow-novice/dist/cli/simple-commands/hive-mind/queen.d.ts +3 -3
- package/.claude-flow-novice/dist/cli/utils/interactive-detector.d.ts +1 -1
- package/.claude-flow-novice/dist/cli/utils/redis-client.d.ts +1 -5
- package/.claude-flow-novice/dist/consensus/byzantine-coordinator.d.ts +314 -0
- package/.claude-flow-novice/dist/constants/agent-types.d.ts +2 -2
- package/.claude-flow-novice/dist/coordination/hive-orchestrator.d.ts +1 -1
- package/.claude-flow-novice/dist/coordination/validation-schemas.d.ts +12 -12
- package/.claude-flow-novice/dist/hooks/index.d.ts +1 -1
- package/.claude-flow-novice/dist/hooks/useSwarmRealtimeData.d.ts +11 -11
- package/.claude-flow-novice/dist/memory/advanced-memory-manager.d.ts +1 -0
- package/.claude-flow-novice/dist/memory/backends/sqlite.d.ts +1 -0
- package/.claude-flow-novice/dist/memory/distributed-memory.d.ts +1 -0
- package/.claude-flow-novice/dist/memory/secret-detector.d.ts +131 -0
- package/.claude-flow-novice/dist/memory/sqlite-enhanced-backend.d.ts +1 -0
- package/.claude-flow-novice/dist/monitoring/memory-leak-dashboard-widget.d.ts +194 -0
- package/.claude-flow-novice/dist/providers/api-key-rotation-example.d.ts +54 -0
- package/.claude-flow-novice/dist/providers/api-key-rotator.d.ts +166 -0
- package/.claude-flow-novice/dist/providers/rate-limit-detector.d.ts +60 -0
- package/.claude-flow-novice/dist/redis/RedisHealthMonitor.d.ts +162 -0
- package/.claude-flow-novice/dist/redis/health-integration-example.d.ts +86 -0
- package/.claude-flow-novice/dist/services/swarm-memory-manager.d.ts +1 -0
- package/.claude-flow-novice/dist/src/agents/heartbeat-manager.js +144 -0
- package/.claude-flow-novice/dist/src/agents/lifecycle-cleanup-enhanced.js +514 -0
- package/.claude-flow-novice/dist/src/automation/test-pipeline/PipelineValidator.js +1 -1
- package/.claude-flow-novice/dist/src/automation/test-pipeline/SwarmTestCoordinator.js +1 -1
- package/.claude-flow-novice/dist/src/cfn-loop/agent-lifecycle-sqlite.js +385 -0
- package/.claude-flow-novice/dist/src/cfn-loop/blocking-coordination-signals.js +470 -0
- package/.claude-flow-novice/dist/src/cfn-loop/blocking-coordination.js +768 -0
- package/.claude-flow-novice/dist/src/cfn-loop/byzantine-consensus-adapter.js +548 -0
- package/.claude-flow-novice/dist/src/cfn-loop/cfn-loop-memory-manager.js +589 -0
- package/.claude-flow-novice/dist/src/cfn-loop/cfn-loop-orchestrator.js +1059 -21
- package/.claude-flow-novice/dist/src/cfn-loop/checkpoint-serializer.js +308 -0
- package/.claude-flow-novice/dist/src/cfn-loop/circuit-breaker.js +34 -9
- package/.claude-flow-novice/dist/src/cfn-loop/conflict-resolver.js +525 -0
- package/.claude-flow-novice/dist/src/cfn-loop/consensus/enterprise-planning-consensus.js +403 -0
- package/.claude-flow-novice/dist/src/cfn-loop/consensus/mvp-consensus.js +235 -0
- package/.claude-flow-novice/dist/src/cfn-loop/coordination-validator.js +304 -0
- package/.claude-flow-novice/dist/src/cfn-loop/coordinator-timeout-handler.js +600 -0
- package/.claude-flow-novice/dist/src/cfn-loop/crash-detector.js +362 -0
- package/.claude-flow-novice/dist/src/cfn-loop/epic-report-generator.js +283 -0
- package/.claude-flow-novice/dist/src/cfn-loop/git-checkpoint-integration.example.js +161 -0
- package/.claude-flow-novice/dist/src/cfn-loop/git-checkpoint-manager.js +486 -0
- package/.claude-flow-novice/dist/src/cfn-loop/heartbeat-integration-example.js +187 -0
- package/.claude-flow-novice/dist/src/cfn-loop/heartbeat-warning-system.js +492 -0
- package/.claude-flow-novice/dist/src/cfn-loop/meta-coordinator.js +538 -0
- package/.claude-flow-novice/dist/src/cfn-loop/modes/enterprise-mode.js +132 -0
- package/.claude-flow-novice/dist/src/cfn-loop/modes/index.js +191 -0
- package/.claude-flow-novice/dist/src/cfn-loop/modes/mvp-mode.js +79 -0
- package/.claude-flow-novice/dist/src/cfn-loop/modes/standard-mode.js +81 -0
- package/.claude-flow-novice/dist/src/cfn-loop/modes/types.js +41 -0
- package/.claude-flow-novice/dist/src/cfn-loop/product-owner/enterprise-owner-team.js +380 -0
- package/.claude-flow-novice/dist/src/cfn-loop/product-owner/mvp-owner.js +170 -0
- package/.claude-flow-novice/dist/src/cfn-loop/recovery-engine.js +546 -0
- package/.claude-flow-novice/dist/src/cfn-loop/redis-health-integration-example.js +215 -0
- package/.claude-flow-novice/dist/src/cfn-loop/redis-health-monitor.js +414 -0
- package/.claude-flow-novice/dist/src/cfn-loop/redis-pubsub-helpers.js +463 -0
- package/.claude-flow-novice/dist/src/cfn-loop/sprint-coordinator-enhanced.js +466 -0
- package/.claude-flow-novice/dist/src/cfn-loop/state-checkpoint-manager.js +402 -0
- package/.claude-flow-novice/dist/src/cfn-loop/test-aggregator.js +476 -0
- package/.claude-flow-novice/dist/src/cfn-loop/test-lock-coordinator.js +446 -0
- package/.claude-flow-novice/dist/src/cfn-loop/test-product-owner-decision.js +69 -0
- package/.claude-flow-novice/dist/src/cfn-loop/types.js +30 -0
- package/.claude-flow-novice/dist/src/cfn-loop/validator-methods-replacement.js +362 -0
- package/.claude-flow-novice/dist/src/cli/cleanup-orphans.js +246 -0
- package/.claude-flow-novice/dist/src/cli/commands/agent-lifecycle.js +1058 -0
- package/.claude-flow-novice/dist/src/cli/commands/cfn-loop-parallel.js +436 -0
- package/.claude-flow-novice/dist/src/cli/commands/index.js +86 -0
- package/.claude-flow-novice/dist/src/cli/commands/parse-epic.js +64 -2
- package/.claude-flow-novice/dist/src/cli/commands/recovery-resume.js +369 -0
- package/.claude-flow-novice/dist/src/cli/commands/recovery-status.js +265 -0
- package/.claude-flow-novice/dist/src/cli/commands/recovery.js +546 -0
- package/.claude-flow-novice/dist/src/cli/commands/validate-coordination.js +211 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/CLAUDE-backup-pre-enterprise-loop.md +735 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/CLAUDE.md +176 -326
- package/.claude-flow-novice/dist/src/coordination/shared/transparency/transparency-system.js +1 -1
- package/.claude-flow-novice/dist/src/memory/advanced-memory-manager.js +17 -2
- package/.claude-flow-novice/dist/src/memory/backends/sqlite.js +23 -1
- package/.claude-flow-novice/dist/src/memory/distributed-memory.js +18 -3
- package/.claude-flow-novice/dist/src/memory/secret-detector.js +253 -0
- package/.claude-flow-novice/dist/src/memory/sqlite-enhanced-backend.js +20 -1
- package/.claude-flow-novice/dist/src/monitoring/memory-leak-dashboard-widget.js +421 -0
- package/.claude-flow-novice/dist/src/observability/prometheus-metrics.d.js +8 -0
- package/.claude-flow-novice/dist/src/providers/api-key-rotation-example.js +165 -0
- package/.claude-flow-novice/dist/src/providers/api-key-rotator.js +412 -0
- package/.claude-flow-novice/dist/src/providers/rate-limit-detector.js +193 -0
- package/.claude-flow-novice/dist/src/redis/RedisHealthMonitor.js +429 -0
- package/.claude-flow-novice/dist/src/redis/health-integration-example.js +353 -0
- package/.claude-flow-novice/dist/src/services/swarm-memory-manager.js +72 -42
- package/.claude-flow-novice/dist/src/sqlite/ACLEnforcer.cjs +928 -0
- package/.claude-flow-novice/dist/src/sqlite/AgentRegistry.cjs +702 -0
- package/.claude-flow-novice/dist/src/sqlite/AgentRegistry.js +702 -0
- package/.claude-flow-novice/dist/src/sqlite/EncryptionKeyManager.cjs +754 -0
- package/.claude-flow-novice/dist/src/sqlite/EncryptionKeyManager.js +754 -0
- package/.claude-flow-novice/dist/src/sqlite/MemoryStoreAdapter.cjs +571 -0
- package/.claude-flow-novice/dist/src/sqlite/MemoryStoreAdapter.js +571 -0
- package/.claude-flow-novice/dist/src/sqlite/MultiLayerCache.cjs +640 -0
- package/.claude-flow-novice/dist/src/sqlite/MultiLayerCache.js +640 -0
- package/.claude-flow-novice/dist/src/sqlite/RedisCoordinator.cjs +636 -0
- package/.claude-flow-novice/dist/src/sqlite/RedisCoordinator.js +636 -0
- package/.claude-flow-novice/dist/src/sqlite/SwarmMemoryManager.cjs +750 -0
- package/.claude-flow-novice/dist/src/sqlite/SwarmMemoryManager.js +750 -0
- package/.claude-flow-novice/dist/src/sqlite/index.cjs +620 -0
- package/.claude-flow-novice/dist/src/sqlite/index.js +620 -0
- package/.claude-flow-novice/dist/src/sqlite/performance-benchmarks.cjs +839 -0
- package/.claude-flow-novice/dist/src/sqlite/performance-benchmarks.js +839 -0
- package/.claude-flow-novice/dist/src/testing/performance/PerformanceTestRunner.js +1 -1
- package/.claude-flow-novice/dist/src/wasm-regex-engine/pkg/wasm_regex_engine.d.js +11 -0
- package/.claude-flow-novice/dist/src/wasm-regex-engine/pkg/wasm_regex_engine_bg.wasm.d.js +28 -0
- package/.claude-flow-novice/dist/web/api/routes/parallel-status.d.ts +105 -0
- package/.claude-flow-novice/dist/web/dashboard/hooks/useWebSocket.d.ts +4 -4
- package/.claude-flow-novice/tsconfig.tsbuildinfo +1 -1
- package/AUTO_SETUP.md +271 -0
- package/CLAUDE.md +176 -326
- package/README.md +127 -30
- package/config/.env.example +17 -0
- package/config/cfn-loop/enterprise-criteria.json +207 -0
- package/config/cfn-loop/instructions/enterprise-instructions.md +506 -0
- package/config/cfn-loop/instructions/mvp-instructions.md +420 -0
- package/config/cfn-loop/instructions/standard-instructions.md +497 -0
- package/config/cfn-loop/mvp-criteria.json +133 -0
- package/config/docker/DEPLOYMENT_VALIDATION_RESULTS.md +1 -1
- package/config/docker/QUICK_START.txt +7 -5
- package/config/docker/STABILITY_TEST_README.md +10 -10
- package/config/hooks/AGENT_TEMPLATE_VALIDATOR_COMPLETION.md +440 -0
- package/config/hooks/BLOCKING_COORDINATION_VALIDATOR_IMPLEMENTATION_REPORT.md +559 -0
- package/config/hooks/BLOCKING_COORDINATION_VALIDATOR_README.md +467 -0
- package/config/hooks/CFN_LOOP_MEMORY_VALIDATOR_IMPLEMENTATION.md +343 -0
- package/config/hooks/COVERAGE_VALIDATOR_QUICK_START.md +218 -0
- package/config/hooks/POST_TEST_COVERAGE_README.md +657 -0
- package/config/hooks/README-AGENT-TEMPLATE-VALIDATOR.md +464 -0
- package/config/hooks/README-CFN-LOOP-MEMORY-VALIDATOR.md +442 -0
- package/config/hooks/TEST_COVERAGE_VALIDATOR_COMPLETION.md +497 -0
- package/config/hooks/WASM_REGEX_ENGINE.md +210 -0
- package/config/hooks/coverage.config.json +40 -0
- package/config/hooks/hook-manager.cjs +47 -0
- package/config/hooks/markdown-validator.js +202 -0
- package/config/hooks/post-edit-agent-template.js +607 -0
- package/config/hooks/post-edit-blocking-coordination.js +748 -0
- package/config/hooks/post-edit-cfn-loop-memory.cjs +503 -0
- package/config/hooks/post-edit-pipeline.js +290 -145
- package/config/hooks/post-test-coverage.js +981 -0
- package/config/hooks/pre-commit-db-scan +119 -0
- package/config/hooks/pre-edit-security.js +33 -6
- package/config/hooks/pre-tool-validation.js +60 -1
- package/config/hooks/safety-validator.js +236 -21
- package/config/hooks/safety-validator.js.backup +1323 -0
- package/config/hooks/validators/CWEValidator.js +152 -0
- package/config/hooks/validators/ComplianceValidator.js +187 -0
- package/config/hooks/validators/DependencyScanner.js +162 -0
- package/config/hooks/validators/InputSanitizer.js +134 -0
- package/config/hooks/validators/OWASPValidator.js +197 -0
- package/config/hooks/validators/SecurityPatternScanner.js +318 -0
- package/config/jest/jest.config.js +12 -1
- package/docs/PRE_COMMIT_HOOK.md +294 -0
- package/docs/README.md +130 -153
- package/docs/TEST_INFRASTRUCTURE.md +381 -0
- package/docs/agent-lifecycle-hooks.md +860 -0
- package/docs/api/FUNCTION_CATALOG.md +584 -0
- package/docs/api/ROUTING_QUICK_REFERENCE.md +117 -0
- package/docs/api/VALIDATION_QUICK_REFERENCE.md +172 -0
- package/docs/api/blocking-coordination-api.md +1451 -0
- package/docs/architecture/MULTI_SWARM_COORDINATION_README.md +620 -0
- package/docs/architecture/README_REALTIME_COMMUNICATION.md +463 -0
- package/docs/architecture/REALTIME_COMMUNICATION_ANALYSIS.md +321 -0
- package/docs/architecture/WASM_ARCHITECTURE_SUMMARY.md +429 -0
- package/docs/architecture/WASM_INTEGRATION_ARCHITECTURE.md +1330 -0
- package/docs/archive/2025-10-10-architecture/deprecated-implementations/BLOCKING_COORDINATION_VALIDATION_FINAL.md +334 -0
- package/docs/archive/2025-10-10-architecture/deprecated-implementations/blocking-coordination-pattern.md +484 -0
- package/docs/archive/2025-10-10-architecture/deprecated-implementations/production-blocking-coordination-plan.md +764 -0
- package/docs/archive/2025-10-10-architecture/deprecated-implementations/revised-production-blocking-plan.md +614 -0
- package/docs/archive/2025-10-10-architecture/implementation-guides/WASM_IMPLEMENTATION_GUIDE.md +1011 -0
- package/docs/archive/2025-10-10-architecture/implementation-guides/WASM_ROLLOUT_PLAN.md +701 -0
- package/docs/archive/2025-10-10-architecture/implementation-guides/agent-lifecycle-implementation-plan.md +1428 -0
- package/docs/archive/2025-10-10-architecture/other-designs/CORRECTED-task-tool-constraints.md +366 -0
- package/docs/archive/2025-10-10-architecture/other-designs/claude-code-task-tool-constraints.md +401 -0
- package/docs/archive/2025-10-10-architecture/other-designs/cleanup-architecture-explanation.md +423 -0
- package/docs/archive/2025-10-10-guides/setup-guides/CONTRIBUTING.md +136 -0
- package/docs/archive/2025-10-10-guides/setup-guides/DEVELOPMENT_SETUP.md +486 -0
- package/docs/archive/2025-10-10-guides/setup-guides/EXAMPLES.md +793 -0
- package/docs/archive/2025-10-10-guides/setup-guides/INSTALLATION.md +608 -0
- package/docs/archive/2025-10-10-guides/setup-guides/QUICK_START_INSTALLATION.md +521 -0
- package/docs/archive/2025-10-10-guides/setup-guides/README.md +162 -0
- package/docs/archive/2025-10-10-guides/setup-guides/TROUBLESHOOTING.md +1388 -0
- package/docs/archive/2025-10-10-operations/ARCHIVE_MIGRATION_PLAN.md +214 -0
- package/docs/archive/2025-10-10-performance/wasm-deliverables/WASM_DELIVERABLES.md +421 -0
- package/docs/archive/ARCHIVAL_EXECUTION_REPORT_2025-10-10.md +219 -0
- package/docs/archive/HTTP_POLLING_FALLBACK.md +283 -0
- package/docs/archive/reference-historical/BACKUP_MANIFEST.md +32 -0
- package/docs/archive/reference-historical/README-PHASE4.md +355 -0
- package/docs/archive/reference-historical/READMEv2.md +524 -0
- package/docs/deployment/blocking-coordination-secrets.md +1445 -0
- package/docs/implementation/SQLITE_INTEGRATION_IMPLEMENTATION.md +663 -0
- package/docs/integration/cfn-loop-examples.md +1107 -0
- package/docs/observability/prometheus-setup.md +455 -0
- package/docs/operations/OPERATIONS_FOLDER_REVIEW_REPORT.json +135 -0
- package/docs/operations/failure-recovery-playbook.md +877 -0
- package/docs/operations/monitoring-runbook.md +880 -0
- package/docs/patterns/blocking-coordination-pattern.md +642 -0
- package/docs/reference/CHANGELOG-POST-EDIT-PIPELINE.md +370 -0
- package/docs/reference/MANUAL_NPM_PUBLICATION_GUIDE.md +248 -0
- package/docs/security/SEC-002-race-condition-fix.md +300 -0
- package/docs/security/SEC-003-JSON-VALIDATION.md +215 -0
- package/docs/testing/chaos-engineering.md +524 -0
- package/docs/training/best-practices.md +1241 -0
- package/docs/training/faq.md +1483 -0
- package/docs/training/interactive-tutorial.md +966 -0
- package/docs/training/troubleshooting-guide.md +1279 -0
- package/docs/training/video-walkthrough-script.md +675 -0
- package/examples/demonstrations/phase5-demonstration.cjs +227 -0
- package/examples/rest-api-simple/sparc-implementation-roadmap.md +1 -1
- package/examples/rest-api-simple/sparc-implementation-roadmap.md.backup-1760135091708 +190 -0
- package/examples/templates/basic-swarm/CLAUDE.md +464 -0
- package/examples/templates/custom-agent/CLAUDE.md +299 -0
- package/examples/templates/custom-agent/package.json +26 -0
- package/examples/templates/event-bus/package.json +28 -0
- package/examples/templates/fleet-manager/CLAUDE.md +134 -0
- package/examples/templates/fleet-manager/package.json +28 -0
- package/package.json +60 -18
- package/readme/additional-commands.md +365 -2
- package/readme/cfn-loop-modes.md +527 -0
- package/readme/logs-cli-redis.md +82 -14
- package/readme/logs-documentation-index.md +8 -0
- package/readme/logs-features.md +188 -24
- package/readme/logs-slash-commands.md +35 -11
- package/scripts/CLEANUP_OPTIMIZATION_REPORT.json +312 -0
- package/scripts/CLEANUP_PERFORMANCE_OPTIMIZATION.md +387 -0
- package/scripts/CLEANUP_QUICK_START.md +268 -0
- package/scripts/CLEANUP_TEST_RESULTS.md +205 -0
- package/scripts/auto-setup.js +332 -0
- package/scripts/cleanup-blocking-coordination.sh +420 -0
- package/scripts/collect-build-metrics.js +65 -0
- package/scripts/demo/README.md +79 -0
- package/scripts/demo/autoscaling-demo-simplified.js +963 -0
- package/scripts/demo/comprehensive-dashboard-test.js +693 -0
- package/scripts/demo/confidence-log.js +87 -0
- package/scripts/demo/confidence-report.js +82 -0
- package/scripts/demo/demo-multi-swarm-coordination.js +325 -0
- package/scripts/demo/demo-production-deployment.js +399 -0
- package/scripts/demo/demo-visualization-system.js +149 -0
- package/scripts/demo/performance-analysis.cjs +71 -0
- package/scripts/demo/performance-analysis.js +71 -0
- package/scripts/demo/test-autoscaling-demo.js +314 -0
- package/scripts/dev/demo-phase3-compliance.js +2 -2
- package/scripts/ecosystem.config.cjs +90 -0
- package/scripts/hook-wrapper.sh +54 -0
- package/scripts/install-pre-commit-hook.sh +127 -0
- package/scripts/legacy/performance-test-runner.js +7 -7
- package/scripts/migration/QUICK-START.md +189 -0
- package/scripts/migration/QUICK-START.md.backup-1760135091363 +189 -0
- package/scripts/migration/README.md +30 -0
- package/scripts/migration/TASK-1.3.2-COMPLETION-REPORT.md +500 -0
- package/scripts/migration/TASK-1.3.2-COMPLETION-REPORT.md.backup-1760135091348 +500 -0
- package/scripts/migration/UPDATE-PATHS-README.md +464 -0
- package/scripts/migration/UPDATE-PATHS-README.md.backup-1760135091337 +464 -0
- package/scripts/migration/example-patterns.json +19 -0
- package/scripts/migration/reorganize-workspace.js +504 -0
- package/scripts/migration/test-update-paths.js +359 -0
- package/scripts/migration/update-paths.js +664 -0
- package/scripts/migration/validate-migration.js +647 -0
- package/scripts/monitoring/README.md +6 -6
- package/scripts/monitoring/analyze-resources.sh +1 -1
- package/scripts/monitoring/dynamic-monitor.sh +4 -4
- package/scripts/monitoring/test-monitor-quick.sh +1 -1
- package/scripts/performance-test-runner.js +7 -7
- package/scripts/redis-lua/cleanup-blocking-coordination.lua +198 -0
- package/scripts/sync-agents.js +290 -0
- package/scripts/test/NEW_STABILITY_TEST_GUIDE.md +13 -8
- package/scripts/test/quick-multilingual-demo.js +2 -2
- package/scripts/test-cleanup-performance.sh +416 -0
- package/scripts/test-runner.cjs +154 -0
- package/scripts/validate-agent-hooks.js +506 -0
- package/scripts/validation/README.md +33 -0
- package/scripts/validation/acl-security-validation.cjs +214 -0
- package/scripts/validation/acl-security-validation.js +402 -0
- package/scripts/validation/byzantine-verification.js +407 -0
- package/scripts/validation/final-phase-2-consensus.cjs +219 -0
- package/scripts/validation/final-security-validation.js +791 -0
- package/scripts/validation/final-wasm-validation.cjs +840 -0
- package/scripts/validation/integration-test-analysis.js +105 -0
- package/scripts/validation/phase-0-comprehensive-validation.js +474 -0
- package/scripts/validation/phase-0-consensus-report.js +139 -0
- package/scripts/validation/phase-0-final-report.js +112 -0
- package/scripts/validation/phase-0-redis-consensus-report.js +129 -0
- package/scripts/validation/phase-0-validation-improved.js +490 -0
- package/scripts/validation/phase-0-validation-test.js +65 -0
- package/scripts/validation/phase-1-consensus-report.cjs +342 -0
- package/scripts/validation/phase-1-consensus-validation.cjs +551 -0
- package/scripts/validation/phase-1-consensus-validation.js +551 -0
- package/scripts/validation/phase-2-consensus-report.cjs +186 -0
- package/scripts/validation/phase-2-validation.cjs +171 -0
- package/scripts/validation/phase-2-validation.js +171 -0
- package/scripts/validation/phase-4-consensus-report.js +181 -0
- package/scripts/validation/phase-4-final-validation.js +351 -0
- package/scripts/validation/phase-5-consensus-report.cjs +113 -0
- package/scripts/validation/phase-5-consensus-report.js +113 -0
- package/scripts/validation/security-analysis.js +49 -0
- package/scripts/validation/security-validation.js +492 -0
- package/scripts/validation/simple-security-validation.js +464 -0
- package/scripts/verify-installation.js +44 -14
- package/src/cli/simple-commands/init/templates/CLAUDE-backup-pre-enterprise-loop.md +735 -0
- package/src/cli/simple-commands/init/templates/CLAUDE.md +176 -326
- package/src/observability/blocking-coordination-metrics.js +161 -0
- package/src/observability/prometheus-metrics.d.ts +21 -0
- package/src/observability/prometheus-metrics.js +280 -0
- package/wiki/tutorials/beginner/04-quality-testing.md +3 -3
- package/.claude/agents/analyst.md +0 -300
- package/.claude/agents/architect.md +0 -558
- package/.claude/agents/base-template-generator.md +0 -65
- package/.claude/agents/coder.md +0 -181
- package/.claude/agents/planner.md +0 -135
- package/.claude/agents/researcher.md +0 -185
- package/.claude/agents/reviewer.md +0 -293
- package/.claude/agents/task-coordinator.md +0 -126
- package/.claude/agents/tester.md +0 -664
- package/MCP_DEPRECATION_COMPLETE.md +0 -375
- package/V2.0.0_READY_FOR_PUBLICATION.md +0 -417
- package/V2_RELEASE_SUMMARY.md +0 -568
- package/docs/DEPLOYMENT.md +0 -523
- package/docs/TROUBLESHOOTING.md +0 -1388
- package/docs/agent-token-analysis-results.json +0 -1329
- package/docs/architecture/agent-lifecycle-implementation-plan.md +0 -1428
- package/templates/custom-agent/package.json +0 -26
- package/templates/event-bus/package.json +0 -28
- package/templates/fleet-manager/package.json +0 -28
- /package/.claude/{agents → agents-ignore}/benchmarking-tests/test-agent-code-heavy.md +0 -0
- /package/.claude/{agents → agents-ignore}/benchmarking-tests/test-agent-metadata.md +0 -0
- /package/.claude/{agents → agents-ignore}/benchmarking-tests/test-agent-minimal.md +0 -0
- /package/.claude/{agents/coordinator.md → agents-ignore/coordinator-backup.md} +0 -0
- /package/.claude/{agents → agents-ignore}/data/ml/data-ml-model.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/code-review-swarm.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/github-modes.md +0 -0
- /package/.claude/{agents/templates → agents-ignore/github}/github-pr-manager.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/github-specialist.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/issue-tracker.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/multi-repo-swarm.md +0 -0
- /package/.claude/{agents/devops/ci-cd → agents-ignore/github}/ops-cicd-github.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/pr-manager.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/project-board-sync.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/release-manager.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/release-swarm.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/repo-architect.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/swarm-issue.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/swarm-pr.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/sync-coordinator.md +0 -0
- /package/.claude/{agents → agents-ignore}/github/workflow-automation.md +0 -0
- /package/.claude/{agents → agents-ignore}/neural/neural-pattern-agent.md +0 -0
- /package/.claude/{agents → agents-ignore}/neural/safla-neural.md +0 -0
- /package/.claude/{agents → agents-ignore}/optimization/benchmark-suite.md +0 -0
- /package/.claude/{agents → agents-ignore}/optimization/load-balancer.md +0 -0
- /package/.claude/{agents → agents-ignore}/optimization/perf-analyzer.md +0 -0
- /package/.claude/{agents → agents-ignore}/optimization/performance-monitor.md +0 -0
- /package/.claude/{agents → agents-ignore}/optimization/resource-allocator.md +0 -0
- /package/.claude/{agents → agents-ignore}/optimization/topology-optimizer.md +0 -0
- /package/.claude/{agents → agents-ignore}/sublinear/consciousness-evolution-agent.md +0 -0
- /package/.claude/{agents → agents-ignore}/sublinear/matrix-solver-agent.md +0 -0
- /package/.claude/{agents → agents-ignore}/sublinear/nanosecond-scheduler-agent.md +0 -0
- /package/.claude/{agents → agents-ignore}/sublinear/pagerank-agent.md +0 -0
- /package/.claude/{agents → agents-ignore}/sublinear/phi-calculator-agent.md +0 -0
- /package/.claude/{agents → agents-ignore}/sublinear/psycho-symbolic-agent.md +0 -0
- /package/.claude/{agents → agents-ignore}/sublinear/sublinear.md +0 -0
- /package/.claude/{agents → agents-ignore}/sublinear/temporal-advantage-agent.md +0 -0
- /package/.claude/{agents/architecture → agents-ignore}/system-design/arch-system-design.md +0 -0
- /package/.claude/{agents → agents-ignore}/templates/automation-smart-agent.md +0 -0
- /package/.claude/{agents → agents-ignore}/templates/coordinator-swarm-init.md +0 -0
- /package/.claude/{agents → agents-ignore}/templates/implementer-sparc-coder.md +0 -0
- /package/.claude/{agents → agents-ignore}/templates/memory-coordinator.md +0 -0
- /package/.claude/{agents → agents-ignore}/templates/migration-plan.md +0 -0
- /package/.claude/{agents → agents-ignore}/templates/orchestrator-task.md +0 -0
- /package/.claude/{agents → agents-ignore}/templates/performance-analyzer.md +0 -0
- /package/.claude/{agents → agents-ignore}/templates/sparc-coordinator.md +0 -0
- /package/{.claude/agents/specialized → .claude-flow-novice/.claude/agents/agent-principles}/CODER_AGENT_GUIDELINES.md +0 -0
- /package/docs/{API.md → api/API.md} +0 -0
- /package/docs/{CONFIGURATION.md → api/CONFIGURATION.md} +0 -0
- /package/docs/{PROVIDER_ROUTING_CONFIGURATION.md → api/PROVIDER_ROUTING_CONFIGURATION.md} +0 -0
- /package/docs/{PROVIDER_ROUTING_VERIFICATION.md → api/PROVIDER_ROUTING_VERIFICATION.md} +0 -0
- /package/docs/{ROUTING_FLOW_DIAGRAM.md → api/ROUTING_FLOW_DIAGRAM.md} +0 -0
- /package/{AGENT_PERFORMANCE_GUIDELINES.md → docs/architecture/AGENT_PERFORMANCE_GUIDELINES.md} +0 -0
- /package/docs/{EVENTEMITTER_CLEANUP_PATTERN.md → architecture/EVENTEMITTER_CLEANUP_PATTERN.md} +0 -0
- /package/docs/{REDIS_COORDINATION_SYSTEM.md → architecture/REDIS_COORDINATION_SYSTEM.md} +0 -0
- /package/docs/{SYSTEM_ARCHITECTURE.md → architecture/SYSTEM_ARCHITECTURE.md} +0 -0
- /package/docs/{consensus → architecture/consensus}/QUORUM_VERIFICATION_GUIDE.md +0 -0
- /package/docs/{consensus → architecture/consensus}/README.md +0 -0
- /package/docs/{consensus → architecture/consensus}/consensus-verification-1758747665635.json +0 -0
- /package/docs/{agents → archive/2025-10-10-architecture/agent-subdirectory}/MIGRATION_SUMMARY.md +0 -0
- /package/docs/{agents → archive/2025-10-10-architecture/agent-subdirectory}/README.md +0 -0
- /package/docs/{agent-booster-architecture.md → archive/2025-10-10-architecture/agent-subdirectory/agent-booster-architecture.md} +0 -0
- /package/docs/{agent-prompt-guidelines.md → archive/2025-10-10-architecture/agent-subdirectory/agent-prompt-guidelines.md} +0 -0
- /package/docs/{agent-token-usage-analysis-report.md → archive/2025-10-10-architecture/agent-subdirectory/agent-token-usage-analysis-report.md} +0 -0
- /package/docs/{agents → archive/2025-10-10-architecture/agent-subdirectory}/consensus-README.md +0 -0
- /package/docs/{agents → archive/2025-10-10-architecture/agent-subdirectory}/dependency-tracking-examples.md +0 -0
- /package/docs/{agents → archive/2025-10-10-architecture/agent-subdirectory}/optimization-README.md +0 -0
- /package/docs/{agents → archive/2025-10-10-architecture/agent-subdirectory}/swarm-README.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/CONSENSUS-COMPARISON.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/ROUND-5-EXECUTIVE-SUMMARY.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/consolidated-consensus-report.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-consensus-round-2.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-3-consensus-summary.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-3-validator-1.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-3-validator-2.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-3-validator-3.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-3-validator-4.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-4-consensus-summary.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-4-validator-1.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-4-validator-2.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-4-validator-3.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-4-validator-4.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-round-5-final-consensus.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/fullstack-swarm-consensus-report.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/post-edit-consensus-round-2.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/raft-implementation-summary.md +0 -0
- /package/docs/{consensus → archive/2025-10-10-architecture/consensus-rounds}/verification-summary.md +0 -0
- /package/docs/{comprehensive-mcp-solution-architecture.md → archive/2025-10-10-architecture/deprecated-implementations/comprehensive-mcp-solution-architecture.md} +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture}/experimental/ExperimentalFeaturesArchitecture.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/frontend-specific}/frontend-agent-ecosystem-integration.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/frontend-specific}/frontend-agent-technical-decisions.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/frontend-specific}/frontend-backend-coordination-interfaces.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/frontend-specific}/react-frontend-agent-specification.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/github-specific}/github-agent-consolidation-architecture.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/github-specific}/github-architecture-diagrams.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/implementation-guides}/agent-lifecycle-implementation-guide.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/implementation-guides}/implementation-guide.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/implementation-guides}/implementation-specifications.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/implementation-guides}/integration-guide.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/implementation-guides}/performance-optimization-guide.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/old-summaries}/architecture-summary-report.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/old-summaries}/fullstack-swarm-implementation-summary.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/old-summaries}/ultra-fast-communication-summary.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/agent-discovery-registration-system.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/agent-lifecycle-management-architecture.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/architectural-decisions.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/architecture-decision-records.md +0 -0
- /package/docs/{claude-soul-implementation.md → archive/2025-10-10-architecture/other-designs/claude-soul-implementation.md} +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/file-based-cross-team-communication.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/full-stack-swarm-team-specification.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/fullstack-communication-integration.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/stage3-unified-system-architecture.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/ultra-fast-communication-bus-design.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/other-designs}/zero-latency-communication-architecture.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/dynamic-agent-spawning-architecture.md +0 -0
- /package/docs/{fleet-manager-design.md → archive/2025-10-10-architecture/specific-feature-specs/fleet-manager-design.md} +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/fleet-manager-npm-architecture.md +0 -0
- /package/docs/{help-coordinator-implementation.md → archive/2025-10-10-architecture/specific-feature-specs/help-coordinator-implementation.md} +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/high-performance-memory-store.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/intelligent-configuration-system.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/message-serialization-compression-strategy.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/priority-queue-dead-letter-design.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/swarm-message-router-extension-design.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/swarm-router-implementation-spec.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/user-preference-storage-design.md +0 -0
- /package/docs/{architecture → archive/2025-10-10-architecture/specific-feature-specs}/websocket-connection-scaling-design.md +0 -0
- /package/docs/{swarm-coordination-test-results.md → archive/2025-10-10-architecture/test-results/swarm-coordination-test-results.md} +0 -0
- /package/docs/{development → archive/2025-10-10-development}/COMPREHENSIVE_WORKFLOW_SYSTEM.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/DEVELOPMENT_WORKFLOW.md +0 -0
- /package/docs/{EXAMPLES.md → archive/2025-10-10-development/EXAMPLES.md} +0 -0
- /package/docs/{development → archive/2025-10-10-development}/SPARC.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/agent-scope-creep-prevention-guide.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/cargo-build-validator-summary.md +0 -0
- /package/docs/{development → archive/2025-10-10-development/cli-consolidation}/command-consolidation-technical-spec.md +0 -0
- /package/docs/{development → archive/2025-10-10-development/cli-consolidation}/consolidated-cli-implementation.md +0 -0
- /package/docs/{development → archive/2025-10-10-development/cli-consolidation}/consolidated-command-design.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/experimental-features-improvement-plan.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/feature-simplification-strategy.md +0 -0
- /package/docs/{fixes → archive/2025-10-10-development/fixes}/fullstack-swarm-fixes-round-1.md +0 -0
- /package/docs/{fixes → archive/2025-10-10-development/fixes}/fullstack-swarm-fixes-round-3.md +0 -0
- /package/docs/{fixes → archive/2025-10-10-development/fixes}/fullstack-swarm-fixes-round-4.md +0 -0
- /package/docs/{fixes → archive/2025-10-10-development/fixes}/fullstack-swarm-fixes-round-5.md +0 -0
- /package/docs/{fixes → archive/2025-10-10-development/fixes}/round-5-quick-reference.md +0 -0
- /package/docs/{fixes → archive/2025-10-10-development/fixes}/round-5-summary.md +0 -0
- /package/docs/{fixes → archive/2025-10-10-development/fixes}/round-5-visual-summary.md +0 -0
- /package/docs/{implementation → archive/2025-10-10-development/implementation}/configuration-system-specs.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/npm-packaging-solution.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/pair-optimization.md +0 -0
- /package/docs/{phase11-cli-integration-complete.md → archive/2025-10-10-development/phase-summaries/phase11-cli-integration-complete.md} +0 -0
- /package/docs/{phase4-deployment-summary.md → archive/2025-10-10-development/phase-summaries/phase4-deployment-summary.md} +0 -0
- /package/docs/{development → archive/2025-10-10-development}/rust-framework-detection.md +0 -0
- /package/docs/{SDK-INTEGRATION-TEST-SUMMARY.md → archive/2025-10-10-development/sdk-integration/SDK-INTEGRATION-TEST-SUMMARY.md} +0 -0
- /package/docs/{SDK-TESTING.md → archive/2025-10-10-development/sdk-integration/SDK-TESTING.md} +0 -0
- /package/docs/{claude-agent-sdk-integration-strategy.md → archive/2025-10-10-development/sdk-integration/claude-agent-sdk-integration-strategy.md} +0 -0
- /package/docs/{sdk-integration-phase1.md → archive/2025-10-10-development/sdk-integration/sdk-integration-phase1.md} +0 -0
- /package/docs/{sdk-migration-guide.md → archive/2025-10-10-development/sdk-integration/sdk-migration-guide.md} +0 -0
- /package/docs/{sdk-phase1-summary.md → archive/2025-10-10-development/sdk-integration/sdk-phase1-summary.md} +0 -0
- /package/docs/{swarm-fullstack → archive/2025-10-10-development/swarm-fullstack}/IMPLEMENTATION-SUMMARY.md +0 -0
- /package/docs/{swarm-fullstack → archive/2025-10-10-development/swarm-fullstack}/frontend-testing-system.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/technical-implementation-guide.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/token-tracking-guide.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/token-tracking-status.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/troubleshooting.md +0 -0
- /package/docs/{development → archive/2025-10-10-development}/typescript-distribution-solution.md +0 -0
- /package/docs/{personalization → archive/2025-10-10-guides/personalization}/cli-integration-guide.md +0 -0
- /package/docs/{phase4-ux → archive/2025-10-10-guides/phase4-ux}/error-handling-ux-guide.md +0 -0
- /package/docs/{phase4-ux → archive/2025-10-10-guides/phase4-ux}/rollout-monitoring-dashboard.md +0 -0
- /package/docs/{phase4-ux → archive/2025-10-10-guides/phase4-ux}/user-experience-validation-framework.md +0 -0
- /package/docs/{phase4-ux → archive/2025-10-10-guides/phase4-ux}/user-onboarding-experience.md +0 -0
- /package/docs/{NOVICE_USER_GUIDE.md → archive/2025-10-10-guides/setup-guides/NOVICE_USER_GUIDE.md} +0 -0
- /package/docs/{QUICK_START.md → archive/2025-10-10-guides/setup-guides/QUICK_START.md} +0 -0
- /package/docs/{SETUP_WIZARD.md → archive/2025-10-10-guides/setup-guides/SETUP_WIZARD.md} +0 -0
- /package/docs/{ZAIR_SETUP_CHECKLIST.md → archive/2025-10-10-guides/setup-guides/ZAIR_SETUP_CHECKLIST.md} +0 -0
- /package/docs/{user → archive/2025-10-10-guides/user-guides}/PREFERENCE_SYSTEM_GUIDE.md +0 -0
- /package/docs/{user → archive/2025-10-10-guides/user-guides}/USER_GUIDE.md +0 -0
- /package/docs/{user → archive/2025-10-10-guides/user-guides}/enterprise-stakeholder-guide.md +0 -0
- /package/docs/{user → archive/2025-10-10-guides/user-guides}/novice-user-guide.md +0 -0
- /package/docs/{user → archive/2025-10-10-guides/user-guides}/tutorial.md +0 -0
- /package/docs/{user → archive/2025-10-10-guides/user-guides}/ux-assessment-pain-points.md +0 -0
- /package/docs/{ux-design/mockups → archive/2025-10-10-guides/ux-design}/configuration-ui-mockups.md +0 -0
- /package/docs/{ux-design/wizards → archive/2025-10-10-guides/ux-design}/configuration-wizard-flows.md +0 -0
- /package/docs/{ux-design/ui-patterns → archive/2025-10-10-guides/ux-design}/progressive-disclosure-patterns.md +0 -0
- /package/docs/{ux-design → archive/2025-10-10-guides/ux-design}/usability-testing-plan.md +0 -0
- /package/docs/{ux-design/user-journeys → archive/2025-10-10-guides/ux-design}/user-personas-analysis.md +0 -0
- /package/docs/{ux-design/accessibility → archive/2025-10-10-guides/ux-design}/wcag-compliance-guidelines.md +0 -0
- /package/docs/{HOOK-COMPARISON.md → archive/2025-10-10-integration/HOOK-COMPARISON.md} +0 -0
- /package/docs/{POST-EDIT-PIPELINE-AGENT-INFO.md → archive/2025-10-10-integration/POST-EDIT-PIPELINE-AGENT-INFO.md} +0 -0
- /package/docs/{POST-EDIT-PIPELINE-MERGED.md → archive/2025-10-10-integration/POST-EDIT-PIPELINE-MERGED.md} +0 -0
- /package/docs/{POST-EDIT-PIPELINE-UNIFIED.md → archive/2025-10-10-integration/POST-EDIT-PIPELINE-UNIFIED.md} +0 -0
- /package/docs/{automation → archive/2025-10-10-integration/automation}/swarm-test-pipeline-strategy.md +0 -0
- /package/docs/{integration → archive/2025-10-10-integration/mcp-compatibility}/issue-772-implementation-plan.md +0 -0
- /package/docs/{mcp-backwards-compatibility.md → archive/2025-10-10-integration/mcp-compatibility/mcp-backwards-compatibility.md} +0 -0
- /package/docs/{mcp-novice-simplification.md → archive/2025-10-10-integration/mcp-compatibility/mcp-novice-simplification.md} +0 -0
- /package/docs/{slash-commands → archive/2025-10-10-integration/slash-commands}/cfn-claude-sync-usage.md +0 -0
- /package/docs/{slash-commands → archive/2025-10-10-integration/slash-commands}/cfn-loop-quick-reference.md +0 -0
- /package/docs/{slash-commands → archive/2025-10-10-integration/slash-commands}/cfn-loop-usage.md +0 -0
- /package/docs/{final-slash-commands-setup.md → archive/2025-10-10-integration/slash-commands/final-slash-commands-setup.md} +0 -0
- /package/docs/{commands → archive/2025-10-10-integration/slash-commands}/fullstack.md +0 -0
- /package/docs/{slash-commands-complete-status.md → archive/2025-10-10-integration/slash-commands/slash-commands-complete-status.md} +0 -0
- /package/docs/{slash-commands-status-report.md → archive/2025-10-10-integration/slash-commands/slash-commands-status-report.md} +0 -0
- /package/docs/{workflows → archive/2025-10-10-integration/workflows}/IMPLEMENTATION_SUMMARY.md +0 -0
- /package/docs/{workflows → archive/2025-10-10-integration/workflows}/README.md +0 -0
- /package/docs/{workflows → archive/2025-10-10-integration/workflows}/iterative-build-test-workflow.md +0 -0
- /package/docs/{DOCUMENTATION_AUTO_UPDATER_CHANGELOG.md → archive/2025-10-10-migration/deprecation-notices/DOCUMENTATION_AUTO_UPDATER_CHANGELOG.md} +0 -0
- /package/{MCP_DEPRECATION_NOTICE.md → docs/archive/2025-10-10-migration/deprecation-notices/MCP_DEPRECATION_NOTICE.md} +0 -0
- /package/docs/{migration → archive/2025-10-10-migration/deprecation-notices}/README.md +0 -0
- /package/docs/{deprecation-report.md → archive/2025-10-10-migration/deprecation-notices/deprecation-report.md} +0 -0
- /package/docs/{migration → archive/2025-10-10-migration/v2-migration}/COMPREHENSIVE_MIGRATION_GUIDE.md +0 -0
- /package/docs/{V1_TO_V2_MIGRATION.md → archive/2025-10-10-migration/v2-migration/V1_TO_V2_MIGRATION.md} +0 -0
- /package/{V2_MIGRATION_GUIDE.md → docs/archive/2025-10-10-migration/v2-migration/V2_MIGRATION_GUIDE.md} +0 -0
- /package/docs/{migration → archive/2025-10-10-migration/v2-migration}/migration-assessment-toolkit.md +0 -0
- /package/docs/{npm-package-updates.md → archive/2025-10-10-migration/v2-migration/npm-package-updates.md} +0 -0
- /package/docs/{migration → archive/2025-10-10-migration/v2-migration}/proven-migration-case-studies.md +0 -0
- /package/docs/{APM_INTEGRATION_GUIDE.md → archive/2025-10-10-operations/APM_INTEGRATION_GUIDE.md} +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/DEPLOYMENT.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/ENABLE_AUTHENTICATION.md +0 -0
- /package/docs/{HOW_METRICS_WORK.md → archive/2025-10-10-operations/HOW_METRICS_WORK.md} +0 -0
- /package/docs/{METRICS_PLACEMENT_STRATEGY.md → archive/2025-10-10-operations/METRICS_PLACEMENT_STRATEGY.md} +0 -0
- /package/docs/{PRODUCTION_OPERATIONS.md → archive/2025-10-10-operations/PRODUCTION_OPERATIONS.md} +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/RESOURCE_MANAGEMENT_IMPLEMENTATION_PLAN.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/RESOURCE_MANAGEMENT_TECHNICAL_SPECS.md +0 -0
- /package/docs/{SESSION_CLEANUP_SYSTEM.md → archive/2025-10-10-operations/SESSION_CLEANUP_SYSTEM.md} +0 -0
- /package/docs/{V2_TRANSPARENCY_SYSTEM.md → archive/2025-10-10-operations/V2_TRANSPARENCY_SYSTEM.md} +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/analytics-system.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/benchmarks}/benchmark-claude-flow-conflict-analysis.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/benchmarks}/benchmark-cleanup-analysis.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/benchmarks}/build-artifacts-analysis.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/byzantine-consensus}/FINAL_BYZANTINE_CONSENSUS_VERIFICATION_REPORT.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/byzantine-consensus}/byzantine-consensus-verification-report-phase2.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/byzantine-consensus}/byzantine-consensus-verification-report-phase4.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/chrome-mcp-research-report.md +0 -0
- /package/docs/{ci-cd → archive/2025-10-10-operations/ci-cd}/README.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/cli-command-consolidation-analysis.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/DEPLOYMENT_GUIDE.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/DEPLOYMENT_STRATEGIES.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/DISASTER_RECOVERY.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/DOCKER_SECURITY.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/HELM_CHARTS.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/INFRASTRUCTURE_AS_CODE.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/MONITORING_OBSERVABILITY.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/PERFORMANCE_OPTIMIZATION.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/README.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/pm2-setup.md +0 -0
- /package/docs/{deployment → archive/2025-10-10-operations/deployment}/production-deployment-guide.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/deployment-checklist.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/deployment-report.md +0 -0
- /package/docs/{metrics-counter-usage.md → archive/2025-10-10-operations/metrics-counter-usage.md} +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/migration-strategy.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/performance-analysis}/agent-analysis-report.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/performance-analysis}/agent-persistence-performance-analysis.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/performance-analysis}/performance-analysis-report.md +0 -0
- /package/docs/{runbooks → archive/2025-10-10-operations/runbooks}/DATABASE_PERFORMANCE_RUNBOOK.md +0 -0
- /package/docs/{runbooks → archive/2025-10-10-operations/runbooks}/EMERGENCY_RESPONSE_PROCEDURES.md +0 -0
- /package/docs/{runbooks → archive/2025-10-10-operations/runbooks}/SERVICE_OUTAGE_RUNBOOK.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/shadcn-mcp-swarm-research-report.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/training-pipeline-demo.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations}/training-pipeline-real-only.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/COMPREHENSIVE_QA_VALIDATION_REPORT.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/PRODUCTION_VALIDATION_REPORT.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/WIKI_VALIDATION_REPORT.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/checkpoint-1-3-validation-report.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/checkpoint-1-4-validation-summary.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/cli-validation-report.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/command-consolidation-usability-validation.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/configuration-system-validation-report.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/experimental-features-validation-report.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/final-validation-summary.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/unified-config-validation-report.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/validation-executive-summary.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/validator-scope-overreach-analysis.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/verification-integration.md +0 -0
- /package/docs/{operations → archive/2025-10-10-operations/validation-reports}/verification-validation.md +0 -0
- /package/docs/{performance → archive/2025-10-10-performance}/COMPREHENSIVE_SQLITE_ANALYSIS.md +0 -0
- /package/docs/{LRU_GARBAGE_COLLECTION.md → archive/2025-10-10-performance/LRU_GARBAGE_COLLECTION.md} +0 -0
- /package/docs/{OPTIMIZATION_SAFETY_REPORT.md → archive/2025-10-10-performance/OPTIMIZATION_SAFETY_REPORT.md} +0 -0
- /package/docs/{performance → archive/2025-10-10-performance}/Phase3-Remediation-Report.md +0 -0
- /package/docs/{benchmark-realistic-code-generation.md → archive/2025-10-10-performance/benchmarks/benchmark-realistic-code-generation.md} +0 -0
- /package/docs/{benchmark-rust-known-issues.md → archive/2025-10-10-performance/benchmarks/benchmark-rust-known-issues.md} +0 -0
- /package/docs/{benchmark-rust-support-summary.md → archive/2025-10-10-performance/benchmarks/benchmark-rust-support-summary.md} +0 -0
- /package/docs/{optimization → archive/2025-10-10-performance/optimization}/README.md +0 -0
- /package/docs/{optimization → archive/2025-10-10-performance/optimization}/communication-improvements.md +0 -0
- /package/docs/{performance → archive/2025-10-10-performance/optimization}/sqlite-performance-analysis.md +0 -0
- /package/docs/{security → archive/2025-10-10-security}/DEPLOYMENT_CHECKLIST.md +0 -0
- /package/docs/{security → archive/2025-10-10-security}/GIT_SECRETS_SETUP.md +0 -0
- /package/docs/{operations/SECURITY_AUDIT_REPORT.md → archive/2025-10-10-security/PACKAGE_SECURITY_AUDIT.md} +0 -0
- /package/docs/{security → archive/2025-10-10-security}/SECRET-DETECTION.md +0 -0
- /package/docs/{SECURITY_AUDIT_REPORT.md → archive/2025-10-10-security/SECRET_DETECTION_AUDIT.md} +0 -0
- /package/docs/{security → archive/2025-10-10-security/authentication}/JWT_AUTHENTICATION.md +0 -0
- /package/docs/{security → archive/2025-10-10-security/authentication}/MIGRATION_BASE64_TO_JWT.md +0 -0
- /package/docs/{security → archive/2025-10-10-security/authentication}/REDIS_AUTHENTICATION.md +0 -0
- /package/docs/{SECURITY_AUTH.md → archive/2025-10-10-security/authentication/SECURITY_AUTH.md} +0 -0
- /package/docs/{certification → archive/2025-10-10-security/certification}/FINAL-PRODUCTION-CERTIFICATION.md +0 -0
- /package/docs/{certification → archive/2025-10-10-security/certification}/README.md +0 -0
- /package/docs/{certification → archive/2025-10-10-security/certification}/fullstack-swarm-production-cert.md +0 -0
- /package/docs/{certification → archive/2025-10-10-security/certification}/post-edit-pipeline-production-cert.md +0 -0
- /package/docs/{security → archive/2025-10-10-security}/phase5-security-implementation-summary.md +0 -0
- /package/docs/{security → archive/2025-10-10-security}/sec-024-lamport-clock-implementation.md +0 -0
- /package/docs/{security → archive/2025-10-10-security/vulnerabilities}/CRYPTO_CIPHER_FIX_REPORT.md +0 -0
- /package/docs/{security → archive/2025-10-10-security/vulnerabilities}/CRYPTO_VULNERABILITY_SUMMARY.md +0 -0
- /package/docs/{security → archive/2025-10-10-security/vulnerabilities}/cve-2025-005-006-implementation.md +0 -0
- /package/docs/{security → archive/2025-10-10-security/vulnerabilities}/rbac-test-bypass-fix.md +0 -0
- /package/docs/{testing → archive/2025-10-10-testing}/README.md +0 -0
- /package/docs/{testing → archive/2025-10-10-testing}/consensus-decision-matrix.md +0 -0
- /package/docs/{testing → archive/2025-10-10-testing}/playwright-mcp-integration-guide.md +0 -0
- /package/docs/{CROSS_PLATFORM_TEST_RESULTS.md → archive/2025-10-10-testing/test-results/CROSS_PLATFORM_TEST_RESULTS.md} +0 -0
- /package/docs/{V2_MULTI_LEVEL_TEST_RESULTS.md → archive/2025-10-10-testing/test-results/V2_MULTI_LEVEL_TEST_RESULTS.md} +0 -0
- /package/docs/{backend-testing-system.md → archive/2025-10-10-testing/test-results/backend-testing-system.md} +0 -0
- /package/docs/{benchmark-test-report.md → archive/2025-10-10-testing/test-results/benchmark-test-report.md} +0 -0
- /package/docs/{testing → archive/2025-10-10-testing/test-results}/comprehensive-test-results.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/PRODUCTION-CERTIFICATION-SUMMARY.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/byzantine-consensus-coordination-report.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/byzantine-consensus-summary.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/completion-validation-verification-report.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/fullstack-integration-report.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/phase2-byzantine-consensus-verification-report.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/phase2-completion-consensus-report.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/stage5-consensus-report.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/stage6-final-certification.md +0 -0
- /package/docs/{validation → archive/2025-10-10-testing/validation}/stage7-production-certification.md +0 -0
- /package/docs/{ERROR_HANDLING_IMPLEMENTATION_SUMMARY.md → archive/ERROR_HANDLING_IMPLEMENTATION_SUMMARY.md} +0 -0
- /package/docs/{ERROR_MESSAGES_GUIDE.md → archive/ERROR_MESSAGES_GUIDE.md} +0 -0
- /package/docs/{HTTP_POLLING_FALLBACK.md → archive/HTTP_POLLING_FALLBACK.md.backup-1760135090706} +0 -0
- /package/docs/{phase2-implementation-summary.md → archive/cfn-loop/completed-phases/phase2-implementation-summary.md} +0 -0
- /package/docs/{CFN_LOOP.md → archive/cfn-loop/deprecated-3-loop/CFN_LOOP.md} +0 -0
- /package/docs/{validation-loop-pattern.md → archive/cfn-loop/early-patterns/validation-loop-pattern.md} +0 -0
- /package/docs/{MCP_ENDPOINTS_REFERENCE.md → archive/deprecated-mcp/MCP_ENDPOINTS_REFERENCE.md} +0 -0
- /package/docs/{api → archive/deprecated-mcp}/MCP_TOOLS.md +0 -0
- /package/docs/{api → archive/deprecated-mcp}/mcp-swarm-integration-api.md +0 -0
- /package/docs/{API_AUTH.md → archive/phase3-auth-unimplemented/API_AUTH.md} +0 -0
- /package/docs/{AUTHENTICATION.md → archive/phase3-auth-unimplemented/AUTHENTICATION.md} +0 -0
- /package/docs/{AUTH_DOCUMENTATION_SUMMARY.md → archive/phase3-auth-unimplemented/AUTH_DOCUMENTATION_SUMMARY.md} +0 -0
- /package/docs/{AUTH_MIGRATION.md → archive/phase3-auth-unimplemented/AUTH_MIGRATION.md} +0 -0
- /package/docs/{phase5-booster-integration-summary.md → archive/phase5-booster-integration-summary.md} +0 -0
- /package/{CHANGELOG_V2.md → docs/archive/reference-historical/CHANGELOG_V2.md} +0 -0
- /package/docs/{INDEX.md → archive/reference-historical/INDEX.md} +0 -0
- /package/docs/{CFN_LOOP_PHASE_ORCHESTRATION.md → cfn-loop/CFN_LOOP_PHASE_ORCHESTRATION.md} +0 -0
- /package/docs/{CFN_LOOP_SCOPE_CONTROL.md → cfn-loop/CFN_LOOP_SCOPE_CONTROL.md} +0 -0
- /package/docs/{CFN_LOOP_SELF_LOOPING_ADDITIONS.md → cfn-loop/CFN_LOOP_SELF_LOOPING_ADDITIONS.md} +0 -0
- /package/docs/{SPRINT_ORCHESTRATION.md → cfn-loop/SPRINT_ORCHESTRATION.md} +0 -0
- /package/docs/{epic-iteration-limits-implementation.md → cfn-loop/epic-iteration-limits-implementation.md} +0 -0
- /package/docs/{phase-5-sprint-5.2-multi-level-control.md → cfn-loop/phase-5-sprint-5.2-multi-level-control.md} +0 -0
- /package/docs/{phase-orchestrator-sprint-enhancement-summary.md → cfn-loop/phase-orchestrator-sprint-enhancement-summary.md} +0 -0
- /package/docs/{phases → cfn-loop/phases}/PHASE_06_ARCHITECTURE_SUMMARY.md +0 -0
- /package/docs/{phases → cfn-loop/phases}/PHASE_06_COMPONENT_INTERFACES.md +0 -0
- /package/docs/{phases → cfn-loop/phases}/PHASE_06_INTEGRATION_STRATEGY.md +0 -0
- /package/docs/{phases → cfn-loop/phases}/PHASE_06_MESH_COORDINATION_ARCHITECTURE.md +0 -0
- /package/docs/{phases → cfn-loop/phases}/PHASE_06_README.md +0 -0
- /package/docs/{phases → cfn-loop/phases}/PHASE_07_HELP_SYSTEM_ARCHITECTURE.md +0 -0
- /package/docs/{phases → cfn-loop/phases}/PHASE_0_SDK_FOUNDATION.md +0 -0
- /package/docs/{phases → cfn-loop/phases}/phase-05-architecture.md +0 -0
- /package/docs/{self-validating-loops-implementation.md → cfn-loop/self-validating-loops-implementation.md} +0 -0
- /package/{CHANGELOG.md → docs/reference/CHANGELOG.md} +0 -0
- /package/{NPM_PACKAGE_CONTENTS.md → docs/reference/NPM_PACKAGE_CONTENTS.md} +0 -0
- /package/{README-NPM.md → docs/reference/README-NPM.md} +0 -0
- /package/docs/{SITE_MAP.md → reference/SITE_MAP.md} +0 -0
- /package/docs/{research → reference/research}/AGENT_ACCESSIBILITY_GUIDE.md +0 -0
- /package/docs/{research → reference/research}/AGENT_PERMISSION_SYSTEM_ANALYSIS.md +0 -0
- /package/docs/{research → reference/research}/CLAUDE_AGENT_SDK_COMPREHENSIVE_ANALYSIS.md +0 -0
- /package/docs/{research → reference/research}/CLAUDE_AGENT_SDK_EXECUTIVE_SUMMARY.md +0 -0
- /package/docs/{research → reference/research}/CLEANUP_CRITERIA_QUICK_REFERENCE.md +0 -0
- /package/docs/{research → reference/research}/claude-session-cpu-behavior-analysis.md +0 -0
- /package/docs/{research → reference/research}/completion-validation-research.md +0 -0
- /package/docs/{templates → reference/templates}/PHASE_DOCUMENT_TEMPLATE.md +0 -0
- /package/docs/{templates → reference/templates}/PHASE_TEMPLATE_USAGE_GUIDE.md +0 -0
- /package/docs/{TEMPLATE_CUSTOMIZATION_GUIDE.md → reference/templates/TEMPLATE_CUSTOMIZATION_GUIDE.md} +0 -0
- /package/docs/{TEMPLATE_EXAMPLES_AND_BEST_PRACTICES.md → reference/templates/TEMPLATE_EXAMPLES_AND_BEST_PRACTICES.md} +0 -0
- /package/docs/{TEMPLATE_SYSTEM_DOCUMENTATION.md → reference/templates/TEMPLATE_SYSTEM_DOCUMENTATION.md} +0 -0
- /package/docs/{wiki → reference/wiki}/background-commands.md +0 -0
- /package/docs/{wiki → reference/wiki}/efficiency-patterns-and-anti-patterns.md +0 -0
- /package/docs/{wiki → reference/wiki}/monitoring-and-metrics-guide.md +0 -0
- /package/docs/{wiki → reference/wiki}/performance-benchmarking-tools.md +0 -0
- /package/docs/{wiki → reference/wiki}/performance-optimization-strategies.md +0 -0
- /package/docs/{wiki → reference/wiki}/performance-testing-framework.md +0 -0
- /package/docs/{wiki → reference/wiki}/resource-optimization-techniques.md +0 -0
- /package/docs/{wiki → reference/wiki}/scalability-guidelines.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/README.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/authentication-authorization-strategies.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/compliance-automation-workflows.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/compliance-frameworks-integration.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/enterprise-security-patterns.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/incident-response-guide.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/secrets-management-guide.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/secure-coding-patterns.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/security-best-practices.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/security-first-development-workflows.md +0 -0
- /package/docs/{wiki → reference/wiki}/security/security-testing-framework.md +0 -0
- /package/docs/{wiki → reference/wiki}/session-persistence.md +0 -0
- /package/docs/{wiki → reference/wiki}/stream-chain-command.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/README.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/cli-troubleshooting.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/configuration-troubleshooting.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/debug-mode.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/error-analysis.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/linux-troubleshooting.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/log-analysis.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/macos-troubleshooting.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/mcp-troubleshooting.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/performance-troubleshooting.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/quick-reference.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting/windows-troubleshooting.md +0 -0
- /package/docs/{wiki → reference/wiki}/troubleshooting-slow-workflows.md +0 -0
- /package/docs/{CROSS_PLATFORM_VALIDATION_CONFIDENCE.json → testing/CROSS_PLATFORM_VALIDATION_CONFIDENCE.json} +0 -0
- /package/docs/{validation → testing/validation}/byzantine-validation-report.json +0 -0
- /package/{templates → examples/templates}/README.md +0 -0
- /package/{templates → examples/templates}/basic-swarm/.claude/settings.json +0 -0
- /package/{templates/basic-swarm/CLAUDE.md → examples/templates/basic-swarm/CLAUDE.md.backup-1760135091193} +0 -0
- /package/{templates → examples/templates}/basic-swarm/coordination.md +0 -0
- /package/{templates → examples/templates}/basic-swarm/memory-bank.md +0 -0
- /package/{templates → examples/templates}/basic-swarm/package.json +0 -0
- /package/{templates → examples/templates}/custom-agent/.claude/settings.json +0 -0
- /package/{templates/custom-agent/CLAUDE.md → examples/templates/custom-agent/CLAUDE.md.backup-1760135091180} +0 -0
- /package/{templates → examples/templates}/event-bus/.claude/settings.json +0 -0
- /package/{templates → examples/templates}/event-bus/CLAUDE.md +0 -0
- /package/{templates → examples/templates}/fleet-manager/.claude/settings.json +0 -0
- /package/{templates/fleet-manager/CLAUDE.md → examples/templates/fleet-manager/CLAUDE.md.backup-1760135091167} +0 -0
- /package/{docs → scripts}/agent-token-analysis.js +0 -0
|
@@ -0,0 +1,1279 @@
|
|
|
1
|
+
# Blocking Coordination Troubleshooting Guide
|
|
2
|
+
|
|
3
|
+
**Version:** 1.0
|
|
4
|
+
**Last Updated:** 2025-10-10
|
|
5
|
+
**Maintainer:** DevOps Team
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Quick Diagnostics
|
|
10
|
+
|
|
11
|
+
### Health Check Commands
|
|
12
|
+
```bash
|
|
13
|
+
# Check Redis connectivity
|
|
14
|
+
redis-cli ping # Should return "PONG"
|
|
15
|
+
|
|
16
|
+
# List active coordinators
|
|
17
|
+
redis-cli KEYS "blocking:heartbeat:*"
|
|
18
|
+
|
|
19
|
+
# Check coordinator heartbeat age
|
|
20
|
+
redis-cli GET blocking:heartbeat:<coordinator-id> | jq '.timestamp' | xargs -I {} echo $(( $(date +%s) - {} / 1000 ))
|
|
21
|
+
|
|
22
|
+
# View pending signals
|
|
23
|
+
redis-cli KEYS "blocking:signal:*"
|
|
24
|
+
|
|
25
|
+
# Check circuit breaker status
|
|
26
|
+
redis-cli GET "circuit:breaker:status"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Log Locations
|
|
30
|
+
- **Coordinator logs:** `/var/log/cfn-loop/coordinator-<id>.log`
|
|
31
|
+
- **Timeout handler logs:** `/var/log/cfn-loop/timeout-handler.log`
|
|
32
|
+
- **Cleanup script logs:** `/var/log/cfn-loop/cleanup.log`
|
|
33
|
+
- **Redis logs:** `/var/log/redis/redis-server.log`
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Issue 1: Coordinator Stuck in Blocking State
|
|
38
|
+
|
|
39
|
+
### Symptoms
|
|
40
|
+
- Coordinator process running but not progressing
|
|
41
|
+
- Log shows: `"Waiting for signal..."` with no resolution
|
|
42
|
+
- Process has been blocked >30 minutes
|
|
43
|
+
- No timeout error after expected timeout period
|
|
44
|
+
|
|
45
|
+
### Diagnosis Steps
|
|
46
|
+
|
|
47
|
+
**Step 1: Verify Signal Sent**
|
|
48
|
+
```bash
|
|
49
|
+
# Check if signal exists in Redis
|
|
50
|
+
COORDINATOR_ID="coord-123"
|
|
51
|
+
redis-cli GET "blocking:signal:$COORDINATOR_ID"
|
|
52
|
+
|
|
53
|
+
# If empty → signal was never sent
|
|
54
|
+
# If present → check signature verification
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Step 2: Check Redis Connectivity**
|
|
58
|
+
```bash
|
|
59
|
+
# Test coordinator can reach Redis
|
|
60
|
+
redis-cli -h <coordinator-host> ping
|
|
61
|
+
|
|
62
|
+
# Check network latency
|
|
63
|
+
redis-cli --latency-history
|
|
64
|
+
|
|
65
|
+
# Expected: <10ms P99 latency
|
|
66
|
+
# If >100ms → network issue
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Step 3: Verify Signal ACK**
|
|
70
|
+
```bash
|
|
71
|
+
# Check if ACK was sent back
|
|
72
|
+
SENDER_ID="coord-456"
|
|
73
|
+
redis-cli GET "blocking:ack:$SENDER_ID"
|
|
74
|
+
|
|
75
|
+
# If empty → ACK not sent (receiver issue)
|
|
76
|
+
# If present but blocking continues → receiver didn't process ACK
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**Step 4: Inspect Coordinator Logs**
|
|
80
|
+
```bash
|
|
81
|
+
# Look for verification errors
|
|
82
|
+
grep "signature" /var/log/cfn-loop/coordinator-$COORDINATOR_ID.log
|
|
83
|
+
|
|
84
|
+
# Common errors:
|
|
85
|
+
# "Invalid signal signature" → HMAC secret mismatch
|
|
86
|
+
# "Signal timestamp too old" → Clock skew
|
|
87
|
+
# "Unknown signal type" → Protocol version mismatch
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Solutions
|
|
91
|
+
|
|
92
|
+
**Solution 1: HMAC Secret Mismatch**
|
|
93
|
+
```bash
|
|
94
|
+
# Verify all coordinators use same secret
|
|
95
|
+
for host in coord-1 coord-2 coord-3; do
|
|
96
|
+
ssh $host "echo \$BLOCKING_COORDINATION_SECRET | md5sum"
|
|
97
|
+
done
|
|
98
|
+
|
|
99
|
+
# All MD5 hashes should match
|
|
100
|
+
|
|
101
|
+
# If mismatch, update secret on affected coordinator
|
|
102
|
+
ssh coord-1 "export BLOCKING_COORDINATION_SECRET='<correct-secret>' && systemctl restart coordinator"
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
**Solution 2: Clock Skew**
|
|
106
|
+
```bash
|
|
107
|
+
# Check time synchronization
|
|
108
|
+
timedatectl status
|
|
109
|
+
|
|
110
|
+
# If "System clock synchronized: no"
|
|
111
|
+
sudo systemctl restart systemd-timesyncd
|
|
112
|
+
sudo timedatectl set-ntp true
|
|
113
|
+
|
|
114
|
+
# Verify NTP sync
|
|
115
|
+
ntpq -p
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Solution 3: Manual Signal Resend**
|
|
119
|
+
```bash
|
|
120
|
+
# Manually send signal to unstuck coordinator
|
|
121
|
+
TIMESTAMP=$(date +%s)
|
|
122
|
+
SIGNAL=$(cat <<EOF
|
|
123
|
+
{
|
|
124
|
+
"senderId": "manual-override",
|
|
125
|
+
"receiverId": "$COORDINATOR_ID",
|
|
126
|
+
"type": "wake",
|
|
127
|
+
"timestamp": $TIMESTAMP
|
|
128
|
+
}
|
|
129
|
+
EOF
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
redis-cli SETEX "blocking:signal:$COORDINATOR_ID" 86400 "$SIGNAL"
|
|
133
|
+
|
|
134
|
+
# Check coordinator logs for "Signal received"
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Solution 4: Force Timeout**
|
|
138
|
+
```bash
|
|
139
|
+
# If coordinator should have timed out but hasn't
|
|
140
|
+
# Check timeout handler is running
|
|
141
|
+
ps aux | grep coordinator-timeout-handler
|
|
142
|
+
|
|
143
|
+
# If not running, start it
|
|
144
|
+
node src/cfn-loop/coordinator-timeout-handler.js &
|
|
145
|
+
|
|
146
|
+
# Check logs for timeout processing
|
|
147
|
+
tail -f /var/log/cfn-loop/timeout-handler.log
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Prevention
|
|
151
|
+
- Set up HMAC secret validation in CI/CD (all instances must have same secret)
|
|
152
|
+
- Enable NTP on all coordinator hosts
|
|
153
|
+
- Monitor blocking duration with Prometheus alert (P95 >5min)
|
|
154
|
+
- Add health check endpoint to coordinators (`/health` → returns blocking state)
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Issue 2: Signal ACK Verification Fails
|
|
159
|
+
|
|
160
|
+
### Symptoms
|
|
161
|
+
- Log shows: `"ACK verification failed for signal from <sender-id>"`
|
|
162
|
+
- Coordinator sends signal but never receives valid ACK
|
|
163
|
+
- Retry attempts all fail with same error
|
|
164
|
+
- No network or Redis errors
|
|
165
|
+
|
|
166
|
+
### Diagnosis Steps
|
|
167
|
+
|
|
168
|
+
**Step 1: Verify HMAC Secret**
|
|
169
|
+
```bash
|
|
170
|
+
# Check sender secret
|
|
171
|
+
ssh sender-host "echo \$BLOCKING_COORDINATION_SECRET"
|
|
172
|
+
|
|
173
|
+
# Check receiver secret
|
|
174
|
+
ssh receiver-host "echo \$BLOCKING_COORDINATION_SECRET"
|
|
175
|
+
|
|
176
|
+
# Secrets must match exactly (case-sensitive)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
**Step 2: Check Timestamp Drift**
|
|
180
|
+
```bash
|
|
181
|
+
# Get Redis server time
|
|
182
|
+
REDIS_TIME=$(redis-cli TIME | head -1)
|
|
183
|
+
|
|
184
|
+
# Get coordinator time
|
|
185
|
+
COORDINATOR_TIME=$(date +%s)
|
|
186
|
+
|
|
187
|
+
# Calculate drift
|
|
188
|
+
DRIFT=$(( COORDINATOR_TIME - REDIS_TIME ))
|
|
189
|
+
echo "Drift: ${DRIFT}s"
|
|
190
|
+
|
|
191
|
+
# If |drift| >60s → clock skew issue
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
**Step 3: Inspect Signature Calculation**
|
|
195
|
+
```bash
|
|
196
|
+
# Manually calculate expected signature
|
|
197
|
+
SENDER_ID="coord-sender"
|
|
198
|
+
RECEIVER_ID="coord-receiver"
|
|
199
|
+
TYPE="wake"
|
|
200
|
+
TIMESTAMP=$(date +%s)
|
|
201
|
+
SECRET="<your-secret>"
|
|
202
|
+
|
|
203
|
+
PAYLOAD="${SENDER_ID}:${RECEIVER_ID}:${TYPE}:${TIMESTAMP}"
|
|
204
|
+
SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$SECRET" -hex | awk '{print $2}')
|
|
205
|
+
|
|
206
|
+
echo "Expected signature: $SIGNATURE"
|
|
207
|
+
|
|
208
|
+
# Compare with actual signature in Redis
|
|
209
|
+
redis-cli GET "blocking:signal:$RECEIVER_ID" | jq -r '.signature'
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
**Step 4: Check for Timing Attacks**
|
|
213
|
+
```bash
|
|
214
|
+
# Enable debug logging for signature verification
|
|
215
|
+
export DEBUG_SIGNATURE_VERIFICATION=true
|
|
216
|
+
|
|
217
|
+
# Restart coordinator with debug logging
|
|
218
|
+
systemctl restart coordinator
|
|
219
|
+
|
|
220
|
+
# Look for timing-safe comparison failures
|
|
221
|
+
grep "timingSafeEqual" /var/log/cfn-loop/coordinator-*.log
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Solutions
|
|
225
|
+
|
|
226
|
+
**Solution 1: Synchronize HMAC Secrets**
|
|
227
|
+
```bash
|
|
228
|
+
# Generate new secret
|
|
229
|
+
NEW_SECRET=$(openssl rand -hex 32)
|
|
230
|
+
|
|
231
|
+
# Distribute to all coordinators using dual-secret strategy
|
|
232
|
+
# Step 1: Add new secret alongside old (zero-downtime)
|
|
233
|
+
for host in coord-1 coord-2 coord-3; do
|
|
234
|
+
ssh $host "echo 'BLOCKING_COORDINATION_SECRET_NEW=$NEW_SECRET' >> /etc/coordinator/env"
|
|
235
|
+
ssh $host "systemctl reload coordinator" # Reload without restart
|
|
236
|
+
done
|
|
237
|
+
|
|
238
|
+
# Step 2: Wait 24h for all in-flight signals to clear
|
|
239
|
+
|
|
240
|
+
# Step 3: Promote new secret to primary
|
|
241
|
+
for host in coord-1 coord-2 coord-3; do
|
|
242
|
+
ssh $host "sed -i 's/BLOCKING_COORDINATION_SECRET=.*/BLOCKING_COORDINATION_SECRET=$NEW_SECRET/' /etc/coordinator/env"
|
|
243
|
+
ssh $host "sed -i '/BLOCKING_COORDINATION_SECRET_NEW/d' /etc/coordinator/env"
|
|
244
|
+
ssh $host "systemctl restart coordinator"
|
|
245
|
+
done
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
**Solution 2: Fix Clock Skew**
|
|
249
|
+
```bash
|
|
250
|
+
# Force time sync across all coordinators
|
|
251
|
+
ansible all -m shell -a "sudo systemctl restart systemd-timesyncd && sudo timedatectl set-ntp true"
|
|
252
|
+
|
|
253
|
+
# Verify sync status
|
|
254
|
+
ansible all -m shell -a "timedatectl status | grep synchronized"
|
|
255
|
+
|
|
256
|
+
# All should show "System clock synchronized: yes"
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
**Solution 3: Update Signature Algorithm**
|
|
260
|
+
```bash
|
|
261
|
+
# If using old signature format, migrate to current format
|
|
262
|
+
# Old format (insecure): senderId + receiverId
|
|
263
|
+
# New format (secure): senderId:receiverId:type:timestamp
|
|
264
|
+
|
|
265
|
+
# Find coordinators using old format
|
|
266
|
+
grep "signalSignature.*Buffer.from" src/cfn-loop/*.ts
|
|
267
|
+
|
|
268
|
+
# Update to new format in all coordinators
|
|
269
|
+
# Ensure payload includes all fields with colons as separators
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
**Solution 4: Verify timingSafeEqual Usage**
|
|
273
|
+
```typescript
|
|
274
|
+
// Correct implementation (timing-attack resistant)
|
|
275
|
+
private verifySignalSignature(signal: Signal): boolean {
|
|
276
|
+
if (!signal.signature) return false;
|
|
277
|
+
|
|
278
|
+
const expectedSignature = this.signSignal(signal);
|
|
279
|
+
|
|
280
|
+
// Convert to buffers for timing-safe comparison
|
|
281
|
+
const receivedBuffer = Buffer.from(signal.signature, 'hex');
|
|
282
|
+
const expectedBuffer = Buffer.from(expectedSignature, 'hex');
|
|
283
|
+
|
|
284
|
+
// Lengths must match for timingSafeEqual
|
|
285
|
+
if (receivedBuffer.length !== expectedBuffer.length) {
|
|
286
|
+
return false;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return crypto.timingSafeEqual(receivedBuffer, expectedBuffer);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Incorrect implementation (vulnerable to timing attacks)
|
|
293
|
+
private verifySignalSignature(signal: Signal): boolean {
|
|
294
|
+
const expectedSignature = this.signSignal(signal);
|
|
295
|
+
return signal.signature === expectedSignature; // ❌ Timing attack!
|
|
296
|
+
}
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### Prevention
|
|
300
|
+
- Store HMAC secret in HashiCorp Vault or AWS Secrets Manager (never git)
|
|
301
|
+
- Rotate secrets every 90 days (30 days in staging)
|
|
302
|
+
- Add signature verification integration test to CI/CD
|
|
303
|
+
- Monitor signature verification failures with alert threshold (>0.1/s)
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## Issue 3: Dead Coordinator Not Detected
|
|
308
|
+
|
|
309
|
+
### Symptoms
|
|
310
|
+
- Coordinator process has crashed/killed
|
|
311
|
+
- Heartbeat key expired in Redis
|
|
312
|
+
- Timeout handler logs show no detection
|
|
313
|
+
- Work assigned to dead coordinator is not transferred
|
|
314
|
+
|
|
315
|
+
### Diagnosis Steps
|
|
316
|
+
|
|
317
|
+
**Step 1: Verify Timeout Handler Running**
|
|
318
|
+
```bash
|
|
319
|
+
# Check process
|
|
320
|
+
ps aux | grep coordinator-timeout-handler
|
|
321
|
+
|
|
322
|
+
# If not running → timeout handler crashed or not started
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
**Step 2: Check Timeout Handler Logs**
|
|
326
|
+
```bash
|
|
327
|
+
# Look for errors
|
|
328
|
+
tail -50 /var/log/cfn-loop/timeout-handler.log
|
|
329
|
+
|
|
330
|
+
# Common errors:
|
|
331
|
+
# "Redis connection failed" → Can't reach Redis
|
|
332
|
+
# "No coordinators found" → No heartbeat keys in Redis
|
|
333
|
+
# "Heartbeat parsing error" → Corrupt heartbeat data
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
**Step 3: Verify Heartbeat Expiration**
|
|
337
|
+
```bash
|
|
338
|
+
# Check if heartbeat key exists
|
|
339
|
+
COORDINATOR_ID="coord-dead"
|
|
340
|
+
redis-cli GET "blocking:heartbeat:$COORDINATOR_ID"
|
|
341
|
+
|
|
342
|
+
# If key exists → not yet expired (coordinator may still be alive)
|
|
343
|
+
# If null → key expired (should have been detected)
|
|
344
|
+
|
|
345
|
+
# Check TTL
|
|
346
|
+
redis-cli TTL "blocking:heartbeat:$COORDINATOR_ID"
|
|
347
|
+
# -2 = key doesn't exist (expired)
|
|
348
|
+
# -1 = key exists but no TTL set (bug!)
|
|
349
|
+
# >0 = seconds until expiration
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
**Step 4: Check Warning Escalation**
|
|
353
|
+
```bash
|
|
354
|
+
# Check warning count for coordinator
|
|
355
|
+
redis-cli GET "blocking:warning:$COORDINATOR_ID"
|
|
356
|
+
|
|
357
|
+
# If <3 → not yet escalated (need 3 warnings)
|
|
358
|
+
# If ≥3 → should have escalated (handler bug)
|
|
359
|
+
|
|
360
|
+
# Check warning TTL
|
|
361
|
+
redis-cli TTL "blocking:warning:$COORDINATOR_ID"
|
|
362
|
+
# Should be 300s (5 minutes)
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
### Solutions
|
|
366
|
+
|
|
367
|
+
**Solution 1: Start Timeout Handler**
|
|
368
|
+
```bash
|
|
369
|
+
# If using systemd
|
|
370
|
+
sudo systemctl start coordinator-timeout-handler
|
|
371
|
+
sudo systemctl enable coordinator-timeout-handler # Auto-start on boot
|
|
372
|
+
|
|
373
|
+
# If using cron
|
|
374
|
+
crontab -e
|
|
375
|
+
# Add line: */5 * * * * /usr/bin/node /opt/cfn-loop/src/cfn-loop/coordinator-timeout-handler.js
|
|
376
|
+
|
|
377
|
+
# If using Docker
|
|
378
|
+
docker run -d --name timeout-handler \
|
|
379
|
+
--network cfn-loop \
|
|
380
|
+
-e REDIS_URL=redis://redis:6379 \
|
|
381
|
+
cfn-loop:latest node src/cfn-loop/coordinator-timeout-handler.js
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
**Solution 2: Fix Heartbeat TTL**
|
|
385
|
+
```bash
|
|
386
|
+
# Check if heartbeat keys have TTL
|
|
387
|
+
redis-cli --scan --pattern "blocking:heartbeat:*" | while read key; do
|
|
388
|
+
ttl=$(redis-cli TTL "$key")
|
|
389
|
+
if [ "$ttl" -eq -1 ]; then
|
|
390
|
+
echo "WARNING: $key has no TTL (will never expire)"
|
|
391
|
+
# Fix by setting TTL
|
|
392
|
+
redis-cli EXPIRE "$key" 90
|
|
393
|
+
fi
|
|
394
|
+
done
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
**Solution 3: Adjust Detection Timing**
|
|
398
|
+
```typescript
|
|
399
|
+
// If coordinators are frequently marked dead incorrectly,
|
|
400
|
+
// increase thresholds in coordinator-timeout-handler.ts
|
|
401
|
+
|
|
402
|
+
// Before (too aggressive):
|
|
403
|
+
const HEARTBEAT_TTL = 90; // 90s
|
|
404
|
+
const DEAD_THRESHOLD = 120; // 120s
|
|
405
|
+
|
|
406
|
+
// After (more tolerant):
|
|
407
|
+
const HEARTBEAT_TTL = 180; // 3 minutes
|
|
408
|
+
const DEAD_THRESHOLD = 240; // 4 minutes (2× TTL)
|
|
409
|
+
|
|
410
|
+
// Update heartbeat interval in blocking-coordinator.ts
|
|
411
|
+
const HEARTBEAT_INTERVAL = 30000; // 30s (6x intervals before expiration)
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
**Solution 4: Manual Escalation**
|
|
415
|
+
```bash
|
|
416
|
+
# Force escalation for dead coordinator
|
|
417
|
+
COORDINATOR_ID="coord-dead"
|
|
418
|
+
|
|
419
|
+
# Set warning count to 3
|
|
420
|
+
redis-cli SET "blocking:warning:$COORDINATOR_ID" 3
|
|
421
|
+
redis-cli EXPIRE "blocking:warning:$COORDINATOR_ID" 300
|
|
422
|
+
|
|
423
|
+
# Trigger timeout handler
|
|
424
|
+
node -e "
|
|
425
|
+
const handler = require('./src/cfn-loop/coordinator-timeout-handler');
|
|
426
|
+
handler.checkCoordinatorActivity();
|
|
427
|
+
"
|
|
428
|
+
|
|
429
|
+
# Check logs for "Dead coordinator detected: $COORDINATOR_ID"
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
### Prevention
|
|
433
|
+
- Set up systemd service for timeout handler with auto-restart
|
|
434
|
+
- Monitor timeout handler process with Prometheus (process_up gauge)
|
|
435
|
+
- Add integration test: kill coordinator, verify detection within 3 minutes
|
|
436
|
+
- Set up alert for missing heartbeats (no coordinator heartbeats >5 minutes)
|
|
437
|
+
|
|
438
|
+
---
|
|
439
|
+
|
|
440
|
+
## Issue 4: Redis Connection Keeps Failing
|
|
441
|
+
|
|
442
|
+
### Symptoms
|
|
443
|
+
- Log shows: `"Circuit breaker open: Redis unavailable"`
|
|
444
|
+
- Repeated connection attempts with exponential backoff
|
|
445
|
+
- All 4 retry attempts fail
|
|
446
|
+
- Other services can connect to Redis successfully
|
|
447
|
+
|
|
448
|
+
### Diagnosis Steps
|
|
449
|
+
|
|
450
|
+
**Step 1: Test Basic Connectivity**
|
|
451
|
+
```bash
|
|
452
|
+
# From coordinator host
|
|
453
|
+
redis-cli -h <redis-host> -p 6379 ping
|
|
454
|
+
|
|
455
|
+
# If "PONG" → Redis is reachable
|
|
456
|
+
# If "Connection refused" → Redis not listening or firewall
|
|
457
|
+
# If timeout → Network issue
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
**Step 2: Check Authentication**
|
|
461
|
+
```bash
|
|
462
|
+
# Test with password
|
|
463
|
+
redis-cli -h <redis-host> -p 6379 -a <password> ping
|
|
464
|
+
|
|
465
|
+
# If "PONG" → password is correct
|
|
466
|
+
# If "NOAUTH Authentication required" → password missing
|
|
467
|
+
# If "ERR invalid password" → wrong password
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
**Step 3: Verify Environment Variables**
|
|
471
|
+
```bash
|
|
472
|
+
# Check coordinator environment
|
|
473
|
+
ps aux | grep coordinator | grep -o 'REDIS_.*'
|
|
474
|
+
|
|
475
|
+
# Should show:
|
|
476
|
+
# REDIS_URL=redis://redis-host:6379
|
|
477
|
+
# REDIS_PASSWORD=<password>
|
|
478
|
+
|
|
479
|
+
# Check if variables are set
|
|
480
|
+
echo $REDIS_URL
|
|
481
|
+
echo $REDIS_PASSWORD
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
**Step 4: Check Redis Performance**
|
|
485
|
+
```bash
|
|
486
|
+
# Check slow log
|
|
487
|
+
redis-cli SLOWLOG GET 10
|
|
488
|
+
|
|
489
|
+
# If many entries → Redis is overloaded
|
|
490
|
+
|
|
491
|
+
# Check memory usage
|
|
492
|
+
redis-cli INFO memory | grep used_memory_human
|
|
493
|
+
|
|
494
|
+
# If >80% maxmemory → Redis is full
|
|
495
|
+
|
|
496
|
+
# Check client connections
|
|
497
|
+
redis-cli INFO clients | grep connected_clients
|
|
498
|
+
|
|
499
|
+
# If >10000 → too many connections
|
|
500
|
+
```
|
|
501
|
+
|
|
502
|
+
### Solutions
|
|
503
|
+
|
|
504
|
+
**Solution 1: Fix Authentication**
|
|
505
|
+
```bash
|
|
506
|
+
# Update coordinator environment variables
|
|
507
|
+
# In /etc/coordinator/env or docker-compose.yml
|
|
508
|
+
|
|
509
|
+
REDIS_URL=redis://:${REDIS_PASSWORD}@redis-host:6379
|
|
510
|
+
# Note the `:` before password
|
|
511
|
+
|
|
512
|
+
# Or use separate password parameter
|
|
513
|
+
REDIS_HOST=redis-host
|
|
514
|
+
REDIS_PORT=6379
|
|
515
|
+
REDIS_PASSWORD=your-password
|
|
516
|
+
|
|
517
|
+
# Restart coordinator
|
|
518
|
+
systemctl restart coordinator
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
**Solution 2: Scale Redis**
|
|
522
|
+
```bash
|
|
523
|
+
# If Redis is overloaded, scale vertically or horizontally
|
|
524
|
+
|
|
525
|
+
# Vertical scaling (more resources)
|
|
526
|
+
# Update Redis configuration
|
|
527
|
+
redis-cli CONFIG SET maxmemory 8gb
|
|
528
|
+
redis-cli CONFIG SET maxclients 20000
|
|
529
|
+
|
|
530
|
+
# Horizontal scaling (Redis Cluster)
|
|
531
|
+
# Set up 3-node cluster
|
|
532
|
+
redis-cli --cluster create \
|
|
533
|
+
redis-1:6379 redis-2:6379 redis-3:6379 \
|
|
534
|
+
--cluster-replicas 1
|
|
535
|
+
|
|
536
|
+
# Update coordinator to use cluster
|
|
537
|
+
REDIS_URL=redis://redis-1:6379,redis-2:6379,redis-3:6379
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
**Solution 3: Fix Network Issues**
|
|
541
|
+
```bash
|
|
542
|
+
# Check firewall rules
|
|
543
|
+
sudo iptables -L | grep 6379
|
|
544
|
+
|
|
545
|
+
# If no rule, add it
|
|
546
|
+
sudo iptables -A INPUT -p tcp --dport 6379 -j ACCEPT
|
|
547
|
+
|
|
548
|
+
# For AWS Security Groups
|
|
549
|
+
aws ec2 authorize-security-group-ingress \
|
|
550
|
+
--group-id sg-xxxxx \
|
|
551
|
+
--protocol tcp \
|
|
552
|
+
--port 6379 \
|
|
553
|
+
--source-group sg-yyyyy # Coordinator security group
|
|
554
|
+
```
|
|
555
|
+
|
|
556
|
+
**Solution 4: Increase Circuit Breaker Tolerance**
|
|
557
|
+
```typescript
|
|
558
|
+
// If Redis is occasionally slow but not down,
|
|
559
|
+
// increase retry attempts and delays
|
|
560
|
+
|
|
561
|
+
// In blocking-coordinator.ts:
|
|
562
|
+
private async redisOperationWithCircuitBreaker<T>(
|
|
563
|
+
operation: () => Promise<T>
|
|
564
|
+
): Promise<T> {
|
|
565
|
+
const maxAttempts = 6; // Increased from 4
|
|
566
|
+
const delays = [1000, 2000, 4000, 8000, 16000, 32000]; // Longer backoff
|
|
567
|
+
|
|
568
|
+
// ... rest of implementation
|
|
569
|
+
}
|
|
570
|
+
```
|
|
571
|
+
|
|
572
|
+
### Prevention
|
|
573
|
+
- Store Redis credentials in secret manager (Vault, AWS Secrets Manager)
|
|
574
|
+
- Set up Redis monitoring with alerts (memory >80%, connections >90% max)
|
|
575
|
+
- Use Redis Sentinel for automatic failover
|
|
576
|
+
- Configure connection pooling in coordinator (max 10 connections per coordinator)
|
|
577
|
+
|
|
578
|
+
---
|
|
579
|
+
|
|
580
|
+
## Issue 5: Cleanup Script Not Removing Stale State
|
|
581
|
+
|
|
582
|
+
### Symptoms
|
|
583
|
+
- Old coordinator heartbeat keys remain in Redis
|
|
584
|
+
- `redis-cli KEYS "blocking:heartbeat:*"` shows coordinators from >24h ago
|
|
585
|
+
- Redis memory usage growing over time
|
|
586
|
+
- Cleanup script logs show "0 keys cleaned up"
|
|
587
|
+
|
|
588
|
+
### Diagnosis Steps
|
|
589
|
+
|
|
590
|
+
**Step 1: Verify Cleanup Script Running**
|
|
591
|
+
```bash
|
|
592
|
+
# Check if running as cron job
|
|
593
|
+
crontab -l | grep cleanup
|
|
594
|
+
|
|
595
|
+
# Should show:
|
|
596
|
+
# */5 * * * * /usr/bin/node /opt/cfn-loop/config/hooks/cleanup-stale-coordinators.js
|
|
597
|
+
|
|
598
|
+
# Check if running as systemd timer
|
|
599
|
+
systemctl list-timers | grep cleanup
|
|
600
|
+
|
|
601
|
+
# Should show timer active
|
|
602
|
+
```
|
|
603
|
+
|
|
604
|
+
**Step 2: Check Cleanup Script Logs**
|
|
605
|
+
```bash
|
|
606
|
+
# View recent logs
|
|
607
|
+
tail -50 /var/log/cfn-loop/cleanup.log
|
|
608
|
+
|
|
609
|
+
# Look for:
|
|
610
|
+
# "Cleaned up N keys" (should be >0 if stale keys exist)
|
|
611
|
+
# "Error: ..." (script failing)
|
|
612
|
+
# "No stale keys found" (threshold too high)
|
|
613
|
+
```
|
|
614
|
+
|
|
615
|
+
**Step 3: Manually Run Cleanup**
|
|
616
|
+
```bash
|
|
617
|
+
# Run script manually to see output
|
|
618
|
+
node config/hooks/cleanup-stale-coordinators.js
|
|
619
|
+
|
|
620
|
+
# Should output:
|
|
621
|
+
# Found 5 heartbeat keys
|
|
622
|
+
# Cleaned up 2 stale coordinators
|
|
623
|
+
# Cleanup complete
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
**Step 4: Check Timestamp Validation**
|
|
627
|
+
```bash
|
|
628
|
+
# Check heartbeat timestamps
|
|
629
|
+
redis-cli --scan --pattern "blocking:heartbeat:*" | while read key; do
|
|
630
|
+
data=$(redis-cli GET "$key")
|
|
631
|
+
timestamp=$(echo "$data" | jq -r '.timestamp')
|
|
632
|
+
age=$(( $(date +%s) - timestamp / 1000 ))
|
|
633
|
+
echo "$key: ${age}s old"
|
|
634
|
+
done
|
|
635
|
+
|
|
636
|
+
# If ages are <600s (10 minutes) → no stale keys
|
|
637
|
+
# If ages are >600s but not cleaned → cleanup script bug
|
|
638
|
+
```
|
|
639
|
+
|
|
640
|
+
### Solutions
|
|
641
|
+
|
|
642
|
+
**Solution 1: Fix Cron Schedule**
|
|
643
|
+
```bash
|
|
644
|
+
# Edit crontab
|
|
645
|
+
crontab -e
|
|
646
|
+
|
|
647
|
+
# Correct entry (every 5 minutes):
|
|
648
|
+
*/5 * * * * /usr/bin/node /opt/cfn-loop/config/hooks/cleanup-stale-coordinators.js >> /var/log/cfn-loop/cleanup.log 2>&1
|
|
649
|
+
|
|
650
|
+
# Verify cron is running
|
|
651
|
+
sudo systemctl status cron
|
|
652
|
+
|
|
653
|
+
# Check cron logs
|
|
654
|
+
grep CRON /var/log/syslog | tail -20
|
|
655
|
+
```
|
|
656
|
+
|
|
657
|
+
**Solution 2: Adjust Stale Threshold**
|
|
658
|
+
```typescript
|
|
659
|
+
// In cleanup-stale-coordinators.js:
|
|
660
|
+
const STALE_THRESHOLD = 600000; // 10 minutes (current)
|
|
661
|
+
|
|
662
|
+
// If cleanup is too aggressive, increase:
|
|
663
|
+
const STALE_THRESHOLD = 900000; // 15 minutes
|
|
664
|
+
|
|
665
|
+
// If cleanup misses old coordinators, decrease:
|
|
666
|
+
const STALE_THRESHOLD = 300000; // 5 minutes
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
**Solution 3: Fix SCAN Implementation**
|
|
670
|
+
```typescript
|
|
671
|
+
// Incorrect (uses KEYS - blocks Redis):
|
|
672
|
+
const keys = await redis.keys('blocking:heartbeat:*'); // ❌
|
|
673
|
+
|
|
674
|
+
// Correct (uses SCAN - non-blocking):
|
|
675
|
+
const keys: string[] = [];
|
|
676
|
+
let cursor = '0';
|
|
677
|
+
|
|
678
|
+
do {
|
|
679
|
+
const [nextCursor, batch] = await redis.scan(
|
|
680
|
+
cursor,
|
|
681
|
+
'MATCH', 'blocking:heartbeat:*',
|
|
682
|
+
'COUNT', 100
|
|
683
|
+
);
|
|
684
|
+
keys.push(...batch);
|
|
685
|
+
cursor = nextCursor;
|
|
686
|
+
} while (cursor !== '0'); // ✅
|
|
687
|
+
```
|
|
688
|
+
|
|
689
|
+
**Solution 4: Add Explicit TTL Cleanup**
|
|
690
|
+
```typescript
|
|
691
|
+
// In cleanup script, add TTL verification:
|
|
692
|
+
async function cleanupStaleKeys() {
|
|
693
|
+
const keys = await scanKeys('blocking:*');
|
|
694
|
+
|
|
695
|
+
for (const key of keys) {
|
|
696
|
+
const ttl = await redis.ttl(key);
|
|
697
|
+
|
|
698
|
+
// If key has no TTL, set default
|
|
699
|
+
if (ttl === -1) {
|
|
700
|
+
console.warn(`Key ${key} missing TTL, setting to 24h`);
|
|
701
|
+
await redis.expire(key, 86400);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
// If heartbeat key is very old, delete immediately
|
|
705
|
+
if (key.startsWith('blocking:heartbeat:')) {
|
|
706
|
+
const data = await redis.get(key);
|
|
707
|
+
if (data) {
|
|
708
|
+
const { timestamp } = JSON.parse(data);
|
|
709
|
+
const age = Date.now() - timestamp;
|
|
710
|
+
|
|
711
|
+
if (age > 3600000) { // >1 hour old
|
|
712
|
+
console.warn(`Deleting very old heartbeat: ${key}`);
|
|
713
|
+
await redis.del(key);
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
```
|
|
720
|
+
|
|
721
|
+
### Prevention
|
|
722
|
+
- Set up systemd timer instead of cron (more reliable, better logging)
|
|
723
|
+
- Monitor cleanup script execution with Prometheus (script_runs_total counter)
|
|
724
|
+
- Add integration test: create fake heartbeat, wait 15min, verify cleanup
|
|
725
|
+
- Set up alert for old heartbeat keys (any heartbeat >1 hour old)
|
|
726
|
+
|
|
727
|
+
---
|
|
728
|
+
|
|
729
|
+
## Issue 6: High Signal Delivery Latency
|
|
730
|
+
|
|
731
|
+
### Symptoms
|
|
732
|
+
- Prometheus metric `signal_delivery_latency_seconds` P95 >5s
|
|
733
|
+
- Coordinators report slow signal ACK response
|
|
734
|
+
- System overall feels sluggish
|
|
735
|
+
- No obvious Redis or network errors
|
|
736
|
+
|
|
737
|
+
### Diagnosis Steps
|
|
738
|
+
|
|
739
|
+
**Step 1: Check Redis Performance**
|
|
740
|
+
```bash
|
|
741
|
+
# Check latency
|
|
742
|
+
redis-cli --latency-history
|
|
743
|
+
|
|
744
|
+
# Target: P99 <10ms
|
|
745
|
+
# Warning: P99 >50ms
|
|
746
|
+
# Critical: P99 >100ms
|
|
747
|
+
|
|
748
|
+
# Check slow queries
|
|
749
|
+
redis-cli SLOWLOG GET 10
|
|
750
|
+
```
|
|
751
|
+
|
|
752
|
+
**Step 2: Check Network Latency**
|
|
753
|
+
```bash
|
|
754
|
+
# Ping Redis host from coordinator
|
|
755
|
+
ping -c 10 <redis-host>
|
|
756
|
+
|
|
757
|
+
# Check packet loss and RTT
|
|
758
|
+
# Target: 0% loss, <1ms RTT
|
|
759
|
+
|
|
760
|
+
# Trace route to Redis
|
|
761
|
+
traceroute <redis-host>
|
|
762
|
+
|
|
763
|
+
# Should be direct (1-2 hops)
|
|
764
|
+
```
|
|
765
|
+
|
|
766
|
+
**Step 3: Profile Coordinator Process**
|
|
767
|
+
```bash
|
|
768
|
+
# Check CPU usage
|
|
769
|
+
top -p $(pgrep -f coordinator)
|
|
770
|
+
|
|
771
|
+
# If >80% CPU → coordinator overloaded
|
|
772
|
+
|
|
773
|
+
# Check event loop lag (Node.js)
|
|
774
|
+
node --inspect coordinator.js
|
|
775
|
+
# Open chrome://inspect
|
|
776
|
+
# Check "Event Loop Delay" in performance tab
|
|
777
|
+
|
|
778
|
+
# If >100ms → event loop blocked
|
|
779
|
+
```
|
|
780
|
+
|
|
781
|
+
**Step 4: Count Active Coordinators**
|
|
782
|
+
```bash
|
|
783
|
+
# Too many coordinators = contention
|
|
784
|
+
redis-cli KEYS "blocking:heartbeat:*" | wc -l
|
|
785
|
+
|
|
786
|
+
# Target: <10 coordinators
|
|
787
|
+
# Warning: 10-20 coordinators
|
|
788
|
+
# Critical: >20 coordinators
|
|
789
|
+
```
|
|
790
|
+
|
|
791
|
+
### Solutions
|
|
792
|
+
|
|
793
|
+
**Solution 1: Optimize Redis**
|
|
794
|
+
```bash
|
|
795
|
+
# Enable pipelining for better throughput
|
|
796
|
+
redis-cli CONFIG SET tcp-backlog 511
|
|
797
|
+
redis-cli CONFIG SET timeout 300
|
|
798
|
+
|
|
799
|
+
# Increase connection pool
|
|
800
|
+
redis-cli CONFIG SET maxclients 20000
|
|
801
|
+
|
|
802
|
+
# Enable lazy freeing (async deletion)
|
|
803
|
+
redis-cli CONFIG SET lazyfree-lazy-eviction yes
|
|
804
|
+
redis-cli CONFIG SET lazyfree-lazy-expire yes
|
|
805
|
+
```
|
|
806
|
+
|
|
807
|
+
**Solution 2: Reduce Coordinator Count**
|
|
808
|
+
```typescript
|
|
809
|
+
// Batch work to fewer coordinators instead of spawning many
|
|
810
|
+
|
|
811
|
+
// Before (too many coordinators):
|
|
812
|
+
for (const task of tasks) {
|
|
813
|
+
await spawnCoordinator(`coord-${task.id}`, task); // 100 coordinators
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
// After (batched):
|
|
817
|
+
const batchSize = 10;
|
|
818
|
+
const batches = chunk(tasks, batchSize);
|
|
819
|
+
|
|
820
|
+
for (let i = 0; i < batches.length; i++) {
|
|
821
|
+
await spawnCoordinator(`coord-batch-${i}`, batches[i]); // 10 coordinators
|
|
822
|
+
}
|
|
823
|
+
```
|
|
824
|
+
|
|
825
|
+
**Solution 3: Optimize Signal Format**
|
|
826
|
+
```typescript
|
|
827
|
+
// Reduce signal size by removing unnecessary fields
|
|
828
|
+
|
|
829
|
+
// Before (384 bytes):
|
|
830
|
+
const signal = {
|
|
831
|
+
senderId: "coordinator-with-very-long-uuid-123456789",
|
|
832
|
+
receiverId: "another-coordinator-with-long-uuid-987654321",
|
|
833
|
+
type: "wake",
|
|
834
|
+
timestamp: Date.now(),
|
|
835
|
+
metadata: {
|
|
836
|
+
hostname: "coordinator-host-1.example.com",
|
|
837
|
+
version: "1.2.3",
|
|
838
|
+
environment: "production"
|
|
839
|
+
}
|
|
840
|
+
};
|
|
841
|
+
|
|
842
|
+
// After (128 bytes):
|
|
843
|
+
const signal = {
|
|
844
|
+
s: "coord-123", // Shortened sender ID
|
|
845
|
+
r: "coord-456", // Shortened receiver ID
|
|
846
|
+
t: "w", // Signal type (w=wake, v=validate, d=decide)
|
|
847
|
+
ts: Date.now()
|
|
848
|
+
};
|
|
849
|
+
```
|
|
850
|
+
|
|
851
|
+
**Solution 4: Use Redis Pub/Sub Instead of Polling**
|
|
852
|
+
```typescript
|
|
853
|
+
// Before (polling - slow):
|
|
854
|
+
async waitForSignal() {
|
|
855
|
+
while (true) {
|
|
856
|
+
const signal = await this.redis.get(`blocking:signal:${this.id}`);
|
|
857
|
+
if (signal) return JSON.parse(signal);
|
|
858
|
+
await new Promise(r => setTimeout(r, 1000)); // Poll every 1s
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
// After (pub/sub - instant):
|
|
863
|
+
async waitForSignal() {
|
|
864
|
+
return new Promise((resolve) => {
|
|
865
|
+
const subscriber = this.redis.duplicate();
|
|
866
|
+
subscriber.subscribe(`signal:${this.id}`);
|
|
867
|
+
subscriber.on('message', (channel, message) => {
|
|
868
|
+
resolve(JSON.parse(message));
|
|
869
|
+
subscriber.quit();
|
|
870
|
+
});
|
|
871
|
+
});
|
|
872
|
+
}
|
|
873
|
+
```
|
|
874
|
+
|
|
875
|
+
### Prevention
|
|
876
|
+
- Set up Redis monitoring with Grafana dashboard
|
|
877
|
+
- Use Redis Cluster for horizontal scaling (>1000 coordinators)
|
|
878
|
+
- Implement signal batching (send multiple signals in one Redis operation)
|
|
879
|
+
- Monitor coordinator count with alert (>20 active coordinators)
|
|
880
|
+
|
|
881
|
+
---
|
|
882
|
+
|
|
883
|
+
## Issue 7: Memory Leak in Coordinator Process
|
|
884
|
+
|
|
885
|
+
### Symptoms
|
|
886
|
+
- Coordinator process memory grows unbounded
|
|
887
|
+
- `ps aux` shows coordinator using >2GB RSS
|
|
888
|
+
- Eventually crashes with `Out of memory` error
|
|
889
|
+
- Restart fixes temporarily but leak returns
|
|
890
|
+
|
|
891
|
+
### Diagnosis Steps
|
|
892
|
+
|
|
893
|
+
**Step 1: Monitor Memory Growth**
|
|
894
|
+
```bash
|
|
895
|
+
# Check current memory
|
|
896
|
+
ps aux | grep coordinator
|
|
897
|
+
|
|
898
|
+
# Monitor over time
|
|
899
|
+
watch -n 5 'ps aux | grep coordinator'
|
|
900
|
+
|
|
901
|
+
# If memory grows >100MB per hour → leak
|
|
902
|
+
```
|
|
903
|
+
|
|
904
|
+
**Step 2: Generate Heap Snapshot**
|
|
905
|
+
```bash
|
|
906
|
+
# Start coordinator with inspector
|
|
907
|
+
node --inspect coordinator.js
|
|
908
|
+
|
|
909
|
+
# In Chrome DevTools (chrome://inspect)
|
|
910
|
+
# Memory tab → Take heap snapshot
|
|
911
|
+
# Compare snapshots over time
|
|
912
|
+
|
|
913
|
+
# Look for objects growing:
|
|
914
|
+
# - Event listeners
|
|
915
|
+
# - Redis keys cache
|
|
916
|
+
# - Promise chains
|
|
917
|
+
```
|
|
918
|
+
|
|
919
|
+
**Step 3: Check Event Listener Cleanup**
|
|
920
|
+
```bash
|
|
921
|
+
# Check listener count
|
|
922
|
+
node -e "
|
|
923
|
+
const coordinator = require('./coordinator');
|
|
924
|
+
setInterval(() => {
|
|
925
|
+
console.log('Listeners:', coordinator.eventEmitter.listenerCount('signal'));
|
|
926
|
+
}, 5000);
|
|
927
|
+
"
|
|
928
|
+
|
|
929
|
+
# If count grows → listeners not removed
|
|
930
|
+
```
|
|
931
|
+
|
|
932
|
+
**Step 4: Check Redis Key Accumulation**
|
|
933
|
+
```bash
|
|
934
|
+
# Count keys owned by coordinator
|
|
935
|
+
COORDINATOR_ID="coord-123"
|
|
936
|
+
redis-cli --scan --pattern "*${COORDINATOR_ID}*" | wc -l
|
|
937
|
+
|
|
938
|
+
# If >1000 keys → not cleaning up
|
|
939
|
+
```
|
|
940
|
+
|
|
941
|
+
### Solutions
|
|
942
|
+
|
|
943
|
+
**Solution 1: Fix Event Listener Cleanup**
|
|
944
|
+
```typescript
|
|
945
|
+
// Incorrect (leak):
|
|
946
|
+
async waitForSignal() {
|
|
947
|
+
this.eventEmitter.on('signal', (signal) => { // ❌ Never removed
|
|
948
|
+
this.handleSignal(signal);
|
|
949
|
+
});
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
// Correct (cleanup):
|
|
953
|
+
async waitForSignal() {
|
|
954
|
+
return new Promise((resolve) => {
|
|
955
|
+
const handler = (signal) => {
|
|
956
|
+
this.handleSignal(signal);
|
|
957
|
+
resolve();
|
|
958
|
+
};
|
|
959
|
+
|
|
960
|
+
this.eventEmitter.once('signal', handler); // ✅ Auto-removed
|
|
961
|
+
|
|
962
|
+
// Cleanup on timeout
|
|
963
|
+
setTimeout(() => {
|
|
964
|
+
this.eventEmitter.off('signal', handler);
|
|
965
|
+
resolve();
|
|
966
|
+
}, this.timeout);
|
|
967
|
+
});
|
|
968
|
+
}
|
|
969
|
+
```
|
|
970
|
+
|
|
971
|
+
**Solution 2: Set TTL on All Redis Keys**
|
|
972
|
+
```typescript
|
|
973
|
+
// Incorrect (keys live forever):
|
|
974
|
+
await this.redis.set(key, value); // ❌
|
|
975
|
+
|
|
976
|
+
// Correct (24h TTL):
|
|
977
|
+
await this.redis.setex(key, 86400, value); // ✅
|
|
978
|
+
|
|
979
|
+
// Or use default TTL helper:
|
|
980
|
+
async setWithTTL(key: string, value: string, ttl: number = 86400) {
|
|
981
|
+
await this.redis.setex(key, ttl, value);
|
|
982
|
+
}
|
|
983
|
+
```
|
|
984
|
+
|
|
985
|
+
**Solution 3: Implement Graceful Shutdown**
|
|
986
|
+
```typescript
|
|
987
|
+
// Add cleanup on SIGTERM/SIGINT
|
|
988
|
+
process.on('SIGTERM', async () => {
|
|
989
|
+
console.log('SIGTERM received, cleaning up...');
|
|
990
|
+
|
|
991
|
+
// Stop heartbeat
|
|
992
|
+
if (this.heartbeatInterval) {
|
|
993
|
+
clearInterval(this.heartbeatInterval);
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
// Remove event listeners
|
|
997
|
+
this.eventEmitter.removeAllListeners();
|
|
998
|
+
|
|
999
|
+
// Close Redis connections
|
|
1000
|
+
await this.redis.quit();
|
|
1001
|
+
await this.subscriber.quit();
|
|
1002
|
+
|
|
1003
|
+
// Delete coordinator state
|
|
1004
|
+
await this.redis.del(`blocking:heartbeat:${this.id}`);
|
|
1005
|
+
await this.redis.del(`blocking:signal:${this.id}`);
|
|
1006
|
+
|
|
1007
|
+
process.exit(0);
|
|
1008
|
+
});
|
|
1009
|
+
```
|
|
1010
|
+
|
|
1011
|
+
**Solution 4: Use WeakMap for Caching**
|
|
1012
|
+
```typescript
|
|
1013
|
+
// Incorrect (strong references prevent GC):
|
|
1014
|
+
private signalCache = new Map<string, Signal>(); // ❌
|
|
1015
|
+
|
|
1016
|
+
// Correct (weak references allow GC):
|
|
1017
|
+
private signalCache = new WeakMap<object, Signal>(); // ✅
|
|
1018
|
+
|
|
1019
|
+
// Or use LRU cache with max size:
|
|
1020
|
+
import LRU from 'lru-cache';
|
|
1021
|
+
|
|
1022
|
+
private signalCache = new LRU<string, Signal>({
|
|
1023
|
+
max: 500, // Max 500 cached signals
|
|
1024
|
+
ttl: 60000 // 1 minute TTL
|
|
1025
|
+
});
|
|
1026
|
+
```
|
|
1027
|
+
|
|
1028
|
+
### Prevention
|
|
1029
|
+
- Run coordinator with memory limit (`--max-old-space-size=512`)
|
|
1030
|
+
- Monitor memory with Prometheus (process_resident_memory_bytes)
|
|
1031
|
+
- Add memory leak test (run coordinator for 1 hour, verify memory <100MB growth)
|
|
1032
|
+
- Use `clinic` or `0x` profiling tools in development
|
|
1033
|
+
|
|
1034
|
+
---
|
|
1035
|
+
|
|
1036
|
+
## Issue 8: Timeout Events Spike Unexpectedly
|
|
1037
|
+
|
|
1038
|
+
### Symptoms
|
|
1039
|
+
- Prometheus metric `timeout_events_total` sudden increase
|
|
1040
|
+
- Rate goes from 0/min to 5+/min
|
|
1041
|
+
- No code changes or increased load
|
|
1042
|
+
- Affects multiple coordinators simultaneously
|
|
1043
|
+
|
|
1044
|
+
### Diagnosis Steps
|
|
1045
|
+
|
|
1046
|
+
**Step 1: Check Recent Changes**
|
|
1047
|
+
```bash
|
|
1048
|
+
# Check recent deployments
|
|
1049
|
+
git log --oneline --since="1 hour ago"
|
|
1050
|
+
|
|
1051
|
+
# Check infrastructure changes
|
|
1052
|
+
kubectl get events --sort-by='.lastTimestamp' | head -20
|
|
1053
|
+
|
|
1054
|
+
# AWS CloudTrail events
|
|
1055
|
+
aws cloudtrail lookup-events --lookup-attributes AttributeKey=EventTime,AttributeValue=$(date -u +%Y-%m-%dT%H:%M:%S)
|
|
1056
|
+
```
|
|
1057
|
+
|
|
1058
|
+
**Step 2: Check External Dependencies**
|
|
1059
|
+
```bash
|
|
1060
|
+
# Check validator agent health
|
|
1061
|
+
curl http://validator-1:3000/health
|
|
1062
|
+
curl http://validator-2:3000/health
|
|
1063
|
+
|
|
1064
|
+
# Check database health
|
|
1065
|
+
pg_isready -h postgres-host -p 5432
|
|
1066
|
+
|
|
1067
|
+
# Check Redis health
|
|
1068
|
+
redis-cli ping
|
|
1069
|
+
redis-cli INFO stats | grep total_errors
|
|
1070
|
+
```
|
|
1071
|
+
|
|
1072
|
+
**Step 3: Check Timeout Configuration**
|
|
1073
|
+
```bash
|
|
1074
|
+
# Verify timeout values haven't changed
|
|
1075
|
+
grep -r "timeout.*60000" src/cfn-loop/
|
|
1076
|
+
|
|
1077
|
+
# Check environment variables
|
|
1078
|
+
env | grep TIMEOUT
|
|
1079
|
+
```
|
|
1080
|
+
|
|
1081
|
+
**Step 4: Profile Blocking Tasks**
|
|
1082
|
+
```bash
|
|
1083
|
+
# Check what coordinators are blocked on
|
|
1084
|
+
redis-cli --scan --pattern "blocking:signal:*" | while read key; do
|
|
1085
|
+
data=$(redis-cli GET "$key")
|
|
1086
|
+
echo "$key: $data"
|
|
1087
|
+
done
|
|
1088
|
+
|
|
1089
|
+
# Look for common patterns (e.g., all waiting for same validator)
|
|
1090
|
+
```
|
|
1091
|
+
|
|
1092
|
+
### Solutions
|
|
1093
|
+
|
|
1094
|
+
**Solution 1: Increase Timeout Duration**
|
|
1095
|
+
```typescript
|
|
1096
|
+
// If timeouts are legitimate (task takes longer than expected)
|
|
1097
|
+
|
|
1098
|
+
// In blocking-coordinator.ts:
|
|
1099
|
+
const DEFAULT_TIMEOUT = 600000; // 10 minutes (current)
|
|
1100
|
+
|
|
1101
|
+
// Increase for production:
|
|
1102
|
+
const DEFAULT_TIMEOUT = 1800000; // 30 minutes
|
|
1103
|
+
|
|
1104
|
+
// Or make configurable:
|
|
1105
|
+
const timeout = process.env.BLOCKING_TIMEOUT
|
|
1106
|
+
? parseInt(process.env.BLOCKING_TIMEOUT)
|
|
1107
|
+
: 600000;
|
|
1108
|
+
```
|
|
1109
|
+
|
|
1110
|
+
**Solution 2: Fix Slow External Dependency**
|
|
1111
|
+
```bash
|
|
1112
|
+
# If validator agents are slow:
|
|
1113
|
+
|
|
1114
|
+
# Check validator logs
|
|
1115
|
+
kubectl logs -f deployment/validator-agent
|
|
1116
|
+
|
|
1117
|
+
# Scale validator agents
|
|
1118
|
+
kubectl scale deployment/validator-agent --replicas=5
|
|
1119
|
+
|
|
1120
|
+
# Add timeout to validator calls
|
|
1121
|
+
VALIDATOR_TIMEOUT=30000 # 30s max per validator
|
|
1122
|
+
```
|
|
1123
|
+
|
|
1124
|
+
**Solution 3: Add Circuit Breaker for Blocking Operations**
|
|
1125
|
+
```typescript
|
|
1126
|
+
// Prevent timeout cascade by failing fast
|
|
1127
|
+
|
|
1128
|
+
import CircuitBreaker from 'opossum';
|
|
1129
|
+
|
|
1130
|
+
const breaker = new CircuitBreaker(this.blockUntilSignal.bind(this), {
|
|
1131
|
+
timeout: 600000, // 10 min
|
|
1132
|
+
errorThresholdPercentage: 50, // Open after 50% failures
|
|
1133
|
+
resetTimeout: 30000 // Try again after 30s
|
|
1134
|
+
});
|
|
1135
|
+
|
|
1136
|
+
breaker.on('open', () => {
|
|
1137
|
+
console.error('Circuit breaker opened for blocking operations');
|
|
1138
|
+
// Alert or fallback logic
|
|
1139
|
+
});
|
|
1140
|
+
|
|
1141
|
+
// Use breaker instead of direct call
|
|
1142
|
+
await breaker.fire();
|
|
1143
|
+
```
|
|
1144
|
+
|
|
1145
|
+
**Solution 4: Implement Retry with Backoff**
|
|
1146
|
+
```typescript
|
|
1147
|
+
// If timeouts are transient, retry with backoff
|
|
1148
|
+
|
|
1149
|
+
async blockUntilSignalWithRetry(maxRetries = 3): Promise<void> {
|
|
1150
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
1151
|
+
try {
|
|
1152
|
+
await this.blockUntilSignal();
|
|
1153
|
+
return; // Success
|
|
1154
|
+
} catch (error) {
|
|
1155
|
+
if (error.message.includes('Timeout') && attempt < maxRetries - 1) {
|
|
1156
|
+
const delay = Math.pow(2, attempt) * 60000; // 1min, 2min, 4min
|
|
1157
|
+
console.warn(`Timeout attempt ${attempt + 1}, retrying in ${delay}ms`);
|
|
1158
|
+
await new Promise(r => setTimeout(r, delay));
|
|
1159
|
+
} else {
|
|
1160
|
+
throw error; // Final failure
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
```
|
|
1166
|
+
|
|
1167
|
+
### Prevention
|
|
1168
|
+
- Set up gradual rollout for configuration changes (canary deployment)
|
|
1169
|
+
- Monitor external dependency health with synthetic checks
|
|
1170
|
+
- Add timeout histogram to track distribution (not just count)
|
|
1171
|
+
- Set up alert for timeout rate increase >2x baseline
|
|
1172
|
+
|
|
1173
|
+
---
|
|
1174
|
+
|
|
1175
|
+
## Emergency Procedures
|
|
1176
|
+
|
|
1177
|
+
### Complete System Reset
|
|
1178
|
+
```bash
|
|
1179
|
+
# ⚠️ WARNING: This deletes all coordinator state!
|
|
1180
|
+
|
|
1181
|
+
# 1. Stop all coordinators
|
|
1182
|
+
sudo systemctl stop coordinator@*
|
|
1183
|
+
|
|
1184
|
+
# 2. Flush Redis coordinator data
|
|
1185
|
+
redis-cli --scan --pattern "blocking:*" | xargs redis-cli DEL
|
|
1186
|
+
|
|
1187
|
+
# 3. Restart timeout handler
|
|
1188
|
+
sudo systemctl restart coordinator-timeout-handler
|
|
1189
|
+
|
|
1190
|
+
# 4. Start coordinators
|
|
1191
|
+
sudo systemctl start coordinator@*
|
|
1192
|
+
|
|
1193
|
+
# 5. Verify health
|
|
1194
|
+
redis-cli KEYS "blocking:heartbeat:*" | wc -l # Should be >0 within 10s
|
|
1195
|
+
```
|
|
1196
|
+
|
|
1197
|
+
### Coordinator Process Stuck (Force Kill)
|
|
1198
|
+
```bash
|
|
1199
|
+
# Find coordinator PID
|
|
1200
|
+
ps aux | grep coordinator
|
|
1201
|
+
|
|
1202
|
+
# Try graceful shutdown first
|
|
1203
|
+
kill -TERM <pid>
|
|
1204
|
+
sleep 10
|
|
1205
|
+
|
|
1206
|
+
# If still running, force kill
|
|
1207
|
+
kill -9 <pid>
|
|
1208
|
+
|
|
1209
|
+
# Verify process gone
|
|
1210
|
+
ps aux | grep <pid>
|
|
1211
|
+
|
|
1212
|
+
# Cleanup state
|
|
1213
|
+
redis-cli DEL "blocking:heartbeat:<coordinator-id>"
|
|
1214
|
+
redis-cli DEL "blocking:signal:<coordinator-id>"
|
|
1215
|
+
```
|
|
1216
|
+
|
|
1217
|
+
### Redis Full (Eviction)
|
|
1218
|
+
```bash
|
|
1219
|
+
# Check memory
|
|
1220
|
+
redis-cli INFO memory | grep used_memory_human
|
|
1221
|
+
|
|
1222
|
+
# If >90% full, evict old keys
|
|
1223
|
+
redis-cli --scan --pattern "blocking:*" | while read key; do
|
|
1224
|
+
ttl=$(redis-cli TTL "$key")
|
|
1225
|
+
if [ "$ttl" -gt 86400 ]; then # >24h TTL
|
|
1226
|
+
redis-cli EXPIRE "$key" 3600 # Reduce to 1h
|
|
1227
|
+
fi
|
|
1228
|
+
done
|
|
1229
|
+
|
|
1230
|
+
# Or increase maxmemory
|
|
1231
|
+
redis-cli CONFIG SET maxmemory 16gb
|
|
1232
|
+
```
|
|
1233
|
+
|
|
1234
|
+
---
|
|
1235
|
+
|
|
1236
|
+
## Getting Help
|
|
1237
|
+
|
|
1238
|
+
### Log Collection Script
|
|
1239
|
+
```bash
|
|
1240
|
+
#!/bin/bash
|
|
1241
|
+
# collect-logs.sh - Gather diagnostics for support
|
|
1242
|
+
|
|
1243
|
+
COORDINATOR_ID="${1:-unknown}"
|
|
1244
|
+
OUTPUT_DIR="/tmp/coordinator-debug-$(date +%s)"
|
|
1245
|
+
|
|
1246
|
+
mkdir -p "$OUTPUT_DIR"
|
|
1247
|
+
|
|
1248
|
+
# Coordinator logs
|
|
1249
|
+
cp /var/log/cfn-loop/coordinator-*.log "$OUTPUT_DIR/"
|
|
1250
|
+
|
|
1251
|
+
# Timeout handler logs
|
|
1252
|
+
cp /var/log/cfn-loop/timeout-handler.log "$OUTPUT_DIR/"
|
|
1253
|
+
|
|
1254
|
+
# Redis state
|
|
1255
|
+
redis-cli --scan --pattern "blocking:*" > "$OUTPUT_DIR/redis-keys.txt"
|
|
1256
|
+
redis-cli INFO > "$OUTPUT_DIR/redis-info.txt"
|
|
1257
|
+
|
|
1258
|
+
# System info
|
|
1259
|
+
uname -a > "$OUTPUT_DIR/system-info.txt"
|
|
1260
|
+
ps aux | grep coordinator > "$OUTPUT_DIR/processes.txt"
|
|
1261
|
+
|
|
1262
|
+
# Network
|
|
1263
|
+
ping -c 5 <redis-host> > "$OUTPUT_DIR/network-ping.txt"
|
|
1264
|
+
|
|
1265
|
+
# Package
|
|
1266
|
+
tar -czf "coordinator-debug-$(date +%s).tar.gz" "$OUTPUT_DIR"
|
|
1267
|
+
echo "Logs collected: coordinator-debug-$(date +%s).tar.gz"
|
|
1268
|
+
```
|
|
1269
|
+
|
|
1270
|
+
### Support Channels
|
|
1271
|
+
- **Slack:** #cfn-loop-support
|
|
1272
|
+
- **Email:** devops-team@example.com
|
|
1273
|
+
- **On-call:** PagerDuty escalation policy "CFN Loop"
|
|
1274
|
+
- **Documentation:** https://docs.example.com/cfn-loop
|
|
1275
|
+
|
|
1276
|
+
### Escalation Criteria
|
|
1277
|
+
- **P1 (Page immediately):** All coordinators down, production outage
|
|
1278
|
+
- **P2 (2-hour response):** >50% coordinators failing, degraded performance
|
|
1279
|
+
- **P3 (Next business day):** Single coordinator issue, non-prod environment
|