agentic-qe 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/.claude/agents/qx-partner.md +17 -4
  2. package/.claude/skills/accessibility-testing/SKILL.md +144 -692
  3. package/.claude/skills/agentic-quality-engineering/SKILL.md +176 -529
  4. package/.claude/skills/api-testing-patterns/SKILL.md +180 -560
  5. package/.claude/skills/brutal-honesty-review/SKILL.md +113 -603
  6. package/.claude/skills/bug-reporting-excellence/SKILL.md +116 -517
  7. package/.claude/skills/chaos-engineering-resilience/SKILL.md +127 -72
  8. package/.claude/skills/cicd-pipeline-qe-orchestrator/SKILL.md +209 -404
  9. package/.claude/skills/code-review-quality/SKILL.md +158 -608
  10. package/.claude/skills/compatibility-testing/SKILL.md +148 -38
  11. package/.claude/skills/compliance-testing/SKILL.md +132 -63
  12. package/.claude/skills/consultancy-practices/SKILL.md +114 -446
  13. package/.claude/skills/context-driven-testing/SKILL.md +117 -381
  14. package/.claude/skills/contract-testing/SKILL.md +176 -141
  15. package/.claude/skills/database-testing/SKILL.md +137 -130
  16. package/.claude/skills/exploratory-testing-advanced/SKILL.md +160 -629
  17. package/.claude/skills/holistic-testing-pact/SKILL.md +140 -188
  18. package/.claude/skills/localization-testing/SKILL.md +145 -33
  19. package/.claude/skills/mobile-testing/SKILL.md +132 -448
  20. package/.claude/skills/mutation-testing/SKILL.md +147 -41
  21. package/.claude/skills/performance-testing/SKILL.md +200 -546
  22. package/.claude/skills/quality-metrics/SKILL.md +164 -519
  23. package/.claude/skills/refactoring-patterns/SKILL.md +132 -699
  24. package/.claude/skills/regression-testing/SKILL.md +120 -926
  25. package/.claude/skills/risk-based-testing/SKILL.md +157 -660
  26. package/.claude/skills/security-testing/SKILL.md +199 -538
  27. package/.claude/skills/sherlock-review/SKILL.md +163 -699
  28. package/.claude/skills/shift-left-testing/SKILL.md +161 -465
  29. package/.claude/skills/shift-right-testing/SKILL.md +161 -519
  30. package/.claude/skills/six-thinking-hats/SKILL.md +175 -1110
  31. package/.claude/skills/skills-manifest.json +71 -20
  32. package/.claude/skills/tdd-london-chicago/SKILL.md +131 -448
  33. package/.claude/skills/technical-writing/SKILL.md +103 -154
  34. package/.claude/skills/test-automation-strategy/SKILL.md +166 -772
  35. package/.claude/skills/test-data-management/SKILL.md +126 -910
  36. package/.claude/skills/test-design-techniques/SKILL.md +179 -89
  37. package/.claude/skills/test-environment-management/SKILL.md +136 -91
  38. package/.claude/skills/test-reporting-analytics/SKILL.md +169 -92
  39. package/.claude/skills/testability-scoring/SKILL.md +172 -538
  40. package/.claude/skills/testability-scoring/scripts/generate-html-report.js +0 -0
  41. package/.claude/skills/visual-testing-advanced/SKILL.md +155 -78
  42. package/.claude/skills/xp-practices/SKILL.md +151 -587
  43. package/CHANGELOG.md +48 -0
  44. package/README.md +23 -16
  45. package/dist/agents/QXPartnerAgent.d.ts +8 -1
  46. package/dist/agents/QXPartnerAgent.d.ts.map +1 -1
  47. package/dist/agents/QXPartnerAgent.js +1174 -112
  48. package/dist/agents/QXPartnerAgent.js.map +1 -1
  49. package/dist/agents/lifecycle/AgentLifecycleManager.d.ts.map +1 -1
  50. package/dist/agents/lifecycle/AgentLifecycleManager.js +34 -31
  51. package/dist/agents/lifecycle/AgentLifecycleManager.js.map +1 -1
  52. package/dist/cli/commands/init-claude-md-template.d.ts.map +1 -1
  53. package/dist/cli/commands/init-claude-md-template.js +14 -0
  54. package/dist/cli/commands/init-claude-md-template.js.map +1 -1
  55. package/dist/core/SwarmCoordinator.d.ts +180 -0
  56. package/dist/core/SwarmCoordinator.d.ts.map +1 -0
  57. package/dist/core/SwarmCoordinator.js +473 -0
  58. package/dist/core/SwarmCoordinator.js.map +1 -0
  59. package/dist/core/metrics/MetricsAggregator.d.ts +228 -0
  60. package/dist/core/metrics/MetricsAggregator.d.ts.map +1 -0
  61. package/dist/core/metrics/MetricsAggregator.js +482 -0
  62. package/dist/core/metrics/MetricsAggregator.js.map +1 -0
  63. package/dist/core/metrics/index.d.ts +5 -0
  64. package/dist/core/metrics/index.d.ts.map +1 -0
  65. package/dist/core/metrics/index.js +11 -0
  66. package/dist/core/metrics/index.js.map +1 -0
  67. package/dist/core/optimization/SwarmOptimizer.d.ts +5 -0
  68. package/dist/core/optimization/SwarmOptimizer.d.ts.map +1 -1
  69. package/dist/core/optimization/SwarmOptimizer.js +17 -0
  70. package/dist/core/optimization/SwarmOptimizer.js.map +1 -1
  71. package/dist/core/orchestration/AdaptiveScheduler.d.ts +190 -0
  72. package/dist/core/orchestration/AdaptiveScheduler.d.ts.map +1 -0
  73. package/dist/core/orchestration/AdaptiveScheduler.js +460 -0
  74. package/dist/core/orchestration/AdaptiveScheduler.js.map +1 -0
  75. package/dist/core/orchestration/WorkflowOrchestrator.d.ts +13 -0
  76. package/dist/core/orchestration/WorkflowOrchestrator.d.ts.map +1 -1
  77. package/dist/core/orchestration/WorkflowOrchestrator.js +32 -0
  78. package/dist/core/orchestration/WorkflowOrchestrator.js.map +1 -1
  79. package/dist/core/recovery/CircuitBreaker.d.ts +176 -0
  80. package/dist/core/recovery/CircuitBreaker.d.ts.map +1 -0
  81. package/dist/core/recovery/CircuitBreaker.js +382 -0
  82. package/dist/core/recovery/CircuitBreaker.js.map +1 -0
  83. package/dist/core/recovery/RecoveryOrchestrator.d.ts +186 -0
  84. package/dist/core/recovery/RecoveryOrchestrator.d.ts.map +1 -0
  85. package/dist/core/recovery/RecoveryOrchestrator.js +476 -0
  86. package/dist/core/recovery/RecoveryOrchestrator.js.map +1 -0
  87. package/dist/core/recovery/RetryStrategy.d.ts +127 -0
  88. package/dist/core/recovery/RetryStrategy.d.ts.map +1 -0
  89. package/dist/core/recovery/RetryStrategy.js +314 -0
  90. package/dist/core/recovery/RetryStrategy.js.map +1 -0
  91. package/dist/core/recovery/index.d.ts +8 -0
  92. package/dist/core/recovery/index.d.ts.map +1 -0
  93. package/dist/core/recovery/index.js +27 -0
  94. package/dist/core/recovery/index.js.map +1 -0
  95. package/dist/core/skills/DependencyResolver.d.ts +99 -0
  96. package/dist/core/skills/DependencyResolver.d.ts.map +1 -0
  97. package/dist/core/skills/DependencyResolver.js +260 -0
  98. package/dist/core/skills/DependencyResolver.js.map +1 -0
  99. package/dist/core/skills/ManifestGenerator.d.ts +114 -0
  100. package/dist/core/skills/ManifestGenerator.d.ts.map +1 -0
  101. package/dist/core/skills/ManifestGenerator.js +449 -0
  102. package/dist/core/skills/ManifestGenerator.js.map +1 -0
  103. package/dist/core/skills/index.d.ts +9 -0
  104. package/dist/core/skills/index.d.ts.map +1 -0
  105. package/dist/core/skills/index.js +24 -0
  106. package/dist/core/skills/index.js.map +1 -0
  107. package/dist/mcp/server.d.ts +9 -9
  108. package/dist/mcp/server.d.ts.map +1 -1
  109. package/dist/mcp/server.js +1 -2
  110. package/dist/mcp/server.js.map +1 -1
  111. package/dist/types/qx.d.ts +39 -7
  112. package/dist/types/qx.d.ts.map +1 -1
  113. package/dist/types/qx.js.map +1 -1
  114. package/dist/visualization/api/RestEndpoints.js +1 -1
  115. package/dist/visualization/api/RestEndpoints.js.map +1 -1
  116. package/package.json +13 -55
@@ -1,79 +1,103 @@
1
1
  ---
2
2
  name: chaos-engineering-resilience
3
- description: Chaos engineering principles, controlled failure injection, resilience testing, and system recovery validation. Use when testing distributed systems, building confidence in fault tolerance, or validating disaster recovery.
3
+ description: "Chaos engineering principles, controlled failure injection, resilience testing, and system recovery validation. Use when testing distributed systems, building confidence in fault tolerance, or validating disaster recovery."
4
+ category: specialized-testing
5
+ priority: high
6
+ tokenEstimate: 900
7
+ agents: [qe-chaos-engineer, qe-performance-tester, qe-production-intelligence]
8
+ implementation_status: optimized
9
+ optimization_version: 1.0
10
+ last_optimized: 2025-12-02
11
+ dependencies: []
12
+ quick_reference_card: true
13
+ tags: [chaos, resilience, fault-injection, distributed-systems, recovery, netflix]
4
14
  ---
5
15
 
6
16
  # Chaos Engineering & Resilience Testing
7
17
 
8
- ## Core Principle
9
-
10
- **Systems fail. Build systems that fail gracefully.**
11
-
12
- Chaos engineering proactively introduces failures to discover weaknesses before they cause outages. Resilience testing validates recovery capabilities.
13
-
14
- ## What is Chaos Engineering?
15
-
16
- **Chaos Engineering:** Experimenting on distributed systems to build confidence in system ability to withstand turbulent conditions.
17
-
18
- **Principles:**
19
- 1. Define steady state (normal metrics)
20
- 2. Hypothesize steady state continues
21
- 3. Introduce real-world failures
22
- 4. Try to disprove hypothesis
23
- 5. Fix weaknesses, repeat
24
-
25
- ## Types of Failures to Inject
26
-
27
- ### Network Failures
28
- - Latency injection
29
- - Packet loss
30
- - Network partitions
31
- - DNS failures
32
- - Connection timeouts
18
+ <default_to_action>
19
+ When testing system resilience or injecting failures:
20
+ 1. DEFINE steady state (normal metrics: error rate, latency, throughput)
21
+ 2. HYPOTHESIZE system continues in steady state during failure
22
+ 3. INJECT real-world failures (network, instance, disk, CPU)
23
+ 4. OBSERVE and measure deviation from steady state
24
+ 5. FIX weaknesses discovered, document runbooks, repeat
25
+
26
+ **Quick Chaos Steps:**
27
+ - Start small: Dev → Staging → 1% prod → gradual rollout
28
+ - Define clear rollback triggers (error_rate > 5%)
29
+ - Measure blast radius, never exceed planned scope
30
+ - Document findings runbooks → improved resilience
31
+
32
+ **Critical Success Factors:**
33
+ - Controlled experiments with automatic rollback
34
+ - Steady state must be measurable
35
+ - Start in non-production, graduate to production
36
+ </default_to_action>
37
+
38
+ ## Quick Reference Card
39
+
40
+ ### When to Use
41
+ - Distributed systems validation
42
+ - Disaster recovery testing
43
+ - Building confidence in fault tolerance
44
+ - Pre-production resilience verification
45
+
46
+ ### Failure Types to Inject
47
+ | Category | Failures | Tools |
48
+ |----------|----------|-------|
49
+ | **Network** | Latency, packet loss, partition | tc, toxiproxy |
50
+ | **Infrastructure** | Instance kill, disk failure, CPU | Chaos Monkey |
51
+ | **Application** | Exceptions, slow responses, leaks | Gremlin, LitmusChaos |
52
+ | **Dependencies** | Service outage, timeout | WireMock |
53
+
54
+ ### Blast Radius Progression
55
+ ```
56
+ Dev (safe) → Staging → 1% prod → 10% → 50% → 100%
57
+ ↓ ↓ ↓ ↓
58
+ Learn Validate Careful Full confidence
59
+ ```
33
60
 
34
- ### Infrastructure Failures
35
- - Instance termination
36
- - Disk failures
37
- - CPU exhaustion
38
- - Memory pressure
39
- - Cascading failures
61
+ ### Steady State Metrics
62
+ | Metric | Normal | Alert Threshold |
63
+ |--------|--------|-----------------|
64
+ | Error rate | < 0.1% | > 1% |
65
+ | p99 latency | < 200ms | > 500ms |
66
+ | Throughput | baseline | -20% |
40
67
 
41
- ### Application Failures
42
- - Exceptions
43
- - Slow responses
44
- - Resource leaks
45
- - Deadlocks
68
+ ---
46
69
 
47
- ## Controlled Experiments
70
+ ## Chaos Experiment Structure
48
71
 
49
- **Start small, increase blast radius gradually:**
50
- ```
51
- 1. Development: Test locally
52
- 2. Staging: Test in staging environment
53
- 3. Production Canary: 1% of traffic
54
- 4. Production Gradual: 10% → 50% → 100%
72
+ ```typescript
73
+ // Chaos experiment definition
74
+ const experiment = {
75
+ name: 'Database latency injection',
76
+ hypothesis: 'System handles 500ms DB latency gracefully',
77
+ steadyState: {
78
+ errorRate: '< 0.1%',
79
+ p99Latency: '< 300ms'
80
+ },
81
+ method: {
82
+ type: 'network-latency',
83
+ target: 'database',
84
+ delay: '500ms',
85
+ duration: '5m'
86
+ },
87
+ rollback: {
88
+ automatic: true,
89
+ trigger: 'errorRate > 5%'
90
+ }
91
+ };
55
92
  ```
56
93
 
57
- ## Netflix Chaos Monkey
58
-
59
- **Randomly terminates instances:**
60
- ```javascript
61
- // Chaos Monkey configuration
62
- {
63
- "enabled": true,
64
- "meanTimeBetweenKillsInWorkDays": 2,
65
- "minTimeBetweenKillsInWorkDays": 1,
66
- "grouping": "cluster",
67
- "regions": ["us-east-1"],
68
- "exceptions": ["production-critical"]
69
- }
70
- ```
94
+ ---
71
95
 
72
- ## With qe-chaos-engineer Agent
96
+ ## Agent-Driven Chaos
73
97
 
74
98
  ```typescript
75
- // Agent runs controlled chaos experiments
76
- const experiment = await agent.runChaosExperiment({
99
+ // qe-chaos-engineer runs controlled experiments
100
+ await Task("Chaos Experiment", {
77
101
  target: 'payment-service',
78
102
  failure: 'terminate-random-instance',
79
103
  blastRadius: '10%',
@@ -81,23 +105,54 @@ const experiment = await agent.runChaosExperiment({
81
105
  steadyStateHypothesis: {
82
106
  metric: 'success-rate',
83
107
  threshold: 0.99
84
- }
85
- });
108
+ },
109
+ autoRollback: true
110
+ }, "qe-chaos-engineer");
86
111
 
87
- // Verifies:
112
+ // Validates:
88
113
  // - System recovers automatically
89
- // - Error rate stays below threshold
114
+ // - Error rate stays within threshold
90
115
  // - No data loss
91
116
  // - Alerts triggered appropriately
92
117
  ```
93
118
 
94
- ## Remember
119
+ ---
120
+
121
+ ## Agent Coordination Hints
122
+
123
+ ### Memory Namespace
124
+ ```
125
+ aqe/chaos-engineering/
126
+ ├── experiments/* - Experiment definitions & results
127
+ ├── steady-states/* - Baseline measurements
128
+ ├── runbooks/* - Generated recovery procedures
129
+ └── blast-radius/* - Impact analysis
130
+ ```
131
+
132
+ ### Fleet Coordination
133
+ ```typescript
134
+ const chaosFleet = await FleetManager.coordinate({
135
+ strategy: 'chaos-engineering',
136
+ agents: [
137
+ 'qe-chaos-engineer', // Experiment execution
138
+ 'qe-performance-tester', // Baseline metrics
139
+ 'qe-production-intelligence' // Production monitoring
140
+ ],
141
+ topology: 'sequential'
142
+ });
143
+ ```
144
+
145
+ ---
95
146
 
96
- **Break things on purpose to prevent unplanned outages.**
147
+ ## Related Skills
148
+ - [shift-right-testing](../shift-right-testing/) - Production testing
149
+ - [performance-testing](../performance-testing/) - Load testing
150
+ - [test-environment-management](../test-environment-management/) - Environment stability
151
+
152
+ ---
153
+
154
+ ## Remember
97
155
 
98
- - Find weaknesses before users do
99
- - Build confidence in system resilience
100
- - Validate recovery procedures work
101
- - Create runbooks from experiments
156
+ **Break things on purpose to prevent unplanned outages.** Find weaknesses before users do. Define steady state, inject failures, measure impact, fix weaknesses, create runbooks. Start small, increase blast radius gradually.
102
157
 
103
- **With Agents:** `qe-chaos-engineer` automates chaos experiments with blast radius control, automatic rollback, and comprehensive resilience validation.
158
+ **With Agents:** `qe-chaos-engineer` automates chaos experiments with blast radius control, automatic rollback, and comprehensive resilience validation. Generates runbooks from experiment results.