agentic-qe 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/.claude/agents/qx-partner.md +17 -4
  2. package/.claude/skills/accessibility-testing/SKILL.md +144 -692
  3. package/.claude/skills/agentic-quality-engineering/SKILL.md +176 -529
  4. package/.claude/skills/api-testing-patterns/SKILL.md +180 -560
  5. package/.claude/skills/brutal-honesty-review/SKILL.md +113 -603
  6. package/.claude/skills/bug-reporting-excellence/SKILL.md +116 -517
  7. package/.claude/skills/chaos-engineering-resilience/SKILL.md +127 -72
  8. package/.claude/skills/cicd-pipeline-qe-orchestrator/SKILL.md +209 -404
  9. package/.claude/skills/code-review-quality/SKILL.md +158 -608
  10. package/.claude/skills/compatibility-testing/SKILL.md +148 -38
  11. package/.claude/skills/compliance-testing/SKILL.md +132 -63
  12. package/.claude/skills/consultancy-practices/SKILL.md +114 -446
  13. package/.claude/skills/context-driven-testing/SKILL.md +117 -381
  14. package/.claude/skills/contract-testing/SKILL.md +176 -141
  15. package/.claude/skills/database-testing/SKILL.md +137 -130
  16. package/.claude/skills/exploratory-testing-advanced/SKILL.md +160 -629
  17. package/.claude/skills/holistic-testing-pact/SKILL.md +140 -188
  18. package/.claude/skills/localization-testing/SKILL.md +145 -33
  19. package/.claude/skills/mobile-testing/SKILL.md +132 -448
  20. package/.claude/skills/mutation-testing/SKILL.md +147 -41
  21. package/.claude/skills/performance-testing/SKILL.md +200 -546
  22. package/.claude/skills/quality-metrics/SKILL.md +164 -519
  23. package/.claude/skills/refactoring-patterns/SKILL.md +132 -699
  24. package/.claude/skills/regression-testing/SKILL.md +120 -926
  25. package/.claude/skills/risk-based-testing/SKILL.md +157 -660
  26. package/.claude/skills/security-testing/SKILL.md +199 -538
  27. package/.claude/skills/sherlock-review/SKILL.md +163 -699
  28. package/.claude/skills/shift-left-testing/SKILL.md +161 -465
  29. package/.claude/skills/shift-right-testing/SKILL.md +161 -519
  30. package/.claude/skills/six-thinking-hats/SKILL.md +175 -1110
  31. package/.claude/skills/skills-manifest.json +71 -20
  32. package/.claude/skills/tdd-london-chicago/SKILL.md +131 -448
  33. package/.claude/skills/technical-writing/SKILL.md +103 -154
  34. package/.claude/skills/test-automation-strategy/SKILL.md +166 -772
  35. package/.claude/skills/test-data-management/SKILL.md +126 -910
  36. package/.claude/skills/test-design-techniques/SKILL.md +179 -89
  37. package/.claude/skills/test-environment-management/SKILL.md +136 -91
  38. package/.claude/skills/test-reporting-analytics/SKILL.md +169 -92
  39. package/.claude/skills/testability-scoring/SKILL.md +172 -538
  40. package/.claude/skills/testability-scoring/scripts/generate-html-report.js +0 -0
  41. package/.claude/skills/visual-testing-advanced/SKILL.md +155 -78
  42. package/.claude/skills/xp-practices/SKILL.md +151 -587
  43. package/CHANGELOG.md +48 -0
  44. package/README.md +23 -16
  45. package/dist/agents/QXPartnerAgent.d.ts +8 -1
  46. package/dist/agents/QXPartnerAgent.d.ts.map +1 -1
  47. package/dist/agents/QXPartnerAgent.js +1174 -112
  48. package/dist/agents/QXPartnerAgent.js.map +1 -1
  49. package/dist/agents/lifecycle/AgentLifecycleManager.d.ts.map +1 -1
  50. package/dist/agents/lifecycle/AgentLifecycleManager.js +34 -31
  51. package/dist/agents/lifecycle/AgentLifecycleManager.js.map +1 -1
  52. package/dist/cli/commands/init-claude-md-template.d.ts.map +1 -1
  53. package/dist/cli/commands/init-claude-md-template.js +14 -0
  54. package/dist/cli/commands/init-claude-md-template.js.map +1 -1
  55. package/dist/core/SwarmCoordinator.d.ts +180 -0
  56. package/dist/core/SwarmCoordinator.d.ts.map +1 -0
  57. package/dist/core/SwarmCoordinator.js +473 -0
  58. package/dist/core/SwarmCoordinator.js.map +1 -0
  59. package/dist/core/metrics/MetricsAggregator.d.ts +228 -0
  60. package/dist/core/metrics/MetricsAggregator.d.ts.map +1 -0
  61. package/dist/core/metrics/MetricsAggregator.js +482 -0
  62. package/dist/core/metrics/MetricsAggregator.js.map +1 -0
  63. package/dist/core/metrics/index.d.ts +5 -0
  64. package/dist/core/metrics/index.d.ts.map +1 -0
  65. package/dist/core/metrics/index.js +11 -0
  66. package/dist/core/metrics/index.js.map +1 -0
  67. package/dist/core/optimization/SwarmOptimizer.d.ts +5 -0
  68. package/dist/core/optimization/SwarmOptimizer.d.ts.map +1 -1
  69. package/dist/core/optimization/SwarmOptimizer.js +17 -0
  70. package/dist/core/optimization/SwarmOptimizer.js.map +1 -1
  71. package/dist/core/orchestration/AdaptiveScheduler.d.ts +190 -0
  72. package/dist/core/orchestration/AdaptiveScheduler.d.ts.map +1 -0
  73. package/dist/core/orchestration/AdaptiveScheduler.js +460 -0
  74. package/dist/core/orchestration/AdaptiveScheduler.js.map +1 -0
  75. package/dist/core/orchestration/WorkflowOrchestrator.d.ts +13 -0
  76. package/dist/core/orchestration/WorkflowOrchestrator.d.ts.map +1 -1
  77. package/dist/core/orchestration/WorkflowOrchestrator.js +32 -0
  78. package/dist/core/orchestration/WorkflowOrchestrator.js.map +1 -1
  79. package/dist/core/recovery/CircuitBreaker.d.ts +176 -0
  80. package/dist/core/recovery/CircuitBreaker.d.ts.map +1 -0
  81. package/dist/core/recovery/CircuitBreaker.js +382 -0
  82. package/dist/core/recovery/CircuitBreaker.js.map +1 -0
  83. package/dist/core/recovery/RecoveryOrchestrator.d.ts +186 -0
  84. package/dist/core/recovery/RecoveryOrchestrator.d.ts.map +1 -0
  85. package/dist/core/recovery/RecoveryOrchestrator.js +476 -0
  86. package/dist/core/recovery/RecoveryOrchestrator.js.map +1 -0
  87. package/dist/core/recovery/RetryStrategy.d.ts +127 -0
  88. package/dist/core/recovery/RetryStrategy.d.ts.map +1 -0
  89. package/dist/core/recovery/RetryStrategy.js +314 -0
  90. package/dist/core/recovery/RetryStrategy.js.map +1 -0
  91. package/dist/core/recovery/index.d.ts +8 -0
  92. package/dist/core/recovery/index.d.ts.map +1 -0
  93. package/dist/core/recovery/index.js +27 -0
  94. package/dist/core/recovery/index.js.map +1 -0
  95. package/dist/core/skills/DependencyResolver.d.ts +99 -0
  96. package/dist/core/skills/DependencyResolver.d.ts.map +1 -0
  97. package/dist/core/skills/DependencyResolver.js +260 -0
  98. package/dist/core/skills/DependencyResolver.js.map +1 -0
  99. package/dist/core/skills/ManifestGenerator.d.ts +114 -0
  100. package/dist/core/skills/ManifestGenerator.d.ts.map +1 -0
  101. package/dist/core/skills/ManifestGenerator.js +449 -0
  102. package/dist/core/skills/ManifestGenerator.js.map +1 -0
  103. package/dist/core/skills/index.d.ts +9 -0
  104. package/dist/core/skills/index.d.ts.map +1 -0
  105. package/dist/core/skills/index.js +24 -0
  106. package/dist/core/skills/index.js.map +1 -0
  107. package/dist/mcp/server.d.ts +9 -9
  108. package/dist/mcp/server.d.ts.map +1 -1
  109. package/dist/mcp/server.js +1 -2
  110. package/dist/mcp/server.js.map +1 -1
  111. package/dist/types/qx.d.ts +39 -7
  112. package/dist/types/qx.d.ts.map +1 -1
  113. package/dist/types/qx.js.map +1 -1
  114. package/dist/visualization/api/RestEndpoints.js +1 -1
  115. package/dist/visualization/api/RestEndpoints.js.map +1 -1
  116. package/package.json +13 -55
@@ -1,297 +1,149 @@
1
1
  ---
2
2
  name: performance-testing
3
- description: Test application performance, scalability, and resilience. Use when planning load testing, stress testing, or optimizing system performance.
3
+ description: "Test application performance, scalability, and resilience. Use when planning load testing, stress testing, or optimizing system performance."
4
+ category: specialized-testing
5
+ priority: high
6
+ tokenEstimate: 1100
7
+ agents: [qe-performance-tester, qe-quality-analyzer, qe-production-intelligence]
8
+ implementation_status: optimized
9
+ optimization_version: 1.0
10
+ last_optimized: 2025-12-02
11
+ dependencies: []
12
+ quick_reference_card: true
13
+ tags: [performance, load-testing, stress-testing, scalability, k6, bottlenecks]
4
14
  ---
5
15
 
6
16
  # Performance Testing
7
17
 
8
- ## Core Principle
18
+ <default_to_action>
19
+ When testing performance or planning load tests:
20
+ 1. DEFINE SLOs: p95 response time, throughput, error rate targets
21
+ 2. IDENTIFY critical paths: revenue flows, high-traffic pages, key APIs
22
+ 3. CREATE realistic scenarios: user journeys, think time, varied data
23
+ 4. EXECUTE with monitoring: CPU, memory, DB queries, network
24
+ 5. ANALYZE bottlenecks and fix before production
25
+
26
+ **Quick Test Type Selection:**
27
+ - Expected load validation → Load testing
28
+ - Find breaking point → Stress testing
29
+ - Sudden traffic spike → Spike testing
30
+ - Memory leaks, resource exhaustion → Endurance/soak testing
31
+ - Horizontal/vertical scaling → Scalability testing
32
+
33
+ **Critical Success Factors:**
34
+ - Performance is a feature, not an afterthought
35
+ - Test early and often, not just before release
36
+ - Focus on user-impacting bottlenecks
37
+ </default_to_action>
38
+
39
+ ## Quick Reference Card
40
+
41
+ ### When to Use
42
+ - Before major releases
43
+ - After infrastructure changes
44
+ - Before scaling events (Black Friday)
45
+ - When setting SLAs/SLOs
46
+
47
+ ### Test Types
48
+ | Type | Purpose | When |
49
+ |------|---------|------|
50
+ | **Load** | Expected traffic | Every release |
51
+ | **Stress** | Beyond capacity | Quarterly |
52
+ | **Spike** | Sudden surge | Before events |
53
+ | **Endurance** | Memory leaks | After code changes |
54
+ | **Scalability** | Scaling validation | Infrastructure changes |
9
55
 
10
- **Performance is a feature, not an afterthought.**
11
-
12
- Test performance like you test functionality: continuously, automatically, and with clear acceptance criteria.
13
-
14
- ## Why Performance Testing Matters
15
-
16
- ### User Impact
17
- - 100ms delay = 1% drop in conversions (Amazon)
18
- - 53% of mobile users abandon sites taking > 3 seconds (Google)
19
- - Slow = Broken (in users' eyes)
20
-
21
- ### Business Impact
22
- - Lost revenue from abandoned transactions
23
- - Increased infrastructure costs
24
- - Degraded user experience
25
- - Reputation damage
26
-
27
- ### Technical Impact
28
- - Scalability limits
29
- - Infrastructure bottlenecks
30
- - Hidden architectural problems
31
-
32
- ## Types of Performance Testing
33
-
34
- ### 1. Load Testing
35
-
36
- **What**: System behavior under expected load
37
-
38
- **Goal**: Verify the system handles typical usage
39
-
40
- **Example**:
41
- - E-commerce site handling 1,000 concurrent users
42
- - API serving 10,000 requests/minute
43
- - Database processing 500 transactions/second
44
-
45
- **When**: Before every major release
46
-
47
- **Tools**: k6, JMeter, Gatling, Artillery
48
-
49
- ### 2. Stress Testing
50
-
51
- **What**: System behavior under extreme load (beyond capacity)
52
-
53
- **Goal**: Find breaking point, see how system fails
54
-
55
- **Example**:
56
- - Ramping up from 1,000 to 10,000 concurrent users
57
- - Pushing API until response time degrades
58
- - Filling database until queries slow
59
-
60
- **When**: Before scaling infrastructure, quarterly at minimum
61
-
62
- **What to look for**: Graceful degradation, not catastrophic failure
63
-
64
- ### 3. Spike Testing
65
-
66
- **What**: System behavior under sudden load increase
67
-
68
- **Goal**: Test auto-scaling, handling unexpected traffic
69
-
70
- **Example**:
71
- - Black Friday sale announcement
72
- - Viral social media post
73
- - Marketing campaign launch
74
-
75
- **When**: Before major events, after infrastructure changes
76
-
77
- **Pattern**: Instant ramp from normal to 5-10x load
78
-
79
- ### 4. Endurance/Soak Testing
80
-
81
- **What**: System behavior over extended time
82
-
83
- **Goal**: Find memory leaks, resource exhaustion, gradual degradation
84
-
85
- **Example**:
86
- - Run at normal load for 24-72 hours
87
- - Monitor memory, connections, file handles
88
- - Check for resource leaks
89
-
90
- **When**: After significant code changes, quarterly
91
-
92
- **What to look for**: Stable resource usage over time
93
-
94
- ### 5. Scalability Testing
95
-
96
- **What**: How system performs as load increases
97
-
98
- **Goal**: Validate horizontal/vertical scaling
99
-
100
- **Example**:
101
- - Add servers, measure throughput improvement
102
- - Test auto-scaling triggers
103
- - Find scaling limits
104
-
105
- **When**: Before capacity planning, infrastructure changes
106
-
107
- ## Performance Testing Strategy
108
-
109
- ### Start with Requirements
110
-
111
- **Bad**: "The system should be fast"
112
- **Good**: "95th percentile response time < 200ms under 1,000 concurrent users"
113
-
114
- **Define SLOs (Service Level Objectives)**:
115
- - **Response Time**: 95th percentile < 200ms
116
- - **Throughput**: 10,000 requests/minute minimum
117
- - **Error Rate**: < 0.1% under load
118
- - **Resource Usage**: CPU < 70%, Memory < 80%
119
-
120
- ### Identify Critical Paths
121
-
122
- Don't test everything equally. Focus on:
123
- - Revenue-generating flows (checkout, payment)
124
- - High-traffic pages (homepage, product pages)
125
- - Critical APIs (authentication, data access)
126
- - Resource-intensive operations (search, reports)
127
-
128
- ### Realistic Scenarios
129
-
130
- **Bad**: Every user hits homepage repeatedly
131
- **Good**:
132
- - 40% browse products
133
- - 30% search
134
- - 20% view product details
135
- - 10% checkout
136
-
137
- Include:
138
- - Think time (users don't click instantly)
139
- - Varied data (different products, users, queries)
140
- - Realistic workflows (browse → search → add to cart → checkout)
141
-
142
- ## Setting Up Performance Tests
143
-
144
- ### Test Environment
145
-
146
- **Ideal**: Production-like infrastructure
147
- - Same server specs
148
- - Same database size
149
- - Same network topology
150
- - Same third-party integrations (or mocks)
56
+ ### Key Metrics
57
+ | Metric | Target | Why |
58
+ |--------|--------|-----|
59
+ | p95 response | < 200ms | User experience |
60
+ | Throughput | 10k req/min | Capacity |
61
+ | Error rate | < 0.1% | Reliability |
62
+ | CPU | < 70% | Headroom |
63
+ | Memory | < 80% | Stability |
64
+
65
+ ### Tools
66
+ - **k6**: Modern, JS-based, CI/CD friendly
67
+ - **JMeter**: Enterprise, feature-rich
68
+ - **Artillery**: Simple YAML configs
69
+ - **Gatling**: Scala, great reporting
70
+
71
+ ### Agent Coordination
72
+ - `qe-performance-tester`: Load test orchestration
73
+ - `qe-quality-analyzer`: Results analysis
74
+ - `qe-production-intelligence`: Production comparison
151
75
 
152
- **Reality**: Often scaled-down version
153
- - Document differences
154
- - Extrapolate results carefully
155
- - Validate with production monitoring
76
+ ---
156
77
 
157
- ### Test Data
78
+ ## Defining SLOs
158
79
 
159
- **Requirements**:
160
- - Realistic volume (don't test with 100 users when you have 10M)
161
- - Varied data (avoid cache hits skewing results)
162
- - Production-like distribution (80/20 rule applies)
80
+ **Bad:** "The system should be fast"
81
+ **Good:** "p95 response time < 200ms under 1,000 concurrent users"
163
82
 
164
- **Example**:
165
- ```
166
- Products: 100,000 (matching production)
167
- Users: 50,000 test accounts
168
- Orders: 1M historical orders
169
- Search queries: Real query distribution
83
+ ```javascript
84
+ export const options = {
85
+ thresholds: {
86
+ http_req_duration: ['p(95)<200'], // 95% < 200ms
87
+ http_req_failed: ['rate<0.01'], // < 1% failures
88
+ },
89
+ };
170
90
  ```
171
91
 
172
- ### Monitoring During Tests
92
+ ---
173
93
 
174
- **Essential metrics**:
175
- - Response time (avg, 50th, 95th, 99th percentile)
176
- - Throughput (requests/second)
177
- - Error rate
178
- - CPU, memory, disk I/O
179
- - Database query time
180
- - Network latency
94
+ ## Realistic Scenarios
181
95
 
182
- **Tools**:
183
- - Application: New Relic, Datadog, Dynatrace
184
- - Infrastructure: Prometheus, Grafana
185
- - Database: Query analyzers, slow query logs
96
+ **Bad:** Every user hits homepage repeatedly
97
+ **Good:** Model actual user behavior
186
98
 
187
- ## Common Performance Bottlenecks
99
+ ```javascript
100
+ // Realistic distribution
101
+ // 40% browse, 30% search, 20% details, 10% checkout
102
+ export default function () {
103
+ const action = Math.random();
104
+ if (action < 0.4) browse();
105
+ else if (action < 0.7) search();
106
+ else if (action < 0.9) viewProduct();
107
+ else checkout();
188
108
 
189
- ### 1. Database
109
+ sleep(randomInt(1, 5)); // Think time
110
+ }
111
+ ```
190
112
 
191
- **Symptoms**:
192
- - Slow queries under load
193
- - Connection pool exhaustion
194
- - Lock contention
113
+ ---
195
114
 
196
- **Solutions**:
197
- - Add indexes on filtered columns
198
- - Optimize N+1 queries
199
- - Increase connection pool size
200
- - Add read replicas
201
- - Implement caching
115
+ ## Common Bottlenecks
202
116
 
203
- ### 2. N+1 Queries
117
+ ### Database
118
+ **Symptoms:** Slow queries under load, connection pool exhaustion
119
+ **Fixes:** Add indexes, optimize N+1 queries, increase pool size, read replicas
204
120
 
205
- **Problem**:
121
+ ### N+1 Queries
206
122
  ```javascript
207
- // Load 100 orders
123
+ // BAD: 100 orders = 101 queries
208
124
  const orders = await Order.findAll();
209
-
210
- // For each order, load customer (100 queries!)
211
125
  for (const order of orders) {
212
126
  const customer = await Customer.findById(order.customerId);
213
127
  }
214
- ```
215
128
 
216
- **Fix**:
217
- ```javascript
218
- // Load orders with customers in one query
219
- const orders = await Order.findAll({
220
- include: [Customer]
221
- });
129
+ // GOOD: 1 query
130
+ const orders = await Order.findAll({ include: [Customer] });
222
131
  ```
223
132
 
224
- ### 3. Synchronous Processing
225
-
226
- **Problem**: Blocking operations in request path
227
-
228
- **Example**: Sending email during checkout
229
-
230
- **Fix**:
231
- - Use message queues (RabbitMQ, SQS)
232
- - Process asynchronously
233
- - Return response immediately
234
-
235
- ### 4. Memory Leaks
133
+ ### Synchronous Processing
134
+ **Problem:** Blocking operations in request path (sending email during checkout)
135
+ **Fix:** Use message queues, process async, return immediately
236
136
 
237
- **Symptoms**:
238
- - Memory usage grows over time
239
- - Performance degrades gradually
240
- - Eventually crashes
137
+ ### Memory Leaks
138
+ **Detection:** Endurance testing, memory profiling
139
+ **Common causes:** Event listeners not cleaned, caches without eviction
241
140
 
242
- **Detection**:
243
- - Endurance testing
244
- - Memory profiling (heap dumps)
245
- - Monitor garbage collection
141
+ ### External Dependencies
142
+ **Solutions:** Aggressive timeouts, circuit breakers, caching, graceful degradation
246
143
 
247
- **Common causes**:
248
- - Event listeners not cleaned up
249
- - Caches without eviction
250
- - Circular references
251
- - Global state accumulation
252
-
253
- ### 5. Inadequate Caching
254
-
255
- **Problem**: Recalculating same results repeatedly
256
-
257
- **Strategy**:
258
- - Cache expensive operations
259
- - Use CDN for static assets
260
- - Implement application-level caching (Redis)
261
- - Browser caching (Cache-Control headers)
262
-
263
- **Balance**: Cache hit rate vs. memory usage
264
-
265
- ### 6. External Dependencies
266
-
267
- **Problem**: Third-party APIs slow or unavailable
268
-
269
- **Solutions**:
270
- - Set aggressive timeouts
271
- - Implement circuit breakers
272
- - Cache responses when possible
273
- - Degrade gracefully if unavailable
274
-
275
- ## Performance Testing in CI/CD
276
-
277
- ### Continuous Performance Testing
278
-
279
- **Approach 1: Smoke Tests**
280
- - Run small load test on every commit
281
- - 10 concurrent users for 1 minute
282
- - Catch major regressions quickly
283
-
284
- **Approach 2: Nightly Tests**
285
- - Full load test overnight
286
- - More comprehensive scenarios
287
- - Trend analysis over time
288
-
289
- **Approach 3: Pre-Production Gate**
290
- - Load test before production deploy
291
- - Automated pass/fail criteria
292
- - Block deployment if performance degrades
144
+ ---
293
145
 
294
- ### Example: k6 in CI/CD
146
+ ## k6 CI/CD Example
295
147
 
296
148
  ```javascript
297
149
  // performance-test.js
@@ -301,275 +153,76 @@ import { check, sleep } from 'k6';
301
153
  export const options = {
302
154
  stages: [
303
155
  { duration: '1m', target: 50 }, // Ramp up
304
- { duration: '3m', target: 50 }, // Stay at 50 users
156
+ { duration: '3m', target: 50 }, // Steady
305
157
  { duration: '1m', target: 0 }, // Ramp down
306
158
  ],
307
159
  thresholds: {
308
- http_req_duration: ['p(95)<200'], // 95% of requests < 200ms
309
- http_req_failed: ['rate<0.01'], // < 1% failures
160
+ http_req_duration: ['p(95)<200'],
161
+ http_req_failed: ['rate<0.01'],
310
162
  },
311
163
  };
312
164
 
313
165
  export default function () {
314
166
  const res = http.get('https://api.example.com/products');
315
-
316
167
  check(res, {
317
168
  'status is 200': (r) => r.status === 200,
318
169
  'response time < 200ms': (r) => r.timings.duration < 200,
319
170
  });
320
-
321
171
  sleep(1);
322
172
  }
323
173
  ```
324
174
 
325
175
  ```yaml
326
- # .github/workflows/performance.yml
327
- name: Performance Tests
328
-
329
- on:
330
- pull_request:
331
- branches: [main]
332
-
333
- jobs:
334
- performance:
335
- runs-on: ubuntu-latest
336
- steps:
337
- - uses: actions/checkout@v2
338
-
339
- - name: Run k6 test
340
- uses: grafana/k6-action@v0.3.0
341
- with:
342
- filename: performance-test.js
343
-
344
- - name: Upload results
345
- uses: actions/upload-artifact@v2
346
- with:
347
- name: k6-results
348
- path: results.json
176
+ # GitHub Actions
177
+ - name: Run k6 test
178
+ uses: grafana/k6-action@v0.3.0
179
+ with:
180
+ filename: performance-test.js
349
181
  ```
350
182
 
351
- ## Analyzing Performance Test Results
352
-
353
- ### Key Metrics
354
-
355
- **Response Time Distribution**:
356
- - **Average**: Misleading (outliers skew it)
357
- - **Median (50th percentile)**: Typical user experience
358
- - **95th percentile**: "Slow but acceptable"
359
- - **99th percentile**: Worst user experience
360
-
361
- **Throughput**:
362
- - Requests/second sustained
363
- - How it changes with load
364
- - Where it plateaus (capacity)
365
-
366
- **Error Rate**:
367
- - Should stay flat as load increases
368
- - Spike indicates breaking point
183
+ ---
369
184
 
370
- ### Interpreting Results
185
+ ## Analyzing Results
371
186
 
372
- **Good**:
187
+ ### Good Results
373
188
  ```
374
- Load: 1,000 users
375
- Response time p95: 180ms
376
- Throughput: 5,000 req/s
377
- Error rate: 0.05%
378
- CPU: 65%, Memory: 70%
189
+ Load: 1,000 users | p95: 180ms | Throughput: 5,000 req/s
190
+ Error rate: 0.05% | CPU: 65% | Memory: 70%
379
191
  ```
380
192
 
381
- **Problems**:
193
+ ### Problems
382
194
  ```
383
- Load: 1,000 users
384
- Response time p95: 3,500ms(too slow)
385
- Throughput: 500 req/s ❌ (way below target)
386
- Error rate: 5% ❌ (too many errors)
387
- CPU: 95%, Memory: 90% ❌ (maxed out)
195
+ Load: 1,000 users | p95: 3,500ms ❌ | Throughput: 500 req/s ❌
196
+ Error rate: 5% ❌ | CPU: 95%| Memory: 90% ❌
388
197
  ```
389
198
 
390
199
  ### Root Cause Analysis
200
+ 1. Correlate metrics: When response time spikes, what changes?
201
+ 2. Check logs: Errors, warnings, slow queries
202
+ 3. Profile code: Where is time spent?
203
+ 4. Monitor resources: CPU, memory, disk
204
+ 5. Trace requests: End-to-end flow
391
205
 
392
- 1. **Correlate metrics**: When response time spikes, what else changes?
393
- 2. **Check logs**: Errors, warnings, slow queries
394
- 3. **Profile code**: Where is time spent?
395
- 4. **Monitor resources**: CPU, memory, network, disk
396
- 5. **Trace requests**: End-to-end request flow
397
-
398
- ## Production Performance Monitoring
399
-
400
- ### Synthetic Monitoring
401
-
402
- **What**: Automated tests hitting production
403
-
404
- **Example**:
405
- - Every 5 minutes, test critical flows
406
- - Measure response time from multiple locations
407
- - Alert if degradation detected
408
-
409
- **Tools**: Pingdom, Datadog Synthetics, New Relic Synthetics
410
-
411
- ### Real User Monitoring (RUM)
412
-
413
- **What**: Measure actual user experience
414
-
415
- **Metrics**:
416
- - Page load time
417
- - Time to interactive
418
- - API response times
419
- - Error rates
420
-
421
- **Tools**: Google Analytics, New Relic Browser, Datadog RUM
422
-
423
- ### Alerting
424
-
425
- **Set alerts on**:
426
- - p95 response time > threshold
427
- - Error rate > 1%
428
- - Throughput drops suddenly
429
- - Queue depth growing
430
-
431
- **Don't alert on**:
432
- - Average response time (too noisy)
433
- - Single slow request (outliers happen)
434
-
435
- ## Performance Testing Anti-Patterns
436
-
437
- ### ❌ Testing Too Late
438
-
439
- **Problem**: Find performance issues in production
440
-
441
- **Fix**: Test early and often, catch issues before release
442
-
443
- ### ❌ Unrealistic Scenarios
444
-
445
- **Problem**: Test doesn't match real usage
446
-
447
- **Example**: All users hitting same endpoint simultaneously
448
-
449
- **Fix**: Model realistic user journeys, think time, data distribution
450
-
451
- ### ❌ Ignoring Ramp-Up
452
-
453
- **Problem**: 0 to 1,000 users instantly
454
-
455
- **Real world**: Traffic grows gradually (usually)
456
-
457
- **Fix**: Ramp up over time, see how system adapts
458
-
459
- ### ❌ Testing Without Monitoring
460
-
461
- **Problem**: Can't see what's happening during test
462
-
463
- **Fix**: Monitor everything during tests
464
-
465
- ### ❌ No Baseline
466
-
467
- **Problem**: Don't know if results are good or bad
468
-
469
- **Fix**: Establish baseline, track trends over time
470
-
471
- ### ❌ One-Time Testing
472
-
473
- **Problem**: Test once before launch, never again
474
-
475
- **Fix**: Continuous performance testing, trend monitoring
476
-
477
- ## Tools Overview
478
-
479
- ### Load Testing Tools
480
-
481
- **k6**: Modern, developer-friendly, JavaScript-based
482
- - Good for: CI/CD integration, API testing
483
- - Learning curve: Low
484
-
485
- **JMeter**: Mature, feature-rich, GUI-based
486
- - Good for: Complex scenarios, extensive protocols
487
- - Learning curve: Medium
488
-
489
- **Gatling**: Scala-based, great reporting
490
- - Good for: High load, detailed metrics
491
- - Learning curve: Medium
492
-
493
- **Artillery**: Node.js, simple YAML configs
494
- - Good for: Quick tests, simple scenarios
495
- - Learning curve: Very low
496
-
497
- **Locust**: Python-based, distributed testing
498
- - Good for: Custom user behavior, Python ecosystems
499
- - Learning curve: Low-Medium
500
-
501
- ### APM (Application Performance Monitoring)
502
-
503
- - **New Relic**: Comprehensive, expensive
504
- - **Datadog**: Infrastructure + APM combined
505
- - **Dynatrace**: AI-powered root cause analysis
506
- - **AppDynamics**: Enterprise-focused
507
-
508
- ### Database Profiling
509
-
510
- - **pg_stat_statements** (PostgreSQL)
511
- - **MySQL slow query log**
512
- - **MongoDB profiler**
513
- - **Redis SLOWLOG**
514
-
515
- ## Real-World Example
516
-
517
- ### Scenario: E-Commerce Checkout Slow
518
-
519
- **Problem**: Checkout taking 5+ seconds under load
520
-
521
- **Investigation**:
522
- 1. Load test: Reproduce issue
523
- 2. Monitor: Database CPU at 95%
524
- 3. Profile: Slow query on order creation
525
- 4. Root cause: Missing index on `orders.user_id`
526
-
527
- **Fix**:
528
- ```sql
529
- CREATE INDEX idx_orders_user_id ON orders(user_id);
530
- ```
531
-
532
- **Result**:
533
- - Checkout time: 5s → 300ms
534
- - Database CPU: 95% → 40%
535
- - Throughput: 5x improvement
536
-
537
- ## When NOT to Performance Test
538
-
539
- - **MVPs/Prototypes**: Focus on validating idea first
540
- - **Internal tools**: With < 10 users, performance rarely matters
541
- - **One-time scripts**: Not worth the effort
542
- - **Before optimization**: Profile first, optimize second, then test
543
-
544
- ## Checklist: Before Going to Production
545
-
546
- - [ ] Load test passed (expected traffic)
547
- - [ ] Stress test passed (2-3x expected traffic)
548
- - [ ] Spike test passed (sudden traffic surge)
549
- - [ ] Endurance test passed (24+ hours)
550
- - [ ] Database indexes in place
551
- - [ ] Caching configured
552
- - [ ] Monitoring and alerting set up
553
- - [ ] Auto-scaling configured (if applicable)
554
- - [ ] Performance baseline established
206
+ ---
555
207
 
556
- ## Remember
208
+ ## Anti-Patterns
557
209
 
558
- **Performance is a feature**: Test it like functionality
559
- **Test continuously**: Not just before launch
560
- **Monitor production**: Synthetic + real user monitoring
561
- **Set realistic goals**: Based on business requirements
562
- **Fix what matters**: Focus on user-impacting bottlenecks
563
- **Trend over time**: Performance degrades gradually, catch it early
210
+ | Anti-Pattern | Better |
211
+ |----------------|-----------|
212
+ | Testing too late | Test early and often |
213
+ | Unrealistic scenarios | Model real user behavior |
214
+ | 0 to 1000 users instantly | Ramp up gradually |
215
+ | No monitoring during tests | Monitor everything |
216
+ | No baseline | Establish and track trends |
217
+ | One-time testing | Continuous performance testing |
564
218
 
565
- ## Using with QE Agents
219
+ ---
566
220
 
567
- ### Automated Performance Testing
221
+ ## Agent-Assisted Performance Testing
568
222
 
569
- **qe-performance-tester** orchestrates load testing:
570
223
  ```typescript
571
- // Agent runs comprehensive load test
572
- const perfTest = await agent.runLoadTest({
224
+ // Comprehensive load test
225
+ await Task("Load Test", {
573
226
  target: 'https://api.example.com',
574
227
  scenarios: {
575
228
  checkout: { vus: 100, duration: '5m' },
@@ -578,57 +231,47 @@ const perfTest = await agent.runLoadTest({
578
231
  },
579
232
  thresholds: {
580
233
  'http_req_duration': ['p(95)<200'],
581
- 'http_req_failed': ['rate<0.01'],
582
- 'http_reqs': ['rate>100']
234
+ 'http_req_failed': ['rate<0.01']
583
235
  }
584
- });
585
-
586
- // Returns detailed performance report
587
- ```
236
+ }, "qe-performance-tester");
588
237
 
589
- ### Bottleneck Analysis
590
-
591
- ```typescript
592
- // Agent identifies performance bottlenecks
593
- const analysis = await qe-performance-tester.analyzeBottlenecks({
238
+ // Bottleneck analysis
239
+ await Task("Analyze Bottlenecks", {
594
240
  testResults: perfTest,
595
- metrics: ['cpu', 'memory', 'db_queries', 'network', 'cache_hits']
596
- });
597
-
598
- // Returns:
599
- // {
600
- // bottlenecks: [
601
- // { component: 'database', severity: 'high', suggestion: 'Add index on orders.created_at' },
602
- // { component: 'api', severity: 'medium', suggestion: 'Enable response caching' }
603
- // ]
604
- // }
605
- ```
241
+ metrics: ['cpu', 'memory', 'db_queries', 'network']
242
+ }, "qe-performance-tester");
606
243
 
607
- ### Continuous Performance Monitoring
608
-
609
- ```typescript
610
- // Agent integrates performance testing in CI/CD
611
- const ciPerformance = await qe-performance-tester.ciIntegration({
612
- mode: 'smoke', // Quick test on every commit
244
+ // CI integration
245
+ await Task("CI Performance Gate", {
246
+ mode: 'smoke',
613
247
  duration: '1m',
614
248
  vus: 10,
615
- failOn: {
616
- 'p95_response_time': 300, // ms
617
- 'error_rate': 0.01 // 1%
618
- }
619
- });
249
+ failOn: { 'p95_response_time': 300, 'error_rate': 0.01 }
250
+ }, "qe-performance-tester");
620
251
  ```
621
252
 
622
- ### Fleet Coordination for Performance
253
+ ---
254
+
255
+ ## Agent Coordination Hints
623
256
 
257
+ ### Memory Namespace
258
+ ```
259
+ aqe/performance/
260
+ ├── results/* - Test execution results
261
+ ├── baselines/* - Performance baselines
262
+ ├── bottlenecks/* - Identified bottlenecks
263
+ └── trends/* - Historical trends
264
+ ```
265
+
266
+ ### Fleet Coordination
624
267
  ```typescript
625
- const performanceFleet = await FleetManager.coordinate({
268
+ const perfFleet = await FleetManager.coordinate({
626
269
  strategy: 'performance-testing',
627
270
  agents: [
628
- 'qe-performance-tester', // Run load tests
629
- 'qe-quality-analyzer', // Analyze results
630
- 'qe-production-intelligence', // Compare to production
631
- 'qe-deployment-readiness' // Go/no-go decision
271
+ 'qe-performance-tester',
272
+ 'qe-quality-analyzer',
273
+ 'qe-production-intelligence',
274
+ 'qe-deployment-readiness'
632
275
  ],
633
276
  topology: 'sequential'
634
277
  });
@@ -636,21 +279,32 @@ const performanceFleet = await FleetManager.coordinate({
636
279
 
637
280
  ---
638
281
 
639
- ## Related Skills
282
+ ## Pre-Production Checklist
283
+
284
+ - [ ] Load test passed (expected traffic)
285
+ - [ ] Stress test passed (2-3x expected)
286
+ - [ ] Spike test passed (sudden surge)
287
+ - [ ] Endurance test passed (24+ hours)
288
+ - [ ] Database indexes in place
289
+ - [ ] Caching configured
290
+ - [ ] Monitoring and alerting set up
291
+ - [ ] Performance baseline established
640
292
 
641
- **Testing:**
293
+ ---
294
+
295
+ ## Related Skills
642
296
  - [agentic-quality-engineering](../agentic-quality-engineering/) - Agent coordination
643
- - [api-testing-patterns](../api-testing-patterns/) - API performance testing
297
+ - [api-testing-patterns](../api-testing-patterns/) - API performance
298
+ - [chaos-engineering-resilience](../chaos-engineering-resilience/) - Resilience testing
644
299
 
645
- **Quality:**
646
- - [quality-metrics](../quality-metrics/) - Performance metrics tracking
647
- - [risk-based-testing](../risk-based-testing/) - Performance risk assessment
300
+ ---
648
301
 
649
- ## Resources
302
+ ## Remember
650
303
 
651
- - **k6 Documentation**: k6.io/docs
652
- - **Google Web Fundamentals**: Performance optimization guides
653
- - **"Release It!"** by Michael Nygard: Production-ready patterns
654
- - **High Performance Browser Networking** by Ilya Grigorik
304
+ **Performance is a feature:** Test it like functionality
305
+ **Test continuously:** Not just before launch
306
+ **Monitor production:** Synthetic + real user monitoring
307
+ **Fix what matters:** Focus on user-impacting bottlenecks
308
+ **Trend over time:** Catch degradation early
655
309
 
656
- Performance testing isn't optional—it's how you ensure your system works when it matters most.
310
+ **With Agents:** Agents automate load testing, analyze bottlenecks, and compare with production. Use agents to maintain performance at scale.