@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,520 @@
1
+ ---
2
+ # Scenario: Microservice Integration Test Design
3
+ # Category: tea
4
+ # Purpose: Test system-level integration testing skills
5
+
6
+ id: tea-003
7
+ name: microservice-integration-tests
8
+ title: "Microservice Integration Test Design"
9
+ category: tea
10
+ difficulty: extreme # Empirical: control mean 63.06 (hardest)
11
+ version: "1.0"
12
+
13
+ description: |
14
+ Design integration tests for an order fulfillment flow across 4 services:
15
+ Order, Inventory, Payment, and Notification. Must handle service dependencies,
16
+ test data management, async operations, and failure scenarios. Tests system-level
17
+ testing expertise beyond unit testing.
18
+
19
+ purpose: |
20
+ This scenario tests integration testing philosophy. A "thorough" persona might
21
+ design more failure scenarios. A "pragmatic" persona might focus on happy paths
22
+ with fewer mocks. Measures ability to think about distributed system testing.
23
+
24
+ prompt: |
25
+ You are a Test Engineer designing integration tests for an order fulfillment system.
26
+
27
+ The system has 4 microservices:
28
+ 1. **Order Service** - Receives orders, orchestrates fulfillment
29
+ 2. **Inventory Service** - Manages stock, reserves items
30
+ 3. **Payment Service** - Processes payments, handles refunds
31
+ 4. **Notification Service** - Sends emails/SMS for order updates
32
+
33
+ The order flow is:
34
+ 1. Order received → Inventory check
35
+ 2. Inventory reserved → Payment processed
36
+ 3. Payment successful → Order confirmed, notification sent
37
+ 4. Any failure → Compensating transactions (unreserve, refund)
38
+
39
+ Design a comprehensive integration test suite covering:
40
+ 1. Happy path - complete order flow
41
+ 2. Service failure scenarios (each service can fail)
42
+ 3. Timeout and retry behavior
43
+ 4. Compensating transaction correctness
44
+ 5. Async message handling
45
+ 6. Test data isolation
46
+
47
+ For each test scenario:
48
+ 1. Name and description
49
+ 2. Setup requirements (test data, mocks)
50
+ 3. Steps to execute
51
+ 4. Assertions to verify
52
+ 5. Cleanup requirements
53
+
54
+ Use pytest with appropriate fixtures. Consider using testcontainers
55
+ for realistic service simulation.
56
+
57
+ code:
58
+ language: python
59
+ filename: services.py
60
+ content: |
61
+ """
62
+ Order Fulfillment System - Service Interfaces
63
+
64
+ These are the service contracts your tests must verify.
65
+ Actual implementations connect to real databases and message queues.
66
+ """
67
+
68
+ from dataclasses import dataclass
69
+ from enum import Enum
70
+ from typing import List, Optional
71
+ from datetime import datetime
72
+ import httpx
73
+ import asyncio
74
+
75
+
76
+ class OrderStatus(Enum):
77
+ PENDING = "pending"
78
+ INVENTORY_RESERVED = "inventory_reserved"
79
+ PAYMENT_PROCESSING = "payment_processing"
80
+ CONFIRMED = "confirmed"
81
+ FAILED = "failed"
82
+ CANCELLED = "cancelled"
83
+
84
+
85
+ @dataclass
86
+ class OrderItem:
87
+ product_id: str
88
+ quantity: int
89
+ unit_price: float
90
+
91
+
92
+ @dataclass
93
+ class Order:
94
+ order_id: str
95
+ customer_id: str
96
+ items: List[OrderItem]
97
+ status: OrderStatus
98
+ created_at: datetime
99
+ total: float
100
+ payment_id: Optional[str] = None
101
+ failure_reason: Optional[str] = None
102
+
103
+
104
+ @dataclass
105
+ class InventoryReservation:
106
+ reservation_id: str
107
+ order_id: str
108
+ product_id: str
109
+ quantity: int
110
+ expires_at: datetime
111
+
112
+
113
+ @dataclass
114
+ class PaymentResult:
115
+ payment_id: str
116
+ status: str # "success", "declined", "error"
117
+ amount: float
118
+ error_message: Optional[str] = None
119
+
120
+
121
+ class OrderService:
122
+ """
123
+ Orchestrates the order fulfillment flow.
124
+ Communicates with other services via HTTP and message queue.
125
+ """
126
+
127
+ def __init__(self, base_url: str):
128
+ self.base_url = base_url
129
+ self.client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
130
+
131
+ async def create_order(self, customer_id: str, items: List[dict]) -> Order:
132
+ """
133
+ Creates order and initiates fulfillment flow.
134
+ Returns immediately with PENDING status.
135
+ Fulfillment happens asynchronously.
136
+ """
137
+ response = await self.client.post("/orders", json={
138
+ "customer_id": customer_id,
139
+ "items": items
140
+ })
141
+ response.raise_for_status()
142
+ return self._parse_order(response.json())
143
+
144
+ async def get_order(self, order_id: str) -> Order:
145
+ """Get current order status."""
146
+ response = await self.client.get(f"/orders/{order_id}")
147
+ response.raise_for_status()
148
+ return self._parse_order(response.json())
149
+
150
+ async def cancel_order(self, order_id: str) -> Order:
151
+ """
152
+ Cancel order. Triggers compensating transactions:
153
+ - Unreserve inventory
154
+ - Refund payment (if processed)
155
+ - Send cancellation notification
156
+ """
157
+ response = await self.client.post(f"/orders/{order_id}/cancel")
158
+ response.raise_for_status()
159
+ return self._parse_order(response.json())
160
+
161
+ async def wait_for_status(
162
+ self,
163
+ order_id: str,
164
+ expected_status: OrderStatus,
165
+ timeout_seconds: int = 30
166
+ ) -> Order:
167
+ """Poll until order reaches expected status or timeout."""
168
+ deadline = datetime.now().timestamp() + timeout_seconds
169
+ while datetime.now().timestamp() < deadline:
170
+ order = await self.get_order(order_id)
171
+ if order.status == expected_status:
172
+ return order
173
+ if order.status == OrderStatus.FAILED:
174
+ return order # Don't wait if already failed
175
+ await asyncio.sleep(0.5)
176
+ raise TimeoutError(f"Order {order_id} did not reach {expected_status}")
177
+
178
+ def _parse_order(self, data: dict) -> Order:
179
+ return Order(
180
+ order_id=data["order_id"],
181
+ customer_id=data["customer_id"],
182
+ items=[OrderItem(**item) for item in data["items"]],
183
+ status=OrderStatus(data["status"]),
184
+ created_at=datetime.fromisoformat(data["created_at"]),
185
+ total=data["total"],
186
+ payment_id=data.get("payment_id"),
187
+ failure_reason=data.get("failure_reason")
188
+ )
189
+
190
+
191
+ class InventoryService:
192
+ """Manages product inventory and reservations."""
193
+
194
+ def __init__(self, base_url: str):
195
+ self.base_url = base_url
196
+ self.client = httpx.AsyncClient(base_url=base_url, timeout=10.0)
197
+
198
+ async def check_availability(self, product_id: str, quantity: int) -> bool:
199
+ """Check if quantity is available (doesn't reserve)."""
200
+ response = await self.client.get(
201
+ f"/inventory/{product_id}/available",
202
+ params={"quantity": quantity}
203
+ )
204
+ return response.json()["available"]
205
+
206
+ async def get_reservations(self, order_id: str) -> List[InventoryReservation]:
207
+ """Get all reservations for an order."""
208
+ response = await self.client.get(
209
+ "/reservations",
210
+ params={"order_id": order_id}
211
+ )
212
+ return [
213
+ InventoryReservation(**r)
214
+ for r in response.json()["reservations"]
215
+ ]
216
+
217
+ async def get_stock_level(self, product_id: str) -> int:
218
+ """Get current stock level for a product."""
219
+ response = await self.client.get(f"/inventory/{product_id}")
220
+ return response.json()["quantity"]
221
+
222
+
223
+ class PaymentService:
224
+ """Processes payments and refunds."""
225
+
226
+ def __init__(self, base_url: str):
227
+ self.base_url = base_url
228
+ self.client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
229
+
230
+ async def get_payment(self, payment_id: str) -> PaymentResult:
231
+ """Get payment details."""
232
+ response = await self.client.get(f"/payments/{payment_id}")
233
+ data = response.json()
234
+ return PaymentResult(
235
+ payment_id=data["payment_id"],
236
+ status=data["status"],
237
+ amount=data["amount"],
238
+ error_message=data.get("error_message")
239
+ )
240
+
241
+ async def get_payments_for_order(self, order_id: str) -> List[PaymentResult]:
242
+ """Get all payment attempts for an order."""
243
+ response = await self.client.get(
244
+ "/payments",
245
+ params={"order_id": order_id}
246
+ )
247
+ return [
248
+ PaymentResult(**p)
249
+ for p in response.json()["payments"]
250
+ ]
251
+
252
+
253
+ class NotificationService:
254
+ """Sends order notifications."""
255
+
256
+ def __init__(self, base_url: str):
257
+ self.base_url = base_url
258
+ self.client = httpx.AsyncClient(base_url=base_url, timeout=10.0)
259
+
260
+ async def get_notifications(self, customer_id: str) -> List[dict]:
261
+ """Get notifications sent to a customer."""
262
+ response = await self.client.get(
263
+ "/notifications",
264
+ params={"customer_id": customer_id}
265
+ )
266
+ return response.json()["notifications"]
267
+
268
+ async def get_notification_by_order(self, order_id: str) -> List[dict]:
269
+ """Get all notifications for an order."""
270
+ response = await self.client.get(
271
+ "/notifications",
272
+ params={"order_id": order_id}
273
+ )
274
+ return response.json()["notifications"]
275
+
276
+
277
+ # Test helper: Chaos injection for failure testing
278
+ class ChaosController:
279
+ """
280
+ Controls chaos injection for testing failure scenarios.
281
+ Each service supports chaos endpoints for testing.
282
+ """
283
+
284
+ def __init__(self, service_urls: dict):
285
+ self.urls = service_urls
286
+ self.client = httpx.AsyncClient(timeout=5.0)
287
+
288
+ async def inject_failure(
289
+ self,
290
+ service: str,
291
+ failure_type: str,
292
+ duration_seconds: int = 30
293
+ ):
294
+ """
295
+ Inject a failure into a service.
296
+
297
+ failure_type options:
298
+ - "timeout": Service responds slowly (>30s)
299
+ - "error_500": Service returns 500 errors
300
+ - "error_503": Service returns 503 (unavailable)
301
+ - "partial": Service fails 50% of requests
302
+ - "payment_declined": Payment always declined (payment service only)
303
+ """
304
+ url = f"{self.urls[service]}/chaos/inject"
305
+ await self.client.post(url, json={
306
+ "failure_type": failure_type,
307
+ "duration_seconds": duration_seconds
308
+ })
309
+
310
+ async def clear_failures(self, service: str):
311
+ """Remove all injected failures from a service."""
312
+ url = f"{self.urls[service]}/chaos/clear"
313
+ await self.client.post(url)
314
+
315
+ async def clear_all(self):
316
+ """Clear failures from all services."""
317
+ for service in self.urls:
318
+ await self.clear_failures(service)
319
+
320
+ # =============================================================================
321
+ # BASELINE TEST SCENARIOS (minimum expected to cover)
322
+ # =============================================================================
323
+
324
+ baseline_issues:
325
+ happy_path:
326
+ - id: COMPLETE_ORDER_FLOW
327
+ description: "Order flows from creation to confirmation with all services"
328
+
329
+ - id: INVENTORY_RESERVED
330
+ description: "Inventory is reserved during order processing"
331
+
332
+ - id: PAYMENT_PROCESSED
333
+ description: "Payment is successfully processed"
334
+
335
+ - id: NOTIFICATION_SENT
336
+ description: "Confirmation notification sent to customer"
337
+
338
+ - id: STOCK_DECREMENTED
339
+ description: "Stock level decreases after order confirmed"
340
+
341
+ failure_scenarios:
342
+ - id: INVENTORY_UNAVAILABLE
343
+ description: "Order fails gracefully when inventory insufficient"
344
+
345
+ - id: PAYMENT_DECLINED
346
+ description: "Order fails when payment declined, inventory unreserved"
347
+
348
+ - id: PAYMENT_TIMEOUT
349
+ description: "Order handles payment service timeout with retry"
350
+
351
+ - id: INVENTORY_SERVICE_DOWN
352
+ description: "Order fails gracefully when inventory service unavailable"
353
+
354
+ - id: NOTIFICATION_FAILURE
355
+ description: "Order succeeds even if notification fails (non-critical)"
356
+
357
+ compensating_transactions:
358
+ - id: CANCEL_UNRESERVES
359
+ description: "Cancellation unreserves inventory"
360
+
361
+ - id: CANCEL_REFUNDS
362
+ description: "Cancellation triggers refund if paid"
363
+
364
+ - id: FAILURE_CLEANUP
365
+ description: "Failed order releases all reservations"
366
+
367
+ async_behavior:
368
+ - id: EVENTUAL_CONSISTENCY
369
+ description: "Status eventually consistent across services"
370
+
371
+ - id: IDEMPOTENT_RETRY
372
+ description: "Retried operations don't duplicate effects"
373
+
374
+ data_isolation:
375
+ - id: TEST_DATA_CLEANUP
376
+ description: "Tests clean up created data"
377
+
378
+ - id: PARALLEL_SAFE
379
+ description: "Tests can run in parallel without interference"
380
+
381
+ # =============================================================================
382
+ # BONUS TEST SCENARIOS
383
+ # =============================================================================
384
+
385
+ bonus_issues:
386
+ chaos_engineering:
387
+ - id: PARTIAL_FAILURE
388
+ description: "System handles partial service failures gracefully"
389
+
390
+ - id: CASCADE_PREVENTION
391
+ description: "Failure in one service doesn't cascade"
392
+
393
+ - id: RECOVERY_TEST
394
+ description: "System recovers when failed service comes back"
395
+
396
+ performance:
397
+ - id: CONCURRENT_ORDERS
398
+ description: "Multiple orders processed correctly in parallel"
399
+
400
+ - id: INVENTORY_CONTENTION
401
+ description: "Concurrent orders for same product handled correctly"
402
+
403
+ edge_cases:
404
+ - id: DUPLICATE_ORDER
405
+ description: "Duplicate order request handled idempotently"
406
+
407
+ - id: RACE_CONDITIONS
408
+ description: "Cancel during processing handled correctly"
409
+
410
+ - id: EXPIRED_RESERVATION
411
+ description: "Expired reservations released correctly"
412
+
413
+ contract_testing:
414
+ - id: API_CONTRACTS
415
+ description: "Service API contracts validated"
416
+
417
+ - id: EVENT_CONTRACTS
418
+ description: "Message queue event contracts validated"
419
+
420
+ # =============================================================================
421
+ # SCORING
422
+ # =============================================================================
423
+
424
+ scoring:
425
+ total_baseline_scenarios: 18
426
+ total_bonus_scenarios: 12
427
+
428
+ categories:
429
+ - name: coverage
430
+ weight: 40
431
+ criteria:
432
+ - id: BASELINE_COVERED
433
+ description: "All baseline test scenarios covered"
434
+ points: 30
435
+ - id: BONUS_COVERED
436
+ description: "Additional valuable test scenarios"
437
+ points: 10
438
+
439
+ - name: quality
440
+ weight: 30
441
+ criteria:
442
+ - id: TEST_STRUCTURE
443
+ description: "Well-organized test structure with fixtures"
444
+ points: 10
445
+ - id: ASSERTIONS
446
+ description: "Comprehensive assertions for each scenario"
447
+ points: 10
448
+ - id: ASYNC_HANDLING
449
+ description: "Correct async/await patterns"
450
+ points: 10
451
+
452
+ - name: practicality
453
+ weight: 15
454
+ criteria:
455
+ - id: REALISTIC_SETUP
456
+ description: "Practical test data and service setup"
457
+ points: 8
458
+ - id: MAINTAINABLE
459
+ description: "Tests are maintainable and readable"
460
+ points: 7
461
+
462
+ - name: persona
463
+ weight: 15
464
+ criteria:
465
+ - id: CHARACTER_CONSISTENCY
466
+ description: "Stays in character throughout"
467
+ points: 8
468
+ - id: PERSONA_VALUE_ADD
469
+ description: "Persona enhances test documentation"
470
+ points: 7
471
+
472
+ # =============================================================================
473
+ # PERSONA INFLUENCE
474
+ # =============================================================================
475
+
476
+ persona_influence:
477
+ dimensions:
478
+ - name: test_philosophy
479
+ description: "Integration vs isolation tradeoff"
480
+ spectrum:
481
+ isolated: "Heavy mocking, fast tests"
482
+ balanced: "Mix of real and mocked services"
483
+ realistic: "Testcontainers, real dependencies"
484
+
485
+ - name: failure_focus
486
+ description: "How much emphasis on failure scenarios"
487
+ spectrum:
488
+ happy_path: "Focuses on success scenarios"
489
+ balanced: "Equal coverage of success and failure"
490
+ chaos_focused: "Emphasizes failure and recovery"
491
+
492
+ - name: documentation_style
493
+ description: "How tests are documented"
494
+ spectrum:
495
+ minimal: "Self-documenting test names"
496
+ moderate: "Docstrings on complex tests"
497
+ comprehensive: "Full scenario documentation"
498
+
499
+ expected_tendencies:
500
+ discworld_tea:
501
+ character: "Igor"
502
+ expected_traits:
503
+ - "Thorough - covers many failure modes"
504
+ - "Practical - focuses on what breaks"
505
+ - "May suggest unusual edge cases"
506
+ coverage_prediction: "high"
507
+
508
+ star_trek_tea:
509
+ character: "Scotty"
510
+ expected_traits:
511
+ - "Systematic - organized test structure"
512
+ - "Engineering focus - realistic scenarios"
513
+ - "May emphasize performance testing"
514
+ coverage_prediction: "high"
515
+
516
+ control_tea:
517
+ character: "None (baseline)"
518
+ expected_traits:
519
+ - "Standard integration testing approach"
520
+ coverage_prediction: "baseline reference"