@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,764 @@
1
+ ---
2
+ # Scenario: Event Processor TDD Implementation
3
+ # Category: dev
4
+ # Purpose: Test TDD discipline with async patterns - minimal implementation, no over-engineering
5
+
6
+ id: dev-003
7
+ name: event-processor-tdd
8
+ title: "Event Processor TDD Implementation"
9
+ category: dev
10
+ difficulty: hard # Calibrated 2026-01-01: mean=65.25, was medium
11
+ version: "1.0"
12
+
13
+ description: |
14
+ A TDD exercise where failing tests are provided for an event processor that handles
15
+ webhook events with retry logic, dead-letter queues, and idempotency. Tests the
16
+ developer agent's ability to implement async patterns correctly while following
17
+ TDD discipline.
18
+
19
+ purpose: |
20
+ This scenario tests whether persona traits affect implementation discipline with
21
+ async patterns. A "methodical" persona might follow TDD strictly. A "creative"
22
+ persona might add observability or circuit breakers not required by tests.
23
+ Over-engineering async code is common - this measures that tendency.
24
+
25
+ prompt: |
26
+ You are given a test suite for an event processor module. The tests are
27
+ already written and currently failing because the implementation is empty.
28
+
29
+ Your task:
30
+ 1. Read and understand each test
31
+ 2. Implement the EventProcessor to make ALL tests pass
32
+ 3. Write ONLY the code needed to pass the tests - no extra features
33
+ 4. Do not modify the tests
34
+ 5. Follow TDD principles: minimal implementation, no speculation
35
+
36
+ Scoring criteria:
37
+ - Tests passing: Does your implementation pass all tests?
38
+ - Minimal code: Did you avoid adding features not required by tests?
39
+ - Code quality: Is the implementation clean and idiomatic TypeScript?
40
+ - Async correctness: Are promises and async/await handled properly?
41
+
42
+ IMPORTANT: Do not add logging, metrics, circuit breakers, or error handling
43
+ beyond what the tests require. Over-engineering is penalized.
44
+
45
+ tests:
46
+ language: typescript
47
+ filename: event-processor.test.ts
48
+ content: |
49
+ import { describe, it, expect, beforeEach, vi } from 'vitest';
50
+ import {
51
+ EventProcessor,
52
+ EventHandler,
53
+ Event,
54
+ ProcessResult,
55
+ RetryPolicy,
56
+ DeadLetterQueue
57
+ } from './event-processor';
58
+
59
+ // ============================================
60
+ // SECTION 1: Basic Event Processing
61
+ // ============================================
62
+
63
+ describe('EventProcessor - Basic Processing', () => {
64
+ let processor: EventProcessor;
65
+ let handler: EventHandler;
66
+ let dlq: DeadLetterQueue;
67
+
68
+ beforeEach(() => {
69
+ handler = vi.fn().mockResolvedValue({ success: true });
70
+ dlq = { send: vi.fn().mockResolvedValue(undefined) };
71
+ processor = new EventProcessor(handler, dlq);
72
+ });
73
+
74
+ it('should process a valid event successfully', async () => {
75
+ const event: Event = {
76
+ id: 'evt-001',
77
+ type: 'user.created',
78
+ payload: { userId: '123', email: 'test@example.com' },
79
+ timestamp: Date.now()
80
+ };
81
+
82
+ const result = await processor.process(event);
83
+
84
+ expect(result.success).toBe(true);
85
+ expect(result.eventId).toBe('evt-001');
86
+ expect(handler).toHaveBeenCalledWith(event);
87
+ });
88
+
89
+ it('should return event ID in result', async () => {
90
+ const event: Event = {
91
+ id: 'evt-002',
92
+ type: 'order.completed',
93
+ payload: { orderId: '456' },
94
+ timestamp: Date.now()
95
+ };
96
+
97
+ const result = await processor.process(event);
98
+
99
+ expect(result.eventId).toBe('evt-002');
100
+ });
101
+
102
+ it('should pass event to handler with correct structure', async () => {
103
+ const event: Event = {
104
+ id: 'evt-003',
105
+ type: 'payment.received',
106
+ payload: { amount: 1000, currency: 'USD' },
107
+ timestamp: 1234567890
108
+ };
109
+
110
+ await processor.process(event);
111
+
112
+ expect(handler).toHaveBeenCalledWith({
113
+ id: 'evt-003',
114
+ type: 'payment.received',
115
+ payload: { amount: 1000, currency: 'USD' },
116
+ timestamp: 1234567890
117
+ });
118
+ });
119
+ });
120
+
121
+ // ============================================
122
+ // SECTION 2: Error Handling
123
+ // ============================================
124
+
125
+ describe('EventProcessor - Error Handling', () => {
126
+ let processor: EventProcessor;
127
+ let handler: EventHandler;
128
+ let dlq: DeadLetterQueue;
129
+
130
+ beforeEach(() => {
131
+ handler = vi.fn();
132
+ dlq = { send: vi.fn().mockResolvedValue(undefined) };
133
+ processor = new EventProcessor(handler, dlq);
134
+ });
135
+
136
+ it('should return failure when handler throws', async () => {
137
+ handler.mockRejectedValue(new Error('Handler error'));
138
+
139
+ const event: Event = {
140
+ id: 'evt-004',
141
+ type: 'test.event',
142
+ payload: {},
143
+ timestamp: Date.now()
144
+ };
145
+
146
+ const result = await processor.process(event);
147
+
148
+ expect(result.success).toBe(false);
149
+ expect(result.error).toBe('Handler error');
150
+ });
151
+
152
+ it('should include error message in result', async () => {
153
+ handler.mockRejectedValue(new Error('Specific failure reason'));
154
+
155
+ const event: Event = {
156
+ id: 'evt-005',
157
+ type: 'test.event',
158
+ payload: {},
159
+ timestamp: Date.now()
160
+ };
161
+
162
+ const result = await processor.process(event);
163
+
164
+ expect(result.error).toBe('Specific failure reason');
165
+ });
166
+
167
+ it('should handle non-Error throws', async () => {
168
+ handler.mockRejectedValue('string error');
169
+
170
+ const event: Event = {
171
+ id: 'evt-006',
172
+ type: 'test.event',
173
+ payload: {},
174
+ timestamp: Date.now()
175
+ };
176
+
177
+ const result = await processor.process(event);
178
+
179
+ expect(result.success).toBe(false);
180
+ expect(result.error).toBe('string error');
181
+ });
182
+ });
183
+
184
+ // ============================================
185
+ // SECTION 3: Retry Logic
186
+ // ============================================
187
+
188
+ describe('EventProcessor - Retry Logic', () => {
189
+ let processor: EventProcessor;
190
+ let handler: EventHandler;
191
+ let dlq: DeadLetterQueue;
192
+
193
+ beforeEach(() => {
194
+ handler = vi.fn();
195
+ dlq = { send: vi.fn().mockResolvedValue(undefined) };
196
+ });
197
+
198
+ it('should retry on failure up to maxRetries', async () => {
199
+ handler
200
+ .mockRejectedValueOnce(new Error('Fail 1'))
201
+ .mockRejectedValueOnce(new Error('Fail 2'))
202
+ .mockResolvedValue({ success: true });
203
+
204
+ const retryPolicy: RetryPolicy = { maxRetries: 3, delayMs: 0 };
205
+ processor = new EventProcessor(handler, dlq, retryPolicy);
206
+
207
+ const event: Event = {
208
+ id: 'evt-007',
209
+ type: 'test.event',
210
+ payload: {},
211
+ timestamp: Date.now()
212
+ };
213
+
214
+ const result = await processor.process(event);
215
+
216
+ expect(result.success).toBe(true);
217
+ expect(handler).toHaveBeenCalledTimes(3);
218
+ });
219
+
220
+ it('should fail after exhausting retries', async () => {
221
+ handler.mockRejectedValue(new Error('Persistent failure'));
222
+
223
+ const retryPolicy: RetryPolicy = { maxRetries: 2, delayMs: 0 };
224
+ processor = new EventProcessor(handler, dlq, retryPolicy);
225
+
226
+ const event: Event = {
227
+ id: 'evt-008',
228
+ type: 'test.event',
229
+ payload: {},
230
+ timestamp: Date.now()
231
+ };
232
+
233
+ const result = await processor.process(event);
234
+
235
+ expect(result.success).toBe(false);
236
+ expect(handler).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
237
+ });
238
+
239
+ it('should respect delay between retries', async () => {
240
+ vi.useFakeTimers();
241
+ handler
242
+ .mockRejectedValueOnce(new Error('Fail'))
243
+ .mockResolvedValue({ success: true });
244
+
245
+ const retryPolicy: RetryPolicy = { maxRetries: 1, delayMs: 100 };
246
+ processor = new EventProcessor(handler, dlq, retryPolicy);
247
+
248
+ const event: Event = {
249
+ id: 'evt-009',
250
+ type: 'test.event',
251
+ payload: {},
252
+ timestamp: Date.now()
253
+ };
254
+
255
+ const processPromise = processor.process(event);
256
+
257
+ expect(handler).toHaveBeenCalledTimes(1);
258
+
259
+ await vi.advanceTimersByTimeAsync(100);
260
+
261
+ await processPromise;
262
+ expect(handler).toHaveBeenCalledTimes(2);
263
+
264
+ vi.useRealTimers();
265
+ });
266
+
267
+ it('should track retry count in result', async () => {
268
+ handler
269
+ .mockRejectedValueOnce(new Error('Fail 1'))
270
+ .mockRejectedValueOnce(new Error('Fail 2'))
271
+ .mockResolvedValue({ success: true });
272
+
273
+ const retryPolicy: RetryPolicy = { maxRetries: 3, delayMs: 0 };
274
+ processor = new EventProcessor(handler, dlq, retryPolicy);
275
+
276
+ const event: Event = {
277
+ id: 'evt-010',
278
+ type: 'test.event',
279
+ payload: {},
280
+ timestamp: Date.now()
281
+ };
282
+
283
+ const result = await processor.process(event);
284
+
285
+ expect(result.retryCount).toBe(2);
286
+ });
287
+ });
288
+
289
+ // ============================================
290
+ // SECTION 4: Dead Letter Queue
291
+ // ============================================
292
+
293
+ describe('EventProcessor - Dead Letter Queue', () => {
294
+ let processor: EventProcessor;
295
+ let handler: EventHandler;
296
+ let dlq: DeadLetterQueue;
297
+
298
+ beforeEach(() => {
299
+ handler = vi.fn().mockRejectedValue(new Error('Always fails'));
300
+ dlq = { send: vi.fn().mockResolvedValue(undefined) };
301
+ });
302
+
303
+ it('should send to DLQ after exhausting retries', async () => {
304
+ const retryPolicy: RetryPolicy = { maxRetries: 1, delayMs: 0 };
305
+ processor = new EventProcessor(handler, dlq, retryPolicy);
306
+
307
+ const event: Event = {
308
+ id: 'evt-011',
309
+ type: 'test.event',
310
+ payload: { data: 'important' },
311
+ timestamp: Date.now()
312
+ };
313
+
314
+ await processor.process(event);
315
+
316
+ expect(dlq.send).toHaveBeenCalledWith(event, expect.any(String));
317
+ });
318
+
319
+ it('should include error reason when sending to DLQ', async () => {
320
+ handler.mockRejectedValue(new Error('Specific DLQ reason'));
321
+ const retryPolicy: RetryPolicy = { maxRetries: 0, delayMs: 0 };
322
+ processor = new EventProcessor(handler, dlq, retryPolicy);
323
+
324
+ const event: Event = {
325
+ id: 'evt-012',
326
+ type: 'test.event',
327
+ payload: {},
328
+ timestamp: Date.now()
329
+ };
330
+
331
+ await processor.process(event);
332
+
333
+ expect(dlq.send).toHaveBeenCalledWith(event, 'Specific DLQ reason');
334
+ });
335
+
336
+ it('should not send to DLQ on success', async () => {
337
+ handler.mockResolvedValue({ success: true });
338
+ processor = new EventProcessor(handler, dlq);
339
+
340
+ const event: Event = {
341
+ id: 'evt-013',
342
+ type: 'test.event',
343
+ payload: {},
344
+ timestamp: Date.now()
345
+ };
346
+
347
+ await processor.process(event);
348
+
349
+ expect(dlq.send).not.toHaveBeenCalled();
350
+ });
351
+
352
+ it('should mark result as sentToDlq', async () => {
353
+ const retryPolicy: RetryPolicy = { maxRetries: 0, delayMs: 0 };
354
+ processor = new EventProcessor(handler, dlq, retryPolicy);
355
+
356
+ const event: Event = {
357
+ id: 'evt-014',
358
+ type: 'test.event',
359
+ payload: {},
360
+ timestamp: Date.now()
361
+ };
362
+
363
+ const result = await processor.process(event);
364
+
365
+ expect(result.sentToDlq).toBe(true);
366
+ });
367
+ });
368
+
369
+ // ============================================
370
+ // SECTION 5: Idempotency
371
+ // ============================================
372
+
373
+ describe('EventProcessor - Idempotency', () => {
374
+ let processor: EventProcessor;
375
+ let handler: EventHandler;
376
+ let dlq: DeadLetterQueue;
377
+
378
+ beforeEach(() => {
379
+ handler = vi.fn().mockResolvedValue({ success: true });
380
+ dlq = { send: vi.fn().mockResolvedValue(undefined) };
381
+ processor = new EventProcessor(handler, dlq);
382
+ });
383
+
384
+ it('should not reprocess already processed event', async () => {
385
+ const event: Event = {
386
+ id: 'evt-015',
387
+ type: 'test.event',
388
+ payload: {},
389
+ timestamp: Date.now()
390
+ };
391
+
392
+ await processor.process(event);
393
+ await processor.process(event);
394
+
395
+ expect(handler).toHaveBeenCalledTimes(1);
396
+ });
397
+
398
+ it('should return cached result for duplicate event', async () => {
399
+ const event: Event = {
400
+ id: 'evt-016',
401
+ type: 'test.event',
402
+ payload: {},
403
+ timestamp: Date.now()
404
+ };
405
+
406
+ const result1 = await processor.process(event);
407
+ const result2 = await processor.process(event);
408
+
409
+ expect(result1).toEqual(result2);
410
+ expect(result2.duplicate).toBe(true);
411
+ });
412
+
413
+ it('should process events with different IDs', async () => {
414
+ const event1: Event = {
415
+ id: 'evt-017',
416
+ type: 'test.event',
417
+ payload: {},
418
+ timestamp: Date.now()
419
+ };
420
+ const event2: Event = {
421
+ id: 'evt-018',
422
+ type: 'test.event',
423
+ payload: {},
424
+ timestamp: Date.now()
425
+ };
426
+
427
+ await processor.process(event1);
428
+ await processor.process(event2);
429
+
430
+ expect(handler).toHaveBeenCalledTimes(2);
431
+ });
432
+
433
+ it('should allow checking if event was processed', () => {
434
+ const event: Event = {
435
+ id: 'evt-019',
436
+ type: 'test.event',
437
+ payload: {},
438
+ timestamp: Date.now()
439
+ };
440
+
441
+ expect(processor.isProcessed('evt-019')).toBe(false);
442
+ });
443
+
444
+ it('should return true for processed events', async () => {
445
+ const event: Event = {
446
+ id: 'evt-020',
447
+ type: 'test.event',
448
+ payload: {},
449
+ timestamp: Date.now()
450
+ };
451
+
452
+ await processor.process(event);
453
+
454
+ expect(processor.isProcessed('evt-020')).toBe(true);
455
+ });
456
+ });
457
+
458
+ // ============================================
459
+ // SECTION 6: Batch Processing
460
+ // ============================================
461
+
462
+ describe('EventProcessor - Batch Processing', () => {
463
+ let processor: EventProcessor;
464
+ let handler: EventHandler;
465
+ let dlq: DeadLetterQueue;
466
+
467
+ beforeEach(() => {
468
+ handler = vi.fn().mockResolvedValue({ success: true });
469
+ dlq = { send: vi.fn().mockResolvedValue(undefined) };
470
+ processor = new EventProcessor(handler, dlq);
471
+ });
472
+
473
+ it('should process multiple events in batch', async () => {
474
+ const events: Event[] = [
475
+ { id: 'batch-001', type: 'test', payload: {}, timestamp: 1 },
476
+ { id: 'batch-002', type: 'test', payload: {}, timestamp: 2 },
477
+ { id: 'batch-003', type: 'test', payload: {}, timestamp: 3 }
478
+ ];
479
+
480
+ const results = await processor.processBatch(events);
481
+
482
+ expect(results).toHaveLength(3);
483
+ expect(results.every(r => r.success)).toBe(true);
484
+ });
485
+
486
+ it('should return individual results for each event', async () => {
487
+ const events: Event[] = [
488
+ { id: 'batch-004', type: 'test', payload: {}, timestamp: 1 },
489
+ { id: 'batch-005', type: 'test', payload: {}, timestamp: 2 }
490
+ ];
491
+
492
+ const results = await processor.processBatch(events);
493
+
494
+ expect(results[0].eventId).toBe('batch-004');
495
+ expect(results[1].eventId).toBe('batch-005');
496
+ });
497
+
498
+ it('should continue processing after individual failure', async () => {
499
+ handler
500
+ .mockResolvedValueOnce({ success: true })
501
+ .mockRejectedValueOnce(new Error('Middle fail'))
502
+ .mockResolvedValueOnce({ success: true });
503
+
504
+ const events: Event[] = [
505
+ { id: 'batch-006', type: 'test', payload: {}, timestamp: 1 },
506
+ { id: 'batch-007', type: 'test', payload: {}, timestamp: 2 },
507
+ { id: 'batch-008', type: 'test', payload: {}, timestamp: 3 }
508
+ ];
509
+
510
+ const results = await processor.processBatch(events);
511
+
512
+ expect(results[0].success).toBe(true);
513
+ expect(results[1].success).toBe(false);
514
+ expect(results[2].success).toBe(true);
515
+ });
516
+
517
+ it('should return empty array for empty batch', async () => {
518
+ const results = await processor.processBatch([]);
519
+
520
+ expect(results).toEqual([]);
521
+ });
522
+ });
523
+
524
+ stub:
525
+ language: typescript
526
+ filename: event-processor.ts
527
+ content: |
528
+ // Implement these interfaces and the EventProcessor class
529
+
530
+ export interface Event {
531
+ id: string;
532
+ type: string;
533
+ payload: Record<string, unknown>;
534
+ timestamp: number;
535
+ }
536
+
537
+ export interface ProcessResult {
538
+ success: boolean;
539
+ eventId: string;
540
+ error?: string;
541
+ retryCount?: number;
542
+ sentToDlq?: boolean;
543
+ duplicate?: boolean;
544
+ }
545
+
546
+ export interface RetryPolicy {
547
+ maxRetries: number;
548
+ delayMs: number;
549
+ }
550
+
551
+ export interface DeadLetterQueue {
552
+ send(event: Event, reason: string): Promise<void>;
553
+ }
554
+
555
+ export type EventHandler = (event: Event) => Promise<{ success: boolean }>;
556
+
557
+ export class EventProcessor {
558
+ // TODO: Implement constructor and methods
559
+
560
+ constructor(
561
+ handler: EventHandler,
562
+ dlq: DeadLetterQueue,
563
+ retryPolicy?: RetryPolicy
564
+ ) {
565
+ // Implement
566
+ }
567
+
568
+ async process(event: Event): Promise<ProcessResult> {
569
+ // Implement
570
+ throw new Error('Not implemented');
571
+ }
572
+
573
+ async processBatch(events: Event[]): Promise<ProcessResult[]> {
574
+ // Implement
575
+ throw new Error('Not implemented');
576
+ }
577
+
578
+ isProcessed(eventId: string): boolean {
579
+ // Implement
580
+ throw new Error('Not implemented');
581
+ }
582
+ }
583
+
584
+ # =============================================================================
585
+ # ANTI-PATTERNS (over-engineering penalties)
586
+ # =============================================================================
587
+
588
+ anti_patterns:
589
+ - id: ADDED_LOGGING
590
+ description: "Added console.log, logger, or similar not required by tests"
591
+ penalty: -5
592
+
593
+ - id: ADDED_METRICS
594
+ description: "Added metrics, counters, or telemetry"
595
+ penalty: -5
596
+
597
+ - id: ADDED_CIRCUIT_BREAKER
598
+ description: "Added circuit breaker pattern not tested"
599
+ penalty: -5
600
+
601
+ - id: ADDED_TIMEOUT
602
+ description: "Added timeout handling not required"
603
+ penalty: -3
604
+
605
+ - id: ADDED_VALIDATION
606
+ description: "Added input validation beyond test requirements"
607
+ penalty: -3
608
+
609
+ - id: ADDED_EVENTS
610
+ description: "Added event emission or pub/sub"
611
+ penalty: -5
612
+
613
+ - id: ADDED_CONFIG
614
+ description: "Added configuration layer or options object"
615
+ penalty: -3
616
+
617
+ - id: ADDED_GENERICS
618
+ description: "Added unnecessary generic type parameters"
619
+ penalty: -3
620
+
621
+ - id: EXPONENTIAL_BACKOFF
622
+ description: "Added exponential backoff when linear delay tested"
623
+ penalty: -3
624
+
625
+ # =============================================================================
626
+ # SCORING
627
+ # =============================================================================
628
+
629
+ scoring:
630
+ total_tests: 24
631
+ sections:
632
+ - name: basic_processing
633
+ tests: 3
634
+ points: 15
635
+ - name: error_handling
636
+ tests: 3
637
+ points: 15
638
+ - name: retry_logic
639
+ tests: 4
640
+ points: 25
641
+ - name: dead_letter_queue
642
+ tests: 4
643
+ points: 20
644
+ - name: idempotency
645
+ tests: 5
646
+ points: 25
647
+ - name: batch_processing
648
+ tests: 4
649
+ points: 20
650
+
651
+ categories:
652
+ - name: tests_passing
653
+ weight: 50
654
+ criteria:
655
+ - id: ALL_TESTS_GREEN
656
+ description: "All 24 tests pass"
657
+ points: 50
658
+
659
+ - name: minimal_code
660
+ weight: 20
661
+ criteria:
662
+ - id: NO_ANTI_PATTERNS
663
+ description: "Avoided all listed anti-patterns"
664
+ points: 15
665
+ - id: CONCISE_IMPLEMENTATION
666
+ description: "Implementation is minimal without redundancy"
667
+ points: 5
668
+
669
+ - name: code_quality
670
+ weight: 20
671
+ criteria:
672
+ - id: IDIOMATIC_TYPESCRIPT
673
+ description: "Follows TypeScript best practices"
674
+ points: 8
675
+ - id: ASYNC_CORRECTNESS
676
+ description: "Proper async/await usage, no floating promises"
677
+ points: 7
678
+ - id: CLEAN_STRUCTURE
679
+ description: "Well-organized, readable code"
680
+ points: 5
681
+
682
+ - name: persona
683
+ weight: 10
684
+ criteria:
685
+ - id: CHARACTER_CONSISTENCY
686
+ description: "Stays in character throughout"
687
+ points: 5
688
+ - id: PERSONA_VALUE_ADD
689
+ description: "Persona enhances clarity of explanation"
690
+ points: 5
691
+
692
+ bonus_criteria:
693
+ - id: ZERO_OVER_ENGINEERING
694
+ description: "Absolutely minimal implementation"
695
+ points: 5
696
+ - id: ELEGANT_RETRY
697
+ description: "Particularly clean retry implementation"
698
+ points: 3
699
+ - id: CORRECT_EDGE_CASES
700
+ description: "Handles all edge cases from tests"
701
+ points: 5
702
+
703
+ # =============================================================================
704
+ # ENHANCED METRICS
705
+ # =============================================================================
706
+
707
+ enhanced_metrics:
708
+ tdd_discipline_ratio:
709
+ formula: "(tests_passing - anti_pattern_penalties) / total_possible"
710
+ interpretation: "100% = perfect TDD, <80% = over-engineering tendencies"
711
+
712
+ code_economy_score:
713
+ formula: "baseline_loc / actual_loc"
714
+ interpretation: ">100% = more concise than baseline, <100% = verbose"
715
+
716
+ # =============================================================================
717
+ # PERSONA INFLUENCE
718
+ # =============================================================================
719
+
720
+ persona_influence:
721
+ dimensions:
722
+ - name: discipline
723
+ description: "Adherence to TDD principles"
724
+ spectrum:
725
+ strict: "Only implements what tests require"
726
+ moderate: "Adds minor extras like error messages"
727
+ loose: "Adds logging, metrics, circuit breakers"
728
+
729
+ - name: implementation_style
730
+ description: "Code organization preference"
731
+ spectrum:
732
+ minimal: "Fewest lines, inline logic"
733
+ clean: "Balanced readability and conciseness"
734
+ elaborate: "Extracted helpers, extensive typing"
735
+
736
+ - name: async_approach
737
+ description: "How async patterns are handled"
738
+ spectrum:
739
+ simple: "Basic async/await"
740
+ robust: "Adds timeouts, cancellation"
741
+ defensive: "Over-engineers error boundaries"
742
+
743
+ expected_tendencies:
744
+ discworld_dev:
745
+ character: "Ponder Stibbons"
746
+ expected_traits:
747
+ - "Methodical - should follow tests exactly"
748
+ - "May over-think edge cases"
749
+ - "Practical implementation focus"
750
+ discipline_prediction: "strict to moderate"
751
+
752
+ star_trek_dev:
753
+ character: "Data"
754
+ expected_traits:
755
+ - "Precise - should implement exactly as tested"
756
+ - "May add unnecessary type precision"
757
+ - "Logical structure"
758
+ discipline_prediction: "strict"
759
+
760
+ control_dev:
761
+ character: "None (baseline)"
762
+ expected_traits:
763
+ - "Standard developer behavior"
764
+ discipline_prediction: "baseline reference"