npm - @pennyfarthing/benchmark - Versions diffs - 10.2.0 - Mend

@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

package/commands/benchmark-control.md +69 -0
package/commands/benchmark.md +485 -0
package/commands/job-fair.md +102 -0
package/commands/solo.md +447 -0
package/dist/benchmark-integration.d.ts +182 -0
package/dist/benchmark-integration.d.ts.map +1 -0
package/dist/benchmark-integration.js +710 -0
package/dist/benchmark-integration.js.map +1 -0
package/dist/benchmark-integration.test.d.ts +6 -0
package/dist/benchmark-integration.test.d.ts.map +1 -0
package/dist/benchmark-integration.test.js +41 -0
package/dist/benchmark-integration.test.js.map +1 -0
package/dist/index.d.ts +3 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +5 -0
package/dist/index.js.map +1 -0
package/dist/job-fair-aggregator.d.ts +150 -0
package/dist/job-fair-aggregator.d.ts.map +1 -0
package/dist/job-fair-aggregator.js +547 -0
package/dist/job-fair-aggregator.js.map +1 -0
package/dist/job-fair-aggregator.test.d.ts +6 -0
package/dist/job-fair-aggregator.test.d.ts.map +1 -0
package/dist/job-fair-aggregator.test.js +35 -0
package/dist/job-fair-aggregator.test.js.map +1 -0
package/dist/package-exports.test.d.ts +13 -0
package/dist/package-exports.test.d.ts.map +1 -0
package/dist/package-exports.test.js +192 -0
package/dist/package-exports.test.js.map +1 -0
package/docs/BENCHMARK-METHODOLOGY.md +105 -0
package/docs/BENCHMARKING.md +311 -0
package/docs/OCEAN-BENCHMARKING.md +210 -0
package/docs/benchmarks-guide.md +62 -0
package/package.json +66 -0
package/scenarios/README.md +145 -0
package/scenarios/architecture/database-selection.yaml +119 -0
package/scenarios/architecture/legacy-modernization.yaml +153 -0
package/scenarios/architecture/scaling-decision.yaml +88 -0
package/scenarios/code-review/graphql-api-review.yaml +714 -0
package/scenarios/code-review/order-service.yaml +622 -0
package/scenarios/code-review/react-auth-component.yaml +569 -0
package/scenarios/code-review/security-review.yaml +145 -0
package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
package/scenarios/debug/buggy-user-service.yaml +541 -0
package/scenarios/debug/null-pointer.yaml +130 -0
package/scenarios/debugging/async-control-flow.yaml +161 -0
package/scenarios/debugging/auth-bypass.yaml +197 -0
package/scenarios/debugging/error-handling.yaml +178 -0
package/scenarios/debugging/input-validation.yaml +157 -0
package/scenarios/debugging/null-check-missing.yaml +139 -0
package/scenarios/debugging/off-by-one-loop.yaml +132 -0
package/scenarios/debugging/race-condition.yaml +180 -0
package/scenarios/debugging/resource-leak.yaml +166 -0
package/scenarios/debugging/simple-logic-error.yaml +115 -0
package/scenarios/debugging/sql-injection.yaml +163 -0
package/scenarios/dev/event-processor-tdd.yaml +764 -0
package/scenarios/dev/migration-disaster.yaml +415 -0
package/scenarios/dev/race-condition-cache.yaml +546 -0
package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
package/scenarios/schema.yaml +639 -0
package/scenarios/sm/dependency-deadlock.yaml +414 -0
package/scenarios/sm/executive-pet-project.yaml +336 -0
package/scenarios/sm/layoff-planning.yaml +356 -0
package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
package/scenarios/sm/story-breakdown.yaml +240 -0
package/scenarios/sm/three-sprint-failure.yaml +397 -0
package/scenarios/swe-bench/README.md +57 -0
package/scenarios/swe-bench/astropy-12907.yaml +128 -0
package/scenarios/swe-bench/astropy-13398.yaml +177 -0
package/scenarios/swe-bench/astropy-14309.yaml +180 -0
package/scenarios/swe-bench/django-10097.yaml +106 -0
package/scenarios/swe-bench/django-10554.yaml +140 -0
package/scenarios/swe-bench/django-10973.yaml +93 -0
package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
package/scenarios/swe-bench/flask-5014.yaml +91 -0
package/scenarios/swe-bench/import-swebench.py +246 -0
package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
package/scenarios/swe-bench/requests-1142.yaml +100 -0
package/scenarios/swe-bench/requests-2931.yaml +98 -0
package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
package/scenarios/swe-bench/xarray-3993.yaml +104 -0
package/scenarios/swe-bench/xarray-6992.yaml +136 -0
package/scenarios/tea/checkout-component-tests.yaml +596 -0
package/scenarios/tea/cli-tool-tests.yaml +561 -0
package/scenarios/tea/microservice-integration-tests.yaml +520 -0
package/scenarios/tea/payment-processor-tests.yaml +550 -0
package/scripts/aggregate-benchmark-stats.js +315 -0
package/scripts/aggregate-benchmark-stats.sh +8 -0
package/scripts/benchmark-runner.js +392 -0
package/scripts/benchmark-runner.sh +8 -0
package/scripts/consolidate-job-fair.sh +107 -0
package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
package/scripts/job-fair-batch.sh +116 -0
package/scripts/job-fair-progress.sh +35 -0
package/scripts/job-fair-runner.sh +278 -0
package/scripts/job-fair-status.sh +80 -0
package/scripts/job-fair-watcher-v2.sh +38 -0
package/scripts/job-fair-watcher.sh +50 -0
package/scripts/parallel-benchmark.sh +140 -0
package/scripts/solo-runner.sh +344 -0
package/scripts/test/ensure-swebench-data.sh +59 -0
package/scripts/test/ground-truth-judge.py +220 -0
package/scripts/test/swebench-judge.py +374 -0
package/scripts/test/test-cache.sh +165 -0
package/scripts/test/test-setup.sh +337 -0
package/scripts/theme/compute-theme-tiers.sh +13 -0
package/scripts/theme/compute_theme_tiers.py +402 -0
package/scripts/theme/update-theme-tiers.sh +97 -0
package/skills/finalize-run/SKILL.md +261 -0
package/skills/judge/SKILL.md +644 -0
package/skills/persona-benchmark/SKILL.md +187 -0

package/scenarios/dev/event-processor-tdd.yaml ADDED Viewed

@@ -0,0 +1,764 @@
+---
+# Scenario: Event Processor TDD Implementation
+# Category: dev
+# Purpose: Test TDD discipline with async patterns - minimal implementation, no over-engineering
+id: dev-003
+name: event-processor-tdd
+title: "Event Processor TDD Implementation"
+category: dev
+difficulty: hard  # Calibrated 2026-01-01: mean=65.25, was medium
+version: "1.0"
+description: |
+  A TDD exercise where failing tests are provided for an event processor that handles
+  webhook events with retry logic, dead-letter queues, and idempotency. Tests the
+  developer agent's ability to implement async patterns correctly while following
+  TDD discipline.
+purpose: |
+  This scenario tests whether persona traits affect implementation discipline with
+  async patterns. A "methodical" persona might follow TDD strictly. A "creative"
+  persona might add observability or circuit breakers not required by tests.
+  Over-engineering async code is common - this measures that tendency.
+prompt: |
+  You are given a test suite for an event processor module. The tests are
+  already written and currently failing because the implementation is empty.
+  Your task:
+  1. Read and understand each test
+  2. Implement the EventProcessor to make ALL tests pass
+  3. Write ONLY the code needed to pass the tests - no extra features
+  4. Do not modify the tests
+  5. Follow TDD principles: minimal implementation, no speculation
+  Scoring criteria:
+  - Tests passing: Does your implementation pass all tests?
+  - Minimal code: Did you avoid adding features not required by tests?
+  - Code quality: Is the implementation clean and idiomatic TypeScript?
+  - Async correctness: Are promises and async/await handled properly?
+  IMPORTANT: Do not add logging, metrics, circuit breakers, or error handling
+  beyond what the tests require. Over-engineering is penalized.
+tests:
+  language: typescript
+  filename: event-processor.test.ts
+  content: |
+    import { describe, it, expect, beforeEach, vi } from 'vitest';
+    import {
+      EventProcessor,
+      EventHandler,
+      Event,
+      ProcessResult,
+      RetryPolicy,
+      DeadLetterQueue
+    } from './event-processor';
+    // ============================================
+    // SECTION 1: Basic Event Processing
+    // ============================================
+    describe('EventProcessor - Basic Processing', () => {
+      let processor: EventProcessor;
+      let handler: EventHandler;
+      let dlq: DeadLetterQueue;
+      beforeEach(() => {
+        handler = vi.fn().mockResolvedValue({ success: true });
+        dlq = { send: vi.fn().mockResolvedValue(undefined) };
+        processor = new EventProcessor(handler, dlq);
+      });
+      it('should process a valid event successfully', async () => {
+        const event: Event = {
+          id: 'evt-001',
+          type: 'user.created',
+          payload: { userId: '123', email: 'test@example.com' },
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.success).toBe(true);
+        expect(result.eventId).toBe('evt-001');
+        expect(handler).toHaveBeenCalledWith(event);
+      });
+      it('should return event ID in result', async () => {
+        const event: Event = {
+          id: 'evt-002',
+          type: 'order.completed',
+          payload: { orderId: '456' },
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.eventId).toBe('evt-002');
+      });
+      it('should pass event to handler with correct structure', async () => {
+        const event: Event = {
+          id: 'evt-003',
+          type: 'payment.received',
+          payload: { amount: 1000, currency: 'USD' },
+          timestamp: 1234567890
+        };
+        await processor.process(event);
+        expect(handler).toHaveBeenCalledWith({
+          id: 'evt-003',
+          type: 'payment.received',
+          payload: { amount: 1000, currency: 'USD' },
+          timestamp: 1234567890
+        });
+      });
+    });
+    // ============================================
+    // SECTION 2: Error Handling
+    // ============================================
+    describe('EventProcessor - Error Handling', () => {
+      let processor: EventProcessor;
+      let handler: EventHandler;
+      let dlq: DeadLetterQueue;
+      beforeEach(() => {
+        handler = vi.fn();
+        dlq = { send: vi.fn().mockResolvedValue(undefined) };
+        processor = new EventProcessor(handler, dlq);
+      });
+      it('should return failure when handler throws', async () => {
+        handler.mockRejectedValue(new Error('Handler error'));
+        const event: Event = {
+          id: 'evt-004',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('Handler error');
+      });
+      it('should include error message in result', async () => {
+        handler.mockRejectedValue(new Error('Specific failure reason'));
+        const event: Event = {
+          id: 'evt-005',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.error).toBe('Specific failure reason');
+      });
+      it('should handle non-Error throws', async () => {
+        handler.mockRejectedValue('string error');
+        const event: Event = {
+          id: 'evt-006',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('string error');
+      });
+    });
+    // ============================================
+    // SECTION 3: Retry Logic
+    // ============================================
+    describe('EventProcessor - Retry Logic', () => {
+      let processor: EventProcessor;
+      let handler: EventHandler;
+      let dlq: DeadLetterQueue;
+      beforeEach(() => {
+        handler = vi.fn();
+        dlq = { send: vi.fn().mockResolvedValue(undefined) };
+      });
+      it('should retry on failure up to maxRetries', async () => {
+        handler
+          .mockRejectedValueOnce(new Error('Fail 1'))
+          .mockRejectedValueOnce(new Error('Fail 2'))
+          .mockResolvedValue({ success: true });
+        const retryPolicy: RetryPolicy = { maxRetries: 3, delayMs: 0 };
+        processor = new EventProcessor(handler, dlq, retryPolicy);
+        const event: Event = {
+          id: 'evt-007',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.success).toBe(true);
+        expect(handler).toHaveBeenCalledTimes(3);
+      });
+      it('should fail after exhausting retries', async () => {
+        handler.mockRejectedValue(new Error('Persistent failure'));
+        const retryPolicy: RetryPolicy = { maxRetries: 2, delayMs: 0 };
+        processor = new EventProcessor(handler, dlq, retryPolicy);
+        const event: Event = {
+          id: 'evt-008',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.success).toBe(false);
+        expect(handler).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
+      });
+      it('should respect delay between retries', async () => {
+        vi.useFakeTimers();
+        handler
+          .mockRejectedValueOnce(new Error('Fail'))
+          .mockResolvedValue({ success: true });
+        const retryPolicy: RetryPolicy = { maxRetries: 1, delayMs: 100 };
+        processor = new EventProcessor(handler, dlq, retryPolicy);
+        const event: Event = {
+          id: 'evt-009',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const processPromise = processor.process(event);
+        expect(handler).toHaveBeenCalledTimes(1);
+        await vi.advanceTimersByTimeAsync(100);
+        await processPromise;
+        expect(handler).toHaveBeenCalledTimes(2);
+        vi.useRealTimers();
+      });
+      it('should track retry count in result', async () => {
+        handler
+          .mockRejectedValueOnce(new Error('Fail 1'))
+          .mockRejectedValueOnce(new Error('Fail 2'))
+          .mockResolvedValue({ success: true });
+        const retryPolicy: RetryPolicy = { maxRetries: 3, delayMs: 0 };
+        processor = new EventProcessor(handler, dlq, retryPolicy);
+        const event: Event = {
+          id: 'evt-010',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.retryCount).toBe(2);
+      });
+    });
+    // ============================================
+    // SECTION 4: Dead Letter Queue
+    // ============================================
+    describe('EventProcessor - Dead Letter Queue', () => {
+      let processor: EventProcessor;
+      let handler: EventHandler;
+      let dlq: DeadLetterQueue;
+      beforeEach(() => {
+        handler = vi.fn().mockRejectedValue(new Error('Always fails'));
+        dlq = { send: vi.fn().mockResolvedValue(undefined) };
+      });
+      it('should send to DLQ after exhausting retries', async () => {
+        const retryPolicy: RetryPolicy = { maxRetries: 1, delayMs: 0 };
+        processor = new EventProcessor(handler, dlq, retryPolicy);
+        const event: Event = {
+          id: 'evt-011',
+          type: 'test.event',
+          payload: { data: 'important' },
+          timestamp: Date.now()
+        };
+        await processor.process(event);
+        expect(dlq.send).toHaveBeenCalledWith(event, expect.any(String));
+      });
+      it('should include error reason when sending to DLQ', async () => {
+        handler.mockRejectedValue(new Error('Specific DLQ reason'));
+        const retryPolicy: RetryPolicy = { maxRetries: 0, delayMs: 0 };
+        processor = new EventProcessor(handler, dlq, retryPolicy);
+        const event: Event = {
+          id: 'evt-012',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        await processor.process(event);
+        expect(dlq.send).toHaveBeenCalledWith(event, 'Specific DLQ reason');
+      });
+      it('should not send to DLQ on success', async () => {
+        handler.mockResolvedValue({ success: true });
+        processor = new EventProcessor(handler, dlq);
+        const event: Event = {
+          id: 'evt-013',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        await processor.process(event);
+        expect(dlq.send).not.toHaveBeenCalled();
+      });
+      it('should mark result as sentToDlq', async () => {
+        const retryPolicy: RetryPolicy = { maxRetries: 0, delayMs: 0 };
+        processor = new EventProcessor(handler, dlq, retryPolicy);
+        const event: Event = {
+          id: 'evt-014',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const result = await processor.process(event);
+        expect(result.sentToDlq).toBe(true);
+      });
+    });
+    // ============================================
+    // SECTION 5: Idempotency
+    // ============================================
+    describe('EventProcessor - Idempotency', () => {
+      let processor: EventProcessor;
+      let handler: EventHandler;
+      let dlq: DeadLetterQueue;
+      beforeEach(() => {
+        handler = vi.fn().mockResolvedValue({ success: true });
+        dlq = { send: vi.fn().mockResolvedValue(undefined) };
+        processor = new EventProcessor(handler, dlq);
+      });
+      it('should not reprocess already processed event', async () => {
+        const event: Event = {
+          id: 'evt-015',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        await processor.process(event);
+        await processor.process(event);
+        expect(handler).toHaveBeenCalledTimes(1);
+      });
+      it('should return cached result for duplicate event', async () => {
+        const event: Event = {
+          id: 'evt-016',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const result1 = await processor.process(event);
+        const result2 = await processor.process(event);
+        expect(result1).toEqual(result2);
+        expect(result2.duplicate).toBe(true);
+      });
+      it('should process events with different IDs', async () => {
+        const event1: Event = {
+          id: 'evt-017',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        const event2: Event = {
+          id: 'evt-018',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        await processor.process(event1);
+        await processor.process(event2);
+        expect(handler).toHaveBeenCalledTimes(2);
+      });
+      it('should allow checking if event was processed', () => {
+        const event: Event = {
+          id: 'evt-019',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        expect(processor.isProcessed('evt-019')).toBe(false);
+      });
+      it('should return true for processed events', async () => {
+        const event: Event = {
+          id: 'evt-020',
+          type: 'test.event',
+          payload: {},
+          timestamp: Date.now()
+        };
+        await processor.process(event);
+        expect(processor.isProcessed('evt-020')).toBe(true);
+      });
+    });
+    // ============================================
+    // SECTION 6: Batch Processing
+    // ============================================
+    describe('EventProcessor - Batch Processing', () => {
+      let processor: EventProcessor;
+      let handler: EventHandler;
+      let dlq: DeadLetterQueue;
+      beforeEach(() => {
+        handler = vi.fn().mockResolvedValue({ success: true });
+        dlq = { send: vi.fn().mockResolvedValue(undefined) };
+        processor = new EventProcessor(handler, dlq);
+      });
+      it('should process multiple events in batch', async () => {
+        const events: Event[] = [
+          { id: 'batch-001', type: 'test', payload: {}, timestamp: 1 },
+          { id: 'batch-002', type: 'test', payload: {}, timestamp: 2 },
+          { id: 'batch-003', type: 'test', payload: {}, timestamp: 3 }
+        ];
+        const results = await processor.processBatch(events);
+        expect(results).toHaveLength(3);
+        expect(results.every(r => r.success)).toBe(true);
+      });
+      it('should return individual results for each event', async () => {
+        const events: Event[] = [
+          { id: 'batch-004', type: 'test', payload: {}, timestamp: 1 },
+          { id: 'batch-005', type: 'test', payload: {}, timestamp: 2 }
+        ];
+        const results = await processor.processBatch(events);
+        expect(results[0].eventId).toBe('batch-004');
+        expect(results[1].eventId).toBe('batch-005');
+      });
+      it('should continue processing after individual failure', async () => {
+        handler
+          .mockResolvedValueOnce({ success: true })
+          .mockRejectedValueOnce(new Error('Middle fail'))
+          .mockResolvedValueOnce({ success: true });
+        const events: Event[] = [
+          { id: 'batch-006', type: 'test', payload: {}, timestamp: 1 },
+          { id: 'batch-007', type: 'test', payload: {}, timestamp: 2 },
+          { id: 'batch-008', type: 'test', payload: {}, timestamp: 3 }
+        ];
+        const results = await processor.processBatch(events);
+        expect(results[0].success).toBe(true);
+        expect(results[1].success).toBe(false);
+        expect(results[2].success).toBe(true);
+      });
+      it('should return empty array for empty batch', async () => {
+        const results = await processor.processBatch([]);
+        expect(results).toEqual([]);
+      });
+    });
+stub:
+  language: typescript
+  filename: event-processor.ts
+  content: |
+    // Implement these interfaces and the EventProcessor class
+    export interface Event {
+      id: string;
+      type: string;
+      payload: Record<string, unknown>;
+      timestamp: number;
+    }
+    export interface ProcessResult {
+      success: boolean;
+      eventId: string;
+      error?: string;
+      retryCount?: number;
+      sentToDlq?: boolean;
+      duplicate?: boolean;
+    }
+    export interface RetryPolicy {
+      maxRetries: number;
+      delayMs: number;
+    }
+    export interface DeadLetterQueue {
+      send(event: Event, reason: string): Promise<void>;
+    }
+    export type EventHandler = (event: Event) => Promise<{ success: boolean }>;
+    export class EventProcessor {
+      // TODO: Implement constructor and methods
+      constructor(
+        handler: EventHandler,
+        dlq: DeadLetterQueue,
+        retryPolicy?: RetryPolicy
+      ) {
+        // Implement
+      }
+      async process(event: Event): Promise<ProcessResult> {
+        // Implement
+        throw new Error('Not implemented');
+      }
+      async processBatch(events: Event[]): Promise<ProcessResult[]> {
+        // Implement
+        throw new Error('Not implemented');
+      }
+      isProcessed(eventId: string): boolean {
+        // Implement
+        throw new Error('Not implemented');
+      }
+    }
+# =============================================================================
+# ANTI-PATTERNS (over-engineering penalties)
+# =============================================================================
+anti_patterns:
+  - id: ADDED_LOGGING
+    description: "Added console.log, logger, or similar not required by tests"
+    penalty: -5
+  - id: ADDED_METRICS
+    description: "Added metrics, counters, or telemetry"
+    penalty: -5
+  - id: ADDED_CIRCUIT_BREAKER
+    description: "Added circuit breaker pattern not tested"
+    penalty: -5
+  - id: ADDED_TIMEOUT
+    description: "Added timeout handling not required"
+    penalty: -3
+  - id: ADDED_VALIDATION
+    description: "Added input validation beyond test requirements"
+    penalty: -3
+  - id: ADDED_EVENTS
+    description: "Added event emission or pub/sub"
+    penalty: -5
+  - id: ADDED_CONFIG
+    description: "Added configuration layer or options object"
+    penalty: -3
+  - id: ADDED_GENERICS
+    description: "Added unnecessary generic type parameters"
+    penalty: -3
+  - id: EXPONENTIAL_BACKOFF
+    description: "Added exponential backoff when linear delay tested"
+    penalty: -3
+# =============================================================================
+# SCORING
+# =============================================================================
+scoring:
+  total_tests: 24
+  sections:
+    - name: basic_processing
+      tests: 3
+      points: 15
+    - name: error_handling
+      tests: 3
+      points: 15
+    - name: retry_logic
+      tests: 4
+      points: 25
+    - name: dead_letter_queue
+      tests: 4
+      points: 20
+    - name: idempotency
+      tests: 5
+      points: 25
+    - name: batch_processing
+      tests: 4
+      points: 20
+  categories:
+    - name: tests_passing
+      weight: 50
+      criteria:
+        - id: ALL_TESTS_GREEN
+          description: "All 24 tests pass"
+          points: 50
+    - name: minimal_code
+      weight: 20
+      criteria:
+        - id: NO_ANTI_PATTERNS
+          description: "Avoided all listed anti-patterns"
+          points: 15
+        - id: CONCISE_IMPLEMENTATION
+          description: "Implementation is minimal without redundancy"
+          points: 5
+    - name: code_quality
+      weight: 20
+      criteria:
+        - id: IDIOMATIC_TYPESCRIPT
+          description: "Follows TypeScript best practices"
+          points: 8
+        - id: ASYNC_CORRECTNESS
+          description: "Proper async/await usage, no floating promises"
+          points: 7
+        - id: CLEAN_STRUCTURE
+          description: "Well-organized, readable code"
+          points: 5
+    - name: persona
+      weight: 10
+      criteria:
+        - id: CHARACTER_CONSISTENCY
+          description: "Stays in character throughout"
+          points: 5
+        - id: PERSONA_VALUE_ADD
+          description: "Persona enhances clarity of explanation"
+          points: 5
+  bonus_criteria:
+    - id: ZERO_OVER_ENGINEERING
+      description: "Absolutely minimal implementation"
+      points: 5
+    - id: ELEGANT_RETRY
+      description: "Particularly clean retry implementation"
+      points: 3
+    - id: CORRECT_EDGE_CASES
+      description: "Handles all edge cases from tests"
+      points: 5
+# =============================================================================
+# ENHANCED METRICS
+# =============================================================================
+enhanced_metrics:
+  tdd_discipline_ratio:
+    formula: "(tests_passing - anti_pattern_penalties) / total_possible"
+    interpretation: "100% = perfect TDD, <80% = over-engineering tendencies"
+  code_economy_score:
+    formula: "baseline_loc / actual_loc"
+    interpretation: ">100% = more concise than baseline, <100% = verbose"
+# =============================================================================
+# PERSONA INFLUENCE
+# =============================================================================
+persona_influence:
+  dimensions:
+    - name: discipline
+      description: "Adherence to TDD principles"
+      spectrum:
+        strict: "Only implements what tests require"
+        moderate: "Adds minor extras like error messages"
+        loose: "Adds logging, metrics, circuit breakers"
+    - name: implementation_style
+      description: "Code organization preference"
+      spectrum:
+        minimal: "Fewest lines, inline logic"
+        clean: "Balanced readability and conciseness"
+        elaborate: "Extracted helpers, extensive typing"
+    - name: async_approach
+      description: "How async patterns are handled"
+      spectrum:
+        simple: "Basic async/await"
+        robust: "Adds timeouts, cancellation"
+        defensive: "Over-engineers error boundaries"
+expected_tendencies:
+  discworld_dev:
+    character: "Ponder Stibbons"
+    expected_traits:
+      - "Methodical - should follow tests exactly"
+      - "May over-think edge cases"
+      - "Practical implementation focus"
+    discipline_prediction: "strict to moderate"
+  star_trek_dev:
+    character: "Data"
+    expected_traits:
+      - "Precise - should implement exactly as tested"
+      - "May add unnecessary type precision"
+      - "Logical structure"
+    discipline_prediction: "strict"
+  control_dev:
+    character: "None (baseline)"
+    expected_traits:
+      - "Standard developer behavior"
+    discipline_prediction: "baseline reference"