@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,681 @@
1
+ ---
2
+ # Scenario: TDD Shopping Cart Implementation
3
+ # Category: dev
4
+ # Ported from: Pennyfarthing benchmarks/test-cases/dev/dev-002-tdd-shopping-cart.yaml
5
+ # Purpose: Test TDD discipline - minimal implementation, no over-engineering
6
+
7
+ id: dev-002
8
+ name: tdd-shopping-cart
9
+ title: "TDD Shopping Cart Implementation"
10
+ category: dev
11
+ difficulty: easy
12
+ version: "1.0"
13
+
14
+ description: |
15
+ A TDD exercise where failing tests are provided and the developer must
16
+ implement the code to make them pass. Tests the developer agent's ability
17
+ to write minimal, correct implementations that satisfy test contracts
18
+ without over-engineering.
19
+
20
+ purpose: |
21
+ This scenario tests whether persona traits affect implementation discipline.
22
+ A "methodical" persona might follow TDD strictly. A "creative" persona might
23
+ add extra features. An "over-engineering" tendency is a measurable behavior
24
+ that personas may influence.
25
+
26
+ prompt: |
27
+ You are given a test suite for a shopping cart module. The tests are
28
+ already written and currently failing because the implementation is empty.
29
+
30
+ Your task:
31
+ 1. Read and understand each test
32
+ 2. Implement the ShoppingCart to make ALL tests pass
33
+ 3. Write ONLY the code needed to pass the tests - no extra features
34
+ 4. Do not modify the tests
35
+ 5. Follow TDD principles: minimal implementation, no speculation
36
+
37
+ Scoring criteria:
38
+ - Tests passing: Does your implementation pass all tests?
39
+ - Minimal code: Did you avoid adding features not required by tests?
40
+ - Code quality: Is the implementation clean and idiomatic?
41
+ - Edge cases: Did you handle all test scenarios correctly?
42
+
43
+ IMPORTANT: Do not add validation, features, or error handling beyond
44
+ what the tests require. Over-engineering is penalized.
45
+
46
+ tests:
47
+ language: go
48
+ filename: shopping_cart_test.go
49
+ content: |
50
+ package cart
51
+
52
+ import (
53
+ "testing"
54
+ )
55
+
56
+ // ============================================
57
+ // SECTION 1: Basic Cart Operations
58
+ // ============================================
59
+
60
+ func TestNewCart_IsEmpty(t *testing.T) {
61
+ cart := NewCart()
62
+ if cart.ItemCount() != 0 {
63
+ t.Errorf("new cart should have 0 items, got %d", cart.ItemCount())
64
+ }
65
+ if cart.Total() != 0 {
66
+ t.Errorf("new cart should have 0 total, got %d", cart.Total())
67
+ }
68
+ }
69
+
70
+ func TestAddItem_SingleItem(t *testing.T) {
71
+ cart := NewCart()
72
+ cart.AddItem("SKU-001", "Widget", 1000, 1) // price in cents
73
+
74
+ if cart.ItemCount() != 1 {
75
+ t.Errorf("expected 1 item, got %d", cart.ItemCount())
76
+ }
77
+ if cart.Total() != 1000 {
78
+ t.Errorf("expected total 1000, got %d", cart.Total())
79
+ }
80
+ }
81
+
82
+ func TestAddItem_MultipleQuantity(t *testing.T) {
83
+ cart := NewCart()
84
+ cart.AddItem("SKU-001", "Widget", 1000, 3)
85
+
86
+ if cart.ItemCount() != 3 {
87
+ t.Errorf("expected 3 items, got %d", cart.ItemCount())
88
+ }
89
+ if cart.Total() != 3000 {
90
+ t.Errorf("expected total 3000, got %d", cart.Total())
91
+ }
92
+ }
93
+
94
+ func TestAddItem_SameItemTwice_CombinesQuantity(t *testing.T) {
95
+ cart := NewCart()
96
+ cart.AddItem("SKU-001", "Widget", 1000, 2)
97
+ cart.AddItem("SKU-001", "Widget", 1000, 3)
98
+
99
+ if cart.ItemCount() != 5 {
100
+ t.Errorf("expected 5 items, got %d", cart.ItemCount())
101
+ }
102
+ items := cart.GetItems()
103
+ if len(items) != 1 {
104
+ t.Errorf("expected 1 unique item, got %d", len(items))
105
+ }
106
+ }
107
+
108
+ func TestAddItem_DifferentItems(t *testing.T) {
109
+ cart := NewCart()
110
+ cart.AddItem("SKU-001", "Widget", 1000, 1)
111
+ cart.AddItem("SKU-002", "Gadget", 2500, 2)
112
+
113
+ if cart.ItemCount() != 3 {
114
+ t.Errorf("expected 3 items, got %d", cart.ItemCount())
115
+ }
116
+ if cart.Total() != 6000 {
117
+ t.Errorf("expected total 6000, got %d", cart.Total())
118
+ }
119
+ }
120
+
121
+ // ============================================
122
+ // SECTION 2: Remove Operations
123
+ // ============================================
124
+
125
+ func TestRemoveItem_DecreasesQuantity(t *testing.T) {
126
+ cart := NewCart()
127
+ cart.AddItem("SKU-001", "Widget", 1000, 5)
128
+ cart.RemoveItem("SKU-001", 2)
129
+
130
+ if cart.ItemCount() != 3 {
131
+ t.Errorf("expected 3 items, got %d", cart.ItemCount())
132
+ }
133
+ }
134
+
135
+ func TestRemoveItem_AllQuantity_RemovesFromCart(t *testing.T) {
136
+ cart := NewCart()
137
+ cart.AddItem("SKU-001", "Widget", 1000, 3)
138
+ cart.RemoveItem("SKU-001", 3)
139
+
140
+ if cart.ItemCount() != 0 {
141
+ t.Errorf("expected 0 items, got %d", cart.ItemCount())
142
+ }
143
+ items := cart.GetItems()
144
+ if len(items) != 0 {
145
+ t.Errorf("expected no items in cart, got %d", len(items))
146
+ }
147
+ }
148
+
149
+ func TestRemoveItem_MoreThanExists_RemovesAll(t *testing.T) {
150
+ cart := NewCart()
151
+ cart.AddItem("SKU-001", "Widget", 1000, 2)
152
+ cart.RemoveItem("SKU-001", 10)
153
+
154
+ if cart.ItemCount() != 0 {
155
+ t.Errorf("expected 0 items, got %d", cart.ItemCount())
156
+ }
157
+ }
158
+
159
+ func TestRemoveItem_NonExistent_NoOp(t *testing.T) {
160
+ cart := NewCart()
161
+ cart.AddItem("SKU-001", "Widget", 1000, 1)
162
+ cart.RemoveItem("SKU-999", 1) // doesn't exist
163
+
164
+ if cart.ItemCount() != 1 {
165
+ t.Errorf("expected 1 item, got %d", cart.ItemCount())
166
+ }
167
+ }
168
+
169
+ func TestClear_EmptiesCart(t *testing.T) {
170
+ cart := NewCart()
171
+ cart.AddItem("SKU-001", "Widget", 1000, 3)
172
+ cart.AddItem("SKU-002", "Gadget", 2000, 2)
173
+ cart.Clear()
174
+
175
+ if cart.ItemCount() != 0 {
176
+ t.Errorf("expected 0 items after clear, got %d", cart.ItemCount())
177
+ }
178
+ if cart.Total() != 0 {
179
+ t.Errorf("expected 0 total after clear, got %d", cart.Total())
180
+ }
181
+ }
182
+
183
+ // ============================================
184
+ // SECTION 3: Discount Codes
185
+ // ============================================
186
+
187
+ func TestApplyDiscount_PercentOff(t *testing.T) {
188
+ cart := NewCart()
189
+ cart.AddItem("SKU-001", "Widget", 10000, 1) // $100
190
+ cart.ApplyDiscount("SAVE10", DiscountPercent, 10) // 10% off
191
+
192
+ if cart.Total() != 9000 {
193
+ t.Errorf("expected total 9000 after 10%% off, got %d", cart.Total())
194
+ }
195
+ }
196
+
197
+ func TestApplyDiscount_FixedAmount(t *testing.T) {
198
+ cart := NewCart()
199
+ cart.AddItem("SKU-001", "Widget", 10000, 1) // $100
200
+ cart.ApplyDiscount("SAVE20", DiscountFixed, 2000) // $20 off
201
+
202
+ if cart.Total() != 8000 {
203
+ t.Errorf("expected total 8000 after $20 off, got %d", cart.Total())
204
+ }
205
+ }
206
+
207
+ func TestApplyDiscount_FixedExceedsTotal_ZeroTotal(t *testing.T) {
208
+ cart := NewCart()
209
+ cart.AddItem("SKU-001", "Widget", 1000, 1) // $10
210
+ cart.ApplyDiscount("BIGDISCOUNT", DiscountFixed, 5000) // $50 off
211
+
212
+ if cart.Total() != 0 {
213
+ t.Errorf("expected total 0 when discount exceeds cart, got %d", cart.Total())
214
+ }
215
+ }
216
+
217
+ func TestApplyDiscount_OnlyOneAllowed(t *testing.T) {
218
+ cart := NewCart()
219
+ cart.AddItem("SKU-001", "Widget", 10000, 1)
220
+ cart.ApplyDiscount("SAVE10", DiscountPercent, 10)
221
+ cart.ApplyDiscount("SAVE20", DiscountPercent, 20) // replaces previous
222
+
223
+ if cart.Total() != 8000 {
224
+ t.Errorf("expected total 8000 with 20%% off, got %d", cart.Total())
225
+ }
226
+ }
227
+
228
+ func TestRemoveDiscount(t *testing.T) {
229
+ cart := NewCart()
230
+ cart.AddItem("SKU-001", "Widget", 10000, 1)
231
+ cart.ApplyDiscount("SAVE10", DiscountPercent, 10)
232
+ cart.RemoveDiscount()
233
+
234
+ if cart.Total() != 10000 {
235
+ t.Errorf("expected total 10000 after removing discount, got %d", cart.Total())
236
+ }
237
+ }
238
+
239
+ func TestDiscount_AppliedToCurrentTotal(t *testing.T) {
240
+ cart := NewCart()
241
+ cart.AddItem("SKU-001", "Widget", 5000, 2) // $100 total
242
+ cart.ApplyDiscount("HALF", DiscountPercent, 50)
243
+
244
+ if cart.Total() != 5000 {
245
+ t.Errorf("expected 5000 (50%% of 10000), got %d", cart.Total())
246
+ }
247
+
248
+ cart.AddItem("SKU-002", "Gadget", 2000, 1) // +$20
249
+ // New total: 12000, with 50% off = 6000
250
+ if cart.Total() != 6000 {
251
+ t.Errorf("expected 6000 after adding item with discount, got %d", cart.Total())
252
+ }
253
+ }
254
+
255
+ // ============================================
256
+ // SECTION 4: Cart Summary
257
+ // ============================================
258
+
259
+ func TestGetItems_ReturnsAllItems(t *testing.T) {
260
+ cart := NewCart()
261
+ cart.AddItem("SKU-001", "Widget", 1000, 2)
262
+ cart.AddItem("SKU-002", "Gadget", 2000, 1)
263
+
264
+ items := cart.GetItems()
265
+ if len(items) != 2 {
266
+ t.Fatalf("expected 2 items, got %d", len(items))
267
+ }
268
+ }
269
+
270
+ func TestGetItems_ReturnsCorrectDetails(t *testing.T) {
271
+ cart := NewCart()
272
+ cart.AddItem("SKU-001", "Widget", 1000, 2)
273
+
274
+ items := cart.GetItems()
275
+ item := items[0]
276
+
277
+ if item.SKU != "SKU-001" {
278
+ t.Errorf("expected SKU SKU-001, got %s", item.SKU)
279
+ }
280
+ if item.Name != "Widget" {
281
+ t.Errorf("expected name Widget, got %s", item.Name)
282
+ }
283
+ if item.Price != 1000 {
284
+ t.Errorf("expected price 1000, got %d", item.Price)
285
+ }
286
+ if item.Quantity != 2 {
287
+ t.Errorf("expected quantity 2, got %d", item.Quantity)
288
+ }
289
+ }
290
+
291
+ func TestSubtotal_BeforeDiscount(t *testing.T) {
292
+ cart := NewCart()
293
+ cart.AddItem("SKU-001", "Widget", 10000, 1)
294
+ cart.ApplyDiscount("SAVE10", DiscountPercent, 10)
295
+
296
+ if cart.Subtotal() != 10000 {
297
+ t.Errorf("expected subtotal 10000 (before discount), got %d", cart.Subtotal())
298
+ }
299
+ if cart.Total() != 9000 {
300
+ t.Errorf("expected total 9000 (after discount), got %d", cart.Total())
301
+ }
302
+ }
303
+
304
+ func TestDiscountAmount_ShowsSavings(t *testing.T) {
305
+ cart := NewCart()
306
+ cart.AddItem("SKU-001", "Widget", 10000, 1)
307
+ cart.ApplyDiscount("SAVE10", DiscountPercent, 10)
308
+
309
+ if cart.DiscountAmount() != 1000 {
310
+ t.Errorf("expected discount amount 1000, got %d", cart.DiscountAmount())
311
+ }
312
+ }
313
+
314
+ func TestDiscountAmount_NoDiscount_ReturnsZero(t *testing.T) {
315
+ cart := NewCart()
316
+ cart.AddItem("SKU-001", "Widget", 10000, 1)
317
+
318
+ if cart.DiscountAmount() != 0 {
319
+ t.Errorf("expected discount amount 0, got %d", cart.DiscountAmount())
320
+ }
321
+ }
322
+
323
+ // ============================================
324
+ // SECTION 5: Has/Contains Operations
325
+ // ============================================
326
+
327
+ func TestHasItem_ReturnsTrue(t *testing.T) {
328
+ cart := NewCart()
329
+ cart.AddItem("SKU-001", "Widget", 1000, 1)
330
+
331
+ if !cart.HasItem("SKU-001") {
332
+ t.Error("expected HasItem to return true for existing item")
333
+ }
334
+ }
335
+
336
+ func TestHasItem_ReturnsFalse(t *testing.T) {
337
+ cart := NewCart()
338
+ cart.AddItem("SKU-001", "Widget", 1000, 1)
339
+
340
+ if cart.HasItem("SKU-999") {
341
+ t.Error("expected HasItem to return false for non-existing item")
342
+ }
343
+ }
344
+
345
+ func TestHasItem_EmptyCart_ReturnsFalse(t *testing.T) {
346
+ cart := NewCart()
347
+
348
+ if cart.HasItem("SKU-001") {
349
+ t.Error("expected HasItem to return false for empty cart")
350
+ }
351
+ }
352
+
353
+ func TestGetQuantity_ReturnsCorrectAmount(t *testing.T) {
354
+ cart := NewCart()
355
+ cart.AddItem("SKU-001", "Widget", 1000, 5)
356
+
357
+ if cart.GetQuantity("SKU-001") != 5 {
358
+ t.Errorf("expected quantity 5, got %d", cart.GetQuantity("SKU-001"))
359
+ }
360
+ }
361
+
362
+ func TestGetQuantity_NonExistent_ReturnsZero(t *testing.T) {
363
+ cart := NewCart()
364
+
365
+ if cart.GetQuantity("SKU-999") != 0 {
366
+ t.Errorf("expected quantity 0 for non-existent, got %d", cart.GetQuantity("SKU-999"))
367
+ }
368
+ }
369
+
370
+ stub:
371
+ language: go
372
+ filename: shopping_cart.go
373
+ content: |
374
+ package cart
375
+
376
+ // DiscountType represents the type of discount
377
+ type DiscountType int
378
+
379
+ const (
380
+ DiscountPercent DiscountType = iota
381
+ DiscountFixed
382
+ )
383
+
384
+ // CartItem represents an item in the cart
385
+ type CartItem struct {
386
+ SKU string
387
+ Name string
388
+ Price int64 // in cents
389
+ Quantity int
390
+ }
391
+
392
+ // Cart represents a shopping cart
393
+ type Cart struct {
394
+ // TODO: implement fields
395
+ }
396
+
397
+ // NewCart creates a new empty shopping cart
398
+ func NewCart() *Cart {
399
+ // TODO: implement
400
+ return nil
401
+ }
402
+
403
+ // AddItem adds an item to the cart
404
+ func (c *Cart) AddItem(sku, name string, price int64, quantity int) {
405
+ // TODO: implement
406
+ }
407
+
408
+ // RemoveItem removes quantity of an item from the cart
409
+ func (c *Cart) RemoveItem(sku string, quantity int) {
410
+ // TODO: implement
411
+ }
412
+
413
+ // Clear empties the cart
414
+ func (c *Cart) Clear() {
415
+ // TODO: implement
416
+ }
417
+
418
+ // ItemCount returns total number of items (sum of quantities)
419
+ func (c *Cart) ItemCount() int {
420
+ // TODO: implement
421
+ return 0
422
+ }
423
+
424
+ // GetItems returns all items in the cart
425
+ func (c *Cart) GetItems() []CartItem {
426
+ // TODO: implement
427
+ return nil
428
+ }
429
+
430
+ // HasItem checks if an item exists in the cart
431
+ func (c *Cart) HasItem(sku string) bool {
432
+ // TODO: implement
433
+ return false
434
+ }
435
+
436
+ // GetQuantity returns the quantity of a specific item
437
+ func (c *Cart) GetQuantity(sku string) int {
438
+ // TODO: implement
439
+ return 0
440
+ }
441
+
442
+ // Subtotal returns the cart total before discounts
443
+ func (c *Cart) Subtotal() int64 {
444
+ // TODO: implement
445
+ return 0
446
+ }
447
+
448
+ // Total returns the cart total after discounts
449
+ func (c *Cart) Total() int64 {
450
+ // TODO: implement
451
+ return 0
452
+ }
453
+
454
+ // ApplyDiscount applies a discount to the cart
455
+ func (c *Cart) ApplyDiscount(code string, discountType DiscountType, value int64) {
456
+ // TODO: implement
457
+ }
458
+
459
+ // RemoveDiscount removes any applied discount
460
+ func (c *Cart) RemoveDiscount() {
461
+ // TODO: implement
462
+ }
463
+
464
+ // DiscountAmount returns the savings from the applied discount
465
+ func (c *Cart) DiscountAmount() int64 {
466
+ // TODO: implement
467
+ return 0
468
+ }
469
+
470
+ # =============================================================================
471
+ # SCORING: Measuring TDD Discipline
472
+ # =============================================================================
473
+
474
+ baseline_criteria:
475
+ tests_passing:
476
+ - id: BASIC_CART_OPS
477
+ tests: 5
478
+ description: "NewCart, AddItem single/multiple/same/different"
479
+
480
+ - id: REMOVE_OPS
481
+ tests: 5
482
+ description: "RemoveItem decrease/all/more/nonexistent, Clear"
483
+
484
+ - id: DISCOUNT_OPS
485
+ tests: 7
486
+ description: "ApplyDiscount percent/fixed/exceeds/replace/remove, dynamic application"
487
+
488
+ - id: SUMMARY_OPS
489
+ tests: 4
490
+ description: "GetItems, Subtotal, DiscountAmount"
491
+
492
+ - id: HAS_CONTAINS_OPS
493
+ tests: 5
494
+ description: "HasItem, GetQuantity"
495
+
496
+ total_tests: 26
497
+
498
+ scoring:
499
+ categories:
500
+ - name: tests_passing
501
+ weight: 50
502
+ description: "How many of the 26 tests pass"
503
+ criteria:
504
+ - id: ALL_TESTS_PASS
505
+ description: "All 26 tests pass"
506
+ points: 50
507
+ - id: MOST_TESTS_PASS
508
+ description: "21-25 tests pass"
509
+ points: 40
510
+ - id: MANY_TESTS_PASS
511
+ description: "16-20 tests pass"
512
+ points: 30
513
+ - id: SOME_TESTS_PASS
514
+ description: "11-15 tests pass"
515
+ points: 20
516
+
517
+ - name: minimal_code
518
+ weight: 20
519
+ description: "Did they avoid over-engineering?"
520
+ criteria:
521
+ - id: NO_EXTRA_FEATURES
522
+ description: "No features beyond test requirements"
523
+ points: 10
524
+ - id: NO_PREMATURE_ABSTRACTIONS
525
+ description: "No unnecessary interfaces or helpers"
526
+ points: 10
527
+
528
+ - name: code_quality
529
+ weight: 20
530
+ description: "Is the implementation clean and idiomatic?"
531
+ criteria:
532
+ - id: IDIOMATIC_GO
533
+ description: "Follows Go conventions"
534
+ points: 5
535
+ - id: CLEAR_LOGIC
536
+ description: "Logic is straightforward and readable"
537
+ points: 5
538
+ - id: PROPER_TYPES
539
+ description: "Appropriate use of types"
540
+ points: 5
541
+ - id: NO_BUGS
542
+ description: "No obvious bugs"
543
+ points: 5
544
+
545
+ - name: persona
546
+ weight: 10
547
+ description: "Persona consistency"
548
+ criteria:
549
+ - id: IN_CHARACTER
550
+ description: "Stays in character while implementing"
551
+ points: 5
552
+ - id: TDD_COMMENTARY
553
+ description: "Commentary reflects TDD understanding"
554
+ points: 5
555
+
556
+ # =============================================================================
557
+ # ANTI-PATTERNS (Penalties for Over-Engineering)
558
+ # =============================================================================
559
+
560
+ anti_patterns:
561
+ - id: ADDED_MUTEX
562
+ description: "Added sync.Mutex when tests don't require thread safety"
563
+ penalty: 3
564
+
565
+ - id: ADDED_CONTEXT
566
+ description: "Added context.Context when tests don't use it"
567
+ penalty: 2
568
+
569
+ - id: ADDED_ERRORS
570
+ description: "Changed returns to include error when tests don't check"
571
+ penalty: 5
572
+
573
+ - id: ADDED_LOGGING
574
+ description: "Added logging/debugging code"
575
+ penalty: 2
576
+
577
+ - id: ADDED_PERSISTENCE
578
+ description: "Added database/file storage"
579
+ penalty: 5
580
+
581
+ - id: ADDED_EVENTS
582
+ description: "Added event/callback system"
583
+ penalty: 3
584
+
585
+ - id: ADDED_INTERFACES
586
+ description: "Created interfaces for future extensibility"
587
+ penalty: 3
588
+
589
+ - id: UNUSED_FIELDS
590
+ description: "Added struct fields not needed by any test"
591
+ penalty: 2
592
+
593
+ - id: UNUSED_METHODS
594
+ description: "Added methods not called by any test"
595
+ penalty: 3
596
+
597
+ - id: PREMATURE_VALIDATION
598
+ description: "Added input validation not required by tests"
599
+ penalty: 2
600
+
601
+ - id: FEATURE_CREEP
602
+ description: "Added features like persistence, events, etc."
603
+ penalty: 5
604
+
605
+ # =============================================================================
606
+ # BONUS CRITERIA
607
+ # =============================================================================
608
+
609
+ bonus_criteria:
610
+ - id: ZERO_OVERHEAD
611
+ description: "Implementation has no unused code at all"
612
+ points: 2
613
+
614
+ - id: ELEGANT_SOLUTION
615
+ description: "Solution is particularly elegant/minimal"
616
+ points: 3
617
+
618
+ - id: CORRECT_EDGE_CASES
619
+ description: "All edge cases handled exactly as tests expect"
620
+ points: 2
621
+
622
+ # =============================================================================
623
+ # ENHANCED METRICS
624
+ # =============================================================================
625
+
626
+ enhanced_metrics:
627
+ tests_pass_rate:
628
+ formula: "tests_passed / 26"
629
+ interpretation: "100% = perfect implementation"
630
+
631
+ over_engineering_score:
632
+ formula: "sum(anti_pattern_penalties)"
633
+ interpretation: "0 = perfect TDD discipline, higher = over-engineered"
634
+
635
+ tdd_discipline_ratio:
636
+ formula: "(100 - over_engineering_score) / 100"
637
+ interpretation: "1.0 = perfect discipline"
638
+
639
+ # =============================================================================
640
+ # PERSONA INFLUENCE
641
+ # =============================================================================
642
+
643
+ persona_influence:
644
+ dimensions:
645
+ - name: discipline
646
+ description: "How strictly TDD principles are followed"
647
+ spectrum:
648
+ strict: "Only implements what tests require"
649
+ moderate: "Adds minor conveniences"
650
+ loose: "Adds 'obvious' features tests don't require"
651
+
652
+ - name: implementation_style
653
+ description: "How the code is structured"
654
+ spectrum:
655
+ minimal: "Fewest lines possible"
656
+ clean: "Clean but not minimal"
657
+ elaborate: "Well-structured with helpers"
658
+
659
+ expected_tendencies:
660
+ discworld_dev:
661
+ character: "Ponder Stibbons"
662
+ expected_traits:
663
+ - "May over-engineer due to academic tendencies"
664
+ - "Good documentation instincts"
665
+ - "Might add error handling 'to be safe'"
666
+ discipline_prediction: "moderate - may add extra validation"
667
+
668
+ star_trek_dev:
669
+ character: "Geordi La Forge"
670
+ expected_traits:
671
+ - "Practical, gets it working"
672
+ - "May add diagnostic features"
673
+ - "Engineering mindset"
674
+ discipline_prediction: "moderate - may add logging"
675
+
676
+ control_dev:
677
+ character: "None (baseline)"
678
+ expected_traits:
679
+ - "No persona influence"
680
+ - "Pure LLM behavior"
681
+ discipline_prediction: "baseline reference"