@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,139 @@
1
+ ---
2
+ # Scenario: Missing Null Checks
3
+ # Category: debugging
4
+ # Difficulty: easy
5
+ # Error Type Focus: execution (single-type)
6
+
7
+ id: debug-002
8
+ name: null-check-missing
9
+ title: "Null Pointer: The Forgotten Guard Clauses"
10
+ category: debugging
11
+ difficulty: easy
12
+ version: "1.0"
13
+
14
+ description: |
15
+ A user profile service that crashes on null/undefined values.
16
+ Tests detection of missing defensive programming.
17
+
18
+ purpose: |
19
+ This scenario measures detection of execution-level bugs related to
20
+ null safety. Defensive agents will catch all null paths. Optimistic
21
+ agents may assume inputs are always valid.
22
+
23
+ prompt: |
24
+ BUG REPORT
25
+
26
+ Service: user-profile
27
+ Severity: P2
28
+ Status: TypeError crashes in production
29
+
30
+ The profile service is throwing null reference errors:
31
+ "Cannot read property 'X' of undefined"
32
+
33
+ Your task:
34
+ 1. Find all places where null/undefined values cause crashes
35
+ 2. Add appropriate guard clauses
36
+ 3. Ensure the function handles edge cases gracefully
37
+
38
+ There are 6 known issues. How many can you find?
39
+
40
+ code:
41
+ language: typescript
42
+ filename: profile-service.ts
43
+ content: |
44
+ interface User {
45
+ id: string;
46
+ name: string;
47
+ email: string;
48
+ address?: {
49
+ street: string;
50
+ city: string;
51
+ country: string;
52
+ };
53
+ preferences?: {
54
+ theme: string;
55
+ notifications: boolean;
56
+ };
57
+ }
58
+
59
+ function formatUserAddress(user: User): string {
60
+ // Bug: address might be undefined
61
+ const street = user.address.street;
62
+ const city = user.address.city;
63
+ const country = user.address.country;
64
+
65
+ return `${street}, ${city}, ${country}`;
66
+ }
67
+
68
+ function getUserDisplayName(user: User | null): string {
69
+ // Bug: user might be null
70
+ return user.name || user.email;
71
+ }
72
+
73
+ function getNotificationSettings(user: User): boolean {
74
+ // Bug: preferences might be undefined
75
+ return user.preferences.notifications;
76
+ }
77
+
78
+ function mergeUserData(base: User, updates: Partial<User>): User {
79
+ // Bug: updates might have undefined nested objects
80
+ const merged = {
81
+ ...base,
82
+ ...updates,
83
+ address: {
84
+ ...base.address,
85
+ ...updates.address, // Crashes if base.address is undefined
86
+ },
87
+ };
88
+ return merged;
89
+ }
90
+
91
+ function getThemeColor(user: User): string {
92
+ // Bug: preferences and theme might be undefined
93
+ const theme = user.preferences.theme;
94
+ const colors = {
95
+ dark: '#000000',
96
+ light: '#ffffff',
97
+ };
98
+ return colors[theme]; // Bug: theme might not be a valid key
99
+ }
100
+
101
+ baseline_issues:
102
+ high:
103
+ - id: null-001
104
+ location: "lines 18-20"
105
+ description: "Accessing address properties without checking if address exists"
106
+ error_type: execution
107
+ - id: null-002
108
+ location: "line 26"
109
+ description: "Accessing user.name without checking if user is null"
110
+ error_type: execution
111
+ medium:
112
+ - id: null-003
113
+ location: "line 31"
114
+ description: "Accessing preferences.notifications without null check"
115
+ error_type: execution
116
+ - id: null-004
117
+ location: "line 39"
118
+ description: "Spreading undefined base.address causes crash"
119
+ error_type: execution
120
+ low:
121
+ - id: null-005
122
+ location: "line 46"
123
+ description: "Accessing preferences.theme without null check"
124
+ error_type: execution
125
+ - id: null-006
126
+ location: "line 51"
127
+ description: "Theme key might not exist in colors object"
128
+ error_type: execution
129
+
130
+ scoring:
131
+ detection:
132
+ weight: 50
133
+ criteria: "Finding all 6 null-related bugs"
134
+ fix_quality:
135
+ weight: 30
136
+ criteria: "Implementing proper guard clauses"
137
+ explanation:
138
+ weight: 20
139
+ criteria: "Explaining the crash scenarios"
@@ -0,0 +1,132 @@
1
+ ---
2
+ # Scenario: Off-by-One Loop Error
3
+ # Category: debugging
4
+ # Difficulty: easy
5
+ # Error Type Focus: execution (single-type)
6
+
7
+ id: debug-001
8
+ name: off-by-one-loop
9
+ title: "Off-by-One: The Classic Loop Boundary Bug"
10
+ category: debugging
11
+ difficulty: easy
12
+ version: "1.0"
13
+
14
+ description: |
15
+ A simple array processing function with classic off-by-one errors.
16
+ Tests the agent's ability to identify boundary condition bugs.
17
+
18
+ purpose: |
19
+ This scenario measures detection of execution-level bugs - implementation
20
+ errors that occur at boundaries. A systematic agent will check all loop
21
+ bounds. A quick agent might miss subtle boundary issues.
22
+
23
+ prompt: |
24
+ BUG REPORT
25
+
26
+ Service: data-processor
27
+ Severity: P2
28
+ Status: Array index errors in production logs
29
+
30
+ Users are reporting occasional crashes when processing arrays.
31
+ The logs show "index out of bounds" errors but only intermittently.
32
+
33
+ Your task:
34
+ 1. Find all boundary condition bugs in this code
35
+ 2. Explain why each causes problems
36
+ 3. Provide the corrected code
37
+
38
+ There are 5 known issues. How many can you find?
39
+
40
+ code:
41
+ language: python
42
+ filename: array_utils.py
43
+ content: |
44
+ def find_max_subarray(arr):
45
+ """Find the contiguous subarray with largest sum."""
46
+ if len(arr) == 0:
47
+ return 0
48
+
49
+ max_sum = arr[0]
50
+ current_sum = arr[0]
51
+
52
+ # Bug: should start at index 1, not 0
53
+ for i in range(0, len(arr)):
54
+ current_sum = max(arr[i], current_sum + arr[i])
55
+ max_sum = max(max_sum, current_sum)
56
+
57
+ return max_sum
58
+
59
+ def rotate_array(arr, k):
60
+ """Rotate array right by k positions."""
61
+ n = len(arr)
62
+ if n == 0:
63
+ return arr
64
+
65
+ # Bug: doesn't handle k > n
66
+ result = [0] * n
67
+ for i in range(n):
68
+ # Bug: off-by-one in new position calculation
69
+ new_pos = (i + k) % (n + 1)
70
+ result[new_pos] = arr[i]
71
+
72
+ return result
73
+
74
+ def binary_search(arr, target):
75
+ """Find target in sorted array, return index or -1."""
76
+ left = 0
77
+ right = len(arr) # Bug: should be len(arr) - 1
78
+
79
+ while left < right: # Bug: should be left <= right
80
+ mid = (left + right) // 2
81
+ if arr[mid] == target:
82
+ return mid
83
+ elif arr[mid] < target:
84
+ left = mid + 1
85
+ else:
86
+ right = mid - 1
87
+
88
+ return -1
89
+
90
+ def copy_range(src, start, end):
91
+ """Copy elements from start to end (inclusive)."""
92
+ result = []
93
+ # Bug: range excludes end, but we want inclusive
94
+ for i in range(start, end):
95
+ result.append(src[i])
96
+ return result
97
+
98
+ baseline_issues:
99
+ high:
100
+ - id: obo-001
101
+ location: "line 11"
102
+ description: "Loop starts at 0, duplicating first element processing"
103
+ error_type: execution
104
+ - id: obo-002
105
+ location: "line 27"
106
+ description: "Modulo uses n+1 instead of n, causing index out of bounds"
107
+ error_type: execution
108
+ medium:
109
+ - id: obo-003
110
+ location: "line 33"
111
+ description: "Right bound should be len(arr)-1 for valid indexing"
112
+ error_type: execution
113
+ - id: obo-004
114
+ location: "line 35"
115
+ description: "While condition should be <= for inclusive search"
116
+ error_type: execution
117
+ low:
118
+ - id: obo-005
119
+ location: "line 47"
120
+ description: "Range excludes end but docstring says inclusive"
121
+ error_type: execution
122
+
123
+ scoring:
124
+ detection:
125
+ weight: 50
126
+ criteria: "Finding all 5 off-by-one bugs"
127
+ fix_quality:
128
+ weight: 30
129
+ criteria: "Providing correct fixes"
130
+ explanation:
131
+ weight: 20
132
+ criteria: "Explaining why each bug causes problems"
@@ -0,0 +1,180 @@
1
+ ---
2
+ # Scenario: Race Conditions
3
+ # Category: debugging
4
+ # Difficulty: hard
5
+ # Error Type Focus: planning (with reasoning elements)
6
+
7
+ id: debug-008
8
+ name: race-condition
9
+ title: "Race Conditions: The Timing Time Bomb"
10
+ category: debugging
11
+ difficulty: hard
12
+ version: "1.0"
13
+
14
+ description: |
15
+ A caching service with multiple race conditions and timing issues.
16
+ Tests detection of concurrent access problems and TOCTOU vulnerabilities.
17
+
18
+ purpose: |
19
+ This scenario tests detection of planning-level bugs in concurrent code.
20
+ The issues only manifest under specific timing conditions. Agents must
21
+ reason about interleaved execution to find these bugs.
22
+
23
+ prompt: |
24
+ INCIDENT REPORT
25
+
26
+ Service: cache-service
27
+ Severity: P0
28
+ Status: Data corruption under high load
29
+
30
+ Under heavy concurrent access, the cache exhibits:
31
+ - Stale data served despite updates
32
+ - Duplicate entries created
33
+ - Inconsistent state between checks and actions
34
+
35
+ Your task:
36
+ 1. Identify race conditions and timing vulnerabilities
37
+ 2. Explain the interleaved execution that causes each bug
38
+ 3. Implement thread-safe alternatives
39
+
40
+ There are 6 known issues. How many can you find?
41
+
42
+ code:
43
+ language: go
44
+ filename: cache_service.go
45
+ content: |
46
+ package cache
47
+
48
+ import (
49
+ "sync"
50
+ "time"
51
+ )
52
+
53
+ type CacheEntry struct {
54
+ Value interface{}
55
+ ExpiresAt time.Time
56
+ }
57
+
58
+ type CacheService struct {
59
+ data map[string]CacheEntry
60
+ mu sync.Mutex
61
+ hits int
62
+ misses int
63
+ }
64
+
65
+ func NewCacheService() *CacheService {
66
+ return &CacheService{
67
+ data: make(map[string]CacheEntry),
68
+ }
69
+ }
70
+
71
+ func (c *CacheService) Get(key string) (interface{}, bool) {
72
+ c.mu.Lock()
73
+ entry, exists := c.data[key]
74
+ c.mu.Unlock()
75
+
76
+ // Bug: TOCTOU - entry might be modified after unlock
77
+ if !exists {
78
+ c.misses++ // Bug: Not protected by mutex
79
+ return nil, false
80
+ }
81
+
82
+ // Bug: Checking expiry after releasing lock
83
+ if time.Now().After(entry.ExpiresAt) {
84
+ c.Delete(key) // Another goroutine might have updated it
85
+ c.misses++
86
+ return nil, false
87
+ }
88
+
89
+ c.hits++ // Bug: Not protected by mutex
90
+ return entry.Value, true
91
+ }
92
+
93
+ func (c *CacheService) Set(key string, value interface{}, ttl time.Duration) {
94
+ c.mu.Lock()
95
+ c.data[key] = CacheEntry{
96
+ Value: value,
97
+ ExpiresAt: time.Now().Add(ttl),
98
+ }
99
+ c.mu.Unlock()
100
+ }
101
+
102
+ func (c *CacheService) Delete(key string) {
103
+ c.mu.Lock()
104
+ delete(c.data, key)
105
+ c.mu.Unlock()
106
+ }
107
+
108
+ func (c *CacheService) GetOrSet(key string, generator func() interface{}, ttl time.Duration) interface{} {
109
+ // Bug: Check-then-act without holding lock
110
+ if value, exists := c.Get(key); exists {
111
+ return value
112
+ }
113
+
114
+ // Another goroutine might set the same key here
115
+ value := generator()
116
+ c.Set(key, value, ttl)
117
+ return value
118
+ }
119
+
120
+ func (c *CacheService) Increment(key string) int {
121
+ c.mu.Lock()
122
+ entry, exists := c.data[key]
123
+ c.mu.Unlock()
124
+
125
+ // Bug: Read-modify-write without holding lock
126
+ if !exists {
127
+ c.Set(key, 1, time.Hour)
128
+ return 1
129
+ }
130
+
131
+ newValue := entry.Value.(int) + 1
132
+ c.Set(key, newValue, time.Hour)
133
+ return newValue
134
+ }
135
+
136
+ func (c *CacheService) Stats() (int, int) {
137
+ // Bug: Reading hits and misses without lock - torn reads possible
138
+ return c.hits, c.misses
139
+ }
140
+
141
+ baseline_issues:
142
+ critical:
143
+ - id: race-001
144
+ location: "lines 63-70"
145
+ description: "GetOrSet has TOCTOU - duplicate generation if concurrent calls"
146
+ error_type: planning
147
+ - id: race-002
148
+ location: "lines 73-84"
149
+ description: "Increment uses read-modify-write outside lock - lost updates"
150
+ error_type: planning
151
+ high:
152
+ - id: race-003
153
+ location: "lines 28-42"
154
+ description: "Expiry check after unlock - entry might be modified"
155
+ error_type: planning
156
+ - id: race-004
157
+ location: "lines 32, 40, 43"
158
+ description: "hits/misses counters modified without lock - data race"
159
+ error_type: planning
160
+ medium:
161
+ - id: race-005
162
+ location: "line 88"
163
+ description: "Stats returns hits/misses without lock - inconsistent read"
164
+ error_type: planning
165
+ low:
166
+ - id: race-006
167
+ location: "line 37"
168
+ description: "Delete after expiry check - key might have been refreshed"
169
+ error_type: reasoning
170
+
171
+ scoring:
172
+ detection:
173
+ weight: 40
174
+ criteria: "Finding all 6 race conditions"
175
+ fix_quality:
176
+ weight: 35
177
+ criteria: "Implementing thread-safe code"
178
+ explanation:
179
+ weight: 25
180
+ criteria: "Describing the interleaved execution scenarios"
@@ -0,0 +1,166 @@
1
+ ---
2
+ # Scenario: Resource Leaks
3
+ # Category: debugging
4
+ # Difficulty: medium
5
+ # Error Type Focus: planning (single-type)
6
+
7
+ id: debug-005
8
+ name: resource-leak
9
+ title: "Resource Leaks: The Silent Memory Killer"
10
+ category: debugging
11
+ difficulty: medium
12
+ version: "1.0"
13
+
14
+ description: |
15
+ A file processing service that leaks resources.
16
+ Tests detection of resource lifecycle management issues.
17
+
18
+ purpose: |
19
+ This scenario measures detection of planning-level bugs related to
20
+ resource lifecycle. Systematic agents will check all resource acquisitions
21
+ for corresponding releases. Quick agents may miss cleanup paths.
22
+
23
+ prompt: |
24
+ BUG REPORT
25
+
26
+ Service: file-processor
27
+ Severity: P1
28
+ Status: Memory usage grows until OOM crash
29
+
30
+ The file processor's memory usage grows continuously:
31
+ - Starts at 100MB, reaches 2GB after a few hours
32
+ - Eventually crashes with out-of-memory error
33
+ - File handles seem to accumulate
34
+
35
+ Your task:
36
+ 1. Find all resource leaks
37
+ 2. Identify which resources aren't being cleaned up
38
+ 3. Implement proper cleanup
39
+
40
+ There are 5 known issues. How many can you find?
41
+
42
+ code:
43
+ language: go
44
+ filename: file_processor.go
45
+ content: |
46
+ package processor
47
+
48
+ import (
49
+ "bufio"
50
+ "database/sql"
51
+ "io"
52
+ "net/http"
53
+ "os"
54
+ )
55
+
56
+ type FileProcessor struct {
57
+ db *sql.DB
58
+ }
59
+
60
+ func (p *FileProcessor) ProcessFile(path string) error {
61
+ // Bug: File never closed
62
+ file, err := os.Open(path)
63
+ if err != nil {
64
+ return err
65
+ }
66
+
67
+ scanner := bufio.NewScanner(file)
68
+ for scanner.Scan() {
69
+ line := scanner.Text()
70
+ if err := p.saveLine(line); err != nil {
71
+ // Bug: File not closed on error path
72
+ return err
73
+ }
74
+ }
75
+
76
+ return nil
77
+ }
78
+
79
+ func (p *FileProcessor) saveLine(line string) error {
80
+ // Bug: Prepared statement never closed
81
+ stmt, err := p.db.Prepare("INSERT INTO lines (content) VALUES (?)")
82
+ if err != nil {
83
+ return err
84
+ }
85
+
86
+ _, err = stmt.Exec(line)
87
+ return err
88
+ }
89
+
90
+ func (p *FileProcessor) FetchAndProcess(url string) error {
91
+ resp, err := http.Get(url)
92
+ if err != nil {
93
+ return err
94
+ }
95
+ // Bug: Response body never closed
96
+
97
+ data, err := io.ReadAll(resp.Body)
98
+ if err != nil {
99
+ return err
100
+ }
101
+
102
+ return p.processData(data)
103
+ }
104
+
105
+ func (p *FileProcessor) processData(data []byte) error {
106
+ return nil
107
+ }
108
+
109
+ func (p *FileProcessor) BatchProcess(paths []string) error {
110
+ for _, path := range paths {
111
+ file, err := os.Open(path)
112
+ if err != nil {
113
+ continue // Bug: Silently skipping errors, previously opened files not tracked
114
+ }
115
+
116
+ // Bug: Files opened in loop never closed
117
+ if err := p.processFileHandle(file); err != nil {
118
+ return err
119
+ }
120
+ }
121
+ return nil
122
+ }
123
+
124
+ func (p *FileProcessor) processFileHandle(f *os.File) error {
125
+ scanner := bufio.NewScanner(f)
126
+ for scanner.Scan() {
127
+ // Process line
128
+ }
129
+ return nil
130
+ }
131
+
132
+ baseline_issues:
133
+ critical:
134
+ - id: leak-001
135
+ location: "line 17"
136
+ description: "File opened but never closed - file handle leak"
137
+ error_type: planning
138
+ - id: leak-002
139
+ location: "line 35"
140
+ description: "Prepared statement created per line but never closed"
141
+ error_type: planning
142
+ high:
143
+ - id: leak-003
144
+ location: "line 45"
145
+ description: "HTTP response body never closed - connection leak"
146
+ error_type: planning
147
+ - id: leak-004
148
+ location: "lines 63-70"
149
+ description: "Files in loop never closed - accumulating file handles"
150
+ error_type: planning
151
+ medium:
152
+ - id: leak-005
153
+ location: "line 25"
154
+ description: "Early return on error doesn't close file"
155
+ error_type: planning
156
+
157
+ scoring:
158
+ detection:
159
+ weight: 45
160
+ criteria: "Finding all 5 resource leaks"
161
+ fix_quality:
162
+ weight: 35
163
+ criteria: "Implementing proper defer/cleanup patterns"
164
+ explanation:
165
+ weight: 20
166
+ criteria: "Explaining resource lifecycle requirements"