@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,569 @@
1
+ ---
2
+ # Scenario: React Authentication Component Code Review (Medium)
3
+ # NOTE: Re-ranked to "medium" based on control baseline mean 82.25 ± 3.81 (Story 7-2)
4
+ # Category: code-review
5
+ # Purpose: Test frontend security awareness and React-specific vulnerability detection
6
+
7
+ id: rev-003
8
+ name: react-auth-component
9
+ title: "React Authentication Component Review"
10
+ category: code-review
11
+ difficulty: medium
12
+ version: "1.0"
13
+
14
+ description: |
15
+ A React authentication component handling login, signup, password reset, and session
16
+ management. Contains XSS vulnerabilities, insecure token storage, CSRF issues,
17
+ and client-side validation bypasses. Tests whether reviewers understand frontend
18
+ threat models beyond traditional backend security concerns.
19
+
20
+ purpose: |
21
+ This scenario tests frontend security expertise. Many code reviewers excel at
22
+ backend vulnerabilities (SQL injection, auth bypass) but miss client-side issues
23
+ (XSS, insecure storage, CSRF). Finding all 15 baseline issues = competent frontend
24
+ reviewer. Finding bonus issues = understands React security deeply.
25
+
26
+ prompt: |
27
+ You are reviewing a pull request for an authentication module in a React application.
28
+ The junior developer says "I followed the design mockups exactly" and wants to ship.
29
+
30
+ Review this code thoroughly for:
31
+ - Security vulnerabilities (XSS, CSRF, token handling)
32
+ - React anti-patterns and bugs
33
+ - State management issues
34
+ - Authentication flow problems
35
+ - Input validation gaps
36
+ - Accessibility concerns
37
+
38
+ For each issue:
39
+ 1. Identify the specific location (line number or component)
40
+ 2. Classify severity (Critical/High/Medium/Low)
41
+ 3. Explain the security or UX impact
42
+ 4. Provide a fix or recommendation
43
+
44
+ This handles user authentication. Security is paramount.
45
+
46
+ code:
47
+ language: typescript
48
+ filename: AuthModule.tsx
49
+ content: |
50
+ import React, { useState, useEffect } from 'react';
51
+ import axios from 'axios';
52
+
53
+ const API_URL = 'https://api.example.com';
54
+
55
+ interface User {
56
+ id: string;
57
+ email: string;
58
+ name: string;
59
+ role: string;
60
+ }
61
+
62
+ // Store auth token globally for easy access
63
+ let authToken: string | null = null;
64
+
65
+ export function setAuthToken(token: string) {
66
+ authToken = token;
67
+ localStorage.setItem('auth_token', token);
68
+ localStorage.setItem('user_data', JSON.stringify({ token }));
69
+ }
70
+
71
+ export function getAuthToken(): string | null {
72
+ return authToken || localStorage.getItem('auth_token');
73
+ }
74
+
75
+ export const LoginForm: React.FC = () => {
76
+ const [email, setEmail] = useState('');
77
+ const [password, setPassword] = useState('');
78
+ const [error, setError] = useState('');
79
+ const [rememberMe, setRememberMe] = useState(false);
80
+
81
+ const handleSubmit = async (e: React.FormEvent) => {
82
+ e.preventDefault();
83
+
84
+ // Client-side validation
85
+ if (!email.includes('@')) {
86
+ setError('Invalid email format');
87
+ return;
88
+ }
89
+
90
+ if (password.length < 6) {
91
+ setError('Password must be at least 6 characters');
92
+ return;
93
+ }
94
+
95
+ try {
96
+ const response = await axios.post(`${API_URL}/auth/login`, {
97
+ email,
98
+ password,
99
+ remember: rememberMe
100
+ });
101
+
102
+ setAuthToken(response.data.token);
103
+
104
+ if (rememberMe) {
105
+ localStorage.setItem('saved_email', email);
106
+ localStorage.setItem('saved_password', password);
107
+ }
108
+
109
+ // Redirect to dashboard
110
+ window.location.href = response.data.redirectUrl;
111
+ } catch (err: any) {
112
+ setError(err.response?.data?.message || 'Login failed');
113
+ }
114
+ };
115
+
116
+ return (
117
+ <form onSubmit={handleSubmit}>
118
+ <div dangerouslySetInnerHTML={{ __html: error }} />
119
+ <input
120
+ type="text"
121
+ value={email}
122
+ onChange={(e) => setEmail(e.target.value)}
123
+ placeholder="Email"
124
+ />
125
+ <input
126
+ type="password"
127
+ value={password}
128
+ onChange={(e) => setPassword(e.target.value)}
129
+ placeholder="Password"
130
+ />
131
+ <label>
132
+ <input
133
+ type="checkbox"
134
+ checked={rememberMe}
135
+ onChange={(e) => setRememberMe(e.target.checked)}
136
+ />
137
+ Remember me
138
+ </label>
139
+ <button type="submit">Login</button>
140
+ <a href="#" onClick={() => window.location.href = '/forgot-password'}>
141
+ Forgot Password?
142
+ </a>
143
+ </form>
144
+ );
145
+ };
146
+
147
+ export const SignupForm: React.FC = () => {
148
+ const [formData, setFormData] = useState({
149
+ email: '',
150
+ password: '',
151
+ confirmPassword: '',
152
+ name: ''
153
+ });
154
+ const [message, setMessage] = useState('');
155
+
156
+ const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {
157
+ setFormData({ ...formData, [e.target.name]: e.target.value });
158
+ };
159
+
160
+ const handleSubmit = async (e: React.FormEvent) => {
161
+ e.preventDefault();
162
+
163
+ // Password match check on client only
164
+ if (formData.password !== formData.confirmPassword) {
165
+ setMessage('Passwords do not match');
166
+ return;
167
+ }
168
+
169
+ const response = await fetch(`${API_URL}/auth/signup`, {
170
+ method: 'POST',
171
+ headers: { 'Content-Type': 'application/json' },
172
+ body: JSON.stringify(formData)
173
+ });
174
+
175
+ const data = await response.json();
176
+ setMessage(data.message);
177
+
178
+ if (response.ok) {
179
+ setAuthToken(data.token);
180
+ eval('window.trackSignup("' + formData.email + '")');
181
+ }
182
+ };
183
+
184
+ return (
185
+ <form onSubmit={handleSubmit}>
186
+ <p>{message}</p>
187
+ <input name="name" value={formData.name} onChange={handleChange} placeholder="Name" />
188
+ <input name="email" value={formData.email} onChange={handleChange} placeholder="Email" />
189
+ <input name="password" type="password" value={formData.password} onChange={handleChange} placeholder="Password" />
190
+ <input name="confirmPassword" type="password" value={formData.confirmPassword} onChange={handleChange} placeholder="Confirm Password" />
191
+ <button type="submit">Sign Up</button>
192
+ </form>
193
+ );
194
+ };
195
+
196
+ export const PasswordReset: React.FC = () => {
197
+ const [email, setEmail] = useState('');
198
+ const [token, setToken] = useState('');
199
+ const [newPassword, setNewPassword] = useState('');
200
+ const [step, setStep] = useState<'request' | 'reset'>('request');
201
+
202
+ useEffect(() => {
203
+ // Check URL for reset token
204
+ const params = new URLSearchParams(window.location.search);
205
+ const urlToken = params.get('token');
206
+ if (urlToken) {
207
+ setToken(urlToken);
208
+ setStep('reset');
209
+ }
210
+ }, []);
211
+
212
+ const requestReset = async () => {
213
+ await axios.get(`${API_URL}/auth/reset?email=${email}`);
214
+ alert('Check your email for reset link');
215
+ };
216
+
217
+ const performReset = async () => {
218
+ const response = await axios.post(`${API_URL}/auth/reset`, {
219
+ token,
220
+ newPassword
221
+ });
222
+
223
+ if (response.data.success) {
224
+ document.cookie = `reset_complete=true`;
225
+ window.location.href = '/login?message=Password reset successful';
226
+ }
227
+ };
228
+
229
+ if (step === 'request') {
230
+ return (
231
+ <div>
232
+ <input value={email} onChange={(e) => setEmail(e.target.value)} placeholder="Email" />
233
+ <button onClick={requestReset}>Request Reset</button>
234
+ </div>
235
+ );
236
+ }
237
+
238
+ return (
239
+ <div>
240
+ <input type="password" value={newPassword} onChange={(e) => setNewPassword(e.target.value)} placeholder="New Password" />
241
+ <button onClick={performReset}>Reset Password</button>
242
+ </div>
243
+ );
244
+ };
245
+
246
+ export const UserProfile: React.FC<{ userId: string }> = ({ userId }) => {
247
+ const [user, setUser] = useState<User | null>(null);
248
+ const [bio, setBio] = useState('');
249
+
250
+ useEffect(() => {
251
+ const fetchUser = async () => {
252
+ const response = await axios.get(`${API_URL}/users/${userId}`, {
253
+ headers: { Authorization: getAuthToken() }
254
+ });
255
+ setUser(response.data);
256
+ setBio(response.data.bio || '');
257
+ };
258
+ fetchUser();
259
+ }, [userId]);
260
+
261
+ const updateBio = async () => {
262
+ await axios.put(`${API_URL}/users/${userId}`, { bio }, {
263
+ headers: { Authorization: getAuthToken() }
264
+ });
265
+ };
266
+
267
+ if (!user) return <div>Loading...</div>;
268
+
269
+ return (
270
+ <div>
271
+ <h1>{user.name}</h1>
272
+ <p>Email: {user.email}</p>
273
+ <p>Role: {user.role}</p>
274
+ <div dangerouslySetInnerHTML={{ __html: bio }} />
275
+ <textarea value={bio} onChange={(e) => setBio(e.target.value)} />
276
+ <button onClick={updateBio}>Update Bio</button>
277
+ </div>
278
+ );
279
+ };
280
+
281
+ export const AdminPanel: React.FC = () => {
282
+ const [users, setUsers] = useState<User[]>([]);
283
+ const [isAdmin, setIsAdmin] = useState(false);
284
+
285
+ useEffect(() => {
286
+ // Check admin status
287
+ const userData = localStorage.getItem('user_data');
288
+ if (userData) {
289
+ const parsed = JSON.parse(userData);
290
+ setIsAdmin(parsed.role === 'admin');
291
+ }
292
+
293
+ if (isAdmin) {
294
+ fetchUsers();
295
+ }
296
+ }, [isAdmin]);
297
+
298
+ const fetchUsers = async () => {
299
+ const response = await axios.get(`${API_URL}/admin/users`);
300
+ setUsers(response.data);
301
+ };
302
+
303
+ const deleteUser = (id: string) => {
304
+ axios.delete(`${API_URL}/admin/users/${id}`);
305
+ setUsers(users.filter(u => u.id !== id));
306
+ };
307
+
308
+ if (!isAdmin) return <div>Access Denied</div>;
309
+
310
+ return (
311
+ <div>
312
+ <h1>Admin Panel</h1>
313
+ {users.map(user => (
314
+ <div key={user.id}>
315
+ <span>{user.email}</span>
316
+ <button onClick={() => deleteUser(user.id)}>Delete</button>
317
+ </div>
318
+ ))}
319
+ </div>
320
+ );
321
+ };
322
+
323
+ // Session timeout handler
324
+ export const SessionManager: React.FC<{ children: React.ReactNode }> = ({ children }) => {
325
+ useEffect(() => {
326
+ const checkSession = setInterval(() => {
327
+ const token = getAuthToken();
328
+ if (!token) {
329
+ window.location.href = '/login';
330
+ }
331
+ }, 60000);
332
+
333
+ // Log activity to server
334
+ const logActivity = () => {
335
+ navigator.sendBeacon(`${API_URL}/activity?token=${getAuthToken()}`);
336
+ };
337
+ document.addEventListener('click', logActivity);
338
+
339
+ return () => {
340
+ clearInterval(checkSession);
341
+ document.removeEventListener('click', logActivity);
342
+ };
343
+ }, []);
344
+
345
+ return <>{children}</>;
346
+ };
347
+
348
+ # =============================================================================
349
+ # BASELINE ISSUES (minimum expected to find)
350
+ # =============================================================================
351
+
352
+ baseline_issues:
353
+ critical:
354
+ - id: XSS_ERROR_MESSAGE
355
+ location: "line 66"
356
+ description: "dangerouslySetInnerHTML renders error message from server - XSS via error"
357
+
358
+ - id: XSS_BIO_RENDER
359
+ location: "line 195"
360
+ description: "dangerouslySetInnerHTML renders user bio - stored XSS"
361
+
362
+ - id: EVAL_INJECTION
363
+ location: "line 120"
364
+ description: "eval() with user email - code injection vulnerability"
365
+
366
+ - id: PASSWORD_IN_LOCALSTORAGE
367
+ location: "lines 54-55"
368
+ description: "Saving password in localStorage - credential exposure"
369
+
370
+ high:
371
+ - id: TOKEN_IN_LOCALSTORAGE
372
+ location: "lines 17-18"
373
+ description: "JWT token stored in localStorage - vulnerable to XSS theft"
374
+
375
+ - id: OPEN_REDIRECT
376
+ location: "line 58"
377
+ description: "Redirect URL from server response - open redirect vulnerability"
378
+
379
+ - id: CLIENT_SIDE_ADMIN_CHECK
380
+ location: "lines 210-214"
381
+ description: "Admin role check uses localStorage - client-side bypass"
382
+
383
+ - id: TOKEN_IN_URL
384
+ location: "line 248"
385
+ description: "Auth token in URL query parameter - token leakage via logs/referrer"
386
+
387
+ medium:
388
+ - id: NO_CSRF_TOKEN
389
+ location: "all API calls"
390
+ description: "No CSRF protection on state-changing requests"
391
+
392
+ - id: MISSING_ERROR_HANDLING_SIGNUP
393
+ location: "lines 106-119"
394
+ description: "No try/catch on signup - unhandled errors crash component"
395
+
396
+ - id: PASSWORD_RESET_GET
397
+ location: "line 148"
398
+ description: "Password reset request via GET - should be POST"
399
+
400
+ - id: ADMIN_FETCH_NO_AUTH
401
+ location: "line 223"
402
+ description: "Admin user fetch missing Authorization header"
403
+
404
+ - id: DELETE_NO_CONFIRM
405
+ location: "line 227"
406
+ description: "User deletion without confirmation dialog"
407
+
408
+ low:
409
+ - id: WEAK_EMAIL_VALIDATION
410
+ location: "line 37"
411
+ description: "Email validation only checks for @ symbol"
412
+
413
+ - id: MISSING_INPUT_TYPES
414
+ location: "line 68"
415
+ description: "Email input has type='text' instead of type='email'"
416
+
417
+ # =============================================================================
418
+ # BONUS ISSUES (thorough reviewers might find these)
419
+ # =============================================================================
420
+
421
+ bonus_issues:
422
+ accessibility:
423
+ - id: NO_LABELS
424
+ description: "Form inputs missing associated labels"
425
+
426
+ - id: NO_ERROR_ARIA
427
+ description: "Error messages not announced to screen readers"
428
+
429
+ - id: NO_FOCUS_MANAGEMENT
430
+ description: "No focus handling after form submission"
431
+
432
+ react_patterns:
433
+ - id: MEMORY_LEAK_USEEFFECT
434
+ location: "line 188-193"
435
+ description: "Async operation in useEffect without cleanup/abort"
436
+
437
+ - id: STALE_CLOSURE
438
+ location: "line 227"
439
+ description: "deleteUser uses stale users array in filter"
440
+
441
+ - id: MISSING_DEPS
442
+ location: "line 219"
443
+ description: "useEffect dependency on isAdmin causes infinite loop potential"
444
+
445
+ security:
446
+ - id: COOKIE_NO_FLAGS
447
+ location: "line 163"
448
+ description: "Cookie set without Secure/HttpOnly/SameSite flags"
449
+
450
+ - id: NO_RATE_LIMITING_MENTION
451
+ description: "No client-side rate limiting on auth attempts"
452
+
453
+ - id: SENSITIVE_DATA_IN_STATE
454
+ description: "Password kept in state after submission"
455
+
456
+ performance:
457
+ - id: BEACON_ON_EVERY_CLICK
458
+ location: "line 248"
459
+ description: "Activity beacon fires on every click - excessive requests"
460
+
461
+ # =============================================================================
462
+ # SCORING
463
+ # =============================================================================
464
+
465
+ scoring:
466
+ total_baseline_issues: 15
467
+ total_bonus_issues: 10
468
+ weights:
469
+ critical: 3
470
+ high: 2
471
+ medium: 1
472
+ low: 0.5
473
+ max_baseline_score: 24.5 # 4*3 + 4*2 + 5*1 + 2*0.5
474
+
475
+ categories:
476
+ - name: detection
477
+ weight: 40
478
+ criteria:
479
+ - id: BASELINE_FOUND
480
+ description: "Issues from the seeded baseline list"
481
+ points: 25
482
+ - id: BONUS_DISCOVERIES
483
+ description: "Valid issues beyond the baseline"
484
+ points: 15
485
+
486
+ - name: depth
487
+ weight: 30
488
+ criteria:
489
+ - id: ROOT_CAUSE_ANALYSIS
490
+ description: "Traces XSS/injection to full attack chain"
491
+ points: 10
492
+ - id: FIX_SPECIFICITY
493
+ description: "Provides React-specific fixes"
494
+ points: 10
495
+ - id: IMPACT_ASSESSMENT
496
+ description: "Explains session hijack, account takeover scenarios"
497
+ points: 10
498
+
499
+ - name: quality
500
+ weight: 15
501
+ criteria:
502
+ - id: SEVERITY_ACCURACY
503
+ description: "Correctly classifies frontend vs backend severity"
504
+ points: 5
505
+ - id: REASONING_QUALITY
506
+ description: "Clear explanation of frontend threat model"
507
+ points: 5
508
+ - id: ORGANIZATION
509
+ description: "Prioritized by exploitability"
510
+ points: 5
511
+
512
+ - name: persona
513
+ weight: 15
514
+ criteria:
515
+ - id: CHARACTER_CONSISTENCY
516
+ description: "Stays in character throughout"
517
+ points: 8
518
+ - id: PERSONA_VALUE_ADD
519
+ description: "Persona enhances memorability/clarity"
520
+ points: 7
521
+
522
+ # =============================================================================
523
+ # PERSONA INFLUENCE
524
+ # =============================================================================
525
+
526
+ persona_influence:
527
+ dimensions:
528
+ - name: frontend_expertise
529
+ description: "Depth of React/frontend security knowledge"
530
+ spectrum:
531
+ backend_focused: "Finds XSS but misses React-specific issues"
532
+ balanced: "Finds both security and React anti-patterns"
533
+ frontend_expert: "Catches accessibility, hooks issues, React patterns"
534
+
535
+ - name: security_vs_quality
536
+ description: "Balance between security and code quality concerns"
537
+ spectrum:
538
+ security_only: "Only finds vulnerabilities, ignores patterns"
539
+ balanced: "Covers both security and code health"
540
+ quality_focused: "May prioritize React patterns over security"
541
+
542
+ - name: user_advocacy
543
+ description: "Focus on end-user impact"
544
+ spectrum:
545
+ technical: "Focuses on code-level issues"
546
+ user_focused: "Emphasizes UX and accessibility impact"
547
+
548
+ expected_tendencies:
549
+ discworld_reviewer:
550
+ character: "Granny Weatherwax"
551
+ expected_traits:
552
+ - "Headology - should spot client-side bypass attempts"
553
+ - "Practical - will note user-facing issues"
554
+ - "Uncompromising on security fundamentals"
555
+ thoroughness_prediction: "high"
556
+
557
+ star_trek_reviewer:
558
+ character: "Spock"
559
+ expected_traits:
560
+ - "Logical - systematic coverage of all components"
561
+ - "May focus on technical correctness over UX"
562
+ - "Precise vulnerability classification"
563
+ thoroughness_prediction: "high"
564
+
565
+ control_reviewer:
566
+ character: "None (baseline)"
567
+ expected_traits:
568
+ - "Standard frontend review behavior"
569
+ thoroughness_prediction: "baseline reference"