@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,714 @@
1
+ ---
2
+ # Scenario: GraphQL API Code Review (Medium)
3
+ # NOTE: Re-ranked to "medium" based on control baseline mean 79.5 ± 1.0 (Story 7-2)
4
+ # Category: code-review
5
+ # Purpose: Test GraphQL-specific security knowledge and API vulnerability detection
6
+
7
+ id: rev-005
8
+ name: graphql-api-review
9
+ title: "GraphQL API Security Review"
10
+ category: code-review
11
+ difficulty: medium
12
+ version: "1.0"
13
+
14
+ description: |
15
+ GraphQL resolvers for a user/order management API with nested queries, mutations,
16
+ and subscriptions. Contains N+1 query problems, unbounded query depth, no rate
17
+ limiting, authorization bypass through nested queries, and information disclosure.
18
+ Tests deep GraphQL security knowledge beyond traditional REST API concerns.
19
+
20
+ purpose: |
21
+ This scenario tests GraphQL-specific security expertise. GraphQL has unique
22
+ vulnerabilities (query complexity attacks, nested auth bypass, introspection abuse)
23
+ that differ from REST. Finding all 20 baseline issues = expert GraphQL reviewer.
24
+ Finding bonus issues = understands GraphQL attack surface deeply.
25
+
26
+ prompt: |
27
+ You are reviewing a pull request for a GraphQL API implementation.
28
+ The developer says "the schema is intuitive and the resolvers are clean."
29
+
30
+ Review this code thoroughly for:
31
+ - GraphQL-specific vulnerabilities (query complexity, depth limits)
32
+ - Authorization bypass via nested queries
33
+ - N+1 query problems and performance issues
34
+ - Information disclosure through introspection or errors
35
+ - Input validation gaps
36
+ - Subscription security
37
+
38
+ For each issue:
39
+ 1. Identify the specific resolver or schema location
40
+ 2. Classify severity (Critical/High/Medium/Low)
41
+ 3. Explain the attack vector with example query
42
+ 4. Provide a fix or recommendation
43
+
44
+ This API handles sensitive user and financial data. Security is paramount.
45
+
46
+ code:
47
+ language: typescript
48
+ filename: graphql-api.ts
49
+ content: |
50
+ import { ApolloServer } from '@apollo/server';
51
+ import { makeExecutableSchema } from '@graphql-tools/schema';
52
+ import { PubSub } from 'graphql-subscriptions';
53
+ import { pool } from './database';
54
+
55
+ const pubsub = new PubSub();
56
+
57
+ const typeDefs = `
58
+ type Query {
59
+ user(id: ID!): User
60
+ users(limit: Int, offset: Int): [User!]!
61
+ order(id: ID!): Order
62
+ orders(userId: ID, status: String): [Order!]!
63
+ searchUsers(query: String!): [User!]!
64
+ adminStats: AdminStats
65
+ }
66
+
67
+ type Mutation {
68
+ createUser(input: CreateUserInput!): User!
69
+ updateUser(id: ID!, input: UpdateUserInput!): User!
70
+ deleteUser(id: ID!): Boolean!
71
+ createOrder(input: CreateOrderInput!): Order!
72
+ updateOrderStatus(id: ID!, status: String!): Order!
73
+ processRefund(orderId: ID!, amount: Float!): RefundResult!
74
+ resetPassword(email: String!): Boolean!
75
+ updatePassword(token: String!, newPassword: String!): Boolean!
76
+ }
77
+
78
+ type Subscription {
79
+ orderStatusChanged(userId: ID): Order!
80
+ newOrder: Order!
81
+ userActivity: ActivityEvent!
82
+ }
83
+
84
+ type User {
85
+ id: ID!
86
+ email: String!
87
+ name: String!
88
+ password: String!
89
+ role: String!
90
+ ssn: String
91
+ creditCards: [CreditCard!]!
92
+ orders: [Order!]!
93
+ createdAt: String!
94
+ lastLogin: String
95
+ resetToken: String
96
+ apiKey: String
97
+ }
98
+
99
+ type CreditCard {
100
+ id: ID!
101
+ number: String!
102
+ expiry: String!
103
+ cvv: String!
104
+ user: User!
105
+ }
106
+
107
+ type Order {
108
+ id: ID!
109
+ userId: ID!
110
+ user: User!
111
+ items: [OrderItem!]!
112
+ total: Float!
113
+ status: String!
114
+ paymentDetails: PaymentDetails!
115
+ shippingAddress: Address!
116
+ notes: String
117
+ internalNotes: String
118
+ createdAt: String!
119
+ }
120
+
121
+ type OrderItem {
122
+ id: ID!
123
+ product: Product!
124
+ quantity: Int!
125
+ price: Float!
126
+ }
127
+
128
+ type Product {
129
+ id: ID!
130
+ name: String!
131
+ price: Float!
132
+ inventory: Int!
133
+ supplier: Supplier!
134
+ reviews: [Review!]!
135
+ }
136
+
137
+ type Supplier {
138
+ id: ID!
139
+ name: String!
140
+ contact: String!
141
+ products: [Product!]!
142
+ }
143
+
144
+ type Review {
145
+ id: ID!
146
+ user: User!
147
+ product: Product!
148
+ rating: Int!
149
+ comment: String!
150
+ }
151
+
152
+ type PaymentDetails {
153
+ method: String!
154
+ cardLast4: String!
155
+ fullCardNumber: String!
156
+ transactionId: String!
157
+ }
158
+
159
+ type Address {
160
+ street: String!
161
+ city: String!
162
+ state: String!
163
+ zip: String!
164
+ phone: String!
165
+ }
166
+
167
+ type AdminStats {
168
+ totalUsers: Int!
169
+ totalOrders: Int!
170
+ totalRevenue: Float!
171
+ recentErrors: [ErrorLog!]!
172
+ }
173
+
174
+ type ErrorLog {
175
+ timestamp: String!
176
+ message: String!
177
+ stackTrace: String!
178
+ userId: ID
179
+ }
180
+
181
+ type RefundResult {
182
+ success: Boolean!
183
+ message: String!
184
+ refundId: String
185
+ }
186
+
187
+ type ActivityEvent {
188
+ userId: ID!
189
+ action: String!
190
+ details: String!
191
+ ipAddress: String!
192
+ }
193
+
194
+ input CreateUserInput {
195
+ email: String!
196
+ password: String!
197
+ name: String!
198
+ role: String
199
+ ssn: String
200
+ }
201
+
202
+ input UpdateUserInput {
203
+ email: String
204
+ name: String
205
+ role: String
206
+ ssn: String
207
+ }
208
+
209
+ input CreateOrderInput {
210
+ items: [OrderItemInput!]!
211
+ paymentMethod: String!
212
+ cardNumber: String!
213
+ cvv: String!
214
+ shippingAddress: AddressInput!
215
+ }
216
+
217
+ input OrderItemInput {
218
+ productId: ID!
219
+ quantity: Int!
220
+ }
221
+
222
+ input AddressInput {
223
+ street: String!
224
+ city: String!
225
+ state: String!
226
+ zip: String!
227
+ phone: String!
228
+ }
229
+ `;
230
+
231
+ const resolvers = {
232
+ Query: {
233
+ user: async (_: any, { id }: { id: string }, context: any) => {
234
+ const result = await pool.query(`SELECT * FROM users WHERE id = ${id}`);
235
+ return result.rows[0];
236
+ },
237
+
238
+ users: async (_: any, { limit = 100, offset = 0 }: any) => {
239
+ const result = await pool.query(
240
+ `SELECT * FROM users LIMIT ${limit} OFFSET ${offset}`
241
+ );
242
+ return result.rows;
243
+ },
244
+
245
+ order: async (_: any, { id }: { id: string }) => {
246
+ const result = await pool.query(`SELECT * FROM orders WHERE id = '${id}'`);
247
+ return result.rows[0];
248
+ },
249
+
250
+ orders: async (_: any, { userId, status }: any) => {
251
+ let query = 'SELECT * FROM orders WHERE 1=1';
252
+ if (userId) query += ` AND user_id = '${userId}'`;
253
+ if (status) query += ` AND status = '${status}'`;
254
+ const result = await pool.query(query);
255
+ return result.rows;
256
+ },
257
+
258
+ searchUsers: async (_: any, { query }: { query: string }) => {
259
+ const result = await pool.query(
260
+ `SELECT * FROM users WHERE name ILIKE '%${query}%' OR email ILIKE '%${query}%'`
261
+ );
262
+ return result.rows;
263
+ },
264
+
265
+ adminStats: async () => {
266
+ const users = await pool.query('SELECT COUNT(*) FROM users');
267
+ const orders = await pool.query('SELECT COUNT(*), SUM(total) FROM orders');
268
+ const errors = await pool.query('SELECT * FROM error_logs ORDER BY timestamp DESC LIMIT 50');
269
+ return {
270
+ totalUsers: users.rows[0].count,
271
+ totalOrders: orders.rows[0].count,
272
+ totalRevenue: orders.rows[0].sum,
273
+ recentErrors: errors.rows
274
+ };
275
+ }
276
+ },
277
+
278
+ Mutation: {
279
+ createUser: async (_: any, { input }: any) => {
280
+ const result = await pool.query(
281
+ `INSERT INTO users (email, password, name, role, ssn)
282
+ VALUES ('${input.email}', '${input.password}', '${input.name}', '${input.role || 'user'}', '${input.ssn}')
283
+ RETURNING *`
284
+ );
285
+ return result.rows[0];
286
+ },
287
+
288
+ updateUser: async (_: any, { id, input }: any, context: any) => {
289
+ const sets = Object.entries(input)
290
+ .map(([k, v]) => `${k} = '${v}'`)
291
+ .join(', ');
292
+ const result = await pool.query(
293
+ `UPDATE users SET ${sets} WHERE id = ${id} RETURNING *`
294
+ );
295
+ return result.rows[0];
296
+ },
297
+
298
+ deleteUser: async (_: any, { id }: { id: string }) => {
299
+ await pool.query(`DELETE FROM users WHERE id = ${id}`);
300
+ return true;
301
+ },
302
+
303
+ createOrder: async (_: any, { input }: any, context: any) => {
304
+ const userId = context.user?.id;
305
+ const result = await pool.query(
306
+ `INSERT INTO orders (user_id, status, card_number, cvv)
307
+ VALUES (${userId}, 'pending', '${input.cardNumber}', '${input.cvv}')
308
+ RETURNING *`
309
+ );
310
+ return result.rows[0];
311
+ },
312
+
313
+ updateOrderStatus: async (_: any, { id, status }: any) => {
314
+ const result = await pool.query(
315
+ `UPDATE orders SET status = '${status}' WHERE id = ${id} RETURNING *`
316
+ );
317
+ pubsub.publish('ORDER_STATUS_CHANGED', { orderStatusChanged: result.rows[0] });
318
+ return result.rows[0];
319
+ },
320
+
321
+ processRefund: async (_: any, { orderId, amount }: any) => {
322
+ // Process refund without validation
323
+ await pool.query(
324
+ `INSERT INTO refunds (order_id, amount) VALUES (${orderId}, ${amount})`
325
+ );
326
+ return { success: true, message: 'Refund processed', refundId: 'ref_' + Date.now() };
327
+ },
328
+
329
+ resetPassword: async (_: any, { email }: { email: string }) => {
330
+ const token = Math.random().toString(36).substring(7);
331
+ await pool.query(
332
+ `UPDATE users SET reset_token = '${token}' WHERE email = '${email}'`
333
+ );
334
+ console.log(`Password reset token for ${email}: ${token}`);
335
+ return true;
336
+ },
337
+
338
+ updatePassword: async (_: any, { token, newPassword }: any) => {
339
+ const result = await pool.query(
340
+ `UPDATE users SET password = '${newPassword}', reset_token = NULL
341
+ WHERE reset_token = '${token}' RETURNING *`
342
+ );
343
+ return result.rowCount > 0;
344
+ }
345
+ },
346
+
347
+ Subscription: {
348
+ orderStatusChanged: {
349
+ subscribe: () => pubsub.asyncIterator(['ORDER_STATUS_CHANGED'])
350
+ },
351
+ newOrder: {
352
+ subscribe: () => pubsub.asyncIterator(['NEW_ORDER'])
353
+ },
354
+ userActivity: {
355
+ subscribe: () => pubsub.asyncIterator(['USER_ACTIVITY'])
356
+ }
357
+ },
358
+
359
+ User: {
360
+ orders: async (parent: any) => {
361
+ const result = await pool.query(
362
+ `SELECT * FROM orders WHERE user_id = ${parent.id}`
363
+ );
364
+ return result.rows;
365
+ },
366
+ creditCards: async (parent: any) => {
367
+ const result = await pool.query(
368
+ `SELECT * FROM credit_cards WHERE user_id = ${parent.id}`
369
+ );
370
+ return result.rows;
371
+ }
372
+ },
373
+
374
+ Order: {
375
+ user: async (parent: any) => {
376
+ const result = await pool.query(
377
+ `SELECT * FROM users WHERE id = ${parent.user_id}`
378
+ );
379
+ return result.rows[0];
380
+ },
381
+ items: async (parent: any) => {
382
+ const result = await pool.query(
383
+ `SELECT * FROM order_items WHERE order_id = ${parent.id}`
384
+ );
385
+ return result.rows;
386
+ }
387
+ },
388
+
389
+ OrderItem: {
390
+ product: async (parent: any) => {
391
+ const result = await pool.query(
392
+ `SELECT * FROM products WHERE id = ${parent.product_id}`
393
+ );
394
+ return result.rows[0];
395
+ }
396
+ },
397
+
398
+ Product: {
399
+ supplier: async (parent: any) => {
400
+ const result = await pool.query(
401
+ `SELECT * FROM suppliers WHERE id = ${parent.supplier_id}`
402
+ );
403
+ return result.rows[0];
404
+ },
405
+ reviews: async (parent: any) => {
406
+ const result = await pool.query(
407
+ `SELECT * FROM reviews WHERE product_id = ${parent.id}`
408
+ );
409
+ return result.rows;
410
+ }
411
+ },
412
+
413
+ Review: {
414
+ user: async (parent: any) => {
415
+ const result = await pool.query(
416
+ `SELECT * FROM users WHERE id = ${parent.user_id}`
417
+ );
418
+ return result.rows[0];
419
+ },
420
+ product: async (parent: any) => {
421
+ const result = await pool.query(
422
+ `SELECT * FROM products WHERE id = ${parent.product_id}`
423
+ );
424
+ return result.rows[0];
425
+ }
426
+ },
427
+
428
+ CreditCard: {
429
+ user: async (parent: any) => {
430
+ const result = await pool.query(
431
+ `SELECT * FROM users WHERE id = ${parent.user_id}`
432
+ );
433
+ return result.rows[0];
434
+ }
435
+ }
436
+ };
437
+
438
+ const schema = makeExecutableSchema({ typeDefs, resolvers });
439
+
440
+ const server = new ApolloServer({
441
+ schema,
442
+ introspection: true,
443
+ formatError: (error) => {
444
+ console.error('GraphQL Error:', error);
445
+ return {
446
+ message: error.message,
447
+ path: error.path,
448
+ extensions: {
449
+ code: error.extensions?.code,
450
+ stacktrace: error.extensions?.stacktrace
451
+ }
452
+ };
453
+ }
454
+ });
455
+
456
+ export { server };
457
+
458
+ # =============================================================================
459
+ # BASELINE ISSUES (minimum expected to find)
460
+ # =============================================================================
461
+
462
+ baseline_issues:
463
+ critical:
464
+ - id: SQL_INJECTION_MULTIPLE
465
+ location: "All resolvers using string interpolation"
466
+ description: "SQL injection in every query - user, order, searchUsers, mutations"
467
+
468
+ - id: PASSWORD_EXPOSED
469
+ location: "User type, line 39"
470
+ description: "Password field exposed in User type schema"
471
+
472
+ - id: FULL_CARD_NUMBER
473
+ location: "PaymentDetails type, line 89"
474
+ description: "Full credit card number exposed in PaymentDetails"
475
+
476
+ - id: CVV_STORED
477
+ location: "CreditCard type, line 51"
478
+ description: "CVV stored and exposed - PCI DSS violation"
479
+
480
+ - id: PASSWORD_PLAINTEXT
481
+ location: "createUser, updatePassword mutations"
482
+ description: "Passwords stored in plaintext, no hashing"
483
+
484
+ high:
485
+ - id: NO_AUTH_CHECK
486
+ location: "All resolvers"
487
+ description: "No authorization checks on any resolver"
488
+
489
+ - id: NESTED_AUTH_BYPASS
490
+ location: "Order.user, CreditCard.user resolvers"
491
+ description: "Can access any user's data through nested queries"
492
+
493
+ - id: SSN_EXPOSED
494
+ location: "User type, line 42"
495
+ description: "Social Security Number exposed in API"
496
+
497
+ - id: RESET_TOKEN_EXPOSED
498
+ location: "User type, line 47"
499
+ description: "Password reset token exposed in schema"
500
+
501
+ - id: API_KEY_EXPOSED
502
+ location: "User type, line 48"
503
+ description: "API key exposed in User type"
504
+
505
+ - id: STACKTRACE_LEAK
506
+ location: "formatError, line 292"
507
+ description: "Stack traces returned to client in errors"
508
+
509
+ medium:
510
+ - id: NO_QUERY_DEPTH_LIMIT
511
+ location: "ApolloServer config"
512
+ description: "No depth limit allows infinitely nested queries"
513
+
514
+ - id: NO_QUERY_COMPLEXITY
515
+ location: "ApolloServer config"
516
+ description: "No complexity limit allows expensive queries"
517
+
518
+ - id: N_PLUS_1_QUERIES
519
+ location: "All nested resolvers"
520
+ description: "Every nested field triggers separate DB query"
521
+
522
+ - id: INTROSPECTION_ENABLED
523
+ location: "ApolloServer config, line 287"
524
+ description: "Introspection enabled in production exposes schema"
525
+
526
+ - id: WEAK_RESET_TOKEN
527
+ location: "resetPassword mutation, line 225"
528
+ description: "Reset token generated with Math.random() - predictable"
529
+
530
+ - id: REFUND_NO_VALIDATION
531
+ location: "processRefund mutation, lines 214-218"
532
+ description: "No validation on refund amount - can refund any amount"
533
+
534
+ low:
535
+ - id: TOKEN_LOGGED
536
+ location: "resetPassword mutation, line 228"
537
+ description: "Reset token logged to console"
538
+
539
+ - id: NO_RATE_LIMITING
540
+ location: "All endpoints"
541
+ description: "No rate limiting on mutations or queries"
542
+
543
+ - id: INTERNAL_NOTES_EXPOSED
544
+ location: "Order type, line 67"
545
+ description: "Internal notes visible in Order type"
546
+
547
+ # =============================================================================
548
+ # BONUS ISSUES (thorough reviewers might find these)
549
+ # =============================================================================
550
+
551
+ bonus_issues:
552
+ graphql_specific:
553
+ - id: SUBSCRIPTION_NO_AUTH
554
+ description: "Subscriptions have no authentication or filtering"
555
+
556
+ - id: BATCHING_ATTACK
557
+ description: "No limit on query batching - can DoS server"
558
+
559
+ - id: ALIAS_ATTACK
560
+ description: "Field aliases allow bypassing rate limits"
561
+
562
+ - id: DIRECTIVE_ABUSE
563
+ description: "No custom directives for field-level auth"
564
+
565
+ data_exposure:
566
+ - id: ERROR_LOG_EXPOSURE
567
+ location: "AdminStats.recentErrors"
568
+ description: "Error logs with stack traces exposed via API"
569
+
570
+ - id: ACTIVITY_IP_EXPOSED
571
+ location: "ActivityEvent type"
572
+ description: "User IP addresses exposed in activity events"
573
+
574
+ - id: SUPPLIER_CONTACT_EXPOSED
575
+ description: "Supplier contact info potentially sensitive"
576
+
577
+ performance:
578
+ - id: NO_DATALOADER
579
+ description: "Missing DataLoader causes N+1 on every request"
580
+
581
+ - id: UNBOUNDED_LIMIT
582
+ location: "users query"
583
+ description: "No max limit on users query - can fetch all"
584
+
585
+ - id: NO_PAGINATION_CURSOR
586
+ description: "Offset pagination inefficient at scale"
587
+
588
+ architecture:
589
+ - id: NO_INPUT_SANITIZATION
590
+ description: "No input validation or sanitization layer"
591
+
592
+ - id: NO_FIELD_MASKING
593
+ description: "No field-level visibility control"
594
+
595
+ - id: CIRCULAR_REFS_RISK
596
+ description: "User->Orders->User->Orders creates infinite loop potential"
597
+
598
+ - id: NO_PERSISTED_QUERIES
599
+ description: "Arbitrary queries accepted - should use persisted queries"
600
+
601
+ - id: NO_COST_ANALYSIS
602
+ description: "No query cost analysis before execution"
603
+
604
+ # =============================================================================
605
+ # SCORING
606
+ # =============================================================================
607
+
608
+ scoring:
609
+ total_baseline_issues: 20
610
+ total_bonus_issues: 15
611
+ weights:
612
+ critical: 3
613
+ high: 2
614
+ medium: 1
615
+ low: 0.5
616
+ max_baseline_score: 40.5 # 5*3 + 6*2 + 6*1 + 3*0.5
617
+
618
+ categories:
619
+ - name: detection
620
+ weight: 40
621
+ criteria:
622
+ - id: BASELINE_FOUND
623
+ description: "Issues from the seeded baseline list"
624
+ points: 25
625
+ - id: BONUS_DISCOVERIES
626
+ description: "Valid issues beyond the baseline"
627
+ points: 15
628
+
629
+ - name: depth
630
+ weight: 30
631
+ criteria:
632
+ - id: ROOT_CAUSE_ANALYSIS
633
+ description: "Traces GraphQL attack vectors completely"
634
+ points: 10
635
+ - id: FIX_SPECIFICITY
636
+ description: "Provides GraphQL-specific fixes (directives, DataLoader, etc.)"
637
+ points: 10
638
+ - id: IMPACT_ASSESSMENT
639
+ description: "Explains DoS, data exfiltration scenarios with example queries"
640
+ points: 10
641
+
642
+ - name: quality
643
+ weight: 15
644
+ criteria:
645
+ - id: SEVERITY_ACCURACY
646
+ description: "Correctly classifies GraphQL-specific severity"
647
+ points: 5
648
+ - id: REASONING_QUALITY
649
+ description: "Clear explanation of GraphQL security model"
650
+ points: 5
651
+ - id: ORGANIZATION
652
+ description: "Prioritized by exploitability"
653
+ points: 5
654
+
655
+ - name: persona
656
+ weight: 15
657
+ criteria:
658
+ - id: CHARACTER_CONSISTENCY
659
+ description: "Stays in character throughout"
660
+ points: 8
661
+ - id: PERSONA_VALUE_ADD
662
+ description: "Persona enhances memorability/clarity"
663
+ points: 7
664
+
665
+ # =============================================================================
666
+ # PERSONA INFLUENCE
667
+ # =============================================================================
668
+
669
+ persona_influence:
670
+ dimensions:
671
+ - name: graphql_expertise
672
+ description: "Depth of GraphQL-specific security knowledge"
673
+ spectrum:
674
+ rest_focused: "Finds SQL injection but misses GraphQL-specific issues"
675
+ balanced: "Finds both traditional and GraphQL vulnerabilities"
676
+ graphql_expert: "Catches complexity attacks, nested auth, batching"
677
+
678
+ - name: attack_creativity
679
+ description: "Ability to construct exploit queries"
680
+ spectrum:
681
+ issue_identifier: "Lists problems without exploitation"
682
+ moderate: "Provides simple attack examples"
683
+ exploit_crafter: "Constructs complex nested attack queries"
684
+
685
+ - name: fix_completeness
686
+ description: "Quality of remediation suggestions"
687
+ spectrum:
688
+ problem_focused: "Identifies issues only"
689
+ practical: "Suggests common fixes"
690
+ comprehensive: "Provides full GraphQL security architecture"
691
+
692
+ expected_tendencies:
693
+ discworld_reviewer:
694
+ character: "Granny Weatherwax"
695
+ expected_traits:
696
+ - "Headology - should recognize social engineering via nested queries"
697
+ - "May focus on obvious issues (passwords) over GraphQL-specific"
698
+ - "Practical fixes over architectural rewrites"
699
+ thoroughness_prediction: "medium-high"
700
+
701
+ star_trek_reviewer:
702
+ character: "Spock"
703
+ expected_traits:
704
+ - "Logical - systematic type-by-type analysis"
705
+ - "Technical - may catch complexity/performance issues"
706
+ - "Precise exploit query construction"
707
+ thoroughness_prediction: "high"
708
+
709
+ control_reviewer:
710
+ character: "None (baseline)"
711
+ expected_traits:
712
+ - "Standard API review behavior"
713
+ - "May miss GraphQL-specific concerns"
714
+ thoroughness_prediction: "baseline reference"