@kevinrabun/judges 3.115.4 → 3.117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/agents/accessibility.judge.md +7 -0
  2. package/agents/agent-instructions.judge.md +7 -0
  3. package/agents/ai-code-safety.judge.md +7 -0
  4. package/agents/api-contract.judge.md +7 -0
  5. package/agents/api-design.judge.md +7 -0
  6. package/agents/authentication.judge.md +7 -0
  7. package/agents/backwards-compatibility.judge.md +7 -0
  8. package/agents/caching.judge.md +7 -0
  9. package/agents/ci-cd.judge.md +7 -0
  10. package/agents/cloud-readiness.judge.md +7 -0
  11. package/agents/concurrency.judge.md +7 -0
  12. package/agents/configuration-management.judge.md +7 -0
  13. package/agents/cybersecurity.judge.md +7 -0
  14. package/agents/data-security.judge.md +7 -0
  15. package/agents/dependency-health.judge.md +7 -0
  16. package/agents/documentation.judge.md +7 -0
  17. package/agents/error-handling.judge.md +7 -0
  18. package/agents/ethics-bias.judge.md +7 -0
  19. package/agents/false-positive-review.judge.md +12 -0
  20. package/agents/framework-safety.judge.md +7 -0
  21. package/agents/hallucination-detection.judge.md +13 -0
  22. package/agents/iac-security.judge.md +7 -0
  23. package/agents/intent-alignment.judge.md +13 -0
  24. package/agents/logging-privacy.judge.md +7 -0
  25. package/agents/maintainability.judge.md +7 -0
  26. package/agents/multi-turn-coherence.judge.md +7 -0
  27. package/agents/observability.judge.md +7 -0
  28. package/agents/portability.judge.md +7 -0
  29. package/agents/rate-limiting.judge.md +7 -0
  30. package/agents/reliability.judge.md +7 -0
  31. package/agents/security.judge.md +13 -0
  32. package/agents/testing.judge.md +7 -0
  33. package/agents/ux.judge.md +7 -0
  34. package/dist/a2a-protocol.d.ts +136 -0
  35. package/dist/a2a-protocol.js +218 -0
  36. package/dist/api.d.ts +21 -3
  37. package/dist/api.js +21 -1
  38. package/dist/audit-trail.d.ts +245 -0
  39. package/dist/audit-trail.js +257 -0
  40. package/dist/commands/benchmark-advanced.js +51 -51
  41. package/dist/commands/benchmark-ai-agents.js +16 -16
  42. package/dist/commands/benchmark-compliance-ethics.js +12 -12
  43. package/dist/commands/benchmark-expanded-2.js +2 -2
  44. package/dist/commands/benchmark-expanded.js +2 -2
  45. package/dist/commands/benchmark-infrastructure.js +12 -12
  46. package/dist/commands/benchmark-languages.js +11 -11
  47. package/dist/commands/benchmark-quality-ops.js +7 -7
  48. package/dist/commands/benchmark-security-deep.js +9 -9
  49. package/dist/commands/benchmark.js +1 -1
  50. package/dist/commands/llm-benchmark-optimizer.d.ts +78 -0
  51. package/dist/commands/llm-benchmark-optimizer.js +241 -0
  52. package/dist/commands/llm-benchmark.d.ts +4 -2
  53. package/dist/commands/llm-benchmark.js +40 -12
  54. package/dist/escalation.d.ts +100 -0
  55. package/dist/escalation.js +292 -0
  56. package/dist/evaluation-session.d.ts +74 -0
  57. package/dist/evaluation-session.js +152 -0
  58. package/dist/evaluators/index.d.ts +23 -1
  59. package/dist/evaluators/index.js +192 -3
  60. package/dist/evaluators/judge-selector.d.ts +19 -0
  61. package/dist/evaluators/judge-selector.js +141 -0
  62. package/dist/evaluators/recall-boost.d.ts +27 -0
  63. package/dist/evaluators/recall-boost.js +409 -0
  64. package/dist/feedback-loop.d.ts +62 -0
  65. package/dist/feedback-loop.js +179 -0
  66. package/dist/index.js +2 -0
  67. package/dist/judges/accessibility.js +7 -0
  68. package/dist/judges/agent-instructions.js +7 -0
  69. package/dist/judges/ai-code-safety.js +7 -0
  70. package/dist/judges/api-contract.js +7 -0
  71. package/dist/judges/api-design.js +7 -0
  72. package/dist/judges/authentication.js +7 -0
  73. package/dist/judges/backwards-compatibility.js +7 -0
  74. package/dist/judges/caching.js +7 -0
  75. package/dist/judges/ci-cd.js +7 -0
  76. package/dist/judges/cloud-readiness.js +7 -0
  77. package/dist/judges/concurrency.js +7 -0
  78. package/dist/judges/configuration-management.js +7 -0
  79. package/dist/judges/cybersecurity.js +7 -0
  80. package/dist/judges/data-security.js +7 -0
  81. package/dist/judges/dependency-health.js +7 -0
  82. package/dist/judges/documentation.js +7 -0
  83. package/dist/judges/error-handling.js +7 -0
  84. package/dist/judges/ethics-bias.js +7 -0
  85. package/dist/judges/false-positive-review.js +13 -1
  86. package/dist/judges/framework-safety.js +7 -0
  87. package/dist/judges/hallucination-detection.js +14 -1
  88. package/dist/judges/iac-security.js +7 -0
  89. package/dist/judges/intent-alignment.js +14 -1
  90. package/dist/judges/logging-privacy.js +7 -0
  91. package/dist/judges/maintainability.js +7 -0
  92. package/dist/judges/multi-turn-coherence.js +7 -0
  93. package/dist/judges/observability.js +7 -0
  94. package/dist/judges/portability.js +7 -0
  95. package/dist/judges/rate-limiting.js +7 -0
  96. package/dist/judges/reliability.js +7 -0
  97. package/dist/judges/security.js +14 -1
  98. package/dist/judges/testing.js +7 -0
  99. package/dist/judges/ux.js +7 -0
  100. package/dist/review-conversation.d.ts +87 -0
  101. package/dist/review-conversation.js +307 -0
  102. package/dist/sast-integration.d.ts +112 -0
  103. package/dist/sast-integration.js +215 -0
  104. package/dist/tools/register-evaluation.js +208 -8
  105. package/dist/tools/register-fix.js +24 -1
  106. package/dist/tools/register-resources.d.ts +6 -0
  107. package/dist/tools/register-resources.js +177 -0
  108. package/dist/tools/register-review.js +26 -1
  109. package/dist/tools/register-workflow.js +384 -11
  110. package/dist/tools/validation.d.ts +13 -0
  111. package/dist/tools/validation.js +77 -0
  112. package/dist/types.d.ts +122 -0
  113. package/package.json +25 -12
  114. package/server.json +2 -2
@@ -37,7 +37,7 @@ async function loadConfig(path: string) {
37
37
  function hashPassword(password: string): string {
38
38
  return crypto.hash("sha256", password);
39
39
  }`,
40
- expectedRuleIds: [],
40
+ expectedRuleIds: ["HALLU-001"],
41
41
  category: "hallucination-detection",
42
42
  difficulty: "medium",
43
43
  },
@@ -76,7 +76,7 @@ async def read_users_me(token: str = Depends(oauth2_scheme)):
76
76
  code: `function searchUsers(users, query) {
77
77
  return users.filter(u => u.name.contains(query));
78
78
  }`,
79
- expectedRuleIds: [],
79
+ expectedRuleIds: ["HALLU-001"],
80
80
  category: "hallucination-detection",
81
81
  difficulty: "easy",
82
82
  },
@@ -88,7 +88,7 @@ async def read_users_me(token: str = Depends(oauth2_scheme)):
88
88
 
89
89
  def ensure_dir(path):
90
90
  os.makedirs(path, exist_ok=True, permissions=0o755)`,
91
- expectedRuleIds: [],
91
+ expectedRuleIds: ["HALLU-001"],
92
92
  category: "hallucination-detection",
93
93
  difficulty: "medium",
94
94
  },
@@ -121,7 +121,7 @@ fn generate_token() -> String {
121
121
  let rng = SecureRandom::new();
122
122
  rng.generate_hex(32)
123
123
  }`,
124
- expectedRuleIds: [],
124
+ expectedRuleIds: ["HALLU-001"],
125
125
  category: "hallucination-detection",
126
126
  difficulty: "hard",
127
127
  },
@@ -134,7 +134,7 @@ fn generate_token() -> String {
134
134
  async def fetch_data(url):
135
135
  response = await requests.async_get(url, timeout=30)
136
136
  return response.json()`,
137
- expectedRuleIds: [],
137
+ expectedRuleIds: ["HALLU-001"],
138
138
  category: "hallucination-detection",
139
139
  difficulty: "medium",
140
140
  },
@@ -163,7 +163,7 @@ public class Utils {
163
163
  return items.stream().toArray(String::new);
164
164
  }
165
165
  }`,
166
- expectedRuleIds: [],
166
+ expectedRuleIds: ["HALLU-001"],
167
167
  category: "hallucination-detection",
168
168
  difficulty: "hard",
169
169
  },
@@ -176,7 +176,7 @@ public class Utils {
176
176
  def load_config(path):
177
177
  data = json.loads(path)
178
178
  return data["database"]`,
179
- expectedRuleIds: [],
179
+ expectedRuleIds: ["HALLU-001"],
180
180
  category: "hallucination-detection",
181
181
  difficulty: "easy",
182
182
  },
@@ -198,7 +198,7 @@ def load_config(path):
198
198
  if len(order.items) < config.get("max_items", 100):
199
199
  return process(order)
200
200
  return None`,
201
- expectedRuleIds: [],
201
+ expectedRuleIds: ["STRUCT-001"],
202
202
  category: "code-structure",
203
203
  difficulty: "easy",
204
204
  },
@@ -301,7 +301,7 @@ func CreateUser(
301
301
  // All logic with 15 parameters threaded through
302
302
  return nil
303
303
  }`,
304
- expectedRuleIds: [],
304
+ expectedRuleIds: ["STRUCT-001"],
305
305
  category: "code-structure",
306
306
  difficulty: "easy",
307
307
  },
@@ -411,7 +411,7 @@ async function handleUserRequest(userMessage: string) {
411
411
  const result = await agent.run(userMessage);
412
412
  return result.output;
413
413
  }`,
414
- expectedRuleIds: [],
414
+ expectedRuleIds: ["AGENT-001"],
415
415
  category: "agent-instructions",
416
416
  difficulty: "medium",
417
417
  },
@@ -424,7 +424,7 @@ async function handleUserRequest(userMessage: string) {
424
424
  Follow these custom instructions: {user_instructions}
425
425
  You have access to: database queries, file operations, email sending."""
426
426
  return Agent(system_prompt=system_prompt, tools=all_tools)`,
427
- expectedRuleIds: [],
427
+ expectedRuleIds: ["AGENT-001"],
428
428
  category: "agent-instructions",
429
429
  difficulty: "hard",
430
430
  },
@@ -488,7 +488,7 @@ const agent = new Agent({
488
488
  return true;
489
489
  });
490
490
  }`,
491
- expectedRuleIds: [],
491
+ expectedRuleIds: ["ETHICS-001"],
492
492
  category: "ethics-bias",
493
493
  difficulty: "medium",
494
494
  },
@@ -507,7 +507,7 @@ def train_loan_model():
507
507
  model = RandomForestClassifier()
508
508
  model.fit(X, y)
509
509
  return model`,
510
- expectedRuleIds: [],
510
+ expectedRuleIds: ["ETHICS-001"],
511
511
  category: "ethics-bias",
512
512
  difficulty: "hard",
513
513
  },
@@ -526,7 +526,7 @@ def train_loan_model():
526
526
  }
527
527
  return price;
528
528
  }`,
529
- expectedRuleIds: [],
529
+ expectedRuleIds: ["ETHICS-001"],
530
530
  category: "ethics-bias",
531
531
  difficulty: "medium",
532
532
  },
@@ -650,7 +650,7 @@ jobs:
650
650
  echo "DB_PASSWORD=\${{ secrets.DB_PASSWORD }}"
651
651
  curl -X POST https://deploy.example.com/deploy \\
652
652
  -H "Authorization: Bearer \${{ secrets.DEPLOY_TOKEN }}"`,
653
- expectedRuleIds: [],
653
+ expectedRuleIds: ["CICD-001"],
654
654
  category: "ci-cd",
655
655
  difficulty: "easy",
656
656
  },
@@ -669,7 +669,7 @@ jobs:
669
669
  - uses: some-org/untrusted-action@master
670
670
  - uses: random-user/deploy-action@latest
671
671
  - run: npm test`,
672
- expectedRuleIds: [],
672
+ expectedRuleIds: ["CICD-001"],
673
673
  category: "ci-cd",
674
674
  difficulty: "medium",
675
675
  },
@@ -730,7 +730,7 @@ export async function fetchUser(id: string, options?: FetchOptions): Promise<Use
730
730
  return db.users.findUnique({ where: { id }, ...options });
731
731
  }
732
732
  // No backward-compatible alias, no deprecation notice`,
733
- expectedRuleIds: [],
733
+ expectedRuleIds: ["COMPAT-001"],
734
734
  category: "backwards-compatibility",
735
735
  difficulty: "medium",
736
736
  },
@@ -773,7 +773,7 @@ app.get("/api/users/:id", async (req, res) => {
773
773
  return repository.search(query, limit, "relevance");
774
774
  }
775
775
  }`,
776
- expectedRuleIds: [],
776
+ expectedRuleIds: ["COMPAT-001"],
777
777
  category: "backwards-compatibility",
778
778
  difficulty: "easy",
779
779
  },
@@ -828,7 +828,7 @@ class TokenManager:
828
828
  if hmac.compare_digest(sig, expected):
829
829
  return json.loads(data)
830
830
  return None`,
831
- expectedRuleIds: [],
831
+ expectedRuleIds: ["DOC-001"],
832
832
  category: "documentation",
833
833
  difficulty: "easy",
834
834
  },
@@ -843,7 +843,7 @@ class TokenManager:
843
843
  metrics: { enabled: boolean; prefix: string; tags: Record<string, string>; interval: number; };
844
844
  logging: { level: string; format: string; destination: string; };
845
845
  }`,
846
- expectedRuleIds: [],
846
+ expectedRuleIds: ["DOC-001"],
847
847
  category: "documentation",
848
848
  difficulty: "medium",
849
849
  },
@@ -881,7 +881,7 @@ func Auth(validator func(string) bool) func(http.Handler) http.Handler {
881
881
  })
882
882
  }
883
883
  }`,
884
- expectedRuleIds: [],
884
+ expectedRuleIds: ["DOC-001"],
885
885
  category: "documentation",
886
886
  difficulty: "easy",
887
887
  },
@@ -1026,7 +1026,7 @@ def get_product(product_id):
1026
1026
  if not product:
1027
1027
  return jsonify({"status": 404, "detail": "Product not found"}), 404 # different JSON format
1028
1028
  return jsonify(product)`,
1029
- expectedRuleIds: [],
1029
+ expectedRuleIds: ["API-001"],
1030
1030
  category: "api-design",
1031
1031
  difficulty: "medium",
1032
1032
  },
@@ -1115,7 +1115,7 @@ AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
1115
1115
  AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
1116
1116
  STRIPE_SECRET_KEY=sk_test_FAKE_KEY_FOR_BENCHMARK_TEST
1117
1117
  API_SECRET=my-super-secret-api-key-do-not-share`,
1118
- expectedRuleIds: [],
1118
+ expectedRuleIds: ["SWDEV-001"],
1119
1119
  category: "software-practices",
1120
1120
  difficulty: "easy",
1121
1121
  },
@@ -1438,7 +1438,7 @@ def kill_process(pid):
1438
1438
  def get_disk_usage():
1439
1439
  result = subprocess.run(["wmic", "logicaldisk", "get", "size,freespace"], capture_output=True, text=True, shell=True)
1440
1440
  return result.stdout`,
1441
- expectedRuleIds: [],
1441
+ expectedRuleIds: ["PORTA-001"],
1442
1442
  category: "portability",
1443
1443
  difficulty: "easy",
1444
1444
  },
@@ -1464,7 +1464,7 @@ public class AppConfig
1464
1464
  key.SetValue(name, value);
1465
1465
  }
1466
1466
  }`,
1467
- expectedRuleIds: [],
1467
+ expectedRuleIds: ["PORTA-001"],
1468
1468
  category: "portability",
1469
1469
  difficulty: "medium",
1470
1470
  },
@@ -1481,7 +1481,7 @@ public class AppConfig
1481
1481
  const activeUsers = await db.query("SELECT COUNT(DISTINCT user_id) FROM sessions WHERE last_active > NOW() - INTERVAL '5 minutes'");
1482
1482
  res.json({ stats: stats.rows[0], topProducts: topProducts.rows, activeUsers: activeUsers.rows[0] });
1483
1483
  });`,
1484
- expectedRuleIds: [],
1484
+ expectedRuleIds: ["CACHE-001"],
1485
1485
  category: "caching",
1486
1486
  difficulty: "easy",
1487
1487
  },
@@ -1624,7 +1624,7 @@ const smtpHost = process.env.SMTP_HOST;
1624
1624
  const smtpPort = parseInt(process.env.SMTP_PORT || "587");
1625
1625
  const smtpUser = process.env.SMTP_USER;
1626
1626
  const smtpPass = process.env.SMTP_PASS;`,
1627
- expectedRuleIds: [],
1627
+ expectedRuleIds: ["CFG-001"],
1628
1628
  category: "configuration",
1629
1629
  difficulty: "easy",
1630
1630
  },
@@ -1858,7 +1858,7 @@ def handler(event, context):
1858
1858
  "statusCode": 200,
1859
1859
  "body": json.dumps({"message": f"Hello, {name}!"})
1860
1860
  }`,
1861
- expectedRuleIds: [],
1861
+ expectedRuleIds: ["COST-001"],
1862
1862
  category: "cost-effectiveness",
1863
1863
  difficulty: "easy",
1864
1864
  },
@@ -2240,7 +2240,7 @@ class User:
2240
2240
 
2241
2241
  # Protocol.implements() does not exist
2242
2242
  assert Serializable.implements(User)`,
2243
- expectedRuleIds: [],
2243
+ expectedRuleIds: ["HALLU-001"],
2244
2244
  category: "hallucination-detection",
2245
2245
  difficulty: "medium",
2246
2246
  },
@@ -2252,7 +2252,7 @@ assert Serializable.implements(User)`,
2252
2252
  setTimeout(() => {
2253
2253
  request.abort(); // fetch returns a Promise, not an abortable request
2254
2254
  }, 5000);`,
2255
- expectedRuleIds: [],
2255
+ expectedRuleIds: ["HALLU-001"],
2256
2256
  category: "hallucination-detection",
2257
2257
  difficulty: "easy",
2258
2258
  },
@@ -2295,7 +2295,7 @@ export class OrderService {
2295
2295
  return order;
2296
2296
  }
2297
2297
  }`,
2298
- expectedRuleIds: [],
2298
+ expectedRuleIds: ["STRUCT-001"],
2299
2299
  category: "code-structure",
2300
2300
  difficulty: "medium",
2301
2301
  },
@@ -2416,7 +2416,7 @@ export interface V2Response<T> {
2416
2416
 
2417
2417
  // v1 used: { result: T, error?: string, timestamp: number }
2418
2418
  // These types are incompatible and no documentation explains the migration`,
2419
- expectedRuleIds: [],
2419
+ expectedRuleIds: ["DOC-001"],
2420
2420
  category: "documentation",
2421
2421
  difficulty: "medium",
2422
2422
  },
@@ -2443,7 +2443,7 @@ export interface V2Response<T> {
2443
2443
  return count < 100;
2444
2444
  }
2445
2445
  }`,
2446
- expectedRuleIds: [],
2446
+ expectedRuleIds: ["CLOUD-001"],
2447
2447
  category: "cloud-readiness",
2448
2448
  difficulty: "medium",
2449
2449
  },
@@ -2505,7 +2505,7 @@ const server = new ApolloServer({
2505
2505
 
2506
2506
  // Allows recursive queries:
2507
2507
  // { user { orders { user { orders { user { orders ... } } } } } }`,
2508
- expectedRuleIds: [],
2508
+ expectedRuleIds: ["DATA-001"],
2509
2509
  category: "data-security",
2510
2510
  difficulty: "hard",
2511
2511
  },
@@ -2534,7 +2534,7 @@ async function handleRequest(req: Request) {
2534
2534
  return { status: 500, message: "Internal error" };
2535
2535
  }
2536
2536
  }`,
2537
- expectedRuleIds: [],
2537
+ expectedRuleIds: ["OBS-001"],
2538
2538
  category: "observability",
2539
2539
  difficulty: "medium",
2540
2540
  },
@@ -2620,7 +2620,7 @@ app.post("/api/upload", upload.array("files"), async (req, res) => {
2620
2620
  "clientSecret": "oauth-secret-do-not-share"
2621
2621
  }
2622
2622
  }`,
2623
- expectedRuleIds: [],
2623
+ expectedRuleIds: ["CFG-001"],
2624
2624
  category: "configuration",
2625
2625
  difficulty: "easy",
2626
2626
  },
@@ -2730,7 +2730,7 @@ async function loadProfile() {
2730
2730
  showToast(\`ECONNREFUSED 127.0.0.1:5432 - \${err.code}\`);
2731
2731
  }
2732
2732
  }`,
2733
- expectedRuleIds: [],
2733
+ expectedRuleIds: ["UX-001"],
2734
2734
  category: "user-experience",
2735
2735
  difficulty: "easy",
2736
2736
  },
@@ -2748,7 +2748,7 @@ async function loadProfile() {
2748
2748
  "expresss": "4.18.2"
2749
2749
  }
2750
2750
  }`,
2751
- expectedRuleIds: [],
2751
+ expectedRuleIds: ["DEPS-001"],
2752
2752
  category: "supply-chain",
2753
2753
  difficulty: "medium",
2754
2754
  },
@@ -2842,7 +2842,7 @@ async def task2():
2842
2842
 
2843
2843
  async def main():
2844
2844
  await asyncio.gather(task1(), task2()) # potential deadlock`,
2845
- expectedRuleIds: [],
2845
+ expectedRuleIds: ["CONC-001"],
2846
2846
  category: "concurrency",
2847
2847
  difficulty: "hard",
2848
2848
  },
@@ -2931,7 +2931,7 @@ def run_analysis(data):
2931
2931
  result = process_on_instance(instance_id, data)
2932
2932
  return result
2933
2933
  # Instance never terminated — runs (and costs money) forever`,
2934
- expectedRuleIds: [],
2934
+ expectedRuleIds: ["COST-001"],
2935
2935
  category: "cost-effectiveness",
2936
2936
  difficulty: "hard",
2937
2937
  },
@@ -3157,7 +3157,7 @@ function joinPaths(...parts: string[]): string {
3157
3157
  function normalizePath(p: string): string {
3158
3158
  return p.replace(/\\//g, "\\\\");
3159
3159
  }`,
3160
- expectedRuleIds: [],
3160
+ expectedRuleIds: ["PORTA-001"],
3161
3161
  category: "portability",
3162
3162
  difficulty: "easy",
3163
3163
  },
@@ -3418,7 +3418,7 @@ function signPayload(payload, privateKey) {
3418
3418
  return crypto.signMessage(payload, privateKey, "sha256");
3419
3419
  // crypto.signMessage does not exist; should use crypto.sign() or crypto.createSign()
3420
3420
  }`,
3421
- expectedRuleIds: [],
3421
+ expectedRuleIds: ["HALLU-001"],
3422
3422
  category: "hallucination-detection",
3423
3423
  difficulty: "medium",
3424
3424
  },
@@ -3436,7 +3436,7 @@ public class UserFilter {
3436
3436
  .collect(Collectors.toList());
3437
3437
  }
3438
3438
  }`,
3439
- expectedRuleIds: [],
3439
+ expectedRuleIds: ["HALLU-001"],
3440
3440
  category: "hallucination-detection",
3441
3441
  difficulty: "medium",
3442
3442
  },
@@ -3452,7 +3452,7 @@ public class UserFilter {
3452
3452
  function truncate(text: string, maxLen: number): string {
3453
3453
  return text.slice(0, maxLen); // may split surrogate pairs
3454
3454
  }`,
3455
- expectedRuleIds: [],
3455
+ expectedRuleIds: ["I18N-001"],
3456
3456
  category: "internationalization",
3457
3457
  difficulty: "hard",
3458
3458
  },
@@ -3512,7 +3512,7 @@ export class BillingService {
3512
3512
  return { customerId, amount: usage * rate, currency: "USD" };
3513
3513
  }
3514
3514
  }`,
3515
- expectedRuleIds: [],
3515
+ expectedRuleIds: ["COMP-001"],
3516
3516
  category: "compliance",
3517
3517
  difficulty: "easy",
3518
3518
  },
@@ -3873,7 +3873,7 @@ export async function createUser(
3873
3873
  // Actual is (data, options?) -> User
3874
3874
  return db.insert("users", data);
3875
3875
  }`,
3876
- expectedRuleIds: [],
3876
+ expectedRuleIds: ["DOC-001"],
3877
3877
  category: "documentation",
3878
3878
  difficulty: "easy",
3879
3879
  },
@@ -3892,7 +3892,7 @@ export async function createUser(
3892
3892
  )
3893
3893
  # Race condition: two concurrent requests read same count,
3894
3894
  # both increment to same value, losing one increment`,
3895
- expectedRuleIds: [],
3895
+ expectedRuleIds: ["DB-001"],
3896
3896
  category: "database",
3897
3897
  difficulty: "medium",
3898
3898
  },
@@ -3970,7 +3970,7 @@ func Filter[T implements Comparable](slice []T, pred func(T) bool) []T {
3970
3970
  }
3971
3971
  return result
3972
3972
  }`,
3973
- expectedRuleIds: [],
3973
+ expectedRuleIds: ["HALLU-001"],
3974
3974
  category: "hallucination-detection",
3975
3975
  difficulty: "hard",
3976
3976
  },
@@ -3985,7 +3985,7 @@ func Filter[T implements Comparable](slice []T, pred func(T) bool) []T {
3985
3985
  .error-message:parent(.form-group) {
3986
3986
  border: 2px solid red;
3987
3987
  }`,
3988
- expectedRuleIds: [],
3988
+ expectedRuleIds: ["HALLU-001"],
3989
3989
  category: "hallucination-detection",
3990
3990
  difficulty: "easy",
3991
3991
  },
@@ -4252,7 +4252,7 @@ export async function query(sql: string, params?: any[]) {
4252
4252
 
4253
4253
  // App starts even if critical config is missing/invalid
4254
4254
  // Fails at random point when config is first accessed`,
4255
- expectedRuleIds: [],
4255
+ expectedRuleIds: ["CFG-001"],
4256
4256
  category: "configuration",
4257
4257
  difficulty: "easy",
4258
4258
  },
@@ -4696,7 +4696,7 @@ def verify_token(token):
4696
4696
  Purpose = "backup"
4697
4697
  }
4698
4698
  }`,
4699
- expectedRuleIds: [],
4699
+ expectedRuleIds: ["IAC-001"],
4700
4700
  category: "iac-security",
4701
4701
  difficulty: "easy",
4702
4702
  },
@@ -166,7 +166,7 @@ export default defineConfig({
166
166
  frameguard: "deny",
167
167
  },
168
168
  });`,
169
- expectedRuleIds: [],
169
+ expectedRuleIds: ["HALLU-001"],
170
170
  category: "hallucination",
171
171
  difficulty: "medium",
172
172
  },
@@ -203,7 +203,7 @@ export const Card = styled.div\`
203
203
  outline: 2px solid #007bff;
204
204
  }
205
205
  \`;`,
206
- expectedRuleIds: [],
206
+ expectedRuleIds: ["HALLU-001"],
207
207
  category: "hallucination",
208
208
  difficulty: "medium",
209
209
  },
@@ -233,7 +233,7 @@ def process_config(data: StrictDict[str, int]) -> OrderedDefaultDict:
233
233
  result[secure_key].append(expensive_compute(str(value)))
234
234
 
235
235
  return result`,
236
- expectedRuleIds: [],
236
+ expectedRuleIds: ["HALLU-001"],
237
237
  category: "hallucination",
238
238
  difficulty: "easy",
239
239
  },
@@ -1133,7 +1133,7 @@ export function shouldRetry(statusCode: number, attempt: number): boolean {
1133
1133
 
1134
1134
  return { results, successful, withTimeout };
1135
1135
  }`,
1136
- expectedRuleIds: [],
1136
+ expectedRuleIds: ["HALLU-001"],
1137
1137
  category: "hallucination",
1138
1138
  difficulty: "medium",
1139
1139
  },
@@ -1166,7 +1166,7 @@ type ImmutableConfig = Frozen<UserConfig>; // Completely fabricated
1166
1166
  function applyConfig(config: ValidatedConfig): void {
1167
1167
  console.log(config.host, config.port);
1168
1168
  }`,
1169
- expectedRuleIds: [],
1169
+ expectedRuleIds: ["HALLU-001"],
1170
1170
  category: "hallucination",
1171
1171
  difficulty: "hard",
1172
1172
  },
@@ -1320,7 +1320,7 @@ fn main() {
1320
1320
  let nested: Result<Result<i32, &str>, &str> = Ok(Ok(42));
1321
1321
  let flat = nested.flatten();
1322
1322
  }`,
1323
- expectedRuleIds: [],
1323
+ expectedRuleIds: ["HALLU-001"],
1324
1324
  category: "hallucination",
1325
1325
  difficulty: "hard",
1326
1326
  },
@@ -1354,7 +1354,7 @@ public class DataProcessor {
1354
1354
  return grouped;
1355
1355
  }
1356
1356
  }`,
1357
- expectedRuleIds: [],
1357
+ expectedRuleIds: ["HALLU-001"],
1358
1358
  category: "hallucination",
1359
1359
  difficulty: "medium",
1360
1360
  },
@@ -1498,7 +1498,7 @@ public class DataService {
1498
1498
  var stats = orders.Statistics(o => o.Total); // Statistics doesn't exist
1499
1499
  }
1500
1500
  }`,
1501
- expectedRuleIds: [],
1501
+ expectedRuleIds: ["HALLU-001"],
1502
1502
  category: "hallucination",
1503
1503
  difficulty: "medium",
1504
1504
  },
@@ -2004,7 +2004,7 @@ resource "aws_lambda_auto_scale" "api" { # Resource doesn't exist
2004
2004
  max_concurrency = 1000
2005
2005
  auto_warm = true
2006
2006
  }`,
2007
- expectedRuleIds: [],
2007
+ expectedRuleIds: ["HALLU-001"],
2008
2008
  category: "hallucination",
2009
2009
  difficulty: "hard",
2010
2010
  },
@@ -2098,7 +2098,7 @@ module.exports = {
2098
2098
  }),
2099
2099
  ],
2100
2100
  };`,
2101
- expectedRuleIds: [],
2101
+ expectedRuleIds: ["HALLU-001"],
2102
2102
  category: "hallucination",
2103
2103
  difficulty: "medium",
2104
2104
  },
@@ -2143,7 +2143,7 @@ export async function getAnalytics() {
2143
2143
 
2144
2144
  return { usersByRole, orders, stats };
2145
2145
  }`,
2146
- expectedRuleIds: [],
2146
+ expectedRuleIds: ["HALLU-001"],
2147
2147
  category: "hallucination",
2148
2148
  difficulty: "medium",
2149
2149
  },
@@ -2202,7 +2202,7 @@ type Mutation {
2202
2202
  rateLimitMode: process.env.NODE_RATE_LIMIT || "sliding-window",
2203
2203
  };
2204
2204
  }`,
2205
- expectedRuleIds: [],
2205
+ expectedRuleIds: ["HALLU-001"],
2206
2206
  category: "hallucination",
2207
2207
  difficulty: "easy",
2208
2208
  },
@@ -2231,7 +2231,7 @@ HAVING COUNT(*) > 5
2231
2231
  ORDER BY AVG(salary) DESC
2232
2232
  FILL_GAPS(date, INTERVAL '1 day') -- Not real SQL
2233
2233
  LIMIT 100;`,
2234
- expectedRuleIds: [],
2234
+ expectedRuleIds: ["HALLU-001"],
2235
2235
  category: "hallucination",
2236
2236
  difficulty: "medium",
2237
2237
  },
@@ -2268,7 +2268,7 @@ export async function initServer() {
2268
2268
 
2269
2269
  return { pool, pipeline, cpuUsage };
2270
2270
  }`,
2271
- expectedRuleIds: [],
2271
+ expectedRuleIds: ["HALLU-001"],
2272
2272
  category: "hallucination",
2273
2273
  difficulty: "medium",
2274
2274
  },
@@ -2699,7 +2699,7 @@ export async function setupInfrastructure() {
2699
2699
  KeepWarm: true,
2700
2700
  }));
2701
2701
  }`,
2702
- expectedRuleIds: [],
2702
+ expectedRuleIds: ["HALLU-001"],
2703
2703
  category: "hallucination",
2704
2704
  difficulty: "medium",
2705
2705
  },
@@ -2742,7 +2742,7 @@ export async function analyzeRepo(owner: string, repo: string) {
2742
2742
 
2743
2743
  return { security, codeReview, deps, metrics };
2744
2744
  }`,
2745
- expectedRuleIds: [],
2745
+ expectedRuleIds: ["HALLU-001"],
2746
2746
  category: "hallucination",
2747
2747
  difficulty: "hard",
2748
2748
  },
@@ -251,7 +251,7 @@ export function storeUserDocuments(userId: string, documents: Document[]) {
251
251
 
252
252
  return user;
253
253
  }`,
254
- expectedRuleIds: [],
254
+ expectedRuleIds: ["COMP-001"],
255
255
  category: "compliance",
256
256
  difficulty: "hard",
257
257
  },
@@ -347,7 +347,7 @@ export function trackUserBehavior(userId: string, event: string, properties: any
347
347
  base_rate *= 1.1
348
348
 
349
349
  return base_rate`,
350
- expectedRuleIds: [],
350
+ expectedRuleIds: ["ETHICS-001"],
351
351
  category: "ethics",
352
352
  difficulty: "easy",
353
353
  },
@@ -462,7 +462,7 @@ export function trackUserBehavior(userId: string, event: string, properties: any
462
462
  });
463
463
  }
464
464
  }`,
465
- expectedRuleIds: [],
465
+ expectedRuleIds: ["ETHICS-001"],
466
466
  category: "ethics",
467
467
  difficulty: "hard",
468
468
  },
@@ -751,7 +751,7 @@ export function getTimeAgo(seconds: number): string {
751
751
  const days = Math.floor(hours / 24);
752
752
  return days + " day" + (days !== 1 ? "s" : "") + " ago";
753
753
  }`,
754
- expectedRuleIds: [],
754
+ expectedRuleIds: ["I18N-001"],
755
755
  category: "internationalization",
756
756
  difficulty: "medium",
757
757
  },
@@ -781,7 +781,7 @@ export function sanitizeUsername(username: string): string {
781
781
  return username.replace(/[^a-zA-Z0-9_]/g, "");
782
782
  // Removes valid Unicode letters
783
783
  }`,
784
- expectedRuleIds: [],
784
+ expectedRuleIds: ["I18N-001"],
785
785
  category: "internationalization",
786
786
  difficulty: "medium",
787
787
  },
@@ -1147,7 +1147,7 @@ export function runBackup() {
1147
1147
  // inferred profiles, third-party shared data
1148
1148
  }
1149
1149
  }`,
1150
- expectedRuleIds: [],
1150
+ expectedRuleIds: ["COMP-001"],
1151
1151
  category: "compliance",
1152
1152
  difficulty: "hard",
1153
1153
  },
@@ -1214,7 +1214,7 @@ app.listen(3000, () => {
1214
1214
  return match.rows[0]?.user_id;
1215
1215
  }
1216
1216
  }`,
1217
- expectedRuleIds: [],
1217
+ expectedRuleIds: ["COMP-001"],
1218
1218
  category: "compliance",
1219
1219
  difficulty: "hard",
1220
1220
  },
@@ -1251,7 +1251,7 @@ export async function createCDNDistribution(originBucket: string) {
1251
1251
  },
1252
1252
  });
1253
1253
  }`,
1254
- expectedRuleIds: [],
1254
+ expectedRuleIds: ["SOV-001"],
1255
1255
  category: "sovereignty",
1256
1256
  difficulty: "hard",
1257
1257
  },
@@ -1363,7 +1363,7 @@ export async function createCDNDistribution(originBucket: string) {
1363
1363
  // No transparency, no due process
1364
1364
  }
1365
1365
  }`,
1366
- expectedRuleIds: [],
1366
+ expectedRuleIds: ["ETHICS-001"],
1367
1367
  category: "ethics",
1368
1368
  difficulty: "hard",
1369
1369
  },
@@ -1406,7 +1406,7 @@ def screen_resume(model, resume_data):
1406
1406
  # No explanation for rejection
1407
1407
  # No human review requirement
1408
1408
  }`,
1409
- expectedRuleIds: [],
1409
+ expectedRuleIds: ["ETHICS-001"],
1410
1410
  category: "ethics",
1411
1411
  difficulty: "hard",
1412
1412
  },
@@ -1555,7 +1555,7 @@ def screen_resume(model, resume_data):
1555
1555
  </div>
1556
1556
  );
1557
1557
  }`,
1558
- expectedRuleIds: [],
1558
+ expectedRuleIds: ["I18N-001"],
1559
1559
  category: "internationalization",
1560
1560
  difficulty: "hard",
1561
1561
  },
@@ -1795,7 +1795,7 @@ export class OrderService {
1795
1795
  "webpackk": "^5.90.0"
1796
1796
  }
1797
1797
  }`,
1798
- expectedRuleIds: [],
1798
+ expectedRuleIds: ["DEPS-001"],
1799
1799
  category: "dependency-health",
1800
1800
  difficulty: "easy",
1801
1801
  },