@kevinrabun/judges 3.115.4 → 3.117.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/accessibility.judge.md +7 -0
- package/agents/agent-instructions.judge.md +7 -0
- package/agents/ai-code-safety.judge.md +7 -0
- package/agents/api-contract.judge.md +7 -0
- package/agents/api-design.judge.md +7 -0
- package/agents/authentication.judge.md +7 -0
- package/agents/backwards-compatibility.judge.md +7 -0
- package/agents/caching.judge.md +7 -0
- package/agents/ci-cd.judge.md +7 -0
- package/agents/cloud-readiness.judge.md +7 -0
- package/agents/concurrency.judge.md +7 -0
- package/agents/configuration-management.judge.md +7 -0
- package/agents/cybersecurity.judge.md +7 -0
- package/agents/data-security.judge.md +7 -0
- package/agents/dependency-health.judge.md +7 -0
- package/agents/documentation.judge.md +7 -0
- package/agents/error-handling.judge.md +7 -0
- package/agents/ethics-bias.judge.md +7 -0
- package/agents/false-positive-review.judge.md +12 -0
- package/agents/framework-safety.judge.md +7 -0
- package/agents/hallucination-detection.judge.md +13 -0
- package/agents/iac-security.judge.md +7 -0
- package/agents/intent-alignment.judge.md +13 -0
- package/agents/logging-privacy.judge.md +7 -0
- package/agents/maintainability.judge.md +7 -0
- package/agents/multi-turn-coherence.judge.md +7 -0
- package/agents/observability.judge.md +7 -0
- package/agents/portability.judge.md +7 -0
- package/agents/rate-limiting.judge.md +7 -0
- package/agents/reliability.judge.md +7 -0
- package/agents/security.judge.md +13 -0
- package/agents/testing.judge.md +7 -0
- package/agents/ux.judge.md +7 -0
- package/dist/a2a-protocol.d.ts +136 -0
- package/dist/a2a-protocol.js +218 -0
- package/dist/api.d.ts +21 -3
- package/dist/api.js +21 -1
- package/dist/audit-trail.d.ts +245 -0
- package/dist/audit-trail.js +257 -0
- package/dist/commands/benchmark-advanced.js +51 -51
- package/dist/commands/benchmark-ai-agents.js +16 -16
- package/dist/commands/benchmark-compliance-ethics.js +12 -12
- package/dist/commands/benchmark-expanded-2.js +2 -2
- package/dist/commands/benchmark-expanded.js +2 -2
- package/dist/commands/benchmark-infrastructure.js +12 -12
- package/dist/commands/benchmark-languages.js +11 -11
- package/dist/commands/benchmark-quality-ops.js +7 -7
- package/dist/commands/benchmark-security-deep.js +9 -9
- package/dist/commands/benchmark.js +1 -1
- package/dist/commands/llm-benchmark-optimizer.d.ts +78 -0
- package/dist/commands/llm-benchmark-optimizer.js +241 -0
- package/dist/commands/llm-benchmark.d.ts +4 -2
- package/dist/commands/llm-benchmark.js +40 -12
- package/dist/escalation.d.ts +100 -0
- package/dist/escalation.js +292 -0
- package/dist/evaluation-session.d.ts +74 -0
- package/dist/evaluation-session.js +152 -0
- package/dist/evaluators/index.d.ts +23 -1
- package/dist/evaluators/index.js +192 -3
- package/dist/evaluators/judge-selector.d.ts +19 -0
- package/dist/evaluators/judge-selector.js +141 -0
- package/dist/evaluators/recall-boost.d.ts +27 -0
- package/dist/evaluators/recall-boost.js +409 -0
- package/dist/feedback-loop.d.ts +62 -0
- package/dist/feedback-loop.js +179 -0
- package/dist/index.js +2 -0
- package/dist/judges/accessibility.js +7 -0
- package/dist/judges/agent-instructions.js +7 -0
- package/dist/judges/ai-code-safety.js +7 -0
- package/dist/judges/api-contract.js +7 -0
- package/dist/judges/api-design.js +7 -0
- package/dist/judges/authentication.js +7 -0
- package/dist/judges/backwards-compatibility.js +7 -0
- package/dist/judges/caching.js +7 -0
- package/dist/judges/ci-cd.js +7 -0
- package/dist/judges/cloud-readiness.js +7 -0
- package/dist/judges/concurrency.js +7 -0
- package/dist/judges/configuration-management.js +7 -0
- package/dist/judges/cybersecurity.js +7 -0
- package/dist/judges/data-security.js +7 -0
- package/dist/judges/dependency-health.js +7 -0
- package/dist/judges/documentation.js +7 -0
- package/dist/judges/error-handling.js +7 -0
- package/dist/judges/ethics-bias.js +7 -0
- package/dist/judges/false-positive-review.js +13 -1
- package/dist/judges/framework-safety.js +7 -0
- package/dist/judges/hallucination-detection.js +14 -1
- package/dist/judges/iac-security.js +7 -0
- package/dist/judges/intent-alignment.js +14 -1
- package/dist/judges/logging-privacy.js +7 -0
- package/dist/judges/maintainability.js +7 -0
- package/dist/judges/multi-turn-coherence.js +7 -0
- package/dist/judges/observability.js +7 -0
- package/dist/judges/portability.js +7 -0
- package/dist/judges/rate-limiting.js +7 -0
- package/dist/judges/reliability.js +7 -0
- package/dist/judges/security.js +14 -1
- package/dist/judges/testing.js +7 -0
- package/dist/judges/ux.js +7 -0
- package/dist/review-conversation.d.ts +87 -0
- package/dist/review-conversation.js +307 -0
- package/dist/sast-integration.d.ts +112 -0
- package/dist/sast-integration.js +215 -0
- package/dist/tools/register-evaluation.js +208 -8
- package/dist/tools/register-fix.js +24 -1
- package/dist/tools/register-resources.d.ts +6 -0
- package/dist/tools/register-resources.js +177 -0
- package/dist/tools/register-review.js +26 -1
- package/dist/tools/register-workflow.js +384 -11
- package/dist/tools/validation.d.ts +13 -0
- package/dist/tools/validation.js +77 -0
- package/dist/types.d.ts +122 -0
- package/package.json +25 -12
- package/server.json +2 -2
|
@@ -37,7 +37,7 @@ async function loadConfig(path: string) {
|
|
|
37
37
|
function hashPassword(password: string): string {
|
|
38
38
|
return crypto.hash("sha256", password);
|
|
39
39
|
}`,
|
|
40
|
-
expectedRuleIds: [],
|
|
40
|
+
expectedRuleIds: ["HALLU-001"],
|
|
41
41
|
category: "hallucination-detection",
|
|
42
42
|
difficulty: "medium",
|
|
43
43
|
},
|
|
@@ -76,7 +76,7 @@ async def read_users_me(token: str = Depends(oauth2_scheme)):
|
|
|
76
76
|
code: `function searchUsers(users, query) {
|
|
77
77
|
return users.filter(u => u.name.contains(query));
|
|
78
78
|
}`,
|
|
79
|
-
expectedRuleIds: [],
|
|
79
|
+
expectedRuleIds: ["HALLU-001"],
|
|
80
80
|
category: "hallucination-detection",
|
|
81
81
|
difficulty: "easy",
|
|
82
82
|
},
|
|
@@ -88,7 +88,7 @@ async def read_users_me(token: str = Depends(oauth2_scheme)):
|
|
|
88
88
|
|
|
89
89
|
def ensure_dir(path):
|
|
90
90
|
os.makedirs(path, exist_ok=True, permissions=0o755)`,
|
|
91
|
-
expectedRuleIds: [],
|
|
91
|
+
expectedRuleIds: ["HALLU-001"],
|
|
92
92
|
category: "hallucination-detection",
|
|
93
93
|
difficulty: "medium",
|
|
94
94
|
},
|
|
@@ -121,7 +121,7 @@ fn generate_token() -> String {
|
|
|
121
121
|
let rng = SecureRandom::new();
|
|
122
122
|
rng.generate_hex(32)
|
|
123
123
|
}`,
|
|
124
|
-
expectedRuleIds: [],
|
|
124
|
+
expectedRuleIds: ["HALLU-001"],
|
|
125
125
|
category: "hallucination-detection",
|
|
126
126
|
difficulty: "hard",
|
|
127
127
|
},
|
|
@@ -134,7 +134,7 @@ fn generate_token() -> String {
|
|
|
134
134
|
async def fetch_data(url):
|
|
135
135
|
response = await requests.async_get(url, timeout=30)
|
|
136
136
|
return response.json()`,
|
|
137
|
-
expectedRuleIds: [],
|
|
137
|
+
expectedRuleIds: ["HALLU-001"],
|
|
138
138
|
category: "hallucination-detection",
|
|
139
139
|
difficulty: "medium",
|
|
140
140
|
},
|
|
@@ -163,7 +163,7 @@ public class Utils {
|
|
|
163
163
|
return items.stream().toArray(String::new);
|
|
164
164
|
}
|
|
165
165
|
}`,
|
|
166
|
-
expectedRuleIds: [],
|
|
166
|
+
expectedRuleIds: ["HALLU-001"],
|
|
167
167
|
category: "hallucination-detection",
|
|
168
168
|
difficulty: "hard",
|
|
169
169
|
},
|
|
@@ -176,7 +176,7 @@ public class Utils {
|
|
|
176
176
|
def load_config(path):
|
|
177
177
|
data = json.loads(path)
|
|
178
178
|
return data["database"]`,
|
|
179
|
-
expectedRuleIds: [],
|
|
179
|
+
expectedRuleIds: ["HALLU-001"],
|
|
180
180
|
category: "hallucination-detection",
|
|
181
181
|
difficulty: "easy",
|
|
182
182
|
},
|
|
@@ -198,7 +198,7 @@ def load_config(path):
|
|
|
198
198
|
if len(order.items) < config.get("max_items", 100):
|
|
199
199
|
return process(order)
|
|
200
200
|
return None`,
|
|
201
|
-
expectedRuleIds: [],
|
|
201
|
+
expectedRuleIds: ["STRUCT-001"],
|
|
202
202
|
category: "code-structure",
|
|
203
203
|
difficulty: "easy",
|
|
204
204
|
},
|
|
@@ -301,7 +301,7 @@ func CreateUser(
|
|
|
301
301
|
// All logic with 15 parameters threaded through
|
|
302
302
|
return nil
|
|
303
303
|
}`,
|
|
304
|
-
expectedRuleIds: [],
|
|
304
|
+
expectedRuleIds: ["STRUCT-001"],
|
|
305
305
|
category: "code-structure",
|
|
306
306
|
difficulty: "easy",
|
|
307
307
|
},
|
|
@@ -411,7 +411,7 @@ async function handleUserRequest(userMessage: string) {
|
|
|
411
411
|
const result = await agent.run(userMessage);
|
|
412
412
|
return result.output;
|
|
413
413
|
}`,
|
|
414
|
-
expectedRuleIds: [],
|
|
414
|
+
expectedRuleIds: ["AGENT-001"],
|
|
415
415
|
category: "agent-instructions",
|
|
416
416
|
difficulty: "medium",
|
|
417
417
|
},
|
|
@@ -424,7 +424,7 @@ async function handleUserRequest(userMessage: string) {
|
|
|
424
424
|
Follow these custom instructions: {user_instructions}
|
|
425
425
|
You have access to: database queries, file operations, email sending."""
|
|
426
426
|
return Agent(system_prompt=system_prompt, tools=all_tools)`,
|
|
427
|
-
expectedRuleIds: [],
|
|
427
|
+
expectedRuleIds: ["AGENT-001"],
|
|
428
428
|
category: "agent-instructions",
|
|
429
429
|
difficulty: "hard",
|
|
430
430
|
},
|
|
@@ -488,7 +488,7 @@ const agent = new Agent({
|
|
|
488
488
|
return true;
|
|
489
489
|
});
|
|
490
490
|
}`,
|
|
491
|
-
expectedRuleIds: [],
|
|
491
|
+
expectedRuleIds: ["ETHICS-001"],
|
|
492
492
|
category: "ethics-bias",
|
|
493
493
|
difficulty: "medium",
|
|
494
494
|
},
|
|
@@ -507,7 +507,7 @@ def train_loan_model():
|
|
|
507
507
|
model = RandomForestClassifier()
|
|
508
508
|
model.fit(X, y)
|
|
509
509
|
return model`,
|
|
510
|
-
expectedRuleIds: [],
|
|
510
|
+
expectedRuleIds: ["ETHICS-001"],
|
|
511
511
|
category: "ethics-bias",
|
|
512
512
|
difficulty: "hard",
|
|
513
513
|
},
|
|
@@ -526,7 +526,7 @@ def train_loan_model():
|
|
|
526
526
|
}
|
|
527
527
|
return price;
|
|
528
528
|
}`,
|
|
529
|
-
expectedRuleIds: [],
|
|
529
|
+
expectedRuleIds: ["ETHICS-001"],
|
|
530
530
|
category: "ethics-bias",
|
|
531
531
|
difficulty: "medium",
|
|
532
532
|
},
|
|
@@ -650,7 +650,7 @@ jobs:
|
|
|
650
650
|
echo "DB_PASSWORD=\${{ secrets.DB_PASSWORD }}"
|
|
651
651
|
curl -X POST https://deploy.example.com/deploy \\
|
|
652
652
|
-H "Authorization: Bearer \${{ secrets.DEPLOY_TOKEN }}"`,
|
|
653
|
-
expectedRuleIds: [],
|
|
653
|
+
expectedRuleIds: ["CICD-001"],
|
|
654
654
|
category: "ci-cd",
|
|
655
655
|
difficulty: "easy",
|
|
656
656
|
},
|
|
@@ -669,7 +669,7 @@ jobs:
|
|
|
669
669
|
- uses: some-org/untrusted-action@master
|
|
670
670
|
- uses: random-user/deploy-action@latest
|
|
671
671
|
- run: npm test`,
|
|
672
|
-
expectedRuleIds: [],
|
|
672
|
+
expectedRuleIds: ["CICD-001"],
|
|
673
673
|
category: "ci-cd",
|
|
674
674
|
difficulty: "medium",
|
|
675
675
|
},
|
|
@@ -730,7 +730,7 @@ export async function fetchUser(id: string, options?: FetchOptions): Promise<Use
|
|
|
730
730
|
return db.users.findUnique({ where: { id }, ...options });
|
|
731
731
|
}
|
|
732
732
|
// No backward-compatible alias, no deprecation notice`,
|
|
733
|
-
expectedRuleIds: [],
|
|
733
|
+
expectedRuleIds: ["COMPAT-001"],
|
|
734
734
|
category: "backwards-compatibility",
|
|
735
735
|
difficulty: "medium",
|
|
736
736
|
},
|
|
@@ -773,7 +773,7 @@ app.get("/api/users/:id", async (req, res) => {
|
|
|
773
773
|
return repository.search(query, limit, "relevance");
|
|
774
774
|
}
|
|
775
775
|
}`,
|
|
776
|
-
expectedRuleIds: [],
|
|
776
|
+
expectedRuleIds: ["COMPAT-001"],
|
|
777
777
|
category: "backwards-compatibility",
|
|
778
778
|
difficulty: "easy",
|
|
779
779
|
},
|
|
@@ -828,7 +828,7 @@ class TokenManager:
|
|
|
828
828
|
if hmac.compare_digest(sig, expected):
|
|
829
829
|
return json.loads(data)
|
|
830
830
|
return None`,
|
|
831
|
-
expectedRuleIds: [],
|
|
831
|
+
expectedRuleIds: ["DOC-001"],
|
|
832
832
|
category: "documentation",
|
|
833
833
|
difficulty: "easy",
|
|
834
834
|
},
|
|
@@ -843,7 +843,7 @@ class TokenManager:
|
|
|
843
843
|
metrics: { enabled: boolean; prefix: string; tags: Record<string, string>; interval: number; };
|
|
844
844
|
logging: { level: string; format: string; destination: string; };
|
|
845
845
|
}`,
|
|
846
|
-
expectedRuleIds: [],
|
|
846
|
+
expectedRuleIds: ["DOC-001"],
|
|
847
847
|
category: "documentation",
|
|
848
848
|
difficulty: "medium",
|
|
849
849
|
},
|
|
@@ -881,7 +881,7 @@ func Auth(validator func(string) bool) func(http.Handler) http.Handler {
|
|
|
881
881
|
})
|
|
882
882
|
}
|
|
883
883
|
}`,
|
|
884
|
-
expectedRuleIds: [],
|
|
884
|
+
expectedRuleIds: ["DOC-001"],
|
|
885
885
|
category: "documentation",
|
|
886
886
|
difficulty: "easy",
|
|
887
887
|
},
|
|
@@ -1026,7 +1026,7 @@ def get_product(product_id):
|
|
|
1026
1026
|
if not product:
|
|
1027
1027
|
return jsonify({"status": 404, "detail": "Product not found"}), 404 # different JSON format
|
|
1028
1028
|
return jsonify(product)`,
|
|
1029
|
-
expectedRuleIds: [],
|
|
1029
|
+
expectedRuleIds: ["API-001"],
|
|
1030
1030
|
category: "api-design",
|
|
1031
1031
|
difficulty: "medium",
|
|
1032
1032
|
},
|
|
@@ -1115,7 +1115,7 @@ AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
|
|
1115
1115
|
AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
|
1116
1116
|
STRIPE_SECRET_KEY=sk_test_FAKE_KEY_FOR_BENCHMARK_TEST
|
|
1117
1117
|
API_SECRET=my-super-secret-api-key-do-not-share`,
|
|
1118
|
-
expectedRuleIds: [],
|
|
1118
|
+
expectedRuleIds: ["SWDEV-001"],
|
|
1119
1119
|
category: "software-practices",
|
|
1120
1120
|
difficulty: "easy",
|
|
1121
1121
|
},
|
|
@@ -1438,7 +1438,7 @@ def kill_process(pid):
|
|
|
1438
1438
|
def get_disk_usage():
|
|
1439
1439
|
result = subprocess.run(["wmic", "logicaldisk", "get", "size,freespace"], capture_output=True, text=True, shell=True)
|
|
1440
1440
|
return result.stdout`,
|
|
1441
|
-
expectedRuleIds: [],
|
|
1441
|
+
expectedRuleIds: ["PORTA-001"],
|
|
1442
1442
|
category: "portability",
|
|
1443
1443
|
difficulty: "easy",
|
|
1444
1444
|
},
|
|
@@ -1464,7 +1464,7 @@ public class AppConfig
|
|
|
1464
1464
|
key.SetValue(name, value);
|
|
1465
1465
|
}
|
|
1466
1466
|
}`,
|
|
1467
|
-
expectedRuleIds: [],
|
|
1467
|
+
expectedRuleIds: ["PORTA-001"],
|
|
1468
1468
|
category: "portability",
|
|
1469
1469
|
difficulty: "medium",
|
|
1470
1470
|
},
|
|
@@ -1481,7 +1481,7 @@ public class AppConfig
|
|
|
1481
1481
|
const activeUsers = await db.query("SELECT COUNT(DISTINCT user_id) FROM sessions WHERE last_active > NOW() - INTERVAL '5 minutes'");
|
|
1482
1482
|
res.json({ stats: stats.rows[0], topProducts: topProducts.rows, activeUsers: activeUsers.rows[0] });
|
|
1483
1483
|
});`,
|
|
1484
|
-
expectedRuleIds: [],
|
|
1484
|
+
expectedRuleIds: ["CACHE-001"],
|
|
1485
1485
|
category: "caching",
|
|
1486
1486
|
difficulty: "easy",
|
|
1487
1487
|
},
|
|
@@ -1624,7 +1624,7 @@ const smtpHost = process.env.SMTP_HOST;
|
|
|
1624
1624
|
const smtpPort = parseInt(process.env.SMTP_PORT || "587");
|
|
1625
1625
|
const smtpUser = process.env.SMTP_USER;
|
|
1626
1626
|
const smtpPass = process.env.SMTP_PASS;`,
|
|
1627
|
-
expectedRuleIds: [],
|
|
1627
|
+
expectedRuleIds: ["CFG-001"],
|
|
1628
1628
|
category: "configuration",
|
|
1629
1629
|
difficulty: "easy",
|
|
1630
1630
|
},
|
|
@@ -1858,7 +1858,7 @@ def handler(event, context):
|
|
|
1858
1858
|
"statusCode": 200,
|
|
1859
1859
|
"body": json.dumps({"message": f"Hello, {name}!"})
|
|
1860
1860
|
}`,
|
|
1861
|
-
expectedRuleIds: [],
|
|
1861
|
+
expectedRuleIds: ["COST-001"],
|
|
1862
1862
|
category: "cost-effectiveness",
|
|
1863
1863
|
difficulty: "easy",
|
|
1864
1864
|
},
|
|
@@ -2240,7 +2240,7 @@ class User:
|
|
|
2240
2240
|
|
|
2241
2241
|
# Protocol.implements() does not exist
|
|
2242
2242
|
assert Serializable.implements(User)`,
|
|
2243
|
-
expectedRuleIds: [],
|
|
2243
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2244
2244
|
category: "hallucination-detection",
|
|
2245
2245
|
difficulty: "medium",
|
|
2246
2246
|
},
|
|
@@ -2252,7 +2252,7 @@ assert Serializable.implements(User)`,
|
|
|
2252
2252
|
setTimeout(() => {
|
|
2253
2253
|
request.abort(); // fetch returns a Promise, not an abortable request
|
|
2254
2254
|
}, 5000);`,
|
|
2255
|
-
expectedRuleIds: [],
|
|
2255
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2256
2256
|
category: "hallucination-detection",
|
|
2257
2257
|
difficulty: "easy",
|
|
2258
2258
|
},
|
|
@@ -2295,7 +2295,7 @@ export class OrderService {
|
|
|
2295
2295
|
return order;
|
|
2296
2296
|
}
|
|
2297
2297
|
}`,
|
|
2298
|
-
expectedRuleIds: [],
|
|
2298
|
+
expectedRuleIds: ["STRUCT-001"],
|
|
2299
2299
|
category: "code-structure",
|
|
2300
2300
|
difficulty: "medium",
|
|
2301
2301
|
},
|
|
@@ -2416,7 +2416,7 @@ export interface V2Response<T> {
|
|
|
2416
2416
|
|
|
2417
2417
|
// v1 used: { result: T, error?: string, timestamp: number }
|
|
2418
2418
|
// These types are incompatible and no documentation explains the migration`,
|
|
2419
|
-
expectedRuleIds: [],
|
|
2419
|
+
expectedRuleIds: ["DOC-001"],
|
|
2420
2420
|
category: "documentation",
|
|
2421
2421
|
difficulty: "medium",
|
|
2422
2422
|
},
|
|
@@ -2443,7 +2443,7 @@ export interface V2Response<T> {
|
|
|
2443
2443
|
return count < 100;
|
|
2444
2444
|
}
|
|
2445
2445
|
}`,
|
|
2446
|
-
expectedRuleIds: [],
|
|
2446
|
+
expectedRuleIds: ["CLOUD-001"],
|
|
2447
2447
|
category: "cloud-readiness",
|
|
2448
2448
|
difficulty: "medium",
|
|
2449
2449
|
},
|
|
@@ -2505,7 +2505,7 @@ const server = new ApolloServer({
|
|
|
2505
2505
|
|
|
2506
2506
|
// Allows recursive queries:
|
|
2507
2507
|
// { user { orders { user { orders { user { orders ... } } } } } }`,
|
|
2508
|
-
expectedRuleIds: [],
|
|
2508
|
+
expectedRuleIds: ["DATA-001"],
|
|
2509
2509
|
category: "data-security",
|
|
2510
2510
|
difficulty: "hard",
|
|
2511
2511
|
},
|
|
@@ -2534,7 +2534,7 @@ async function handleRequest(req: Request) {
|
|
|
2534
2534
|
return { status: 500, message: "Internal error" };
|
|
2535
2535
|
}
|
|
2536
2536
|
}`,
|
|
2537
|
-
expectedRuleIds: [],
|
|
2537
|
+
expectedRuleIds: ["OBS-001"],
|
|
2538
2538
|
category: "observability",
|
|
2539
2539
|
difficulty: "medium",
|
|
2540
2540
|
},
|
|
@@ -2620,7 +2620,7 @@ app.post("/api/upload", upload.array("files"), async (req, res) => {
|
|
|
2620
2620
|
"clientSecret": "oauth-secret-do-not-share"
|
|
2621
2621
|
}
|
|
2622
2622
|
}`,
|
|
2623
|
-
expectedRuleIds: [],
|
|
2623
|
+
expectedRuleIds: ["CFG-001"],
|
|
2624
2624
|
category: "configuration",
|
|
2625
2625
|
difficulty: "easy",
|
|
2626
2626
|
},
|
|
@@ -2730,7 +2730,7 @@ async function loadProfile() {
|
|
|
2730
2730
|
showToast(\`ECONNREFUSED 127.0.0.1:5432 - \${err.code}\`);
|
|
2731
2731
|
}
|
|
2732
2732
|
}`,
|
|
2733
|
-
expectedRuleIds: [],
|
|
2733
|
+
expectedRuleIds: ["UX-001"],
|
|
2734
2734
|
category: "user-experience",
|
|
2735
2735
|
difficulty: "easy",
|
|
2736
2736
|
},
|
|
@@ -2748,7 +2748,7 @@ async function loadProfile() {
|
|
|
2748
2748
|
"expresss": "4.18.2"
|
|
2749
2749
|
}
|
|
2750
2750
|
}`,
|
|
2751
|
-
expectedRuleIds: [],
|
|
2751
|
+
expectedRuleIds: ["DEPS-001"],
|
|
2752
2752
|
category: "supply-chain",
|
|
2753
2753
|
difficulty: "medium",
|
|
2754
2754
|
},
|
|
@@ -2842,7 +2842,7 @@ async def task2():
|
|
|
2842
2842
|
|
|
2843
2843
|
async def main():
|
|
2844
2844
|
await asyncio.gather(task1(), task2()) # potential deadlock`,
|
|
2845
|
-
expectedRuleIds: [],
|
|
2845
|
+
expectedRuleIds: ["CONC-001"],
|
|
2846
2846
|
category: "concurrency",
|
|
2847
2847
|
difficulty: "hard",
|
|
2848
2848
|
},
|
|
@@ -2931,7 +2931,7 @@ def run_analysis(data):
|
|
|
2931
2931
|
result = process_on_instance(instance_id, data)
|
|
2932
2932
|
return result
|
|
2933
2933
|
# Instance never terminated — runs (and costs money) forever`,
|
|
2934
|
-
expectedRuleIds: [],
|
|
2934
|
+
expectedRuleIds: ["COST-001"],
|
|
2935
2935
|
category: "cost-effectiveness",
|
|
2936
2936
|
difficulty: "hard",
|
|
2937
2937
|
},
|
|
@@ -3157,7 +3157,7 @@ function joinPaths(...parts: string[]): string {
|
|
|
3157
3157
|
function normalizePath(p: string): string {
|
|
3158
3158
|
return p.replace(/\\//g, "\\\\");
|
|
3159
3159
|
}`,
|
|
3160
|
-
expectedRuleIds: [],
|
|
3160
|
+
expectedRuleIds: ["PORTA-001"],
|
|
3161
3161
|
category: "portability",
|
|
3162
3162
|
difficulty: "easy",
|
|
3163
3163
|
},
|
|
@@ -3418,7 +3418,7 @@ function signPayload(payload, privateKey) {
|
|
|
3418
3418
|
return crypto.signMessage(payload, privateKey, "sha256");
|
|
3419
3419
|
// crypto.signMessage does not exist; should use crypto.sign() or crypto.createSign()
|
|
3420
3420
|
}`,
|
|
3421
|
-
expectedRuleIds: [],
|
|
3421
|
+
expectedRuleIds: ["HALLU-001"],
|
|
3422
3422
|
category: "hallucination-detection",
|
|
3423
3423
|
difficulty: "medium",
|
|
3424
3424
|
},
|
|
@@ -3436,7 +3436,7 @@ public class UserFilter {
|
|
|
3436
3436
|
.collect(Collectors.toList());
|
|
3437
3437
|
}
|
|
3438
3438
|
}`,
|
|
3439
|
-
expectedRuleIds: [],
|
|
3439
|
+
expectedRuleIds: ["HALLU-001"],
|
|
3440
3440
|
category: "hallucination-detection",
|
|
3441
3441
|
difficulty: "medium",
|
|
3442
3442
|
},
|
|
@@ -3452,7 +3452,7 @@ public class UserFilter {
|
|
|
3452
3452
|
function truncate(text: string, maxLen: number): string {
|
|
3453
3453
|
return text.slice(0, maxLen); // may split surrogate pairs
|
|
3454
3454
|
}`,
|
|
3455
|
-
expectedRuleIds: [],
|
|
3455
|
+
expectedRuleIds: ["I18N-001"],
|
|
3456
3456
|
category: "internationalization",
|
|
3457
3457
|
difficulty: "hard",
|
|
3458
3458
|
},
|
|
@@ -3512,7 +3512,7 @@ export class BillingService {
|
|
|
3512
3512
|
return { customerId, amount: usage * rate, currency: "USD" };
|
|
3513
3513
|
}
|
|
3514
3514
|
}`,
|
|
3515
|
-
expectedRuleIds: [],
|
|
3515
|
+
expectedRuleIds: ["COMP-001"],
|
|
3516
3516
|
category: "compliance",
|
|
3517
3517
|
difficulty: "easy",
|
|
3518
3518
|
},
|
|
@@ -3873,7 +3873,7 @@ export async function createUser(
|
|
|
3873
3873
|
// Actual is (data, options?) -> User
|
|
3874
3874
|
return db.insert("users", data);
|
|
3875
3875
|
}`,
|
|
3876
|
-
expectedRuleIds: [],
|
|
3876
|
+
expectedRuleIds: ["DOC-001"],
|
|
3877
3877
|
category: "documentation",
|
|
3878
3878
|
difficulty: "easy",
|
|
3879
3879
|
},
|
|
@@ -3892,7 +3892,7 @@ export async function createUser(
|
|
|
3892
3892
|
)
|
|
3893
3893
|
# Race condition: two concurrent requests read same count,
|
|
3894
3894
|
# both increment to same value, losing one increment`,
|
|
3895
|
-
expectedRuleIds: [],
|
|
3895
|
+
expectedRuleIds: ["DB-001"],
|
|
3896
3896
|
category: "database",
|
|
3897
3897
|
difficulty: "medium",
|
|
3898
3898
|
},
|
|
@@ -3970,7 +3970,7 @@ func Filter[T implements Comparable](slice []T, pred func(T) bool) []T {
|
|
|
3970
3970
|
}
|
|
3971
3971
|
return result
|
|
3972
3972
|
}`,
|
|
3973
|
-
expectedRuleIds: [],
|
|
3973
|
+
expectedRuleIds: ["HALLU-001"],
|
|
3974
3974
|
category: "hallucination-detection",
|
|
3975
3975
|
difficulty: "hard",
|
|
3976
3976
|
},
|
|
@@ -3985,7 +3985,7 @@ func Filter[T implements Comparable](slice []T, pred func(T) bool) []T {
|
|
|
3985
3985
|
.error-message:parent(.form-group) {
|
|
3986
3986
|
border: 2px solid red;
|
|
3987
3987
|
}`,
|
|
3988
|
-
expectedRuleIds: [],
|
|
3988
|
+
expectedRuleIds: ["HALLU-001"],
|
|
3989
3989
|
category: "hallucination-detection",
|
|
3990
3990
|
difficulty: "easy",
|
|
3991
3991
|
},
|
|
@@ -4252,7 +4252,7 @@ export async function query(sql: string, params?: any[]) {
|
|
|
4252
4252
|
|
|
4253
4253
|
// App starts even if critical config is missing/invalid
|
|
4254
4254
|
// Fails at random point when config is first accessed`,
|
|
4255
|
-
expectedRuleIds: [],
|
|
4255
|
+
expectedRuleIds: ["CFG-001"],
|
|
4256
4256
|
category: "configuration",
|
|
4257
4257
|
difficulty: "easy",
|
|
4258
4258
|
},
|
|
@@ -4696,7 +4696,7 @@ def verify_token(token):
|
|
|
4696
4696
|
Purpose = "backup"
|
|
4697
4697
|
}
|
|
4698
4698
|
}`,
|
|
4699
|
-
expectedRuleIds: [],
|
|
4699
|
+
expectedRuleIds: ["IAC-001"],
|
|
4700
4700
|
category: "iac-security",
|
|
4701
4701
|
difficulty: "easy",
|
|
4702
4702
|
},
|
|
@@ -166,7 +166,7 @@ export default defineConfig({
|
|
|
166
166
|
frameguard: "deny",
|
|
167
167
|
},
|
|
168
168
|
});`,
|
|
169
|
-
expectedRuleIds: [],
|
|
169
|
+
expectedRuleIds: ["HALLU-001"],
|
|
170
170
|
category: "hallucination",
|
|
171
171
|
difficulty: "medium",
|
|
172
172
|
},
|
|
@@ -203,7 +203,7 @@ export const Card = styled.div\`
|
|
|
203
203
|
outline: 2px solid #007bff;
|
|
204
204
|
}
|
|
205
205
|
\`;`,
|
|
206
|
-
expectedRuleIds: [],
|
|
206
|
+
expectedRuleIds: ["HALLU-001"],
|
|
207
207
|
category: "hallucination",
|
|
208
208
|
difficulty: "medium",
|
|
209
209
|
},
|
|
@@ -233,7 +233,7 @@ def process_config(data: StrictDict[str, int]) -> OrderedDefaultDict:
|
|
|
233
233
|
result[secure_key].append(expensive_compute(str(value)))
|
|
234
234
|
|
|
235
235
|
return result`,
|
|
236
|
-
expectedRuleIds: [],
|
|
236
|
+
expectedRuleIds: ["HALLU-001"],
|
|
237
237
|
category: "hallucination",
|
|
238
238
|
difficulty: "easy",
|
|
239
239
|
},
|
|
@@ -1133,7 +1133,7 @@ export function shouldRetry(statusCode: number, attempt: number): boolean {
|
|
|
1133
1133
|
|
|
1134
1134
|
return { results, successful, withTimeout };
|
|
1135
1135
|
}`,
|
|
1136
|
-
expectedRuleIds: [],
|
|
1136
|
+
expectedRuleIds: ["HALLU-001"],
|
|
1137
1137
|
category: "hallucination",
|
|
1138
1138
|
difficulty: "medium",
|
|
1139
1139
|
},
|
|
@@ -1166,7 +1166,7 @@ type ImmutableConfig = Frozen<UserConfig>; // Completely fabricated
|
|
|
1166
1166
|
function applyConfig(config: ValidatedConfig): void {
|
|
1167
1167
|
console.log(config.host, config.port);
|
|
1168
1168
|
}`,
|
|
1169
|
-
expectedRuleIds: [],
|
|
1169
|
+
expectedRuleIds: ["HALLU-001"],
|
|
1170
1170
|
category: "hallucination",
|
|
1171
1171
|
difficulty: "hard",
|
|
1172
1172
|
},
|
|
@@ -1320,7 +1320,7 @@ fn main() {
|
|
|
1320
1320
|
let nested: Result<Result<i32, &str>, &str> = Ok(Ok(42));
|
|
1321
1321
|
let flat = nested.flatten();
|
|
1322
1322
|
}`,
|
|
1323
|
-
expectedRuleIds: [],
|
|
1323
|
+
expectedRuleIds: ["HALLU-001"],
|
|
1324
1324
|
category: "hallucination",
|
|
1325
1325
|
difficulty: "hard",
|
|
1326
1326
|
},
|
|
@@ -1354,7 +1354,7 @@ public class DataProcessor {
|
|
|
1354
1354
|
return grouped;
|
|
1355
1355
|
}
|
|
1356
1356
|
}`,
|
|
1357
|
-
expectedRuleIds: [],
|
|
1357
|
+
expectedRuleIds: ["HALLU-001"],
|
|
1358
1358
|
category: "hallucination",
|
|
1359
1359
|
difficulty: "medium",
|
|
1360
1360
|
},
|
|
@@ -1498,7 +1498,7 @@ public class DataService {
|
|
|
1498
1498
|
var stats = orders.Statistics(o => o.Total); // Statistics doesn't exist
|
|
1499
1499
|
}
|
|
1500
1500
|
}`,
|
|
1501
|
-
expectedRuleIds: [],
|
|
1501
|
+
expectedRuleIds: ["HALLU-001"],
|
|
1502
1502
|
category: "hallucination",
|
|
1503
1503
|
difficulty: "medium",
|
|
1504
1504
|
},
|
|
@@ -2004,7 +2004,7 @@ resource "aws_lambda_auto_scale" "api" { # Resource doesn't exist
|
|
|
2004
2004
|
max_concurrency = 1000
|
|
2005
2005
|
auto_warm = true
|
|
2006
2006
|
}`,
|
|
2007
|
-
expectedRuleIds: [],
|
|
2007
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2008
2008
|
category: "hallucination",
|
|
2009
2009
|
difficulty: "hard",
|
|
2010
2010
|
},
|
|
@@ -2098,7 +2098,7 @@ module.exports = {
|
|
|
2098
2098
|
}),
|
|
2099
2099
|
],
|
|
2100
2100
|
};`,
|
|
2101
|
-
expectedRuleIds: [],
|
|
2101
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2102
2102
|
category: "hallucination",
|
|
2103
2103
|
difficulty: "medium",
|
|
2104
2104
|
},
|
|
@@ -2143,7 +2143,7 @@ export async function getAnalytics() {
|
|
|
2143
2143
|
|
|
2144
2144
|
return { usersByRole, orders, stats };
|
|
2145
2145
|
}`,
|
|
2146
|
-
expectedRuleIds: [],
|
|
2146
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2147
2147
|
category: "hallucination",
|
|
2148
2148
|
difficulty: "medium",
|
|
2149
2149
|
},
|
|
@@ -2202,7 +2202,7 @@ type Mutation {
|
|
|
2202
2202
|
rateLimitMode: process.env.NODE_RATE_LIMIT || "sliding-window",
|
|
2203
2203
|
};
|
|
2204
2204
|
}`,
|
|
2205
|
-
expectedRuleIds: [],
|
|
2205
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2206
2206
|
category: "hallucination",
|
|
2207
2207
|
difficulty: "easy",
|
|
2208
2208
|
},
|
|
@@ -2231,7 +2231,7 @@ HAVING COUNT(*) > 5
|
|
|
2231
2231
|
ORDER BY AVG(salary) DESC
|
|
2232
2232
|
FILL_GAPS(date, INTERVAL '1 day') -- Not real SQL
|
|
2233
2233
|
LIMIT 100;`,
|
|
2234
|
-
expectedRuleIds: [],
|
|
2234
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2235
2235
|
category: "hallucination",
|
|
2236
2236
|
difficulty: "medium",
|
|
2237
2237
|
},
|
|
@@ -2268,7 +2268,7 @@ export async function initServer() {
|
|
|
2268
2268
|
|
|
2269
2269
|
return { pool, pipeline, cpuUsage };
|
|
2270
2270
|
}`,
|
|
2271
|
-
expectedRuleIds: [],
|
|
2271
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2272
2272
|
category: "hallucination",
|
|
2273
2273
|
difficulty: "medium",
|
|
2274
2274
|
},
|
|
@@ -2699,7 +2699,7 @@ export async function setupInfrastructure() {
|
|
|
2699
2699
|
KeepWarm: true,
|
|
2700
2700
|
}));
|
|
2701
2701
|
}`,
|
|
2702
|
-
expectedRuleIds: [],
|
|
2702
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2703
2703
|
category: "hallucination",
|
|
2704
2704
|
difficulty: "medium",
|
|
2705
2705
|
},
|
|
@@ -2742,7 +2742,7 @@ export async function analyzeRepo(owner: string, repo: string) {
|
|
|
2742
2742
|
|
|
2743
2743
|
return { security, codeReview, deps, metrics };
|
|
2744
2744
|
}`,
|
|
2745
|
-
expectedRuleIds: [],
|
|
2745
|
+
expectedRuleIds: ["HALLU-001"],
|
|
2746
2746
|
category: "hallucination",
|
|
2747
2747
|
difficulty: "hard",
|
|
2748
2748
|
},
|
|
@@ -251,7 +251,7 @@ export function storeUserDocuments(userId: string, documents: Document[]) {
|
|
|
251
251
|
|
|
252
252
|
return user;
|
|
253
253
|
}`,
|
|
254
|
-
expectedRuleIds: [],
|
|
254
|
+
expectedRuleIds: ["COMP-001"],
|
|
255
255
|
category: "compliance",
|
|
256
256
|
difficulty: "hard",
|
|
257
257
|
},
|
|
@@ -347,7 +347,7 @@ export function trackUserBehavior(userId: string, event: string, properties: any
|
|
|
347
347
|
base_rate *= 1.1
|
|
348
348
|
|
|
349
349
|
return base_rate`,
|
|
350
|
-
expectedRuleIds: [],
|
|
350
|
+
expectedRuleIds: ["ETHICS-001"],
|
|
351
351
|
category: "ethics",
|
|
352
352
|
difficulty: "easy",
|
|
353
353
|
},
|
|
@@ -462,7 +462,7 @@ export function trackUserBehavior(userId: string, event: string, properties: any
|
|
|
462
462
|
});
|
|
463
463
|
}
|
|
464
464
|
}`,
|
|
465
|
-
expectedRuleIds: [],
|
|
465
|
+
expectedRuleIds: ["ETHICS-001"],
|
|
466
466
|
category: "ethics",
|
|
467
467
|
difficulty: "hard",
|
|
468
468
|
},
|
|
@@ -751,7 +751,7 @@ export function getTimeAgo(seconds: number): string {
|
|
|
751
751
|
const days = Math.floor(hours / 24);
|
|
752
752
|
return days + " day" + (days !== 1 ? "s" : "") + " ago";
|
|
753
753
|
}`,
|
|
754
|
-
expectedRuleIds: [],
|
|
754
|
+
expectedRuleIds: ["I18N-001"],
|
|
755
755
|
category: "internationalization",
|
|
756
756
|
difficulty: "medium",
|
|
757
757
|
},
|
|
@@ -781,7 +781,7 @@ export function sanitizeUsername(username: string): string {
|
|
|
781
781
|
return username.replace(/[^a-zA-Z0-9_]/g, "");
|
|
782
782
|
// Removes valid Unicode letters
|
|
783
783
|
}`,
|
|
784
|
-
expectedRuleIds: [],
|
|
784
|
+
expectedRuleIds: ["I18N-001"],
|
|
785
785
|
category: "internationalization",
|
|
786
786
|
difficulty: "medium",
|
|
787
787
|
},
|
|
@@ -1147,7 +1147,7 @@ export function runBackup() {
|
|
|
1147
1147
|
// inferred profiles, third-party shared data
|
|
1148
1148
|
}
|
|
1149
1149
|
}`,
|
|
1150
|
-
expectedRuleIds: [],
|
|
1150
|
+
expectedRuleIds: ["COMP-001"],
|
|
1151
1151
|
category: "compliance",
|
|
1152
1152
|
difficulty: "hard",
|
|
1153
1153
|
},
|
|
@@ -1214,7 +1214,7 @@ app.listen(3000, () => {
|
|
|
1214
1214
|
return match.rows[0]?.user_id;
|
|
1215
1215
|
}
|
|
1216
1216
|
}`,
|
|
1217
|
-
expectedRuleIds: [],
|
|
1217
|
+
expectedRuleIds: ["COMP-001"],
|
|
1218
1218
|
category: "compliance",
|
|
1219
1219
|
difficulty: "hard",
|
|
1220
1220
|
},
|
|
@@ -1251,7 +1251,7 @@ export async function createCDNDistribution(originBucket: string) {
|
|
|
1251
1251
|
},
|
|
1252
1252
|
});
|
|
1253
1253
|
}`,
|
|
1254
|
-
expectedRuleIds: [],
|
|
1254
|
+
expectedRuleIds: ["SOV-001"],
|
|
1255
1255
|
category: "sovereignty",
|
|
1256
1256
|
difficulty: "hard",
|
|
1257
1257
|
},
|
|
@@ -1363,7 +1363,7 @@ export async function createCDNDistribution(originBucket: string) {
|
|
|
1363
1363
|
// No transparency, no due process
|
|
1364
1364
|
}
|
|
1365
1365
|
}`,
|
|
1366
|
-
expectedRuleIds: [],
|
|
1366
|
+
expectedRuleIds: ["ETHICS-001"],
|
|
1367
1367
|
category: "ethics",
|
|
1368
1368
|
difficulty: "hard",
|
|
1369
1369
|
},
|
|
@@ -1406,7 +1406,7 @@ def screen_resume(model, resume_data):
|
|
|
1406
1406
|
# No explanation for rejection
|
|
1407
1407
|
# No human review requirement
|
|
1408
1408
|
}`,
|
|
1409
|
-
expectedRuleIds: [],
|
|
1409
|
+
expectedRuleIds: ["ETHICS-001"],
|
|
1410
1410
|
category: "ethics",
|
|
1411
1411
|
difficulty: "hard",
|
|
1412
1412
|
},
|
|
@@ -1555,7 +1555,7 @@ def screen_resume(model, resume_data):
|
|
|
1555
1555
|
</div>
|
|
1556
1556
|
);
|
|
1557
1557
|
}`,
|
|
1558
|
-
expectedRuleIds: [],
|
|
1558
|
+
expectedRuleIds: ["I18N-001"],
|
|
1559
1559
|
category: "internationalization",
|
|
1560
1560
|
difficulty: "hard",
|
|
1561
1561
|
},
|
|
@@ -1795,7 +1795,7 @@ export class OrderService {
|
|
|
1795
1795
|
"webpackk": "^5.90.0"
|
|
1796
1796
|
}
|
|
1797
1797
|
}`,
|
|
1798
|
-
expectedRuleIds: [],
|
|
1798
|
+
expectedRuleIds: ["DEPS-001"],
|
|
1799
1799
|
category: "dependency-health",
|
|
1800
1800
|
difficulty: "easy",
|
|
1801
1801
|
},
|