@kevinrabun/judges-cli 3.124.5 → 3.126.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/accessibility.judge.md +1 -1
- package/agents/agent-instructions.judge.md +1 -1
- package/agents/ai-code-safety.judge.md +10 -1
- package/agents/api-design.judge.md +1 -1
- package/agents/authentication.judge.md +1 -1
- package/agents/backwards-compatibility.judge.md +1 -1
- package/agents/caching.judge.md +1 -1
- package/agents/ci-cd.judge.md +1 -1
- package/agents/cloud-readiness.judge.md +1 -1
- package/agents/code-structure.judge.md +1 -1
- package/agents/compliance.judge.md +1 -1
- package/agents/concurrency.judge.md +1 -1
- package/agents/configuration-management.judge.md +1 -1
- package/agents/cost-effectiveness.judge.md +9 -1
- package/agents/cybersecurity.judge.md +1 -1
- package/agents/data-security.judge.md +1 -1
- package/agents/data-sovereignty.judge.md +1 -1
- package/agents/database.judge.md +1 -1
- package/agents/dependency-health.judge.md +1 -1
- package/agents/documentation.judge.md +1 -1
- package/agents/error-handling.judge.md +1 -1
- package/agents/ethics-bias.judge.md +1 -1
- package/agents/framework-safety.judge.md +9 -1
- package/agents/hallucination-detection.judge.md +1 -1
- package/agents/iac-security.judge.md +1 -1
- package/agents/intent-alignment.judge.md +1 -1
- package/agents/internationalization.judge.md +1 -1
- package/agents/logging-privacy.judge.md +1 -1
- package/agents/logic-review.judge.md +8 -0
- package/agents/maintainability.judge.md +10 -1
- package/agents/observability.judge.md +1 -1
- package/agents/performance.judge.md +1 -1
- package/agents/portability.judge.md +1 -1
- package/agents/rate-limiting.judge.md +1 -1
- package/agents/reliability.judge.md +1 -1
- package/agents/scalability.judge.md +1 -1
- package/agents/security.judge.md +1 -1
- package/agents/software-practices.judge.md +1 -1
- package/agents/testing.judge.md +1 -1
- package/agents/ux.judge.md +1 -1
- package/dist/api.d.ts +2 -1
- package/dist/api.js +2 -0
- package/dist/cli-formatters.js +38 -0
- package/dist/cli.js +27 -1
- package/dist/commands/llm-benchmark.js +18 -5
- package/dist/evaluators/index.js +163 -1
- package/dist/evaluators/shared.js +33 -0
- package/dist/judges/accessibility.js +1 -1
- package/dist/judges/agent-instructions.js +1 -1
- package/dist/judges/ai-code-safety.js +10 -1
- package/dist/judges/api-design.js +1 -1
- package/dist/judges/authentication.js +1 -1
- package/dist/judges/backwards-compatibility.js +1 -1
- package/dist/judges/caching.js +1 -1
- package/dist/judges/ci-cd.js +1 -1
- package/dist/judges/cloud-readiness.js +1 -1
- package/dist/judges/code-structure.js +1 -1
- package/dist/judges/compliance.js +1 -1
- package/dist/judges/concurrency.js +1 -1
- package/dist/judges/configuration-management.js +1 -1
- package/dist/judges/cost-effectiveness.js +9 -1
- package/dist/judges/cybersecurity.js +1 -1
- package/dist/judges/data-security.js +1 -1
- package/dist/judges/data-sovereignty.js +1 -1
- package/dist/judges/database.js +1 -1
- package/dist/judges/dependency-health.js +1 -1
- package/dist/judges/documentation.js +1 -1
- package/dist/judges/error-handling.js +1 -1
- package/dist/judges/ethics-bias.js +1 -1
- package/dist/judges/framework-safety.js +9 -1
- package/dist/judges/hallucination-detection.js +1 -1
- package/dist/judges/iac-security.js +1 -1
- package/dist/judges/intent-alignment.js +1 -1
- package/dist/judges/internationalization.js +1 -1
- package/dist/judges/logging-privacy.js +1 -1
- package/dist/judges/logic-review.js +9 -1
- package/dist/judges/maintainability.js +10 -1
- package/dist/judges/observability.js +1 -1
- package/dist/judges/performance.js +1 -1
- package/dist/judges/portability.js +1 -1
- package/dist/judges/rate-limiting.js +1 -1
- package/dist/judges/reliability.js +1 -1
- package/dist/judges/scalability.js +1 -1
- package/dist/judges/security.js +1 -1
- package/dist/judges/software-practices.js +1 -1
- package/dist/judges/testing.js +1 -1
- package/dist/judges/ux.js +1 -1
- package/dist/regulatory-scope.d.ts +27 -0
- package/dist/regulatory-scope.js +181 -0
- package/dist/tools/prompts.d.ts +1 -1
- package/dist/tools/prompts.js +3 -1
- package/dist/types.d.ts +87 -0
- package/package.json +1 -1
package/dist/evaluators/index.js
CHANGED
|
@@ -6,6 +6,7 @@ import { analyzeStructure } from "../ast/index.js";
|
|
|
6
6
|
import { analyzeTaintFlows } from "../ast/index.js";
|
|
7
7
|
import { LRUCache, contentHash } from "../cache.js";
|
|
8
8
|
import { getSharedDiskCache } from "../disk-cache.js";
|
|
9
|
+
import { filterByRegulatoryScope } from "../regulatory-scope.js";
|
|
9
10
|
// ─── Shared Utilities ────────────────────────────────────────────────────────
|
|
10
11
|
import { calculateScore, deriveVerdict, buildSummary, buildTribunalSummary, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, classifyFile, shouldRunAbsenceRules, applyConfig, applyFrameworkAwareness, } from "./shared.js";
|
|
11
12
|
// ─── Extracted Modules ───────────────────────────────────────────────────────
|
|
@@ -414,6 +415,137 @@ function synthesizeReviewDecision(findings) {
|
|
|
414
415
|
blockingIssues,
|
|
415
416
|
};
|
|
416
417
|
}
|
|
418
|
+
// ─── Human Focus Guide ────────────────────────────────────────────────────────
|
|
419
|
+
/**
|
|
420
|
+
* Synthesize a Human Focus Guide from tribunal findings.
|
|
421
|
+
*
|
|
422
|
+
* Categorizes findings into three buckets:
|
|
423
|
+
* - **Trust**: High-confidence, evidence-backed findings (confidence ≥ 0.8)
|
|
424
|
+
* - **Verify**: Lower-confidence or absence-based findings (confidence < 0.8 or absence-based)
|
|
425
|
+
* - **Blind spots**: Areas automated analysis cannot evaluate (business logic, architecture, UX judgment)
|
|
426
|
+
*
|
|
427
|
+
* Also detects code characteristics that suggest human attention is needed.
|
|
428
|
+
*/
|
|
429
|
+
function synthesizeHumanFocusGuide(findings, code, language) {
|
|
430
|
+
const trust = [];
|
|
431
|
+
const verify = [];
|
|
432
|
+
for (const f of findings) {
|
|
433
|
+
const conf = f.confidence ?? 0.7;
|
|
434
|
+
const item = {
|
|
435
|
+
ruleId: f.ruleId,
|
|
436
|
+
title: f.title,
|
|
437
|
+
severity: f.severity,
|
|
438
|
+
confidence: conf,
|
|
439
|
+
lineNumbers: f.lineNumbers,
|
|
440
|
+
reason: "",
|
|
441
|
+
};
|
|
442
|
+
if (f.isAbsenceBased) {
|
|
443
|
+
item.reason = "Absence-based — the detected issue may be handled in another file";
|
|
444
|
+
verify.push(item);
|
|
445
|
+
}
|
|
446
|
+
else if (conf >= 0.8 && (f.provenance === "ast-confirmed" || f.provenance === "taint-flow")) {
|
|
447
|
+
item.reason = "AST/taint-flow confirmed with high confidence";
|
|
448
|
+
trust.push(item);
|
|
449
|
+
}
|
|
450
|
+
else if (conf >= 0.8) {
|
|
451
|
+
item.reason = "High confidence with concrete evidence";
|
|
452
|
+
trust.push(item);
|
|
453
|
+
}
|
|
454
|
+
else if (conf >= 0.5) {
|
|
455
|
+
item.reason = `Moderate confidence (${Math.round(conf * 100)}%) — verify manually`;
|
|
456
|
+
verify.push(item);
|
|
457
|
+
}
|
|
458
|
+
else {
|
|
459
|
+
item.reason = `Low confidence (${Math.round(conf * 100)}%) — may be a false positive`;
|
|
460
|
+
verify.push(item);
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
// ── Blind spots: areas automated analysis cannot evaluate ──
|
|
464
|
+
const blindSpots = [];
|
|
465
|
+
// Always include core blind spots
|
|
466
|
+
blindSpots.push({
|
|
467
|
+
area: "Business Logic Correctness",
|
|
468
|
+
guidance: "Verify that the code implements the intended requirements correctly. Automated analysis checks for patterns and vulnerabilities but cannot validate business rules, domain constraints, or functional correctness.",
|
|
469
|
+
});
|
|
470
|
+
// Code-characteristic-based blind spots
|
|
471
|
+
if (code) {
|
|
472
|
+
const lines = code.split("\n");
|
|
473
|
+
const lineCount = lines.length;
|
|
474
|
+
// Complex branching
|
|
475
|
+
const branchCount = (code.match(/\bif\b|\belse\b|\bswitch\b|\bcase\b|\?\s*:/g) || []).length;
|
|
476
|
+
if (branchCount > lineCount * 0.15 && branchCount > 10) {
|
|
477
|
+
blindSpots.push({
|
|
478
|
+
area: "Complex Control Flow",
|
|
479
|
+
guidance: `This code has dense branching logic (~${branchCount} branch points). Review edge cases, boundary conditions, and off-by-one errors that pattern matching cannot reliably detect.`,
|
|
480
|
+
});
|
|
481
|
+
}
|
|
482
|
+
// External API/service calls
|
|
483
|
+
const hasExternalCalls = /fetch\(|axios\.|http\.|https\.|\.request\(|urllib|requests\.|HttpClient|WebClient/i.test(code);
|
|
484
|
+
if (hasExternalCalls) {
|
|
485
|
+
blindSpots.push({
|
|
486
|
+
area: "External Service Integration",
|
|
487
|
+
guidance: "This code calls external services. Verify timeout behavior, retry logic, circuit breaking, and graceful degradation when services are unavailable. Automated analysis can detect missing patterns but cannot validate the integration logic.",
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
// Financial/monetary operations
|
|
491
|
+
const hasFinancial = /price|amount|balance|payment|invoice|refund|discount|tax|currency|decimal|money/i.test(code);
|
|
492
|
+
if (hasFinancial) {
|
|
493
|
+
blindSpots.push({
|
|
494
|
+
area: "Financial/Monetary Calculations",
|
|
495
|
+
guidance: "This code handles monetary values. Verify rounding behavior, currency precision, and that floating-point arithmetic is not used for financial calculations.",
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
// Complex regex
|
|
499
|
+
const complexRegex = (code.match(/\/[^/\n]{30,}\//g) || []).length;
|
|
500
|
+
if (complexRegex > 0) {
|
|
501
|
+
blindSpots.push({
|
|
502
|
+
area: "Complex Regular Expressions",
|
|
503
|
+
guidance: `Found ${complexRegex} complex regex pattern(s). Verify they match the intended inputs and don't have catastrophic backtracking on adversarial input.`,
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
// State machines / workflow
|
|
507
|
+
const hasStateMachine = /state\s*[=:]\s*['"][^'"]+['"]|status\s*===?\s*['"]|transition|workflow|step.*next/i.test(code);
|
|
508
|
+
if (hasStateMachine) {
|
|
509
|
+
blindSpots.push({
|
|
510
|
+
area: "State Management / Workflow Logic",
|
|
511
|
+
guidance: "This code manages state transitions or workflow steps. Verify that all valid state transitions are handled and invalid transitions are rejected. Automated analysis cannot validate state machine correctness.",
|
|
512
|
+
});
|
|
513
|
+
}
|
|
514
|
+
// PII/sensitive data handling
|
|
515
|
+
const hasPII = /\b(email|ssn|social.security|phone.number|address|birth.date|passport|national.id|credit.card)\b/i.test(code);
|
|
516
|
+
if (hasPII) {
|
|
517
|
+
blindSpots.push({
|
|
518
|
+
area: "PII / Sensitive Data Handling",
|
|
519
|
+
guidance: "This code handles personally identifiable information. Verify data minimization, consent tracking, retention policies, and that PII is not logged or transmitted unnecessarily.",
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
// Architecture blind spot (always relevant for non-trivial code)
|
|
524
|
+
if (code && code.split("\n").length > 50) {
|
|
525
|
+
blindSpots.push({
|
|
526
|
+
area: "Architectural Fit",
|
|
527
|
+
guidance: "Verify this code fits the project's architectural patterns (service boundaries, dependency direction, naming conventions). Automated analysis evaluates code in isolation and cannot assess architectural context.",
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
// ── Build summary ──
|
|
531
|
+
const trustCount = trust.length;
|
|
532
|
+
const verifyCount = verify.length;
|
|
533
|
+
const blindCount = blindSpots.length;
|
|
534
|
+
const parts = [];
|
|
535
|
+
if (trustCount > 0) {
|
|
536
|
+
parts.push(`${trustCount} high-confidence finding${trustCount > 1 ? "s" : ""} you can act on directly`);
|
|
537
|
+
}
|
|
538
|
+
if (verifyCount > 0) {
|
|
539
|
+
parts.push(`${verifyCount} finding${verifyCount > 1 ? "s" : ""} that need your judgment`);
|
|
540
|
+
}
|
|
541
|
+
if (blindCount > 0) {
|
|
542
|
+
parts.push(`${blindCount} area${blindCount > 1 ? "s" : ""} that automated analysis cannot evaluate`);
|
|
543
|
+
}
|
|
544
|
+
const summary = parts.length > 0
|
|
545
|
+
? `Human reviewer: ${parts.join(", ")}. Focus your review time on the "Verify" and "Blind Spots" sections — the "Trust" findings have strong automated evidence.`
|
|
546
|
+
: "No findings — code looks clean. Focus your review on business logic correctness and architectural fit.";
|
|
547
|
+
return { trust, verify, blindSpots, summary };
|
|
548
|
+
}
|
|
417
549
|
/**
|
|
418
550
|
* Cap the number of findings by priority-sorting and keeping only
|
|
419
551
|
* the top N. Ensures high-severity / high-confidence findings always survive.
|
|
@@ -571,6 +703,16 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
571
703
|
}
|
|
572
704
|
}
|
|
573
705
|
}
|
|
706
|
+
// ── Regulatory scope filtering ──
|
|
707
|
+
// When regulatoryScope is set in config, suppress findings that cite ONLY
|
|
708
|
+
// out-of-scope regulatory frameworks.
|
|
709
|
+
let regulatorySuppressed = 0;
|
|
710
|
+
if (options?.config?.regulatoryScope && options.config.regulatoryScope.length > 0) {
|
|
711
|
+
const scopeResult = filterByRegulatoryScope(configFiltered, options.config.regulatoryScope);
|
|
712
|
+
configFiltered.length = 0;
|
|
713
|
+
configFiltered.push(...scopeResult.findings);
|
|
714
|
+
regulatorySuppressed = scopeResult.suppressed;
|
|
715
|
+
}
|
|
574
716
|
// ── Feedback-driven confidence calibration & auto-tuning ──
|
|
575
717
|
// When options.calibrate is set, load the feedback store and apply:
|
|
576
718
|
// 1. Auto-suppression of rules with FP rate ≥ 80%
|
|
@@ -709,8 +851,27 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
709
851
|
...(owaspLlmTop10 ? { owaspLlmTop10 } : {}),
|
|
710
852
|
};
|
|
711
853
|
});
|
|
854
|
+
// ── Consensus-based suppression ──
|
|
855
|
+
// When consensusThreshold is set: if a supermajority of judges reported
|
|
856
|
+
// zero findings, suppress findings from the minority outlier judges.
|
|
857
|
+
// This catches cases where most judges agree code is clean but a few
|
|
858
|
+
// structurally over-flag (e.g. error-handling, testing).
|
|
859
|
+
let consensusSuppressed = 0;
|
|
860
|
+
let postConsensuFindings = allFindings;
|
|
861
|
+
const consensusThreshold = options?.config?.consensusThreshold;
|
|
862
|
+
if (consensusThreshold !== undefined && consensusThreshold > 0 && evaluations.length > 0) {
|
|
863
|
+
const zeroFindingJudges = evaluations.filter((e) => e.findings.length === 0).length;
|
|
864
|
+
const totalJudges = evaluations.length;
|
|
865
|
+
const cleanRatio = zeroFindingJudges / totalJudges;
|
|
866
|
+
if (cleanRatio >= consensusThreshold) {
|
|
867
|
+
// Majority says clean — suppress minority findings (keep critical severity)
|
|
868
|
+
const before = postConsensuFindings.length;
|
|
869
|
+
postConsensuFindings = postConsensuFindings.filter((f) => f.severity === "critical");
|
|
870
|
+
consensusSuppressed = before - postConsensuFindings.length;
|
|
871
|
+
}
|
|
872
|
+
}
|
|
712
873
|
// ── Structured CWE/OWASP IDs and Learn More URLs ──
|
|
713
|
-
const enrichedFindings = enrichWithSecurityIds(
|
|
874
|
+
const enrichedFindings = enrichWithSecurityIds(postConsensuFindings);
|
|
714
875
|
const mustFixGate = evaluateMustFixGate(enrichedFindings, options?.mustFixGate);
|
|
715
876
|
const criticalCount = enrichedFindings.filter((f) => f.severity === "critical").length;
|
|
716
877
|
const highCount = enrichedFindings.filter((f) => f.severity === "high").length;
|
|
@@ -741,6 +902,7 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
741
902
|
})),
|
|
742
903
|
},
|
|
743
904
|
reviewDecision: synthesizeReviewDecision(enrichedFindings),
|
|
905
|
+
humanFocusGuide: synthesizeHumanFocusGuide(enrichedFindings, code, language),
|
|
744
906
|
};
|
|
745
907
|
// ── Deep review prompt attachment (P0.1) ──
|
|
746
908
|
// When deepReview is enabled, build and attach a structured LLM prompt
|
|
@@ -1036,6 +1036,39 @@ export function formatVerdictAsMarkdown(verdict) {
|
|
|
1036
1036
|
md += `---\n\n`;
|
|
1037
1037
|
}
|
|
1038
1038
|
}
|
|
1039
|
+
// Human Focus Guide
|
|
1040
|
+
if (verdict.humanFocusGuide) {
|
|
1041
|
+
const guide = verdict.humanFocusGuide;
|
|
1042
|
+
md += `## 👤 Human Reviewer Focus Guide\n\n`;
|
|
1043
|
+
md += `${guide.summary}\n\n`;
|
|
1044
|
+
if (guide.trust.length > 0) {
|
|
1045
|
+
md += `### ✅ Trust (act on these directly)\n\n`;
|
|
1046
|
+
md += `| Severity | Rule | Finding | Reason |\n|---|---|---|---|\n`;
|
|
1047
|
+
for (const item of guide.trust.slice(0, 15)) {
|
|
1048
|
+
md += `| ${item.severity} | \`${item.ruleId}\` | ${item.title} | ${item.reason} |\n`;
|
|
1049
|
+
}
|
|
1050
|
+
if (guide.trust.length > 15)
|
|
1051
|
+
md += `\n*...and ${guide.trust.length - 15} more*\n`;
|
|
1052
|
+
md += `\n`;
|
|
1053
|
+
}
|
|
1054
|
+
if (guide.verify.length > 0) {
|
|
1055
|
+
md += `### 🔍 Verify (use your judgment)\n\n`;
|
|
1056
|
+
md += `| Severity | Rule | Finding | Reason |\n|---|---|---|---|\n`;
|
|
1057
|
+
for (const item of guide.verify.slice(0, 15)) {
|
|
1058
|
+
md += `| ${item.severity} | \`${item.ruleId}\` | ${item.title} | ${item.reason} |\n`;
|
|
1059
|
+
}
|
|
1060
|
+
if (guide.verify.length > 15)
|
|
1061
|
+
md += `\n*...and ${guide.verify.length - 15} more*\n`;
|
|
1062
|
+
md += `\n`;
|
|
1063
|
+
}
|
|
1064
|
+
if (guide.blindSpots.length > 0) {
|
|
1065
|
+
md += `### 🔦 Blind Spots (automated analysis cannot evaluate)\n\n`;
|
|
1066
|
+
for (const spot of guide.blindSpots) {
|
|
1067
|
+
md += `- **${spot.area}** — ${spot.guidance}\n`;
|
|
1068
|
+
}
|
|
1069
|
+
md += `\n`;
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1039
1072
|
return md;
|
|
1040
1073
|
}
|
|
1041
1074
|
// ─── Shared Credential / Placeholder Detection ──────────────────────────────
|
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Your role is adversarial: assume the code has accessibility defects and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
41
41
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
42
42
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
44
44
|
analyze: analyzeAccessibility,
|
|
45
45
|
};
|
|
46
46
|
defaultRegistry.register(accessibilityJudge);
|
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Assume instruction files are brittle until proven robust.
|
|
41
41
|
- Never praise or compliment; report risks, ambiguities, and missing controls.
|
|
42
42
|
- If uncertain, flag likely ambiguity only when you can cite specific evidence from the instruction file. Speculative findings without concrete evidence erode trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code.`,
|
|
44
44
|
analyze: analyzeAgentInstructions,
|
|
45
45
|
};
|
|
46
46
|
defaultRegistry.register(agentInstructionsJudge);
|
|
@@ -47,11 +47,20 @@ FALSE POSITIVE AVOIDANCE:
|
|
|
47
47
|
- Missing AI-specific guardrails (content filtering, toxicity detection) are only relevant for AI-facing code.
|
|
48
48
|
- Framework-level AI safety features (OpenAI content policy, Anthropic safety layers) are external controls — code calling these APIs is correctly delegating safety.
|
|
49
49
|
|
|
50
|
+
CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
|
|
51
|
+
- Input validation present on user-facing entry points
|
|
52
|
+
- No eval(), exec(), or dynamic code generation from untrusted input
|
|
53
|
+
- API keys/secrets not hardcoded (using environment variables or secret managers)
|
|
54
|
+
- Dependencies from standard registries with no placeholder/example credentials
|
|
55
|
+
- Error handling does not expose internal details to callers
|
|
56
|
+
- No disabled security features (TLS verification, CORS restrictions)
|
|
57
|
+
- Standard application code without AI/LLM interactions does not need AI safety review
|
|
58
|
+
|
|
50
59
|
ADVERSARIAL MANDATE:
|
|
51
60
|
- Assume the code was generated by an AI and has not been security-reviewed. Hunt for the patterns LLMs typically get wrong.
|
|
52
61
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
53
62
|
- If uncertain, flag the issue only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
54
|
-
-
|
|
63
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
55
64
|
analyze: analyzeAiCodeSafety,
|
|
56
65
|
};
|
|
57
66
|
defaultRegistry.register(aiCodeSafetyJudge);
|
|
@@ -51,7 +51,7 @@ ADVERSARIAL MANDATE:
|
|
|
51
51
|
- Your role is adversarial: assume the API has design flaws and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
52
52
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
53
53
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
54
|
-
-
|
|
54
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
55
55
|
analyze: analyzeApiDesign,
|
|
56
56
|
};
|
|
57
57
|
defaultRegistry.register(apiDesignJudge);
|
|
@@ -57,7 +57,7 @@ ADVERSARIAL MANDATE:
|
|
|
57
57
|
- Your role is adversarial: assume authentication is broken and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
58
58
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
59
59
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
60
|
-
-
|
|
60
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
61
61
|
analyze: analyzeAuthentication,
|
|
62
62
|
};
|
|
63
63
|
defaultRegistry.register(authenticationJudge);
|
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Your role is adversarial: assume backwards compatibility is not considered and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
41
41
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
42
42
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
44
44
|
analyze: analyzeBackwardsCompatibility,
|
|
45
45
|
};
|
|
46
46
|
defaultRegistry.register(backwardsCompatibilityJudge);
|
package/dist/judges/caching.js
CHANGED
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Your role is adversarial: assume the caching strategy is flawed or absent and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
41
41
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
42
42
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
44
44
|
analyze: analyzeCaching,
|
|
45
45
|
};
|
|
46
46
|
defaultRegistry.register(cachingJudge);
|
package/dist/judges/ci-cd.js
CHANGED
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Your role is adversarial: assume the CI/CD posture is weak and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
41
41
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
42
42
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
44
44
|
analyze: analyzeCiCd,
|
|
45
45
|
};
|
|
46
46
|
defaultRegistry.register(ciCdJudge);
|
|
@@ -47,7 +47,7 @@ ADVERSARIAL MANDATE:
|
|
|
47
47
|
- Your role is adversarial: assume the code is not cloud-ready and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
48
48
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
49
49
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
50
|
-
-
|
|
50
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
51
51
|
analyze: analyzeCloudReadiness,
|
|
52
52
|
};
|
|
53
53
|
defaultRegistry.register(cloudReadinessJudge);
|
|
@@ -39,7 +39,7 @@ ADVERSARIAL MANDATE:
|
|
|
39
39
|
- Your role is adversarial: assume the code has structural problems and actively hunt for complexity, dead code, and over-sized functions. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
40
40
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
41
41
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
42
|
-
-
|
|
42
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.
|
|
43
43
|
|
|
44
44
|
FALSE POSITIVE AVOIDANCE:
|
|
45
45
|
- **Dict[str, Any] at serialization boundaries**: When code deserializes JSON (json.loads, JSON.parse, API responses), Dict[str, Any] / Record<string, any> is the correct type until schema validation narrows it. Do not flag dynamic types at JSON I/O boundaries when the schema is defined elsewhere (Pydantic model, TypedDict, Zod schema).
|
|
@@ -43,7 +43,7 @@ ADVERSARIAL MANDATE:
|
|
|
43
43
|
- Your role is adversarial: assume the code has compliance gaps and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
44
44
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
45
45
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
46
|
-
-
|
|
46
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
47
47
|
analyze: analyzeCompliance,
|
|
48
48
|
};
|
|
49
49
|
defaultRegistry.register(complianceJudge);
|
|
@@ -42,7 +42,7 @@ ADVERSARIAL MANDATE:
|
|
|
42
42
|
- Your role is adversarial: assume the code has concurrency bugs and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
43
43
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
44
44
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
45
|
-
-
|
|
45
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
46
46
|
analyze: analyzeConcurrency,
|
|
47
47
|
};
|
|
48
48
|
defaultRegistry.register(concurrencyJudge);
|
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Your role is adversarial: assume configuration management is inadequate and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
41
41
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
42
42
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
44
44
|
analyze: analyzeConfigurationManagement,
|
|
45
45
|
};
|
|
46
46
|
defaultRegistry.register(configurationManagementJudge);
|
|
@@ -32,11 +32,19 @@ FALSE POSITIVE AVOIDANCE:
|
|
|
32
32
|
- **Tree/hierarchy traversal**: Nested loops that iterate parent → children (e.g., chapters → sections → articles) visit each element once. Total work is O(total_items), NOT O(n²). Only flag quadratic cost when two independent collections are cross-joined.
|
|
33
33
|
- **Bounded reference datasets**: Loaders for fixed-size data (regulations, schemas, configs with <1000 items) have bounded cost regardless of algorithm choice. Do not flag these as scaling cost concerns.
|
|
34
34
|
|
|
35
|
+
CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
|
|
36
|
+
- Database queries are targeted (no SELECT * on large tables without limits)
|
|
37
|
+
- No unbounded loops or recursive calls on external data
|
|
38
|
+
- Resources (connections, file handles, streams) cleaned up after use
|
|
39
|
+
- No redundant network calls or duplicate computations in hot paths
|
|
40
|
+
- Appropriate use of caching or memoization where data is re-read
|
|
41
|
+
- Small utility functions, type definitions, and configuration code are inherently cost-neutral
|
|
42
|
+
|
|
35
43
|
ADVERSARIAL MANDATE:
|
|
36
44
|
- Your role is adversarial: assume the code wastes resources and actively hunt for inefficiencies. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
37
45
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
38
46
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
39
|
-
-
|
|
47
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
40
48
|
analyze: analyzeCostEffectiveness,
|
|
41
49
|
};
|
|
42
50
|
defaultRegistry.register(costEffectivenessJudge);
|
|
@@ -57,7 +57,7 @@ ADVERSARIAL MANDATE:
|
|
|
57
57
|
- Your role is adversarial: assume the code is vulnerable and actively hunt for exploits. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
58
58
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
59
59
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
60
|
-
-
|
|
60
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
61
61
|
analyze: analyzeCybersecurity,
|
|
62
62
|
};
|
|
63
63
|
defaultRegistry.register(cybersecurityJudge);
|
|
@@ -44,7 +44,7 @@ ADVERSARIAL MANDATE:
|
|
|
44
44
|
- Your role is adversarial: assume the code leaks or mishandles data and actively hunt for exposures. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
45
45
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
46
46
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
47
|
-
-
|
|
47
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
48
48
|
analyze: analyzeDataSecurity,
|
|
49
49
|
};
|
|
50
50
|
defaultRegistry.register(dataSecurityJudge);
|
|
@@ -54,7 +54,7 @@ ADVERSARIAL MANDATE:
|
|
|
54
54
|
- Your role is adversarial: assume sovereignty controls are missing unless explicitly shown.
|
|
55
55
|
- Never praise or compliment the code. Report only gaps, risks, and deficiencies.
|
|
56
56
|
- If uncertain, flag potential sovereignty exposure only when you can cite specific code evidence. Speculative findings without concrete evidence erode trust.
|
|
57
|
-
-
|
|
57
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code.`,
|
|
58
58
|
analyze: analyzeDataSovereignty,
|
|
59
59
|
};
|
|
60
60
|
defaultRegistry.register(dataSovereigntyJudge);
|
package/dist/judges/database.js
CHANGED
|
@@ -45,7 +45,7 @@ ADVERSARIAL MANDATE:
|
|
|
45
45
|
- Your role is adversarial: assume database usage is unsafe and inefficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
46
46
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
47
47
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
48
|
-
-
|
|
48
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
49
49
|
analyze: analyzeDatabase,
|
|
50
50
|
};
|
|
51
51
|
defaultRegistry.register(databaseJudge);
|
|
@@ -42,7 +42,7 @@ ADVERSARIAL MANDATE:
|
|
|
42
42
|
- Your role is adversarial: assume the dependency tree has risks and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
43
43
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
44
44
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
45
|
-
-
|
|
45
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
46
46
|
analyze: analyzeDependencyHealth,
|
|
47
47
|
};
|
|
48
48
|
defaultRegistry.register(dependencyHealthJudge);
|
|
@@ -49,7 +49,7 @@ ADVERSARIAL MANDATE:
|
|
|
49
49
|
- Your role is adversarial: assume the documentation is inadequate and actively hunt for gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
50
50
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
51
51
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
52
|
-
-
|
|
52
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
53
53
|
analyze: analyzeDocumentation,
|
|
54
54
|
};
|
|
55
55
|
defaultRegistry.register(documentationJudge);
|
|
@@ -49,7 +49,7 @@ ADVERSARIAL MANDATE:
|
|
|
49
49
|
- Your role is adversarial: assume error handling is insufficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
50
50
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
51
51
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
52
|
-
-
|
|
52
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
53
53
|
analyze: analyzeErrorHandling,
|
|
54
54
|
};
|
|
55
55
|
defaultRegistry.register(errorHandlingJudge);
|
|
@@ -42,7 +42,7 @@ ADVERSARIAL MANDATE:
|
|
|
42
42
|
- Your role is adversarial: assume the code has ethical risks or bias and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
43
43
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
44
44
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
45
|
-
-
|
|
45
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
46
46
|
analyze: analyzeEthicsBias,
|
|
47
47
|
};
|
|
48
48
|
defaultRegistry.register(ethicsBiasJudge);
|
|
@@ -39,11 +39,19 @@ FALSE POSITIVE AVOIDANCE:
|
|
|
39
39
|
- Missing framework features (no CSRF middleware, no rate limiting) should be deferred to specialized judges (SEC, RATE) unless the framework provides them as defaults that were explicitly disabled.
|
|
40
40
|
- Do NOT flag non-web code (CLI tools, scripts, libraries) for web framework safety issues.
|
|
41
41
|
|
|
42
|
+
CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
|
|
43
|
+
- Framework middleware/plugins used per official documentation
|
|
44
|
+
- Security middleware enabled (helmet, CSRF protection, etc.) where applicable
|
|
45
|
+
- No explicitly disabled built-in protections
|
|
46
|
+
- Route handlers follow framework conventions
|
|
47
|
+
- Template rendering uses auto-escaping (not disabled)
|
|
48
|
+
- Non-web code (CLI tools, libraries, scripts) does not need web framework review
|
|
49
|
+
|
|
42
50
|
ADVERSARIAL MANDATE:
|
|
43
51
|
- Your role is adversarial: assume the code misuses framework APIs and actively hunt for violations. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
44
52
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
45
53
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
46
|
-
-
|
|
54
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
47
55
|
analyze: analyzeFrameworkSafety,
|
|
48
56
|
};
|
|
49
57
|
defaultRegistry.register(frameworkSafetyJudge);
|
|
@@ -42,7 +42,7 @@ ADVERSARIAL MANDATE:
|
|
|
42
42
|
- Assume every API call could be hallucinated. Hunt for subtle mismatches between documented APIs and actual usage.
|
|
43
43
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
44
44
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
45
|
-
-
|
|
45
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
46
46
|
analyze: analyzeHallucinationDetection,
|
|
47
47
|
};
|
|
48
48
|
defaultRegistry.register(hallucinationDetectionJudge);
|
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Your role is adversarial: assume the infrastructure code is insecure and actively hunt for misconfigurations. Back every finding with concrete code evidence (line numbers, resource definitions, configuration blocks).
|
|
41
41
|
- Never praise or compliment the code. Report only problems, risks, and security gaps.
|
|
42
42
|
- If you are uncertain whether something is a misconfiguration, flag it only when you can cite specific code evidence (line numbers, patterns, resource definitions). Speculative findings without concrete evidence erode developer trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.
|
|
44
44
|
- Pay special attention to defaults that are insecure when not explicitly configured (e.g., public access defaults, missing encryption defaults).`,
|
|
45
45
|
analyze: analyzeIacSecurity,
|
|
46
46
|
};
|
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Assume every comment could be lying. Verify that implementations match their stated intent.
|
|
41
41
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
42
42
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
44
44
|
analyze: analyzeIntentAlignment,
|
|
45
45
|
};
|
|
46
46
|
defaultRegistry.register(intentAlignmentJudge);
|
|
@@ -38,7 +38,7 @@ ADVERSARIAL MANDATE:
|
|
|
38
38
|
- Your role is adversarial: assume the code will break in non-English locales and actively hunt for i18n defects. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
39
39
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
40
40
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
41
|
-
-
|
|
41
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
42
42
|
analyze: analyzeInternationalization,
|
|
43
43
|
};
|
|
44
44
|
defaultRegistry.register(internationalizationJudge);
|
|
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
|
|
|
40
40
|
- Your role is adversarial: assume logs contain sensitive data and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
|
|
41
41
|
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
42
42
|
- If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
|
|
43
|
-
-
|
|
43
|
+
- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
|
|
44
44
|
analyze: analyzeLoggingPrivacy,
|
|
45
45
|
};
|
|
46
46
|
defaultRegistry.register(loggingPrivacyJudge);
|
|
@@ -30,7 +30,15 @@ FALSE POSITIVE AVOIDANCE:
|
|
|
30
30
|
- Guard clauses that return early are NOT dead code
|
|
31
31
|
- Feature flags intentionally create "dead" branches — skip if flag-guarded
|
|
32
32
|
- Test files may intentionally test edge cases with unusual conditions
|
|
33
|
-
- Framework-required patterns (e.g., exhaustive switch in Redux) are intentional
|
|
33
|
+
- Framework-required patterns (e.g., exhaustive switch in Redux) are intentional
|
|
34
|
+
|
|
35
|
+
CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
|
|
36
|
+
- Control flow is straightforward with no inverted conditions or unreachable code
|
|
37
|
+
- Functions return consistent types and handle edge cases
|
|
38
|
+
- Boolean expressions read naturally without double negatives
|
|
39
|
+
- Switch/match statements cover expected cases
|
|
40
|
+
- No partial refactor artifacts, dead code, or contradictory logic
|
|
41
|
+
- Guard clauses and early returns used appropriately`,
|
|
34
42
|
analyze: analyzeLogicReview,
|
|
35
43
|
};
|
|
36
44
|
defaultRegistry.register(logicReviewJudge);
|