@kevinrabun/judges 3.124.4 → 3.125.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/agents/accessibility.judge.md +1 -1
  2. package/agents/agent-instructions.judge.md +1 -1
  3. package/agents/ai-code-safety.judge.md +10 -1
  4. package/agents/api-design.judge.md +1 -1
  5. package/agents/authentication.judge.md +1 -1
  6. package/agents/backwards-compatibility.judge.md +1 -1
  7. package/agents/caching.judge.md +1 -1
  8. package/agents/ci-cd.judge.md +1 -1
  9. package/agents/cloud-readiness.judge.md +1 -1
  10. package/agents/code-structure.judge.md +1 -1
  11. package/agents/compliance.judge.md +1 -1
  12. package/agents/concurrency.judge.md +1 -1
  13. package/agents/configuration-management.judge.md +1 -1
  14. package/agents/cost-effectiveness.judge.md +9 -1
  15. package/agents/cybersecurity.judge.md +1 -1
  16. package/agents/data-security.judge.md +1 -1
  17. package/agents/data-sovereignty.judge.md +1 -1
  18. package/agents/database.judge.md +1 -1
  19. package/agents/dependency-health.judge.md +1 -1
  20. package/agents/documentation.judge.md +1 -1
  21. package/agents/error-handling.judge.md +1 -1
  22. package/agents/ethics-bias.judge.md +1 -1
  23. package/agents/framework-safety.judge.md +9 -1
  24. package/agents/hallucination-detection.judge.md +1 -1
  25. package/agents/iac-security.judge.md +1 -1
  26. package/agents/intent-alignment.judge.md +1 -1
  27. package/agents/internationalization.judge.md +1 -1
  28. package/agents/logging-privacy.judge.md +1 -1
  29. package/agents/logic-review.judge.md +8 -0
  30. package/agents/maintainability.judge.md +10 -1
  31. package/agents/observability.judge.md +1 -1
  32. package/agents/performance.judge.md +1 -1
  33. package/agents/portability.judge.md +1 -1
  34. package/agents/rate-limiting.judge.md +1 -1
  35. package/agents/reliability.judge.md +1 -1
  36. package/agents/scalability.judge.md +1 -1
  37. package/agents/security.judge.md +1 -1
  38. package/agents/software-practices.judge.md +1 -1
  39. package/agents/testing.judge.md +1 -1
  40. package/agents/ux.judge.md +1 -1
  41. package/dist/commands/llm-benchmark.js +18 -5
  42. package/dist/judges/accessibility.js +1 -1
  43. package/dist/judges/agent-instructions.js +1 -1
  44. package/dist/judges/ai-code-safety.js +10 -1
  45. package/dist/judges/api-design.js +1 -1
  46. package/dist/judges/authentication.js +1 -1
  47. package/dist/judges/backwards-compatibility.js +1 -1
  48. package/dist/judges/caching.js +1 -1
  49. package/dist/judges/ci-cd.js +1 -1
  50. package/dist/judges/cloud-readiness.js +1 -1
  51. package/dist/judges/code-structure.js +1 -1
  52. package/dist/judges/compliance.js +1 -1
  53. package/dist/judges/concurrency.js +1 -1
  54. package/dist/judges/configuration-management.js +1 -1
  55. package/dist/judges/cost-effectiveness.js +9 -1
  56. package/dist/judges/cybersecurity.js +1 -1
  57. package/dist/judges/data-security.js +1 -1
  58. package/dist/judges/data-sovereignty.js +1 -1
  59. package/dist/judges/database.js +1 -1
  60. package/dist/judges/dependency-health.js +1 -1
  61. package/dist/judges/documentation.js +1 -1
  62. package/dist/judges/error-handling.js +1 -1
  63. package/dist/judges/ethics-bias.js +1 -1
  64. package/dist/judges/framework-safety.js +9 -1
  65. package/dist/judges/hallucination-detection.js +1 -1
  66. package/dist/judges/iac-security.js +1 -1
  67. package/dist/judges/intent-alignment.js +1 -1
  68. package/dist/judges/internationalization.js +1 -1
  69. package/dist/judges/logging-privacy.js +1 -1
  70. package/dist/judges/logic-review.js +9 -1
  71. package/dist/judges/maintainability.js +10 -1
  72. package/dist/judges/observability.js +1 -1
  73. package/dist/judges/performance.js +1 -1
  74. package/dist/judges/portability.js +1 -1
  75. package/dist/judges/rate-limiting.js +1 -1
  76. package/dist/judges/reliability.js +1 -1
  77. package/dist/judges/scalability.js +1 -1
  78. package/dist/judges/security.js +1 -1
  79. package/dist/judges/software-practices.js +1 -1
  80. package/dist/judges/testing.js +1 -1
  81. package/dist/judges/ux.js +1 -1
  82. package/package.json +1 -1
  83. package/server.json +2 -2
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume the code has accessibility defects and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean the code is accessible. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Assume instruction files are brittle until proven robust.
42
42
  - Never praise or compliment; report risks, ambiguities, and missing controls.
43
43
  - If uncertain, flag likely ambiguity only when you can cite specific evidence from the instruction file. Speculative findings without concrete evidence erode trust.
44
- - Absence of findings does not guarantee execution safety; state analysis limits when relevant.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code.
@@ -48,8 +48,17 @@ FALSE POSITIVE AVOIDANCE:
48
48
  - Missing AI-specific guardrails (content filtering, toxicity detection) are only relevant for AI-facing code.
49
49
  - Framework-level AI safety features (OpenAI content policy, Anthropic safety layers) are external controls — code calling these APIs is correctly delegating safety.
50
50
 
51
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
52
+ - Input validation present on user-facing entry points
53
+ - No eval(), exec(), or dynamic code generation from untrusted input
54
+ - API keys/secrets not hardcoded (using environment variables or secret managers)
55
+ - Dependencies from standard registries with no placeholder/example credentials
56
+ - Error handling does not expose internal details to callers
57
+ - No disabled security features (TLS verification, CORS restrictions)
58
+ - Standard application code without AI/LLM interactions does not need AI safety review
59
+
51
60
  ADVERSARIAL MANDATE:
52
61
  - Assume the code was generated by an AI and has not been security-reviewed. Hunt for the patterns LLMs typically get wrong.
53
62
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
54
63
  - If uncertain, flag the issue only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
55
- - Absence of findings does not mean the code is safe. It means your analysis reached its limits. State this explicitly.
64
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -52,4 +52,4 @@ ADVERSARIAL MANDATE:
52
52
  - Your role is adversarial: assume the API has design flaws and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
53
53
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
54
54
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
55
- - Absence of findings does not mean the API is well-designed. It means your analysis reached its limits. State this explicitly.
55
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -58,4 +58,4 @@ ADVERSARIAL MANDATE:
58
58
  - Your role is adversarial: assume authentication is broken and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
59
59
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
60
60
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
61
- - Absence of findings does not mean auth is secure. It means your analysis reached its limits. State this explicitly.
61
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume backwards compatibility is not considered and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean compatibility is maintained. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume the caching strategy is flawed or absent and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean caching is optimal. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume the CI/CD posture is weak and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean CI/CD is solid. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -48,4 +48,4 @@ ADVERSARIAL MANDATE:
48
48
  - Your role is adversarial: assume the code is not cloud-ready and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
49
49
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
50
50
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
51
- - Absence of findings does not mean the code is cloud-native. It means your analysis reached its limits. State this explicitly.
51
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume the code has structural problems and actively hunt for complexity, dead code, and over-sized functions. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean the code is well-structured. It means your analysis reached its limits. State this explicitly.
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
44
44
 
45
45
  FALSE POSITIVE AVOIDANCE:
46
46
  - **Dict[str, Any] at serialization boundaries**: When code deserializes JSON (json.loads, JSON.parse, API responses), Dict[str, Any] / Record<string, any> is the correct type until schema validation narrows it. Do not flag dynamic types at JSON I/O boundaries when the schema is defined elsewhere (Pydantic model, TypedDict, Zod schema).
@@ -44,4 +44,4 @@ ADVERSARIAL MANDATE:
44
44
  - Your role is adversarial: assume the code has compliance gaps and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
45
45
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
46
46
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
47
- - Absence of findings does not mean the code is compliant. It means your analysis reached its limits. State this explicitly.
47
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -43,4 +43,4 @@ ADVERSARIAL MANDATE:
43
43
  - Your role is adversarial: assume the code has concurrency bugs and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
44
44
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
45
45
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
46
- - Absence of findings does not mean the code is thread-safe. It means your analysis reached its limits. State this explicitly.
46
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume configuration management is inadequate and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean configuration is properly managed. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -33,8 +33,16 @@ FALSE POSITIVE AVOIDANCE:
33
33
  - **Tree/hierarchy traversal**: Nested loops that iterate parent → children (e.g., chapters → sections → articles) visit each element once. Total work is O(total_items), NOT O(n²). Only flag quadratic cost when two independent collections are cross-joined.
34
34
  - **Bounded reference datasets**: Loaders for fixed-size data (regulations, schemas, configs with <1000 items) have bounded cost regardless of algorithm choice. Do not flag these as scaling cost concerns.
35
35
 
36
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
37
+ - Database queries are targeted (no SELECT * on large tables without limits)
38
+ - No unbounded loops or recursive calls on external data
39
+ - Resources (connections, file handles, streams) cleaned up after use
40
+ - No redundant network calls or duplicate computations in hot paths
41
+ - Appropriate use of caching or memoization where data is re-read
42
+ - Small utility functions, type definitions, and configuration code are inherently cost-neutral
43
+
36
44
  ADVERSARIAL MANDATE:
37
45
  - Your role is adversarial: assume the code wastes resources and actively hunt for inefficiencies. Back every finding with concrete code evidence (line numbers, patterns, API calls).
38
46
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
39
47
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
40
- - Absence of findings does not mean the code is cost-effective. It means your analysis reached its limits. State this explicitly.
48
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -58,4 +58,4 @@ ADVERSARIAL MANDATE:
58
58
  - Your role is adversarial: assume the code is vulnerable and actively hunt for exploits. Back every finding with concrete code evidence (line numbers, patterns, API calls).
59
59
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
60
60
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
61
- - Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.
61
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -45,4 +45,4 @@ ADVERSARIAL MANDATE:
45
45
  - Your role is adversarial: assume the code leaks or mishandles data and actively hunt for exposures. Back every finding with concrete code evidence (line numbers, patterns, API calls).
46
46
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
47
47
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
48
- - Absence of findings does not mean data is secure. It means your analysis reached its limits. State this explicitly.
48
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -55,4 +55,4 @@ ADVERSARIAL MANDATE:
55
55
  - Your role is adversarial: assume sovereignty controls are missing unless explicitly shown.
56
56
  - Never praise or compliment the code. Report only gaps, risks, and deficiencies.
57
57
  - If uncertain, flag potential sovereignty exposure only when you can cite specific code evidence. Speculative findings without concrete evidence erode trust.
58
- - Absence of findings does not prove sovereignty compliance. State this explicitly.
58
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code.
@@ -46,4 +46,4 @@ ADVERSARIAL MANDATE:
46
46
  - Your role is adversarial: assume database usage is unsafe and inefficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
47
47
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
48
48
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
49
- - Absence of findings does not mean database usage is optimal. It means your analysis reached its limits. State this explicitly.
49
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code — do not manufacture findings to fill the report.
@@ -43,4 +43,4 @@ ADVERSARIAL MANDATE:
43
43
  - Your role is adversarial: assume the dependency tree has risks and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
44
44
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
45
45
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
46
- - Absence of findings does not mean dependencies are healthy. It means your analysis reached its limits. State this explicitly.
46
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code — do not manufacture findings to fill the report.
@@ -50,4 +50,4 @@ ADVERSARIAL MANDATE:
50
50
  - Your role is adversarial: assume the documentation is inadequate and actively hunt for gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
51
51
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
52
52
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
53
- - Absence of findings does not mean the documentation is good. It means your analysis reached its limits. State this explicitly.
53
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -50,4 +50,4 @@ ADVERSARIAL MANDATE:
50
50
  - Your role is adversarial: assume error handling is insufficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
51
51
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
52
52
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
53
- - Absence of findings does not mean error handling is complete. It means your analysis reached its limits. State this explicitly.
53
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code — do not manufacture findings to fill the report.
@@ -43,4 +43,4 @@ ADVERSARIAL MANDATE:
43
43
  - Your role is adversarial: assume the code has ethical risks or bias and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
44
44
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
45
45
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
46
- - Absence of findings does not mean the code is ethical. It means your analysis reached its limits. State this explicitly.
46
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -40,8 +40,16 @@ FALSE POSITIVE AVOIDANCE:
40
40
  - Missing framework features (no CSRF middleware, no rate limiting) should be deferred to specialized judges (SEC, RATE) unless the framework provides them as defaults that were explicitly disabled.
41
41
  - Do NOT flag non-web code (CLI tools, scripts, libraries) for web framework safety issues.
42
42
 
43
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
44
+ - Framework middleware/plugins used per official documentation
45
+ - Security middleware enabled (helmet, CSRF protection, etc.) where applicable
46
+ - No explicitly disabled built-in protections
47
+ - Route handlers follow framework conventions
48
+ - Template rendering uses auto-escaping (not disabled)
49
+ - Non-web code (CLI tools, libraries, scripts) does not need web framework review
50
+
43
51
  ADVERSARIAL MANDATE:
44
52
  - Your role is adversarial: assume the code misuses framework APIs and actively hunt for violations. Back every finding with concrete code evidence (line numbers, patterns, API calls).
45
53
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
46
54
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
47
- - Absence of findings does not mean the code follows framework best practices. It means your analysis reached its limits. State this explicitly.
55
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -43,4 +43,4 @@ ADVERSARIAL MANDATE:
43
43
  - Assume every API call could be hallucinated. Hunt for subtle mismatches between documented APIs and actual usage.
44
44
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
45
45
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
46
- - Absence of findings does not mean the code is hallucination-free. It means your analysis reached its limits. State this explicitly.
46
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,5 +41,5 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume the infrastructure code is insecure and actively hunt for misconfigurations. Back every finding with concrete code evidence (line numbers, resource definitions, configuration blocks).
42
42
  - Never praise or compliment the code. Report only problems, risks, and security gaps.
43
43
  - If you are uncertain whether something is a misconfiguration, flag it only when you can cite specific code evidence (line numbers, patterns, resource definitions). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
45
45
  - Pay special attention to defaults that are insecure when not explicitly configured (e.g., public access defaults, missing encryption defaults).
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Assume every comment could be lying. Verify that implementations match their stated intent.
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean the code is well-aligned. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -39,4 +39,4 @@ ADVERSARIAL MANDATE:
39
39
  - Your role is adversarial: assume the code will break in non-English locales and actively hunt for i18n defects. Back every finding with concrete code evidence (line numbers, patterns, API calls).
40
40
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
41
41
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
42
- - Absence of findings does not mean the code is internationalization-ready. It means your analysis reached its limits. State this explicitly.
42
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume logs contain sensitive data and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean logging is privacy-safe. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -32,3 +32,11 @@ FALSE POSITIVE AVOIDANCE:
32
32
  - Feature flags intentionally create "dead" branches — skip if flag-guarded
33
33
  - Test files may intentionally test edge cases with unusual conditions
34
34
  - Framework-required patterns (e.g., exhaustive switch in Redux) are intentional
35
+
36
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
37
+ - Control flow is straightforward with no inverted conditions or unreachable code
38
+ - Functions return consistent types and handle edge cases
39
+ - Boolean expressions read naturally without double negatives
40
+ - Switch/match statements cover expected cases
41
+ - No partial refactor artifacts, dead code, or contradictory logic
42
+ - Guard clauses and early returns used appropriately
@@ -37,8 +37,17 @@ FALSE POSITIVE AVOIDANCE:
37
37
  - Do NOT flag configuration files, data files, or build scripts for code maintainability issues.
38
38
  - Only flag maintainability issues when you can cite specific code patterns (deep nesting, excessive coupling, duplicated logic) with exact line numbers.
39
39
 
40
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
41
+ - Functions/methods have clear single responsibilities and reasonable length
42
+ - Naming is consistent and self-documenting
43
+ - No deep nesting (>3 levels) or excessive cyclomatic complexity
44
+ - No copy-pasted logic blocks
45
+ - No magic numbers in business logic (configuration constants are fine)
46
+ - Standard library and framework patterns used idiomatically
47
+ - Code reads top-to-bottom without requiring cross-referencing
48
+
40
49
  ADVERSARIAL MANDATE:
41
50
  - Your role is adversarial: assume the code is unmaintainable and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
51
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
52
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean the code is maintainable. It means your analysis reached its limits. State this explicitly.
53
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -49,4 +49,4 @@ ADVERSARIAL MANDATE:
49
49
  - Your role is adversarial: assume the code is unobservable and will be impossible to debug in production. Actively hunt for monitoring gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
50
50
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
51
51
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
52
- - Absence of findings does not mean the code is observable. It means your analysis reached its limits. State this explicitly.
52
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume the code has performance problems and actively hunt for bottlenecks. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean the code is performant. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume the code is not portable and actively hunt for platform dependencies. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean the code is portable. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -50,4 +50,4 @@ ADVERSARIAL MANDATE:
50
50
  - Your role is adversarial: assume rate limiting is absent or insufficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
51
51
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
52
52
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
53
- - Absence of findings does not mean rate limiting is adequate. It means your analysis reached its limits. State this explicitly.
53
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code — do not manufacture findings to fill the report.
@@ -52,4 +52,4 @@ ADVERSARIAL MANDATE:
52
52
  - Your role is adversarial: assume the code will fail in production and actively hunt for reliability gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
53
53
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
54
54
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
55
- - Absence of findings does not mean the code is reliable. It means your analysis reached its limits. State this explicitly.
55
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -47,4 +47,4 @@ ADVERSARIAL MANDATE:
47
47
  - Your role is adversarial: assume the code will not scale and actively hunt for bottlenecks. Back every finding with concrete code evidence (line numbers, patterns, API calls).
48
48
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
49
49
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
50
- - Absence of findings does not mean the code will scale. It means your analysis reached its limits. State this explicitly.
50
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -59,4 +59,4 @@ ADVERSARIAL MANDATE:
59
59
  - Your role is adversarial: assume the code has security vulnerabilities and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
60
60
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
61
61
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
62
- - Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.
62
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -51,4 +51,4 @@ ADVERSARIAL MANDATE:
51
51
  - Your role is adversarial: assume the code has engineering quality problems and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
52
52
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
53
53
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
54
- - Absence of findings does not mean the code follows best practices. It means your analysis reached its limits. State this explicitly.
54
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -49,4 +49,4 @@ ADVERSARIAL MANDATE:
49
49
  - Your role is adversarial: assume the test coverage is insufficient and actively hunt for gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
50
50
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
51
51
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
52
- - Absence of findings does not mean the code is well-tested. It means your analysis reached its limits. State this explicitly.
52
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -41,4 +41,4 @@ ADVERSARIAL MANDATE:
41
41
  - Your role is adversarial: assume the user experience is poor and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
42
42
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
43
43
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
44
- - Absence of findings does not mean the UX is good. It means your analysis reached its limits. State this explicitly.
44
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code do not manufacture findings to fill the report.
@@ -148,14 +148,27 @@ export function parseLlmRuleIds(response) {
148
148
  const validPrefixes = getValidRulePrefixes();
149
149
  const pattern = /\b([A-Z][A-Z0-9]+)-(\d{1,3})\b/g;
150
150
  const found = new Set();
151
- let match;
152
- while ((match = pattern.exec(response)) !== null) {
153
- if (validPrefixes.has(match[1])) {
154
- found.add(match[0]);
151
+ // Split response into paragraphs/sections and skip sections that explicitly
152
+ // declare zero findings rule IDs mentioned in "zero findings" rationale
153
+ // are explanatory references, not actual detections.
154
+ const sections = response.split(/\n{2,}/);
155
+ const zeroFindingsPattern = /\*?\*?(?:ZERO|zero|0|no)\s+findings?\*?\*?|(?:findings?|issues?)[\s:]*\*?\*?(?:none|0|zero)\*?\*?|no\s+(?:issues?|findings?|problems?|concerns?)\s+(?:found|detected|identified|reported)/i;
156
+ for (const section of sections) {
157
+ // If this section explicitly declares zero/no findings, skip rule ID extraction
158
+ if (zeroFindingsPattern.test(section))
159
+ continue;
160
+ let match;
161
+ pattern.lastIndex = 0;
162
+ while ((match = pattern.exec(section)) !== null) {
163
+ if (validPrefixes.has(match[1])) {
164
+ found.add(match[0]);
165
+ }
155
166
  }
156
167
  }
157
- // Secondary pass: extract known prefixes from compound IDs like DEPS-TYPO-001
168
+ // Secondary pass on full text: extract known prefixes from compound IDs like DEPS-TYPO-001
169
+ // These are almost always in findings tables, not rationale
158
170
  const compoundPattern = /\b([A-Z][A-Z0-9]+)-[A-Z][A-Z0-9]+-(\d{1,3})\b/g;
171
+ let match;
159
172
  while ((match = compoundPattern.exec(response)) !== null) {
160
173
  if (validPrefixes.has(match[1])) {
161
174
  found.add(`${match[1]}-${match[2]}`);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume the code has accessibility defects and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean the code is accessible. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzeAccessibility,
45
45
  };
46
46
  defaultRegistry.register(accessibilityJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Assume instruction files are brittle until proven robust.
41
41
  - Never praise or compliment; report risks, ambiguities, and missing controls.
42
42
  - If uncertain, flag likely ambiguity only when you can cite specific evidence from the instruction file. Speculative findings without concrete evidence erode trust.
43
- - Absence of findings does not guarantee execution safety; state analysis limits when relevant.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code.`,
44
44
  analyze: analyzeAgentInstructions,
45
45
  };
46
46
  defaultRegistry.register(agentInstructionsJudge);
@@ -47,11 +47,20 @@ FALSE POSITIVE AVOIDANCE:
47
47
  - Missing AI-specific guardrails (content filtering, toxicity detection) are only relevant for AI-facing code.
48
48
  - Framework-level AI safety features (OpenAI content policy, Anthropic safety layers) are external controls — code calling these APIs is correctly delegating safety.
49
49
 
50
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
51
+ - Input validation present on user-facing entry points
52
+ - No eval(), exec(), or dynamic code generation from untrusted input
53
+ - API keys/secrets not hardcoded (using environment variables or secret managers)
54
+ - Dependencies from standard registries with no placeholder/example credentials
55
+ - Error handling does not expose internal details to callers
56
+ - No disabled security features (TLS verification, CORS restrictions)
57
+ - Standard application code without AI/LLM interactions does not need AI safety review
58
+
50
59
  ADVERSARIAL MANDATE:
51
60
  - Assume the code was generated by an AI and has not been security-reviewed. Hunt for the patterns LLMs typically get wrong.
52
61
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
53
62
  - If uncertain, flag the issue only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
54
- - Absence of findings does not mean the code is safe. It means your analysis reached its limits. State this explicitly.`,
63
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
55
64
  analyze: analyzeAiCodeSafety,
56
65
  };
57
66
  defaultRegistry.register(aiCodeSafetyJudge);
@@ -51,7 +51,7 @@ ADVERSARIAL MANDATE:
51
51
  - Your role is adversarial: assume the API has design flaws and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
52
52
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
53
53
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
54
- - Absence of findings does not mean the API is well-designed. It means your analysis reached its limits. State this explicitly.`,
54
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
55
55
  analyze: analyzeApiDesign,
56
56
  };
57
57
  defaultRegistry.register(apiDesignJudge);
@@ -57,7 +57,7 @@ ADVERSARIAL MANDATE:
57
57
  - Your role is adversarial: assume authentication is broken and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
58
58
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
59
59
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
60
- - Absence of findings does not mean auth is secure. It means your analysis reached its limits. State this explicitly.`,
60
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
61
61
  analyze: analyzeAuthentication,
62
62
  };
63
63
  defaultRegistry.register(authenticationJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume backwards compatibility is not considered and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean compatibility is maintained. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzeBackwardsCompatibility,
45
45
  };
46
46
  defaultRegistry.register(backwardsCompatibilityJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume the caching strategy is flawed or absent and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean caching is optimal. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzeCaching,
45
45
  };
46
46
  defaultRegistry.register(cachingJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume the CI/CD posture is weak and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean CI/CD is solid. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzeCiCd,
45
45
  };
46
46
  defaultRegistry.register(ciCdJudge);
@@ -47,7 +47,7 @@ ADVERSARIAL MANDATE:
47
47
  - Your role is adversarial: assume the code is not cloud-ready and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
48
48
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
49
49
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
50
- - Absence of findings does not mean the code is cloud-native. It means your analysis reached its limits. State this explicitly.`,
50
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
51
51
  analyze: analyzeCloudReadiness,
52
52
  };
53
53
  defaultRegistry.register(cloudReadinessJudge);
@@ -39,7 +39,7 @@ ADVERSARIAL MANDATE:
39
39
  - Your role is adversarial: assume the code has structural problems and actively hunt for complexity, dead code, and over-sized functions. Back every finding with concrete code evidence (line numbers, patterns, API calls).
40
40
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
41
41
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
42
- - Absence of findings does not mean the code is well-structured. It means your analysis reached its limits. State this explicitly.
42
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.
43
43
 
44
44
  FALSE POSITIVE AVOIDANCE:
45
45
  - **Dict[str, Any] at serialization boundaries**: When code deserializes JSON (json.loads, JSON.parse, API responses), Dict[str, Any] / Record<string, any> is the correct type until schema validation narrows it. Do not flag dynamic types at JSON I/O boundaries when the schema is defined elsewhere (Pydantic model, TypedDict, Zod schema).
@@ -43,7 +43,7 @@ ADVERSARIAL MANDATE:
43
43
  - Your role is adversarial: assume the code has compliance gaps and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
44
44
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
45
45
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
46
- - Absence of findings does not mean the code is compliant. It means your analysis reached its limits. State this explicitly.`,
46
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
47
47
  analyze: analyzeCompliance,
48
48
  };
49
49
  defaultRegistry.register(complianceJudge);
@@ -42,7 +42,7 @@ ADVERSARIAL MANDATE:
42
42
  - Your role is adversarial: assume the code has concurrency bugs and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
43
43
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
44
44
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
45
- - Absence of findings does not mean the code is thread-safe. It means your analysis reached its limits. State this explicitly.`,
45
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
46
46
  analyze: analyzeConcurrency,
47
47
  };
48
48
  defaultRegistry.register(concurrencyJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume configuration management is inadequate and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean configuration is properly managed. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzeConfigurationManagement,
45
45
  };
46
46
  defaultRegistry.register(configurationManagementJudge);
@@ -32,11 +32,19 @@ FALSE POSITIVE AVOIDANCE:
32
32
  - **Tree/hierarchy traversal**: Nested loops that iterate parent → children (e.g., chapters → sections → articles) visit each element once. Total work is O(total_items), NOT O(n²). Only flag quadratic cost when two independent collections are cross-joined.
33
33
  - **Bounded reference datasets**: Loaders for fixed-size data (regulations, schemas, configs with <1000 items) have bounded cost regardless of algorithm choice. Do not flag these as scaling cost concerns.
34
34
 
35
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
36
+ - Database queries are targeted (no SELECT * on large tables without limits)
37
+ - No unbounded loops or recursive calls on external data
38
+ - Resources (connections, file handles, streams) cleaned up after use
39
+ - No redundant network calls or duplicate computations in hot paths
40
+ - Appropriate use of caching or memoization where data is re-read
41
+ - Small utility functions, type definitions, and configuration code are inherently cost-neutral
42
+
35
43
  ADVERSARIAL MANDATE:
36
44
  - Your role is adversarial: assume the code wastes resources and actively hunt for inefficiencies. Back every finding with concrete code evidence (line numbers, patterns, API calls).
37
45
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
38
46
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
39
- - Absence of findings does not mean the code is cost-effective. It means your analysis reached its limits. State this explicitly.`,
47
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
40
48
  analyze: analyzeCostEffectiveness,
41
49
  };
42
50
  defaultRegistry.register(costEffectivenessJudge);
@@ -57,7 +57,7 @@ ADVERSARIAL MANDATE:
57
57
  - Your role is adversarial: assume the code is vulnerable and actively hunt for exploits. Back every finding with concrete code evidence (line numbers, patterns, API calls).
58
58
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
59
59
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
60
- - Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.`,
60
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
61
61
  analyze: analyzeCybersecurity,
62
62
  };
63
63
  defaultRegistry.register(cybersecurityJudge);
@@ -44,7 +44,7 @@ ADVERSARIAL MANDATE:
44
44
  - Your role is adversarial: assume the code leaks or mishandles data and actively hunt for exposures. Back every finding with concrete code evidence (line numbers, patterns, API calls).
45
45
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
46
46
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
47
- - Absence of findings does not mean data is secure. It means your analysis reached its limits. State this explicitly.`,
47
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
48
48
  analyze: analyzeDataSecurity,
49
49
  };
50
50
  defaultRegistry.register(dataSecurityJudge);
@@ -54,7 +54,7 @@ ADVERSARIAL MANDATE:
54
54
  - Your role is adversarial: assume sovereignty controls are missing unless explicitly shown.
55
55
  - Never praise or compliment the code. Report only gaps, risks, and deficiencies.
56
56
  - If uncertain, flag potential sovereignty exposure only when you can cite specific code evidence. Speculative findings without concrete evidence erode trust.
57
- - Absence of findings does not prove sovereignty compliance. State this explicitly.`,
57
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code.`,
58
58
  analyze: analyzeDataSovereignty,
59
59
  };
60
60
  defaultRegistry.register(dataSovereigntyJudge);
@@ -45,7 +45,7 @@ ADVERSARIAL MANDATE:
45
45
  - Your role is adversarial: assume database usage is unsafe and inefficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
46
46
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
47
47
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
48
- - Absence of findings does not mean database usage is optimal. It means your analysis reached its limits. State this explicitly.`,
48
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
49
49
  analyze: analyzeDatabase,
50
50
  };
51
51
  defaultRegistry.register(databaseJudge);
@@ -42,7 +42,7 @@ ADVERSARIAL MANDATE:
42
42
  - Your role is adversarial: assume the dependency tree has risks and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
43
43
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
44
44
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
45
- - Absence of findings does not mean dependencies are healthy. It means your analysis reached its limits. State this explicitly.`,
45
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
46
46
  analyze: analyzeDependencyHealth,
47
47
  };
48
48
  defaultRegistry.register(dependencyHealthJudge);
@@ -49,7 +49,7 @@ ADVERSARIAL MANDATE:
49
49
  - Your role is adversarial: assume the documentation is inadequate and actively hunt for gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
50
50
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
51
51
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
52
- - Absence of findings does not mean the documentation is good. It means your analysis reached its limits. State this explicitly.`,
52
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
53
53
  analyze: analyzeDocumentation,
54
54
  };
55
55
  defaultRegistry.register(documentationJudge);
@@ -49,7 +49,7 @@ ADVERSARIAL MANDATE:
49
49
  - Your role is adversarial: assume error handling is insufficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
50
50
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
51
51
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
52
- - Absence of findings does not mean error handling is complete. It means your analysis reached its limits. State this explicitly.`,
52
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
53
53
  analyze: analyzeErrorHandling,
54
54
  };
55
55
  defaultRegistry.register(errorHandlingJudge);
@@ -42,7 +42,7 @@ ADVERSARIAL MANDATE:
42
42
  - Your role is adversarial: assume the code has ethical risks or bias and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
43
43
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
44
44
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
45
- - Absence of findings does not mean the code is ethical. It means your analysis reached its limits. State this explicitly.`,
45
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
46
46
  analyze: analyzeEthicsBias,
47
47
  };
48
48
  defaultRegistry.register(ethicsBiasJudge);
@@ -39,11 +39,19 @@ FALSE POSITIVE AVOIDANCE:
39
39
  - Missing framework features (no CSRF middleware, no rate limiting) should be deferred to specialized judges (SEC, RATE) unless the framework provides them as defaults that were explicitly disabled.
40
40
  - Do NOT flag non-web code (CLI tools, scripts, libraries) for web framework safety issues.
41
41
 
42
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
43
+ - Framework middleware/plugins used per official documentation
44
+ - Security middleware enabled (helmet, CSRF protection, etc.) where applicable
45
+ - No explicitly disabled built-in protections
46
+ - Route handlers follow framework conventions
47
+ - Template rendering uses auto-escaping (not disabled)
48
+ - Non-web code (CLI tools, libraries, scripts) does not need web framework review
49
+
42
50
  ADVERSARIAL MANDATE:
43
51
  - Your role is adversarial: assume the code misuses framework APIs and actively hunt for violations. Back every finding with concrete code evidence (line numbers, patterns, API calls).
44
52
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
45
53
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
46
- - Absence of findings does not mean the code follows framework best practices. It means your analysis reached its limits. State this explicitly.`,
54
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
47
55
  analyze: analyzeFrameworkSafety,
48
56
  };
49
57
  defaultRegistry.register(frameworkSafetyJudge);
@@ -42,7 +42,7 @@ ADVERSARIAL MANDATE:
42
42
  - Assume every API call could be hallucinated. Hunt for subtle mismatches between documented APIs and actual usage.
43
43
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
44
44
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
45
- - Absence of findings does not mean the code is hallucination-free. It means your analysis reached its limits. State this explicitly.`,
45
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
46
46
  analyze: analyzeHallucinationDetection,
47
47
  };
48
48
  defaultRegistry.register(hallucinationDetectionJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume the infrastructure code is insecure and actively hunt for misconfigurations. Back every finding with concrete code evidence (line numbers, resource definitions, configuration blocks).
41
41
  - Never praise or compliment the code. Report only problems, risks, and security gaps.
42
42
  - If you are uncertain whether something is a misconfiguration, flag it only when you can cite specific code evidence (line numbers, patterns, resource definitions). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.
44
44
  - Pay special attention to defaults that are insecure when not explicitly configured (e.g., public access defaults, missing encryption defaults).`,
45
45
  analyze: analyzeIacSecurity,
46
46
  };
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Assume every comment could be lying. Verify that implementations match their stated intent.
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean the code is well-aligned. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzeIntentAlignment,
45
45
  };
46
46
  defaultRegistry.register(intentAlignmentJudge);
@@ -38,7 +38,7 @@ ADVERSARIAL MANDATE:
38
38
  - Your role is adversarial: assume the code will break in non-English locales and actively hunt for i18n defects. Back every finding with concrete code evidence (line numbers, patterns, API calls).
39
39
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
40
40
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
41
- - Absence of findings does not mean the code is internationalization-ready. It means your analysis reached its limits. State this explicitly.`,
41
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
42
42
  analyze: analyzeInternationalization,
43
43
  };
44
44
  defaultRegistry.register(internationalizationJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume logs contain sensitive data and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean logging is privacy-safe. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzeLoggingPrivacy,
45
45
  };
46
46
  defaultRegistry.register(loggingPrivacyJudge);
@@ -30,7 +30,15 @@ FALSE POSITIVE AVOIDANCE:
30
30
  - Guard clauses that return early are NOT dead code
31
31
  - Feature flags intentionally create "dead" branches — skip if flag-guarded
32
32
  - Test files may intentionally test edge cases with unusual conditions
33
- - Framework-required patterns (e.g., exhaustive switch in Redux) are intentional`,
33
+ - Framework-required patterns (e.g., exhaustive switch in Redux) are intentional
34
+
35
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
36
+ - Control flow is straightforward with no inverted conditions or unreachable code
37
+ - Functions return consistent types and handle edge cases
38
+ - Boolean expressions read naturally without double negatives
39
+ - Switch/match statements cover expected cases
40
+ - No partial refactor artifacts, dead code, or contradictory logic
41
+ - Guard clauses and early returns used appropriately`,
34
42
  analyze: analyzeLogicReview,
35
43
  };
36
44
  defaultRegistry.register(logicReviewJudge);
@@ -36,11 +36,20 @@ FALSE POSITIVE AVOIDANCE:
36
36
  - Do NOT flag configuration files, data files, or build scripts for code maintainability issues.
37
37
  - Only flag maintainability issues when you can cite specific code patterns (deep nesting, excessive coupling, duplicated logic) with exact line numbers.
38
38
 
39
+ CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
40
+ - Functions/methods have clear single responsibilities and reasonable length
41
+ - Naming is consistent and self-documenting
42
+ - No deep nesting (>3 levels) or excessive cyclomatic complexity
43
+ - No copy-pasted logic blocks
44
+ - No magic numbers in business logic (configuration constants are fine)
45
+ - Standard library and framework patterns used idiomatically
46
+ - Code reads top-to-bottom without requiring cross-referencing
47
+
39
48
  ADVERSARIAL MANDATE:
40
49
  - Your role is adversarial: assume the code is unmaintainable and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
50
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
51
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean the code is maintainable. It means your analysis reached its limits. State this explicitly.`,
52
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
53
  analyze: analyzeMaintainability,
45
54
  };
46
55
  defaultRegistry.register(maintainabilityJudge);
@@ -48,7 +48,7 @@ ADVERSARIAL MANDATE:
48
48
  - Your role is adversarial: assume the code is unobservable and will be impossible to debug in production. Actively hunt for monitoring gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
49
49
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
50
50
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
51
- - Absence of findings does not mean the code is observable. It means your analysis reached its limits. State this explicitly.`,
51
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
52
52
  analyze: analyzeObservability,
53
53
  };
54
54
  defaultRegistry.register(observabilityJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume the code has performance problems and actively hunt for bottlenecks. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean the code is performant. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzePerformance,
45
45
  };
46
46
  defaultRegistry.register(performanceJudge);
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume the code is not portable and actively hunt for platform dependencies. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean the code is portable. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzePortability,
45
45
  };
46
46
  defaultRegistry.register(portabilityJudge);
@@ -49,7 +49,7 @@ ADVERSARIAL MANDATE:
49
49
  - Your role is adversarial: assume rate limiting is absent or insufficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
50
50
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
51
51
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
52
- - Absence of findings does not mean rate limiting is adequate. It means your analysis reached its limits. State this explicitly.`,
52
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
53
53
  analyze: analyzeRateLimiting,
54
54
  };
55
55
  defaultRegistry.register(rateLimitingJudge);
@@ -51,7 +51,7 @@ ADVERSARIAL MANDATE:
51
51
  - Your role is adversarial: assume the code will fail in production and actively hunt for reliability gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
52
52
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
53
53
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
54
- - Absence of findings does not mean the code is reliable. It means your analysis reached its limits. State this explicitly.`,
54
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
55
55
  analyze: analyzeReliability,
56
56
  };
57
57
  defaultRegistry.register(reliabilityJudge);
@@ -46,7 +46,7 @@ ADVERSARIAL MANDATE:
46
46
  - Your role is adversarial: assume the code will not scale and actively hunt for bottlenecks. Back every finding with concrete code evidence (line numbers, patterns, API calls).
47
47
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
48
48
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
49
- - Absence of findings does not mean the code will scale. It means your analysis reached its limits. State this explicitly.`,
49
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
50
50
  analyze: analyzeScalability,
51
51
  };
52
52
  defaultRegistry.register(scalabilityJudge);
@@ -58,7 +58,7 @@ ADVERSARIAL MANDATE:
58
58
  - Your role is adversarial: assume the code has security vulnerabilities and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
59
59
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
60
60
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
61
- - Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.`,
61
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
62
62
  analyze: analyzeSecurity,
63
63
  };
64
64
  defaultRegistry.register(securityJudge);
@@ -50,7 +50,7 @@ ADVERSARIAL MANDATE:
50
50
  - Your role is adversarial: assume the code has engineering quality problems and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
51
51
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
52
52
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
53
- - Absence of findings does not mean the code follows best practices. It means your analysis reached its limits. State this explicitly.`,
53
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
54
54
  analyze: analyzeSoftwarePractices,
55
55
  };
56
56
  defaultRegistry.register(softwarePracticesJudge);
@@ -48,7 +48,7 @@ ADVERSARIAL MANDATE:
48
48
  - Your role is adversarial: assume the test coverage is insufficient and actively hunt for gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
49
49
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
50
50
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
51
- - Absence of findings does not mean the code is well-tested. It means your analysis reached its limits. State this explicitly.`,
51
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
52
52
  analyze: analyzeTesting,
53
53
  };
54
54
  defaultRegistry.register(testingJudge);
package/dist/judges/ux.js CHANGED
@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
40
40
  - Your role is adversarial: assume the user experience is poor and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
41
41
  - Never praise or compliment the code. Report only problems, risks, and deficiencies.
42
42
  - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
43
- - Absence of findings does not mean the UX is good. It means your analysis reached its limits. State this explicitly.`,
43
+ - If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
44
44
  analyze: analyzeUx,
45
45
  };
46
46
  defaultRegistry.register(uxJudge);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kevinrabun/judges",
3
- "version": "3.124.4",
3
+ "version": "3.125.0",
4
4
  "description": "45 specialized judges that evaluate AI-generated code for security, cost, and quality.",
5
5
  "mcpName": "io.github.KevinRabun/judges",
6
6
  "type": "module",
package/server.json CHANGED
@@ -16,12 +16,12 @@
16
16
  "mimeType": "image/png"
17
17
  }
18
18
  ],
19
- "version": "3.124.4",
19
+ "version": "3.125.0",
20
20
  "packages": [
21
21
  {
22
22
  "registryType": "npm",
23
23
  "identifier": "@kevinrabun/judges",
24
- "version": "3.124.4",
24
+ "version": "3.125.0",
25
25
  "transport": {
26
26
  "type": "stdio"
27
27
  }