@kevinrabun/judges 3.37.0 → 3.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/CHANGELOG.md +77 -0
  2. package/README.md +5 -4
  3. package/dist/api.d.ts +5 -2
  4. package/dist/api.d.ts.map +1 -1
  5. package/dist/api.js +5 -1
  6. package/dist/api.js.map +1 -1
  7. package/dist/ast/structural-parser.js +3 -3
  8. package/dist/ast/structural-parser.js.map +1 -1
  9. package/dist/calibration.d.ts +35 -0
  10. package/dist/calibration.d.ts.map +1 -1
  11. package/dist/calibration.js +52 -0
  12. package/dist/calibration.js.map +1 -1
  13. package/dist/cli.d.ts.map +1 -1
  14. package/dist/cli.js +307 -16
  15. package/dist/cli.js.map +1 -1
  16. package/dist/commands/benchmark-advanced.d.ts.map +1 -1
  17. package/dist/commands/benchmark-advanced.js +38 -17
  18. package/dist/commands/benchmark-advanced.js.map +1 -1
  19. package/dist/commands/benchmark-ai-agents.js +1 -1
  20. package/dist/commands/benchmark-ai-agents.js.map +1 -1
  21. package/dist/commands/benchmark-ai-output.js +1 -1
  22. package/dist/commands/benchmark-ai-output.js.map +1 -1
  23. package/dist/commands/benchmark-compliance-ethics.js +4 -4
  24. package/dist/commands/benchmark-compliance-ethics.js.map +1 -1
  25. package/dist/commands/benchmark-expanded.js +2 -2
  26. package/dist/commands/benchmark-expanded.js.map +1 -1
  27. package/dist/commands/benchmark-languages.js +5 -5
  28. package/dist/commands/benchmark-languages.js.map +1 -1
  29. package/dist/commands/benchmark-quality-ops.js +3 -3
  30. package/dist/commands/benchmark-quality-ops.js.map +1 -1
  31. package/dist/commands/benchmark-security-deep.js +2 -2
  32. package/dist/commands/benchmark-security-deep.js.map +1 -1
  33. package/dist/commands/benchmark.d.ts +2 -1
  34. package/dist/commands/benchmark.d.ts.map +1 -1
  35. package/dist/commands/benchmark.js +100 -6
  36. package/dist/commands/benchmark.js.map +1 -1
  37. package/dist/commands/calibration-dashboard.d.ts.map +1 -1
  38. package/dist/commands/calibration-dashboard.js +198 -0
  39. package/dist/commands/calibration-dashboard.js.map +1 -1
  40. package/dist/commands/calibration-share.d.ts +31 -0
  41. package/dist/commands/calibration-share.d.ts.map +1 -0
  42. package/dist/commands/calibration-share.js +183 -0
  43. package/dist/commands/calibration-share.js.map +1 -0
  44. package/dist/commands/compliance-report.d.ts +35 -0
  45. package/dist/commands/compliance-report.d.ts.map +1 -0
  46. package/dist/commands/compliance-report.js +162 -0
  47. package/dist/commands/compliance-report.js.map +1 -0
  48. package/dist/commands/diff.d.ts.map +1 -1
  49. package/dist/commands/diff.js +8 -3
  50. package/dist/commands/diff.js.map +1 -1
  51. package/dist/commands/feedback-rules.d.ts +29 -0
  52. package/dist/commands/feedback-rules.d.ts.map +1 -0
  53. package/dist/commands/feedback-rules.js +174 -0
  54. package/dist/commands/feedback-rules.js.map +1 -0
  55. package/dist/commands/feedback.d.ts +12 -0
  56. package/dist/commands/feedback.d.ts.map +1 -1
  57. package/dist/commands/feedback.js +16 -0
  58. package/dist/commands/feedback.js.map +1 -1
  59. package/dist/commands/fix.d.ts.map +1 -1
  60. package/dist/commands/fix.js +33 -1
  61. package/dist/commands/fix.js.map +1 -1
  62. package/dist/commands/governance.d.ts +32 -0
  63. package/dist/commands/governance.d.ts.map +1 -0
  64. package/dist/commands/governance.js +203 -0
  65. package/dist/commands/governance.js.map +1 -0
  66. package/dist/commands/help.d.ts +8 -0
  67. package/dist/commands/help.d.ts.map +1 -0
  68. package/dist/commands/help.js +303 -0
  69. package/dist/commands/help.js.map +1 -0
  70. package/dist/commands/hook.d.ts.map +1 -1
  71. package/dist/commands/hook.js +17 -20
  72. package/dist/commands/hook.js.map +1 -1
  73. package/dist/commands/llm-benchmark.d.ts +119 -0
  74. package/dist/commands/llm-benchmark.d.ts.map +1 -0
  75. package/dist/commands/llm-benchmark.js +396 -0
  76. package/dist/commands/llm-benchmark.js.map +1 -0
  77. package/dist/commands/metrics-dashboard.d.ts +22 -0
  78. package/dist/commands/metrics-dashboard.d.ts.map +1 -0
  79. package/dist/commands/metrics-dashboard.js +335 -0
  80. package/dist/commands/metrics-dashboard.js.map +1 -0
  81. package/dist/commands/metrics.d.ts +58 -0
  82. package/dist/commands/metrics.d.ts.map +1 -0
  83. package/dist/commands/metrics.js +242 -0
  84. package/dist/commands/metrics.js.map +1 -0
  85. package/dist/commands/onboard.d.ts +13 -0
  86. package/dist/commands/onboard.d.ts.map +1 -0
  87. package/dist/commands/onboard.js +179 -0
  88. package/dist/commands/onboard.js.map +1 -0
  89. package/dist/commands/org-metrics.d.ts +24 -0
  90. package/dist/commands/org-metrics.d.ts.map +1 -0
  91. package/dist/commands/org-metrics.js +238 -0
  92. package/dist/commands/org-metrics.js.map +1 -0
  93. package/dist/commands/override.d.ts +62 -0
  94. package/dist/commands/override.d.ts.map +1 -0
  95. package/dist/commands/override.js +264 -0
  96. package/dist/commands/override.js.map +1 -0
  97. package/dist/commands/parity.d.ts +31 -0
  98. package/dist/commands/parity.d.ts.map +1 -0
  99. package/dist/commands/parity.js +213 -0
  100. package/dist/commands/parity.js.map +1 -0
  101. package/dist/commands/plugin-search.d.ts +40 -0
  102. package/dist/commands/plugin-search.d.ts.map +1 -0
  103. package/dist/commands/plugin-search.js +328 -0
  104. package/dist/commands/plugin-search.js.map +1 -0
  105. package/dist/commands/plugins.d.ts +13 -0
  106. package/dist/commands/plugins.d.ts.map +1 -0
  107. package/dist/commands/plugins.js +105 -0
  108. package/dist/commands/plugins.js.map +1 -0
  109. package/dist/commands/review.js +1 -1
  110. package/dist/commands/review.js.map +1 -1
  111. package/dist/commands/snapshot.d.ts +27 -0
  112. package/dist/commands/snapshot.d.ts.map +1 -1
  113. package/dist/commands/snapshot.js +99 -0
  114. package/dist/commands/snapshot.js.map +1 -1
  115. package/dist/commands/trace.d.ts +65 -0
  116. package/dist/commands/trace.d.ts.map +1 -0
  117. package/dist/commands/trace.js +246 -0
  118. package/dist/commands/trace.js.map +1 -0
  119. package/dist/commands/trust-ramp.d.ts +30 -0
  120. package/dist/commands/trust-ramp.d.ts.map +1 -0
  121. package/dist/commands/trust-ramp.js +190 -0
  122. package/dist/commands/trust-ramp.js.map +1 -0
  123. package/dist/config.d.ts +5 -0
  124. package/dist/config.d.ts.map +1 -1
  125. package/dist/config.js +65 -0
  126. package/dist/config.js.map +1 -1
  127. package/dist/data-adapter.d.ts +124 -0
  128. package/dist/data-adapter.d.ts.map +1 -0
  129. package/dist/data-adapter.js +213 -0
  130. package/dist/data-adapter.js.map +1 -0
  131. package/dist/evaluators/accessibility.js +1 -1
  132. package/dist/evaluators/accessibility.js.map +1 -1
  133. package/dist/evaluators/ai-code-safety.d.ts.map +1 -1
  134. package/dist/evaluators/ai-code-safety.js +1 -4
  135. package/dist/evaluators/ai-code-safety.js.map +1 -1
  136. package/dist/evaluators/code-structure.d.ts.map +1 -1
  137. package/dist/evaluators/code-structure.js +7 -16
  138. package/dist/evaluators/code-structure.js.map +1 -1
  139. package/dist/evaluators/compliance.js +1 -1
  140. package/dist/evaluators/compliance.js.map +1 -1
  141. package/dist/evaluators/concurrency.d.ts.map +1 -1
  142. package/dist/evaluators/concurrency.js +7 -2
  143. package/dist/evaluators/concurrency.js.map +1 -1
  144. package/dist/evaluators/cost-effectiveness.js +1 -1
  145. package/dist/evaluators/cost-effectiveness.js.map +1 -1
  146. package/dist/evaluators/data-sovereignty.d.ts.map +1 -1
  147. package/dist/evaluators/data-sovereignty.js +10 -3
  148. package/dist/evaluators/data-sovereignty.js.map +1 -1
  149. package/dist/evaluators/documentation.d.ts.map +1 -1
  150. package/dist/evaluators/documentation.js +21 -2
  151. package/dist/evaluators/documentation.js.map +1 -1
  152. package/dist/evaluators/false-positive-review.js +9 -4
  153. package/dist/evaluators/false-positive-review.js.map +1 -1
  154. package/dist/evaluators/hallucination-detection.d.ts.map +1 -1
  155. package/dist/evaluators/hallucination-detection.js +41 -0
  156. package/dist/evaluators/hallucination-detection.js.map +1 -1
  157. package/dist/evaluators/iac-security.d.ts.map +1 -1
  158. package/dist/evaluators/iac-security.js +5 -3
  159. package/dist/evaluators/iac-security.js.map +1 -1
  160. package/dist/evaluators/index.d.ts.map +1 -1
  161. package/dist/evaluators/index.js +64 -11
  162. package/dist/evaluators/index.js.map +1 -1
  163. package/dist/evaluators/intent-alignment.d.ts +4 -0
  164. package/dist/evaluators/intent-alignment.d.ts.map +1 -1
  165. package/dist/evaluators/intent-alignment.js +163 -0
  166. package/dist/evaluators/intent-alignment.js.map +1 -1
  167. package/dist/evaluators/internationalization.d.ts.map +1 -1
  168. package/dist/evaluators/internationalization.js +46 -17
  169. package/dist/evaluators/internationalization.js.map +1 -1
  170. package/dist/evaluators/logic-review.d.ts.map +1 -1
  171. package/dist/evaluators/logic-review.js +60 -33
  172. package/dist/evaluators/logic-review.js.map +1 -1
  173. package/dist/evaluators/maintainability.d.ts.map +1 -1
  174. package/dist/evaluators/maintainability.js +82 -3
  175. package/dist/evaluators/maintainability.js.map +1 -1
  176. package/dist/evaluators/over-engineering.js +3 -3
  177. package/dist/evaluators/over-engineering.js.map +1 -1
  178. package/dist/evaluators/project.d.ts +12 -0
  179. package/dist/evaluators/project.d.ts.map +1 -1
  180. package/dist/evaluators/project.js +86 -0
  181. package/dist/evaluators/project.js.map +1 -1
  182. package/dist/evaluators/security.js +2 -2
  183. package/dist/evaluators/security.js.map +1 -1
  184. package/dist/evaluators/shared.d.ts.map +1 -1
  185. package/dist/evaluators/shared.js +13 -1
  186. package/dist/evaluators/shared.js.map +1 -1
  187. package/dist/evaluators/ux.js +1 -1
  188. package/dist/evaluators/ux.js.map +1 -1
  189. package/dist/finding-lifecycle.d.ts +9 -0
  190. package/dist/finding-lifecycle.d.ts.map +1 -1
  191. package/dist/finding-lifecycle.js +15 -0
  192. package/dist/finding-lifecycle.js.map +1 -1
  193. package/dist/fix-history.d.ts +9 -0
  194. package/dist/fix-history.d.ts.map +1 -1
  195. package/dist/fix-history.js +15 -0
  196. package/dist/fix-history.js.map +1 -1
  197. package/dist/formatters/sarif.d.ts +3 -0
  198. package/dist/formatters/sarif.d.ts.map +1 -1
  199. package/dist/formatters/sarif.js +36 -12
  200. package/dist/formatters/sarif.js.map +1 -1
  201. package/dist/github-app.d.ts +16 -1
  202. package/dist/github-app.d.ts.map +1 -1
  203. package/dist/github-app.js +85 -2
  204. package/dist/github-app.js.map +1 -1
  205. package/dist/index.js +5 -0
  206. package/dist/index.js.map +1 -1
  207. package/dist/judge-registry.d.ts +157 -0
  208. package/dist/judge-registry.d.ts.map +1 -0
  209. package/dist/judge-registry.js +273 -0
  210. package/dist/judge-registry.js.map +1 -0
  211. package/dist/judges/accessibility.d.ts.map +1 -1
  212. package/dist/judges/accessibility.js +4 -0
  213. package/dist/judges/accessibility.js.map +1 -1
  214. package/dist/judges/agent-instructions.d.ts.map +1 -1
  215. package/dist/judges/agent-instructions.js +4 -0
  216. package/dist/judges/agent-instructions.js.map +1 -1
  217. package/dist/judges/ai-code-safety.d.ts.map +1 -1
  218. package/dist/judges/ai-code-safety.js +4 -0
  219. package/dist/judges/ai-code-safety.js.map +1 -1
  220. package/dist/judges/api-contract.d.ts.map +1 -1
  221. package/dist/judges/api-contract.js +4 -0
  222. package/dist/judges/api-contract.js.map +1 -1
  223. package/dist/judges/api-design.d.ts.map +1 -1
  224. package/dist/judges/api-design.js +4 -0
  225. package/dist/judges/api-design.js.map +1 -1
  226. package/dist/judges/authentication.d.ts.map +1 -1
  227. package/dist/judges/authentication.js +4 -0
  228. package/dist/judges/authentication.js.map +1 -1
  229. package/dist/judges/backwards-compatibility.d.ts.map +1 -1
  230. package/dist/judges/backwards-compatibility.js +4 -0
  231. package/dist/judges/backwards-compatibility.js.map +1 -1
  232. package/dist/judges/caching.d.ts.map +1 -1
  233. package/dist/judges/caching.js +4 -0
  234. package/dist/judges/caching.js.map +1 -1
  235. package/dist/judges/ci-cd.d.ts.map +1 -1
  236. package/dist/judges/ci-cd.js +4 -0
  237. package/dist/judges/ci-cd.js.map +1 -1
  238. package/dist/judges/cloud-readiness.d.ts.map +1 -1
  239. package/dist/judges/cloud-readiness.js +4 -0
  240. package/dist/judges/cloud-readiness.js.map +1 -1
  241. package/dist/judges/code-structure.d.ts.map +1 -1
  242. package/dist/judges/code-structure.js +4 -0
  243. package/dist/judges/code-structure.js.map +1 -1
  244. package/dist/judges/compliance.d.ts.map +1 -1
  245. package/dist/judges/compliance.js +4 -0
  246. package/dist/judges/compliance.js.map +1 -1
  247. package/dist/judges/concurrency.d.ts.map +1 -1
  248. package/dist/judges/concurrency.js +4 -0
  249. package/dist/judges/concurrency.js.map +1 -1
  250. package/dist/judges/configuration-management.d.ts.map +1 -1
  251. package/dist/judges/configuration-management.js +4 -0
  252. package/dist/judges/configuration-management.js.map +1 -1
  253. package/dist/judges/cost-effectiveness.d.ts.map +1 -1
  254. package/dist/judges/cost-effectiveness.js +4 -0
  255. package/dist/judges/cost-effectiveness.js.map +1 -1
  256. package/dist/judges/cybersecurity.d.ts.map +1 -1
  257. package/dist/judges/cybersecurity.js +4 -0
  258. package/dist/judges/cybersecurity.js.map +1 -1
  259. package/dist/judges/data-security.d.ts.map +1 -1
  260. package/dist/judges/data-security.js +4 -0
  261. package/dist/judges/data-security.js.map +1 -1
  262. package/dist/judges/data-sovereignty.d.ts.map +1 -1
  263. package/dist/judges/data-sovereignty.js +4 -0
  264. package/dist/judges/data-sovereignty.js.map +1 -1
  265. package/dist/judges/database.d.ts.map +1 -1
  266. package/dist/judges/database.js +4 -0
  267. package/dist/judges/database.js.map +1 -1
  268. package/dist/judges/dependency-health.d.ts.map +1 -1
  269. package/dist/judges/dependency-health.js +4 -0
  270. package/dist/judges/dependency-health.js.map +1 -1
  271. package/dist/judges/documentation.d.ts.map +1 -1
  272. package/dist/judges/documentation.js +4 -0
  273. package/dist/judges/documentation.js.map +1 -1
  274. package/dist/judges/error-handling.d.ts.map +1 -1
  275. package/dist/judges/error-handling.js +4 -0
  276. package/dist/judges/error-handling.js.map +1 -1
  277. package/dist/judges/ethics-bias.d.ts.map +1 -1
  278. package/dist/judges/ethics-bias.js +4 -0
  279. package/dist/judges/ethics-bias.js.map +1 -1
  280. package/dist/judges/false-positive-review.d.ts.map +1 -1
  281. package/dist/judges/false-positive-review.js +2 -0
  282. package/dist/judges/false-positive-review.js.map +1 -1
  283. package/dist/judges/framework-safety.d.ts.map +1 -1
  284. package/dist/judges/framework-safety.js +4 -0
  285. package/dist/judges/framework-safety.js.map +1 -1
  286. package/dist/judges/hallucination-detection.d.ts.map +1 -1
  287. package/dist/judges/hallucination-detection.js +4 -0
  288. package/dist/judges/hallucination-detection.js.map +1 -1
  289. package/dist/judges/iac-security.d.ts.map +1 -1
  290. package/dist/judges/iac-security.js +4 -0
  291. package/dist/judges/iac-security.js.map +1 -1
  292. package/dist/judges/index.d.ts +59 -0
  293. package/dist/judges/index.d.ts.map +1 -1
  294. package/dist/judges/index.js +65 -189
  295. package/dist/judges/index.js.map +1 -1
  296. package/dist/judges/intent-alignment.d.ts.map +1 -1
  297. package/dist/judges/intent-alignment.js +4 -0
  298. package/dist/judges/intent-alignment.js.map +1 -1
  299. package/dist/judges/internationalization.d.ts.map +1 -1
  300. package/dist/judges/internationalization.js +4 -0
  301. package/dist/judges/internationalization.js.map +1 -1
  302. package/dist/judges/logging-privacy.d.ts.map +1 -1
  303. package/dist/judges/logging-privacy.js +4 -0
  304. package/dist/judges/logging-privacy.js.map +1 -1
  305. package/dist/judges/logic-review.d.ts.map +1 -1
  306. package/dist/judges/logic-review.js +4 -0
  307. package/dist/judges/logic-review.js.map +1 -1
  308. package/dist/judges/maintainability.d.ts.map +1 -1
  309. package/dist/judges/maintainability.js +4 -0
  310. package/dist/judges/maintainability.js.map +1 -1
  311. package/dist/judges/model-fingerprint.d.ts.map +1 -1
  312. package/dist/judges/model-fingerprint.js +4 -0
  313. package/dist/judges/model-fingerprint.js.map +1 -1
  314. package/dist/judges/multi-turn-coherence.d.ts.map +1 -1
  315. package/dist/judges/multi-turn-coherence.js +4 -0
  316. package/dist/judges/multi-turn-coherence.js.map +1 -1
  317. package/dist/judges/observability.d.ts.map +1 -1
  318. package/dist/judges/observability.js +4 -0
  319. package/dist/judges/observability.js.map +1 -1
  320. package/dist/judges/over-engineering.d.ts.map +1 -1
  321. package/dist/judges/over-engineering.js +4 -0
  322. package/dist/judges/over-engineering.js.map +1 -1
  323. package/dist/judges/performance.d.ts.map +1 -1
  324. package/dist/judges/performance.js +4 -0
  325. package/dist/judges/performance.js.map +1 -1
  326. package/dist/judges/portability.d.ts.map +1 -1
  327. package/dist/judges/portability.js +4 -0
  328. package/dist/judges/portability.js.map +1 -1
  329. package/dist/judges/rate-limiting.d.ts.map +1 -1
  330. package/dist/judges/rate-limiting.js +4 -0
  331. package/dist/judges/rate-limiting.js.map +1 -1
  332. package/dist/judges/reliability.d.ts.map +1 -1
  333. package/dist/judges/reliability.js +4 -0
  334. package/dist/judges/reliability.js.map +1 -1
  335. package/dist/judges/scalability.d.ts.map +1 -1
  336. package/dist/judges/scalability.js +4 -0
  337. package/dist/judges/scalability.js.map +1 -1
  338. package/dist/judges/security.d.ts.map +1 -1
  339. package/dist/judges/security.js +4 -0
  340. package/dist/judges/security.js.map +1 -1
  341. package/dist/judges/software-practices.d.ts.map +1 -1
  342. package/dist/judges/software-practices.js +4 -0
  343. package/dist/judges/software-practices.js.map +1 -1
  344. package/dist/judges/testing.d.ts.map +1 -1
  345. package/dist/judges/testing.js +4 -0
  346. package/dist/judges/testing.js.map +1 -1
  347. package/dist/judges/ux.d.ts.map +1 -1
  348. package/dist/judges/ux.js +4 -0
  349. package/dist/judges/ux.js.map +1 -1
  350. package/dist/plugins.d.ts +8 -51
  351. package/dist/plugins.d.ts.map +1 -1
  352. package/dist/plugins.js +16 -125
  353. package/dist/plugins.js.map +1 -1
  354. package/dist/security-ids.d.ts +24 -0
  355. package/dist/security-ids.d.ts.map +1 -0
  356. package/dist/security-ids.js +240 -0
  357. package/dist/security-ids.js.map +1 -0
  358. package/dist/tools/prompts.d.ts +4 -0
  359. package/dist/tools/prompts.d.ts.map +1 -1
  360. package/dist/tools/prompts.js +6 -4
  361. package/dist/tools/prompts.js.map +1 -1
  362. package/dist/tools/register-scaffold.d.ts +3 -0
  363. package/dist/tools/register-scaffold.d.ts.map +1 -0
  364. package/dist/tools/register-scaffold.js +399 -0
  365. package/dist/tools/register-scaffold.js.map +1 -0
  366. package/dist/tools/register.d.ts +1 -1
  367. package/dist/tools/register.d.ts.map +1 -1
  368. package/dist/tools/register.js +3 -1
  369. package/dist/tools/register.js.map +1 -1
  370. package/dist/types.d.ts +75 -0
  371. package/dist/types.d.ts.map +1 -1
  372. package/package.json +3 -2
  373. package/server.json +2 -2
package/dist/cli.js CHANGED
@@ -42,6 +42,8 @@ import { generateGitLabCi, generateAzurePipelines, generateBitbucketPipelines }
42
42
  import { getPreset, listPresets, composePresets } from "./presets.js";
43
43
  import { parseConfig } from "./config.js";
44
44
  import { applyPatches } from "./commands/fix.js";
45
+ import { DiskCache } from "./disk-cache.js";
46
+ import { contentHash } from "./cache.js";
45
47
  import { runFeedback } from "./commands/feedback.js";
46
48
  import { runBenchmark } from "./commands/benchmark.js";
47
49
  import { runRule } from "./commands/rule.js";
@@ -50,6 +52,7 @@ import { runConfig } from "./commands/config-share.js";
50
52
  import { runDoctor } from "./commands/doctor.js";
51
53
  import { runTriage } from "./commands/triage.js";
52
54
  import { formatComparisonReport, formatFullComparisonMatrix, TOOL_PROFILES } from "./comparison.js";
55
+ import { runOverride, loadOverrideStore, applyOverrides } from "./commands/override.js";
53
56
  // ─── Language Detection from Extension ──────────────────────────────────────
54
57
  const EXT_TO_LANG = {
55
58
  ".ts": "typescript",
@@ -117,8 +120,12 @@ function parseCliArgs(argv) {
117
120
  include: [],
118
121
  maxFiles: undefined,
119
122
  changedOnly: false,
123
+ stagedOnly: false,
120
124
  explain: false,
121
125
  sample: false,
126
+ trace: false,
127
+ incremental: false,
128
+ noCache: false,
122
129
  };
123
130
  // First non-flag arg is the command
124
131
  let i = 2; // skip node + script
@@ -185,6 +192,9 @@ function parseCliArgs(argv) {
185
192
  case "--changed-only":
186
193
  args.changedOnly = true;
187
194
  break;
195
+ case "--staged-only":
196
+ args.stagedOnly = true;
197
+ break;
188
198
  case "--explain":
189
199
  args.explain = true;
190
200
  break;
@@ -202,6 +212,15 @@ function parseCliArgs(argv) {
202
212
  case "--sample":
203
213
  args.sample = true;
204
214
  break;
215
+ case "--trace":
216
+ args.trace = true;
217
+ break;
218
+ case "--incremental":
219
+ args.incremental = true;
220
+ break;
221
+ case "--no-cache":
222
+ args.noCache = true;
223
+ break;
205
224
  default:
206
225
  // If it looks like a file path (not a flag), treat as --file
207
226
  if (!arg.startsWith("-") && !args.file) {
@@ -273,6 +292,7 @@ EVAL OPTIONS:
273
292
  --fix Auto-fix findings after evaluation (applies patches in-place)
274
293
  --changed-only Only evaluate files changed since last commit (uses git diff)
275
294
  --explain Enrich findings with OWASP/CWE learning context
295
+ --trace Show detailed decision trace for every finding
276
296
  --help, -h Show this help
277
297
 
278
298
  FIX OPTIONS:
@@ -526,6 +546,23 @@ function getGitChangedFiles(cwd) {
526
546
  return [];
527
547
  }
528
548
  }
549
+ function getStagedFiles(cwd) {
550
+ try {
551
+ const resolvedCwd = resolve(cwd);
552
+ const output = execSync("git diff --cached --name-only --diff-filter=ACM", {
553
+ cwd: resolvedCwd,
554
+ encoding: "utf-8",
555
+ stdio: ["pipe", "pipe", "pipe"],
556
+ }).trim();
557
+ return output
558
+ .split("\n")
559
+ .filter(Boolean)
560
+ .map((f) => resolve(resolvedCwd, f));
561
+ }
562
+ catch {
563
+ return [];
564
+ }
565
+ }
529
566
  // ─── Format Output ──────────────────────────────────────────────────────────
530
567
  function formatTribunalOutput(verdict, format, filePath) {
531
568
  switch (format) {
@@ -613,10 +650,33 @@ function formatTextOutput(verdict) {
613
650
  lines.push(" " + "─".repeat(60));
614
651
  for (const f of critical.slice(0, 20)) {
615
652
  const fixTag = f.patch ? " 🔧" : "";
616
- lines.push(` [${f.severity.toUpperCase().padEnd(8)}] ${f.ruleId}: ${f.title}${fixTag}`);
653
+ const confTag = f.confidence !== undefined ? ` (${Math.round(f.confidence * 100)}% confidence)` : "";
654
+ lines.push(` [${f.severity.toUpperCase().padEnd(8)}] ${f.ruleId}: ${f.title}${fixTag}${confTag}`);
617
655
  if (f.lineNumbers && f.lineNumbers.length > 0) {
618
656
  lines.push(` Line ${f.lineNumbers[0]}: ${f.description.slice(0, 100)}`);
619
657
  }
658
+ if (f.provenance) {
659
+ lines.push(` Evidence: ${f.provenance}`);
660
+ }
661
+ if (f.evidenceBasis) {
662
+ lines.push(` Basis: ${f.evidenceBasis}`);
663
+ }
664
+ if (f.evidenceChain && f.evidenceChain.steps.length > 0) {
665
+ lines.push(` Impact: ${f.evidenceChain.impactStatement}`);
666
+ for (const step of f.evidenceChain.steps.slice(0, 3)) {
667
+ const loc = step.line ? ` (L${step.line})` : "";
668
+ lines.push(` → [${step.source}]${loc} ${step.observation}`);
669
+ }
670
+ }
671
+ if (f.cweIds && f.cweIds.length > 0) {
672
+ lines.push(` CWE: ${f.cweIds.join(", ")}`);
673
+ }
674
+ if (f.owaspLlmTop10) {
675
+ lines.push(` OWASP LLM: ${f.owaspLlmTop10}`);
676
+ }
677
+ if (f.learnMoreUrl) {
678
+ lines.push(` 📖 Learn more: ${f.learnMoreUrl}`);
679
+ }
620
680
  }
621
681
  if (critical.length > 20) {
622
682
  lines.push(` ... and ${critical.length - 20} more critical/high findings`);
@@ -650,13 +710,23 @@ function formatSingleJudgeTextOutput(evaluation) {
650
710
  lines.push(` Findings : ${evaluation.findings.length}`);
651
711
  lines.push("");
652
712
  for (const f of evaluation.findings) {
653
- lines.push(` [${f.severity.toUpperCase().padEnd(8)}] ${f.ruleId}: ${f.title}`);
713
+ const confTag = f.confidence !== undefined ? ` (${Math.round(f.confidence * 100)}%)` : "";
714
+ lines.push(` [${f.severity.toUpperCase().padEnd(8)}] ${f.ruleId}: ${f.title}${confTag}`);
654
715
  if (f.lineNumbers && f.lineNumbers.length > 0) {
655
716
  lines.push(` Line ${f.lineNumbers[0]}: ${f.description.slice(0, 120)}`);
656
717
  }
718
+ if (f.provenance) {
719
+ lines.push(` Evidence: ${f.provenance}`);
720
+ }
721
+ if (f.evidenceChain && f.evidenceChain.steps.length > 0) {
722
+ lines.push(` Impact: ${f.evidenceChain.impactStatement}`);
723
+ }
657
724
  if (f.suggestedFix) {
658
725
  lines.push(` Fix: ${f.suggestedFix.slice(0, 120)}`);
659
726
  }
727
+ if (f.learnMoreUrl) {
728
+ lines.push(` 📖 ${f.learnMoreUrl}`);
729
+ }
660
730
  }
661
731
  lines.push("");
662
732
  return lines.join("\n");
@@ -782,6 +852,50 @@ export async function runCli(argv) {
782
852
  runFeedback(argv);
783
853
  return;
784
854
  }
855
+ // ─── Override Command ─────────────────────────────────────────────────
856
+ if (args.command === "override") {
857
+ runOverride(argv);
858
+ return;
859
+ }
860
+ // ─── Feedback-Rules Command ───────────────────────────────────────────
861
+ if (args.command === "feedback-rules") {
862
+ const { runFeedbackRules } = await import("./commands/feedback-rules.js");
863
+ runFeedbackRules(argv);
864
+ return;
865
+ }
866
+ // ─── Governance Command ───────────────────────────────────────────────
867
+ if (args.command === "governance") {
868
+ const { runGovernance } = await import("./commands/governance.js");
869
+ runGovernance(argv);
870
+ return;
871
+ }
872
+ // ─── Parity Command ──────────────────────────────────────────────────
873
+ if (args.command === "parity") {
874
+ const { runParity } = await import("./commands/parity.js");
875
+ runParity(argv);
876
+ return;
877
+ }
878
+ // ─── Compliance-Report Command ────────────────────────────────────────
879
+ if (args.command === "compliance-report") {
880
+ const { buildComplianceReport, formatComplianceReportText } = await import("./commands/compliance-report.js");
881
+ const target = args.file || ".";
882
+ const code = args.file ? (await import("fs")).readFileSync(args.file, "utf-8") : "";
883
+ let findings = [];
884
+ if (code) {
885
+ const lang = detectLanguage(args.file) || "typescript";
886
+ const result = evaluateWithTribunal(code, lang);
887
+ findings = result.findings;
888
+ }
889
+ const framework = argv.find((a, i) => argv[i - 1] === "--framework") || undefined;
890
+ const report = buildComplianceReport(target, findings, framework);
891
+ if (argv.includes("--json")) {
892
+ console.log(JSON.stringify(report, null, 2));
893
+ }
894
+ else {
895
+ console.log(formatComplianceReportText(report));
896
+ }
897
+ return;
898
+ }
785
899
  // ─── Triage Command ───────────────────────────────────────────────────
786
900
  if (args.command === "triage") {
787
901
  runTriage(argv);
@@ -837,6 +951,12 @@ export async function runCli(argv) {
837
951
  await runCommunityPatterns(argv);
838
952
  process.exit(0);
839
953
  }
954
+ // ─── Calibration Share Command ───────────────────────────────────────
955
+ if (args.command === "calibration-share") {
956
+ const { runCalibrationShare } = await import("./commands/calibration-share.js");
957
+ runCalibrationShare(argv);
958
+ process.exit(0);
959
+ }
840
960
  // ─── Compare Command ─────────────────────────────────────────────────
841
961
  if (args.command === "compare") {
842
962
  const toolName = argv[3];
@@ -856,9 +976,11 @@ export async function runCli(argv) {
856
976
  }
857
977
  // ─── Trend Command ───────────────────────────────────────────────────
858
978
  if (args.command === "trend") {
859
- const { loadSnapshotStore, computeTrend, formatTrendReport, formatTrendReportHtml } = await import("./commands/snapshot.js");
860
- const snapshotFile = argv.find((a, i) => i >= 3 && !a.startsWith("-")) || ".judges-snapshots.json";
979
+ const { loadSnapshotStore, computeTrend, formatTrendReport, formatTrendReportHtml, detectRegressions, formatRegressionAlerts, } = await import("./commands/snapshot.js");
980
+ const snapshotFile = argv.find((a, i) => i >= 3 && !a.startsWith("-") && !["html", "json", "text"].includes(a)) ||
981
+ ".judges-snapshots.json";
861
982
  const formatArg = argv.includes("--format") ? argv[argv.indexOf("--format") + 1] : "text";
983
+ const outputArg = argv.includes("--output") ? argv[argv.indexOf("--output") + 1] : undefined;
862
984
  const store = loadSnapshotStore(snapshotFile);
863
985
  if (store.snapshots.length === 0) {
864
986
  console.log("No snapshot data found. Run evaluations with --snapshot to collect trend data.");
@@ -866,14 +988,30 @@ export async function runCli(argv) {
866
988
  }
867
989
  else {
868
990
  const report = computeTrend(store);
991
+ let output;
869
992
  if (formatArg === "html") {
870
- console.log(formatTrendReportHtml(report));
993
+ output = formatTrendReportHtml(report);
871
994
  }
872
995
  else if (formatArg === "json") {
873
- console.log(JSON.stringify(report, null, 2));
996
+ output = JSON.stringify(report, null, 2);
997
+ }
998
+ else {
999
+ output = formatTrendReport(report);
1000
+ }
1001
+ if (outputArg) {
1002
+ writeFileSync(outputArg, output, "utf-8");
1003
+ console.log(` ✅ Trend report written to ${outputArg}`);
874
1004
  }
875
1005
  else {
876
- console.log(formatTrendReport(report));
1006
+ console.log(output);
1007
+ }
1008
+ // Regression alerts
1009
+ const regressions = detectRegressions(store);
1010
+ if (regressions.length > 0) {
1011
+ console.log(formatRegressionAlerts(regressions));
1012
+ if (args.failOnFindings && regressions.some((r) => r.severity === "error")) {
1013
+ process.exit(1);
1014
+ }
877
1015
  }
878
1016
  }
879
1017
  process.exit(0);
@@ -884,6 +1022,54 @@ export async function runCli(argv) {
884
1022
  runScaffoldPlugin(argv);
885
1023
  process.exit(0);
886
1024
  }
1025
+ // ─── Plugin Search Command ───────────────────────────────────────────
1026
+ if (args.command === "plugin") {
1027
+ const { runPluginSearch } = await import("./commands/plugin-search.js");
1028
+ runPluginSearch(argv);
1029
+ process.exit(0);
1030
+ }
1031
+ // ─── Trust Ramp Command ──────────────────────────────────────────────
1032
+ if (args.command === "trust-ramp") {
1033
+ const { runTrustRamp } = await import("./commands/trust-ramp.js");
1034
+ runTrustRamp(argv);
1035
+ process.exit(0);
1036
+ }
1037
+ // ─── Metrics Command ────────────────────────────────────────────────
1038
+ if (args.command === "metrics") {
1039
+ const { runMetrics } = await import("./commands/metrics.js");
1040
+ runMetrics(argv);
1041
+ process.exit(0);
1042
+ }
1043
+ // ─── Metrics Dashboard Command ────────────────────────────────────────
1044
+ if (args.command === "metrics-dashboard") {
1045
+ const { runMetricsDashboard } = await import("./commands/metrics-dashboard.js");
1046
+ runMetricsDashboard(argv);
1047
+ process.exit(0);
1048
+ }
1049
+ // ─── Help Command ────────────────────────────────────────────────────
1050
+ if (args.command === "help") {
1051
+ const { runHelp } = await import("./commands/help.js");
1052
+ runHelp(argv);
1053
+ process.exit(0);
1054
+ }
1055
+ // ─── Onboard Command ─────────────────────────────────────────────────
1056
+ if (args.command === "onboard") {
1057
+ const { runOnboard } = await import("./commands/onboard.js");
1058
+ await runOnboard(argv);
1059
+ process.exit(0);
1060
+ }
1061
+ // ─── Org Metrics Command ──────────────────────────────────────────────
1062
+ if (args.command === "org-metrics") {
1063
+ const { runOrgMetrics } = await import("./commands/org-metrics.js");
1064
+ runOrgMetrics(argv);
1065
+ process.exit(0);
1066
+ }
1067
+ // ─── Plugins Command ──────────────────────────────────────────────────
1068
+ if (args.command === "plugins") {
1069
+ const { runPlugins } = await import("./commands/plugins.js");
1070
+ runPlugins(argv);
1071
+ process.exit(0);
1072
+ }
887
1073
  // ─── List Command ────────────────────────────────────────────────────
888
1074
  if (args.command === "list") {
889
1075
  listJudges();
@@ -920,6 +1106,12 @@ export async function runCli(argv) {
920
1106
  const changedSet = new Set(changedFiles.map((f) => resolve(f)));
921
1107
  files = files.filter((f) => changedSet.has(resolve(f)));
922
1108
  }
1109
+ // ── --staged-only: scope to git-staged files ──
1110
+ if (args.stagedOnly) {
1111
+ const stagedFiles = getStagedFiles(target);
1112
+ const stagedSet = new Set(stagedFiles.map((f) => resolve(f)));
1113
+ files = files.filter((f) => stagedSet.has(resolve(f)));
1114
+ }
923
1115
  if (files.length === 0) {
924
1116
  console.error(`No supported source files found in: ${target}${args.changedOnly ? " (changed-only)" : ""}`);
925
1117
  process.exit(1);
@@ -933,6 +1125,9 @@ export async function runCli(argv) {
933
1125
  let failCount = 0;
934
1126
  let totalFixed = 0;
935
1127
  let totalFixable = 0;
1128
+ let cacheHits = 0;
1129
+ // Incremental evaluation: use disk cache to skip unchanged files
1130
+ const diskCache = args.noCache ? undefined : new DiskCache();
936
1131
  for (let idx = 0; idx < files.length; idx++) {
937
1132
  const filePath = files[idx];
938
1133
  const relPath = relative(resolve("."), filePath);
@@ -941,7 +1136,21 @@ export async function runCli(argv) {
941
1136
  }
942
1137
  const fileCode = readFileSync(filePath, "utf-8");
943
1138
  const fileLang = args.language || detectLanguage(filePath) || "typescript";
944
- const verdict = evaluateWithTribunal(fileCode, fileLang, undefined, evalOptions);
1139
+ // Check disk cache for incremental mode (always when cache available)
1140
+ const hash = contentHash(fileCode, fileLang);
1141
+ let verdict;
1142
+ if (diskCache) {
1143
+ verdict = diskCache.get(hash);
1144
+ }
1145
+ if (verdict) {
1146
+ cacheHits++;
1147
+ }
1148
+ else {
1149
+ verdict = evaluateWithTribunal(fileCode, fileLang, undefined, evalOptions);
1150
+ if (diskCache) {
1151
+ diskCache.set(hash, verdict, relPath);
1152
+ }
1153
+ }
945
1154
  // Apply baseline suppression
946
1155
  if (loadedBaseline) {
947
1156
  for (const evaluation of verdict.evaluations) {
@@ -949,6 +1158,18 @@ export async function runCli(argv) {
949
1158
  }
950
1159
  verdict.findings = verdict.findings.filter((f) => !isBaselined(f, loadedBaseline, fileCode, relPath));
951
1160
  }
1161
+ // Apply override suppressions for multi-file mode
1162
+ {
1163
+ const overrideStore = loadOverrideStore();
1164
+ if (overrideStore.overrides.length > 0) {
1165
+ for (const evaluation of verdict.evaluations) {
1166
+ const result = applyOverrides(evaluation.findings, overrideStore, relPath);
1167
+ evaluation.findings = result.active;
1168
+ }
1169
+ const topResult = applyOverrides(verdict.findings, overrideStore, relPath);
1170
+ verdict.findings = topResult.active;
1171
+ }
1172
+ }
952
1173
  const fileFindings = verdict.evaluations.reduce((s, e) => s + e.findings.length, 0);
953
1174
  const fileFixable = verdict.evaluations.reduce((s, e) => s + e.findings.filter((f) => f.patch).length, 0);
954
1175
  totalFindings += fileFindings;
@@ -996,6 +1217,9 @@ export async function runCli(argv) {
996
1217
  if (args.fix && totalFixed > 0) {
997
1218
  console.log(` Fixed : ${totalFixed} patch(es) applied`);
998
1219
  }
1220
+ if (cacheHits > 0) {
1221
+ console.log(` Cached : ${cacheHits} file(s) unchanged (skipped re-evaluation)`);
1222
+ }
999
1223
  console.log(` Time : ${elapsed}ms`);
1000
1224
  console.log("");
1001
1225
  if (args.failOnFindings && failCount > 0)
@@ -1069,6 +1293,27 @@ export async function runCli(argv) {
1069
1293
  if (args.verbose) {
1070
1294
  console.log(` ⏱ Evaluated in ${elapsed}ms`);
1071
1295
  }
1296
+ // Trace output — show pipeline decision trace
1297
+ if (args.trace) {
1298
+ const { buildEvaluationTrace, formatTraceText } = await import("./commands/trace.js");
1299
+ const wrappedForTrace = {
1300
+ overallVerdict: evaluation.verdict,
1301
+ overallScore: evaluation.score,
1302
+ summary: evaluation.summary,
1303
+ evaluations: [evaluation],
1304
+ findings: evaluation.findings,
1305
+ criticalCount: evaluation.findings.filter((f) => f.severity === "critical").length,
1306
+ highCount: evaluation.findings.filter((f) => f.severity === "high").length,
1307
+ timestamp: new Date().toISOString(),
1308
+ };
1309
+ const trace = buildEvaluationTrace(wrappedForTrace, resolvedPath || args.file, language);
1310
+ if (args.format === "json") {
1311
+ console.log(JSON.stringify(trace, null, 2));
1312
+ }
1313
+ else {
1314
+ console.log(formatTraceText(trace));
1315
+ }
1316
+ }
1072
1317
  // Exit code — fail-on-findings or min-score
1073
1318
  if (args.failOnFindings && evaluation.verdict === "fail")
1074
1319
  process.exit(1);
@@ -1117,6 +1362,22 @@ export async function runCli(argv) {
1117
1362
  }
1118
1363
  verdict.findings = filterBySeverity(verdict.findings, evalConfig.minSeverity);
1119
1364
  }
1365
+ // Apply override suppressions
1366
+ {
1367
+ const overrideStore = loadOverrideStore();
1368
+ if (overrideStore.overrides.length > 0) {
1369
+ const fileSrc = resolvedPath || args.file;
1370
+ for (const evaluation of verdict.evaluations) {
1371
+ const result = applyOverrides(evaluation.findings, overrideStore, fileSrc);
1372
+ evaluation.findings = result.active;
1373
+ }
1374
+ const topResult = applyOverrides(verdict.findings, overrideStore, fileSrc);
1375
+ verdict.findings = topResult.active;
1376
+ if (topResult.overridden.length > 0 && !args.quiet) {
1377
+ console.log(` ℹ️ ${topResult.overridden.length} finding(s) suppressed by overrides`);
1378
+ }
1379
+ }
1380
+ }
1120
1381
  // Enrich with learning context when --explain is set
1121
1382
  if (args.explain) {
1122
1383
  for (const evaluation of verdict.evaluations) {
@@ -1149,6 +1410,17 @@ export async function runCli(argv) {
1149
1410
  console.log(` ⏱ Evaluated in ${elapsed}ms`);
1150
1411
  console.log(` 📊 ${verdict.evaluations.length} judges, ${verdict.findings.length} total findings`);
1151
1412
  }
1413
+ // Trace output — show pipeline decision trace
1414
+ if (args.trace) {
1415
+ const { buildEvaluationTrace, formatTraceText } = await import("./commands/trace.js");
1416
+ const trace = buildEvaluationTrace(verdict, resolvedPath || args.file, language);
1417
+ if (args.format === "json") {
1418
+ console.log(JSON.stringify(trace, null, 2));
1419
+ }
1420
+ else {
1421
+ console.log(formatTraceText(trace));
1422
+ }
1423
+ }
1152
1424
  // Exit code — fail-on-findings or min-score
1153
1425
  if (args.failOnFindings && verdict.overallVerdict === "fail")
1154
1426
  process.exit(1);
@@ -1380,18 +1652,37 @@ function enrichWithExplanations(findings) {
1380
1652
  return findings.map((f) => {
1381
1653
  const prefix = f.ruleId.replace(/-\d+$/, "");
1382
1654
  const ctx = RULE_PREFIX_CONTEXT[prefix];
1383
- if (!ctx)
1384
- return f;
1385
1655
  const parts = [f.description];
1386
- if (ctx.owasp)
1387
- parts.push(`\n📚 OWASP: ${ctx.owasp}`);
1388
- if (ctx.cwe)
1389
- parts.push(`CWE: ${ctx.cwe}`);
1390
- parts.push(`💡 ${ctx.learn}`);
1656
+ // Layer 2: evidence-based explanation
1657
+ if (f.confidence !== undefined) {
1658
+ parts.push(`\n🎯 Confidence: ${Math.round(f.confidence * 100)}%`);
1659
+ }
1660
+ if (f.provenance) {
1661
+ parts.push(`🔍 Detection: ${f.provenance}`);
1662
+ }
1663
+ if (f.evidenceBasis) {
1664
+ parts.push(`📊 Evidence: ${f.evidenceBasis}`);
1665
+ }
1666
+ if (f.evidenceChain && f.evidenceChain.steps.length > 0) {
1667
+ parts.push(`\n⚡ Why this matters: ${f.evidenceChain.impactStatement}`);
1668
+ parts.push(" Evidence chain:");
1669
+ for (const step of f.evidenceChain.steps.slice(0, 5)) {
1670
+ const loc = step.line ? ` (L${step.line})` : "";
1671
+ parts.push(` → [${step.source}]${loc} ${step.observation}`);
1672
+ }
1673
+ }
1674
+ // Layer 1: OWASP/CWE reference context
1675
+ if (ctx) {
1676
+ if (ctx.owasp)
1677
+ parts.push(`\n📚 OWASP: ${ctx.owasp}`);
1678
+ if (ctx.cwe)
1679
+ parts.push(`CWE: ${ctx.cwe}`);
1680
+ parts.push(`💡 ${ctx.learn}`);
1681
+ }
1391
1682
  return {
1392
1683
  ...f,
1393
1684
  description: parts.join(" "),
1394
- reference: f.reference || [ctx.owasp, ctx.cwe].filter(Boolean).join(" / ") || f.reference,
1685
+ reference: f.reference || (ctx ? [ctx.owasp, ctx.cwe].filter(Boolean).join(" / ") : undefined) || f.reference,
1395
1686
  };
1396
1687
  });
1397
1688
  }