@kevinrabun/judges 3.119.0 → 3.122.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/api.d.ts +2 -1
- package/dist/api.js +3 -1
- package/dist/cli-dispatch.d.ts +7 -0
- package/dist/cli-dispatch.js +654 -0
- package/dist/cli-formatters.d.ts +6 -0
- package/dist/cli-formatters.js +186 -0
- package/dist/cli.js +69 -4159
- package/dist/commands/baseline.js +2 -42
- package/dist/commands/coverage.js +3 -39
- package/dist/commands/diff.js +2 -38
- package/dist/commands/fix-pr.js +2 -23
- package/dist/commands/fix.js +3 -27
- package/dist/commands/llm-benchmark.d.ts +7 -0
- package/dist/commands/llm-benchmark.js +27 -1
- package/dist/commands/quality-gate.js +1 -12
- package/dist/commands/review-parallel.js +1 -19
- package/dist/commands/review.js +2 -33
- package/dist/commands/rule-test.js +1 -15
- package/dist/commands/tune.js +2 -29
- package/dist/commands/watch.js +3 -42
- package/dist/config.js +1 -1
- package/dist/evaluators/hallucination-detection.js +343 -0
- package/dist/evaluators/index.d.ts +2 -11
- package/dist/evaluators/index.js +3 -181
- package/dist/evaluators/security.js +226 -2
- package/dist/evaluators/suppressions.d.ts +49 -0
- package/dist/evaluators/suppressions.js +185 -0
- package/dist/ext-to-lang.d.ts +16 -0
- package/dist/ext-to-lang.js +60 -0
- package/dist/github-app.d.ts +1 -3
- package/dist/github-app.js +2 -34
- package/dist/parallel.js +2 -14
- package/dist/probabilistic/llm-response-validator.js +1 -1
- package/dist/reports/public-repo-report.js +9 -1
- package/dist/skill-loader.js +9 -6
- package/dist/tools/register-evaluation.js +2 -29
- package/package.json +1 -1
- package/server.json +2 -2
- package/src/skill-loader.ts +9 -6
package/README.md
CHANGED
|
@@ -15,7 +15,7 @@ An MCP (Model Context Protocol) server that provides a panel of **45 specialized
|
|
|
15
15
|
[](https://www.npmjs.com/package/@kevinrabun/judges)
|
|
16
16
|
[](https://www.npmjs.com/package/@kevinrabun/judges)
|
|
17
17
|
[](https://opensource.org/licenses/MIT)
|
|
18
|
-
[](https://github.com/KevinRabun/judges/actions)
|
|
19
19
|
|
|
20
20
|
> 🔰 **Packages**
|
|
21
21
|
> - **CLI**: `@kevinrabun/judges-cli` → binary `judges` (use `npx @kevinrabun/judges-cli eval --file app.ts`).
|
package/dist/api.d.ts
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
export type { Severity, Verdict, Finding, Patch, LangFamily, JudgesConfig, RuleOverride, ProjectFile, ProjectVerdict, DiffVerdict, DependencyEntry, DependencyVerdict, JudgeEvaluation, TribunalVerdict, JudgeDefinition, EvaluationContextV2, EvidenceBundleV2, SpecializedFindingV2, TribunalVerdictV2, MustFixGateOptions, MustFixGateResult, AppBuilderWorkflowResult, PlainLanguageFinding, WorkflowTask, PolicyProfile, SuppressionRecord, SuppressionResult, ExecutionTrace, RuleTrace, StreamingBatch, JudgeSelectionContext, JudgeSelectionResult, SessionContext, } from "./types.js";
|
|
12
12
|
export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
|
|
13
13
|
export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadConfigFile, expandEnvPlaceholders, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, validateJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
|
|
14
|
+
export { EXT_TO_LANG, SUPPORTED_EXTENSIONS, detectLanguageFromPath } from "./ext-to-lang.js";
|
|
14
15
|
export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
|
|
15
16
|
export { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, crossFileDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
16
17
|
export type { FindingDiff, NetChangeGateOptions, NetChangeGateResult, EvaluationOptions } from "./evaluators/index.js";
|
|
@@ -69,7 +70,7 @@ export { compareCapabilities, formatComparisonReport, formatFullComparisonMatrix
|
|
|
69
70
|
export type { ToolProfile, ToolCapability, ComparisonResult } from "./comparison.js";
|
|
70
71
|
export { runBenchmarkSuite, benchmarkGate, formatBenchmarkReport, formatBenchmarkMarkdown, analyzeL2Coverage, formatL2CoverageReport, ingestFindingsAsBenchmarkCases, deduplicateIngestCases, BENCHMARK_CASES, } from "./commands/benchmark.js";
|
|
71
72
|
export type { BenchmarkCase, BenchmarkResult, BenchmarkGateOptions, BenchmarkGateResult, L2CoverageAnalysis, L2JudgeCoverage, L2CategoryCoverage, } from "./commands/benchmark.js";
|
|
72
|
-
export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, } from "./commands/llm-benchmark.js";
|
|
73
|
+
export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, getTribunalValidPrefixes, } from "./commands/llm-benchmark.js";
|
|
73
74
|
export type { LlmBenchmarkSnapshot, LlmCaseResult } from "./commands/llm-benchmark.js";
|
|
74
75
|
export type { LlmFinding, ValidationResult } from "./probabilistic/llm-response-validator.js";
|
|
75
76
|
export { optimizeBenchmark, formatAmendmentSection, createEmptyStore, mergeAmendments, } from "./commands/llm-benchmark-optimizer.js";
|
package/dist/api.js
CHANGED
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
|
|
13
13
|
// ─── Config ──────────────────────────────────────────────────────────────────
|
|
14
14
|
export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadConfigFile, expandEnvPlaceholders, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, validateJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
|
|
15
|
+
// ─── Language Detection ──────────────────────────────────────────────────────
|
|
16
|
+
export { EXT_TO_LANG, SUPPORTED_EXTENSIONS, detectLanguageFromPath } from "./ext-to-lang.js";
|
|
15
17
|
// ─── Judge Registry ──────────────────────────────────────────────────────────
|
|
16
18
|
export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
|
|
17
19
|
// ─── Core Evaluation Functions ───────────────────────────────────────────────
|
|
@@ -78,7 +80,7 @@ export { compareCapabilities, formatComparisonReport, formatFullComparisonMatrix
|
|
|
78
80
|
// ─── Benchmark Gate ──────────────────────────────────────────────────────────
|
|
79
81
|
export { runBenchmarkSuite, benchmarkGate, formatBenchmarkReport, formatBenchmarkMarkdown, analyzeL2Coverage, formatL2CoverageReport, ingestFindingsAsBenchmarkCases, deduplicateIngestCases, BENCHMARK_CASES, } from "./commands/benchmark.js";
|
|
80
82
|
// ─── LLM Benchmark ──────────────────────────────────────────────────────────
|
|
81
|
-
export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, } from "./commands/llm-benchmark.js";
|
|
83
|
+
export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, getTribunalValidPrefixes, } from "./commands/llm-benchmark.js";
|
|
82
84
|
// ─── LLM Benchmark Optimizer (Self-Teaching) ────────────────────────────────
|
|
83
85
|
export { optimizeBenchmark, formatAmendmentSection, createEmptyStore, mergeAmendments, } from "./commands/llm-benchmark-optimizer.js";
|
|
84
86
|
// Review autopilot (GitHub App / scripts)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Command dispatch table — maps CLI command names to their module path
|
|
3
|
+
* and exported handler function name. Each entry is lazily imported.
|
|
4
|
+
*
|
|
5
|
+
* Format: "command-name": ["./module-path.js", "exportedFunctionName"]
|
|
6
|
+
*/
|
|
7
|
+
export declare const COMMAND_TABLE: Record<string, [string, string]>;
|