npm - @kevinrabun/judges - Versions diffs - 3.118.0 → 3.121.0 - Mend

@kevinrabun/judges 3.118.0 → 3.121.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +31 -1
package/dist/api.d.ts +3 -2
package/dist/api.js +4 -2
package/dist/cli-dispatch.d.ts +7 -0
package/dist/cli-dispatch.js +654 -0
package/dist/cli-formatters.d.ts +6 -0
package/dist/cli-formatters.js +186 -0
package/dist/cli.js +69 -4159
package/dist/commands/baseline.js +2 -42
package/dist/commands/coverage.js +3 -39
package/dist/commands/diff.js +2 -38
package/dist/commands/fix-pr.js +2 -23
package/dist/commands/fix.js +3 -27
package/dist/commands/llm-benchmark.d.ts +7 -0
package/dist/commands/llm-benchmark.js +27 -1
package/dist/commands/quality-gate.js +1 -12
package/dist/commands/review-parallel.js +1 -19
package/dist/commands/review.js +2 -33
package/dist/commands/rule-test.js +1 -15
package/dist/commands/tune.js +2 -29
package/dist/commands/watch.js +3 -42
package/dist/evaluators/hallucination-detection.js +343 -0
package/dist/evaluators/index.d.ts +14 -11
package/dist/evaluators/index.js +4 -182
package/dist/evaluators/security.js +226 -2
package/dist/evaluators/suppressions.d.ts +49 -0
package/dist/evaluators/suppressions.js +185 -0
package/dist/ext-to-lang.d.ts +16 -0
package/dist/ext-to-lang.js +60 -0
package/dist/github-app.d.ts +1 -3
package/dist/github-app.js +2 -34
package/dist/parallel.js +2 -14
package/dist/tools/deep-review.d.ts +13 -2
package/dist/tools/deep-review.js +76 -16
package/dist/tools/register-evaluation.js +2 -29
package/dist/tools/register-review.js +17 -3
package/dist/tools/register-workflow.js +7 -1
package/package.json +1 -1
package/server.json +2 -2

package/README.md CHANGED Viewed

@@ -15,7 +15,7 @@ An MCP (Model Context Protocol) server that provides a panel of **45 specialized
 [![npm](https://img.shields.io/npm/v/@kevinrabun/judges)](https://www.npmjs.com/package/@kevinrabun/judges)
 [![npm downloads](https://img.shields.io/npm/dw/@kevinrabun/judges)](https://www.npmjs.com/package/@kevinrabun/judges)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![Tests](https://img.shields.io/badge/tests-2412-brightgreen)](https://github.com/KevinRabun/judges/actions)
+[![Tests](https://img.shields.io/badge/tests-2481-brightgreen)](https://github.com/KevinRabun/judges/actions)
 > 🔰 **Packages**
 > - **CLI**: `@kevinrabun/judges-cli` → binary `judges` (use `npx @kevinrabun/judges-cli eval --file app.ts`).
@@ -1098,6 +1098,36 @@ Analyze a dependency manifest file for supply-chain risks, version pinning issue
 | `manifestType` | string | yes | File type: `package.json`, `requirements.txt`, etc. |
 | `context` | string | no | Optional context |
+### `evaluate_git_diff`
+Evaluate only **changed lines** from a git diff. Provide either `repoPath` for a live git diff or `diffText` for a pre-computed unified diff.
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `repoPath` | string | conditional | Absolute path to the git repository |
+| `base` | string | no | Git ref to diff against (default: `HEAD~1`) |
+| `diffText` | string | conditional | Pre-computed unified diff text |
+| `confidenceFilter` | number | no | Minimum confidence threshold for findings (0–1) |
+| `autoTune` | boolean | no | Apply feedback-driven auto-tuning (default: false) |
+| `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
+| `config` | object | no | Inline configuration |
+### `re_evaluate_with_context`
+Re-run the tribunal with **prior findings as context** for iterative refinement. Supports dispute resolution, developer context injection, and focus-area filtering.
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `code` | string | yes | Source code to re-evaluate |
+| `language` | string | yes | Programming language |
+| `disputedRuleIds` | string[] | no | Rule IDs the developer disputes as false positives |
+| `acceptedRuleIds` | string[] | no | Rule IDs the developer accepts |
+| `developerContext` | string | no | Free-form explanation of developer intent |
+| `focusAreas` | string[] | no | Specific areas to focus on (e.g., `["security"]`) |
+| `confidenceFilter` | number | no | Minimum confidence threshold (default: 0.5) |
+| `filePath` | string | no | File path for context-aware evaluation |
+| `deepReview` | boolean | no | Include LLM deep-review prompt section |
+| `relatedFiles` | array | no | Cross-file context `{ path, snippet, relationship? }[]` |
+| `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
 #### Judge IDs
 `data-security` · `cybersecurity` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions` · `ai-code-safety` · `framework-safety` · `iac-security` · `false-positive-review`

package/dist/api.d.ts CHANGED Viewed

@@ -11,6 +11,7 @@
 export type { Severity, Verdict, Finding, Patch, LangFamily, JudgesConfig, RuleOverride, ProjectFile, ProjectVerdict, DiffVerdict, DependencyEntry, DependencyVerdict, JudgeEvaluation, TribunalVerdict, JudgeDefinition, EvaluationContextV2, EvidenceBundleV2, SpecializedFindingV2, TribunalVerdictV2, MustFixGateOptions, MustFixGateResult, AppBuilderWorkflowResult, PlainLanguageFinding, WorkflowTask, PolicyProfile, SuppressionRecord, SuppressionResult, ExecutionTrace, RuleTrace, StreamingBatch, JudgeSelectionContext, JudgeSelectionResult, SessionContext, } from "./types.js";
 export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
 export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadConfigFile, expandEnvPlaceholders, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, validateJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
+export { EXT_TO_LANG, SUPPORTED_EXTENSIONS, detectLanguageFromPath } from "./ext-to-lang.js";
 export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
 export { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, crossFileDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
 export type { FindingDiff, NetChangeGateOptions, NetChangeGateResult, EvaluationOptions } from "./evaluators/index.js";
@@ -20,7 +21,7 @@ export { getPreset, composePresets, listPresets, PRESETS } from "./presets.js";
 export type { Preset } from "./presets.js";
 export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
 export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
-export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
+export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, formatRelatedFilesSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, DEFAULT_MAX_PROMPT_CHARS, } from "./tools/deep-review.js";
 export type { RelatedFileSnippet } from "./tools/deep-review.js";
 export { getCondensedCriteria } from "./tools/prompts.js";
 export { parseDismissedFindings, recordL2Feedback, loadFeedbackStore, saveFeedbackStore, addFeedback, computeFeedbackStats, getFpRateByRule, mergeFeedbackStores, computeTeamFeedbackStats, formatTeamStatsOutput, } from "./commands/feedback.js";
@@ -69,7 +70,7 @@ export { compareCapabilities, formatComparisonReport, formatFullComparisonMatrix
 export type { ToolProfile, ToolCapability, ComparisonResult } from "./comparison.js";
 export { runBenchmarkSuite, benchmarkGate, formatBenchmarkReport, formatBenchmarkMarkdown, analyzeL2Coverage, formatL2CoverageReport, ingestFindingsAsBenchmarkCases, deduplicateIngestCases, BENCHMARK_CASES, } from "./commands/benchmark.js";
 export type { BenchmarkCase, BenchmarkResult, BenchmarkGateOptions, BenchmarkGateResult, L2CoverageAnalysis, L2JudgeCoverage, L2CategoryCoverage, } from "./commands/benchmark.js";
-export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, } from "./commands/llm-benchmark.js";
+export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, getTribunalValidPrefixes, } from "./commands/llm-benchmark.js";
 export type { LlmBenchmarkSnapshot, LlmCaseResult } from "./commands/llm-benchmark.js";
 export type { LlmFinding, ValidationResult } from "./probabilistic/llm-response-validator.js";
 export { optimizeBenchmark, formatAmendmentSection, createEmptyStore, mergeAmendments, } from "./commands/llm-benchmark-optimizer.js";

package/dist/api.js CHANGED Viewed

@@ -12,6 +12,8 @@
 export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
 // ─── Config ──────────────────────────────────────────────────────────────────
 export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadConfigFile, expandEnvPlaceholders, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, validateJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
+// ─── Language Detection ──────────────────────────────────────────────────────
+export { EXT_TO_LANG, SUPPORTED_EXTENSIONS, detectLanguageFromPath } from "./ext-to-lang.js";
 // ─── Judge Registry ──────────────────────────────────────────────────────────
 export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
 // ─── Core Evaluation Functions ───────────────────────────────────────────────
@@ -27,7 +29,7 @@ export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from ".
 // ─── Cross-File Taint Analysis ───────────────────────────────────────────────
 export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
 // ─── Deep Review Prompts ─────────────────────────────────────────────────────
-export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
+export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, formatRelatedFilesSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, DEFAULT_MAX_PROMPT_CHARS, } from "./tools/deep-review.js";
 // ─── Prompt Utilities ────────────────────────────────────────────────────────
 export { getCondensedCriteria } from "./tools/prompts.js";
 // ─── Feedback & Calibration ─────────────────────────────────────────────────
@@ -78,7 +80,7 @@ export { compareCapabilities, formatComparisonReport, formatFullComparisonMatrix
 // ─── Benchmark Gate ──────────────────────────────────────────────────────────
 export { runBenchmarkSuite, benchmarkGate, formatBenchmarkReport, formatBenchmarkMarkdown, analyzeL2Coverage, formatL2CoverageReport, ingestFindingsAsBenchmarkCases, deduplicateIngestCases, BENCHMARK_CASES, } from "./commands/benchmark.js";
 // ─── LLM Benchmark ──────────────────────────────────────────────────────────
-export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, } from "./commands/llm-benchmark.js";
+export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, getTribunalValidPrefixes, } from "./commands/llm-benchmark.js";
 // ─── LLM Benchmark Optimizer (Self-Teaching) ────────────────────────────────
 export { optimizeBenchmark, formatAmendmentSection, createEmptyStore, mergeAmendments, } from "./commands/llm-benchmark-optimizer.js";
 // Review autopilot (GitHub App / scripts)

package/dist/cli-dispatch.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+/**
+ * Command dispatch table — maps CLI command names to their module path
+ * and exported handler function name. Each entry is lazily imported.
+ *
+ * Format: "command-name": ["./module-path.js", "exportedFunctionName"]
+ */
+export declare const COMMAND_TABLE: Record<string, [string, string]>;