@kevinrabun/judges 3.115.4 → 3.117.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/accessibility.judge.md +7 -0
- package/agents/agent-instructions.judge.md +7 -0
- package/agents/ai-code-safety.judge.md +7 -0
- package/agents/api-contract.judge.md +7 -0
- package/agents/api-design.judge.md +7 -0
- package/agents/authentication.judge.md +7 -0
- package/agents/backwards-compatibility.judge.md +7 -0
- package/agents/caching.judge.md +7 -0
- package/agents/ci-cd.judge.md +7 -0
- package/agents/cloud-readiness.judge.md +7 -0
- package/agents/concurrency.judge.md +7 -0
- package/agents/configuration-management.judge.md +7 -0
- package/agents/cybersecurity.judge.md +7 -0
- package/agents/data-security.judge.md +7 -0
- package/agents/dependency-health.judge.md +7 -0
- package/agents/documentation.judge.md +7 -0
- package/agents/error-handling.judge.md +7 -0
- package/agents/ethics-bias.judge.md +7 -0
- package/agents/false-positive-review.judge.md +12 -0
- package/agents/framework-safety.judge.md +7 -0
- package/agents/hallucination-detection.judge.md +13 -0
- package/agents/iac-security.judge.md +7 -0
- package/agents/intent-alignment.judge.md +13 -0
- package/agents/logging-privacy.judge.md +7 -0
- package/agents/maintainability.judge.md +7 -0
- package/agents/multi-turn-coherence.judge.md +7 -0
- package/agents/observability.judge.md +7 -0
- package/agents/portability.judge.md +7 -0
- package/agents/rate-limiting.judge.md +7 -0
- package/agents/reliability.judge.md +7 -0
- package/agents/security.judge.md +13 -0
- package/agents/testing.judge.md +7 -0
- package/agents/ux.judge.md +7 -0
- package/dist/a2a-protocol.d.ts +136 -0
- package/dist/a2a-protocol.js +218 -0
- package/dist/api.d.ts +21 -3
- package/dist/api.js +21 -1
- package/dist/audit-trail.d.ts +245 -0
- package/dist/audit-trail.js +257 -0
- package/dist/commands/benchmark-advanced.js +51 -51
- package/dist/commands/benchmark-ai-agents.js +16 -16
- package/dist/commands/benchmark-compliance-ethics.js +12 -12
- package/dist/commands/benchmark-expanded-2.js +2 -2
- package/dist/commands/benchmark-expanded.js +2 -2
- package/dist/commands/benchmark-infrastructure.js +12 -12
- package/dist/commands/benchmark-languages.js +11 -11
- package/dist/commands/benchmark-quality-ops.js +7 -7
- package/dist/commands/benchmark-security-deep.js +9 -9
- package/dist/commands/benchmark.js +1 -1
- package/dist/commands/llm-benchmark-optimizer.d.ts +78 -0
- package/dist/commands/llm-benchmark-optimizer.js +241 -0
- package/dist/commands/llm-benchmark.d.ts +4 -2
- package/dist/commands/llm-benchmark.js +40 -12
- package/dist/escalation.d.ts +100 -0
- package/dist/escalation.js +292 -0
- package/dist/evaluation-session.d.ts +74 -0
- package/dist/evaluation-session.js +152 -0
- package/dist/evaluators/index.d.ts +23 -1
- package/dist/evaluators/index.js +192 -3
- package/dist/evaluators/judge-selector.d.ts +19 -0
- package/dist/evaluators/judge-selector.js +141 -0
- package/dist/evaluators/recall-boost.d.ts +27 -0
- package/dist/evaluators/recall-boost.js +409 -0
- package/dist/feedback-loop.d.ts +62 -0
- package/dist/feedback-loop.js +179 -0
- package/dist/index.js +2 -0
- package/dist/judges/accessibility.js +7 -0
- package/dist/judges/agent-instructions.js +7 -0
- package/dist/judges/ai-code-safety.js +7 -0
- package/dist/judges/api-contract.js +7 -0
- package/dist/judges/api-design.js +7 -0
- package/dist/judges/authentication.js +7 -0
- package/dist/judges/backwards-compatibility.js +7 -0
- package/dist/judges/caching.js +7 -0
- package/dist/judges/ci-cd.js +7 -0
- package/dist/judges/cloud-readiness.js +7 -0
- package/dist/judges/concurrency.js +7 -0
- package/dist/judges/configuration-management.js +7 -0
- package/dist/judges/cybersecurity.js +7 -0
- package/dist/judges/data-security.js +7 -0
- package/dist/judges/dependency-health.js +7 -0
- package/dist/judges/documentation.js +7 -0
- package/dist/judges/error-handling.js +7 -0
- package/dist/judges/ethics-bias.js +7 -0
- package/dist/judges/false-positive-review.js +13 -1
- package/dist/judges/framework-safety.js +7 -0
- package/dist/judges/hallucination-detection.js +14 -1
- package/dist/judges/iac-security.js +7 -0
- package/dist/judges/intent-alignment.js +14 -1
- package/dist/judges/logging-privacy.js +7 -0
- package/dist/judges/maintainability.js +7 -0
- package/dist/judges/multi-turn-coherence.js +7 -0
- package/dist/judges/observability.js +7 -0
- package/dist/judges/portability.js +7 -0
- package/dist/judges/rate-limiting.js +7 -0
- package/dist/judges/reliability.js +7 -0
- package/dist/judges/security.js +14 -1
- package/dist/judges/testing.js +7 -0
- package/dist/judges/ux.js +7 -0
- package/dist/review-conversation.d.ts +87 -0
- package/dist/review-conversation.js +307 -0
- package/dist/sast-integration.d.ts +112 -0
- package/dist/sast-integration.js +215 -0
- package/dist/tools/register-evaluation.js +208 -8
- package/dist/tools/register-fix.js +24 -1
- package/dist/tools/register-resources.d.ts +6 -0
- package/dist/tools/register-resources.js +177 -0
- package/dist/tools/register-review.js +26 -1
- package/dist/tools/register-workflow.js +384 -11
- package/dist/tools/validation.d.ts +13 -0
- package/dist/tools/validation.js +77 -0
- package/dist/types.d.ts +122 -0
- package/package.json +25 -12
- package/server.json +2 -2
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent-to-Agent (A2A) Protocol Support
|
|
3
|
+
*
|
|
4
|
+
* Implements the agent card and task exchange protocol enabling Judges
|
|
5
|
+
* to participate in multi-agent orchestration ecosystems. Compatible
|
|
6
|
+
* with Google's A2A protocol and similar agent discovery patterns.
|
|
7
|
+
*
|
|
8
|
+
* Capabilities:
|
|
9
|
+
* - Agent Card: advertises Judges' capabilities to orchestrators
|
|
10
|
+
* - Task Reception: accepts code review requests from other agents
|
|
11
|
+
* - Task Delegation: forwards specialized work to sub-agents
|
|
12
|
+
* - Result Reporting: returns structured findings to callers
|
|
13
|
+
*
|
|
14
|
+
* Wire format: JSON-RPC 2.0 over HTTP or stdio (MCP-compatible)
|
|
15
|
+
*/
|
|
16
|
+
import type { Finding, ReviewDecision } from "./types.js";
|
|
17
|
+
export interface AgentCard {
|
|
18
|
+
/** Agent identifier */
|
|
19
|
+
id: string;
|
|
20
|
+
/** Human-readable name */
|
|
21
|
+
name: string;
|
|
22
|
+
/** Version string */
|
|
23
|
+
version: string;
|
|
24
|
+
/** Description of agent capabilities */
|
|
25
|
+
description: string;
|
|
26
|
+
/** Supported input capabilities */
|
|
27
|
+
capabilities: AgentCapability[];
|
|
28
|
+
/** Supported output formats */
|
|
29
|
+
outputFormats: string[];
|
|
30
|
+
/** Communication protocols supported */
|
|
31
|
+
protocols: string[];
|
|
32
|
+
/** Authentication methods accepted */
|
|
33
|
+
authMethods: string[];
|
|
34
|
+
/** Agent metadata */
|
|
35
|
+
metadata: Record<string, unknown>;
|
|
36
|
+
}
|
|
37
|
+
export interface AgentCapability {
|
|
38
|
+
/** Capability name */
|
|
39
|
+
name: string;
|
|
40
|
+
/** Description */
|
|
41
|
+
description: string;
|
|
42
|
+
/** Input schema (JSON Schema subset) */
|
|
43
|
+
inputSchema?: Record<string, unknown>;
|
|
44
|
+
/** Output schema */
|
|
45
|
+
outputSchema?: Record<string, unknown>;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Generate the Judges agent card for discovery by orchestrators.
|
|
49
|
+
*/
|
|
50
|
+
export declare function getAgentCard(options?: {
|
|
51
|
+
version?: string;
|
|
52
|
+
baseUrl?: string;
|
|
53
|
+
}): AgentCard;
|
|
54
|
+
export type TaskStatus = "pending" | "in-progress" | "completed" | "failed" | "cancelled";
|
|
55
|
+
export interface A2ATask {
|
|
56
|
+
/** Unique task identifier */
|
|
57
|
+
taskId: string;
|
|
58
|
+
/** The capability being invoked */
|
|
59
|
+
capability: string;
|
|
60
|
+
/** Task status */
|
|
61
|
+
status: TaskStatus;
|
|
62
|
+
/** Input parameters */
|
|
63
|
+
input: Record<string, unknown>;
|
|
64
|
+
/** Output result (when completed) */
|
|
65
|
+
output?: A2ATaskResult;
|
|
66
|
+
/** Error details (when failed) */
|
|
67
|
+
error?: {
|
|
68
|
+
code: string;
|
|
69
|
+
message: string;
|
|
70
|
+
};
|
|
71
|
+
/** Requesting agent ID */
|
|
72
|
+
requesterId: string;
|
|
73
|
+
/** Created timestamp */
|
|
74
|
+
createdAt: string;
|
|
75
|
+
/** Completed timestamp */
|
|
76
|
+
completedAt?: string;
|
|
77
|
+
}
|
|
78
|
+
export interface A2ATaskResult {
|
|
79
|
+
/** The findings from evaluation */
|
|
80
|
+
findings: Finding[];
|
|
81
|
+
/** Overall verdict */
|
|
82
|
+
verdict?: string;
|
|
83
|
+
/** Overall score (0-100) */
|
|
84
|
+
score?: number;
|
|
85
|
+
/** Review decision */
|
|
86
|
+
reviewDecision?: ReviewDecision;
|
|
87
|
+
/** Summary markdown */
|
|
88
|
+
summary?: string;
|
|
89
|
+
/** SARIF output (if requested) */
|
|
90
|
+
sarif?: unknown;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Create a new A2A task from an incoming request.
|
|
94
|
+
*/
|
|
95
|
+
export declare function createTask(capability: string, input: Record<string, unknown>, requesterId: string): A2ATask;
|
|
96
|
+
/**
|
|
97
|
+
* Get a task by ID.
|
|
98
|
+
*/
|
|
99
|
+
export declare function getTask(taskId: string): A2ATask | undefined;
|
|
100
|
+
/**
|
|
101
|
+
* Update task status and optionally set the result.
|
|
102
|
+
*/
|
|
103
|
+
export declare function completeTask(taskId: string, result: A2ATaskResult): A2ATask | undefined;
|
|
104
|
+
/**
|
|
105
|
+
* Mark a task as failed.
|
|
106
|
+
*/
|
|
107
|
+
export declare function failTask(taskId: string, code: string, message: string): A2ATask | undefined;
|
|
108
|
+
/**
|
|
109
|
+
* List all tasks, optionally filtered by status.
|
|
110
|
+
*/
|
|
111
|
+
export declare function listTasks(status?: TaskStatus): A2ATask[];
|
|
112
|
+
/**
|
|
113
|
+
* Clean up completed/failed tasks older than the given age.
|
|
114
|
+
*/
|
|
115
|
+
export declare function pruneTasks(maxAgeMs?: number): number;
|
|
116
|
+
export interface A2ARequest {
|
|
117
|
+
jsonrpc: "2.0";
|
|
118
|
+
method: string;
|
|
119
|
+
params?: Record<string, unknown>;
|
|
120
|
+
id?: string | number;
|
|
121
|
+
}
|
|
122
|
+
export interface A2AResponse {
|
|
123
|
+
jsonrpc: "2.0";
|
|
124
|
+
result?: unknown;
|
|
125
|
+
error?: {
|
|
126
|
+
code: number;
|
|
127
|
+
message: string;
|
|
128
|
+
data?: unknown;
|
|
129
|
+
};
|
|
130
|
+
id?: string | number;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Handle an incoming A2A JSON-RPC request.
|
|
134
|
+
* Returns a JSON-RPC response.
|
|
135
|
+
*/
|
|
136
|
+
export declare function handleA2ARequest(request: A2ARequest): A2AResponse;
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent-to-Agent (A2A) Protocol Support
|
|
3
|
+
*
|
|
4
|
+
* Implements the agent card and task exchange protocol enabling Judges
|
|
5
|
+
* to participate in multi-agent orchestration ecosystems. Compatible
|
|
6
|
+
* with Google's A2A protocol and similar agent discovery patterns.
|
|
7
|
+
*
|
|
8
|
+
* Capabilities:
|
|
9
|
+
* - Agent Card: advertises Judges' capabilities to orchestrators
|
|
10
|
+
* - Task Reception: accepts code review requests from other agents
|
|
11
|
+
* - Task Delegation: forwards specialized work to sub-agents
|
|
12
|
+
* - Result Reporting: returns structured findings to callers
|
|
13
|
+
*
|
|
14
|
+
* Wire format: JSON-RPC 2.0 over HTTP or stdio (MCP-compatible)
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* Generate the Judges agent card for discovery by orchestrators.
|
|
18
|
+
*/
|
|
19
|
+
export function getAgentCard(options) {
|
|
20
|
+
return {
|
|
21
|
+
id: "judges-code-reviewer",
|
|
22
|
+
name: "Judges — AI Code Review Tribunal",
|
|
23
|
+
version: options?.version || "3.116.0",
|
|
24
|
+
description: "Automated code review system with 45 specialized judges covering security, " +
|
|
25
|
+
"performance, reliability, AI-generated code safety, and 40+ rule categories. " +
|
|
26
|
+
"Uses a tribunal of LLM judges with deterministic pre-screening for high-confidence " +
|
|
27
|
+
"findings plus human escalation for uncertain cases.",
|
|
28
|
+
capabilities: [
|
|
29
|
+
{
|
|
30
|
+
name: "evaluate-code",
|
|
31
|
+
description: "Evaluate a code snippet or file for issues across all judge categories",
|
|
32
|
+
inputSchema: {
|
|
33
|
+
type: "object",
|
|
34
|
+
properties: {
|
|
35
|
+
code: { type: "string", description: "Source code to evaluate" },
|
|
36
|
+
language: { type: "string", description: "Programming language" },
|
|
37
|
+
filePath: { type: "string", description: "File path for context" },
|
|
38
|
+
preset: { type: "string", enum: ["default", "strict", "lenient", "security-only"] },
|
|
39
|
+
},
|
|
40
|
+
required: ["code", "language"],
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
name: "evaluate-diff",
|
|
45
|
+
description: "Evaluate a code diff (PR or commit) for introduced issues",
|
|
46
|
+
inputSchema: {
|
|
47
|
+
type: "object",
|
|
48
|
+
properties: {
|
|
49
|
+
diff: { type: "string", description: "Unified diff content" },
|
|
50
|
+
baseRef: { type: "string", description: "Base commit/branch reference" },
|
|
51
|
+
},
|
|
52
|
+
required: ["diff"],
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "review-project",
|
|
57
|
+
description: "Full project-level review with cross-file analysis",
|
|
58
|
+
inputSchema: {
|
|
59
|
+
type: "object",
|
|
60
|
+
properties: {
|
|
61
|
+
files: {
|
|
62
|
+
type: "array",
|
|
63
|
+
items: {
|
|
64
|
+
type: "object",
|
|
65
|
+
properties: {
|
|
66
|
+
path: { type: "string" },
|
|
67
|
+
content: { type: "string" },
|
|
68
|
+
language: { type: "string" },
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
required: ["files"],
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
name: "explain-finding",
|
|
78
|
+
description: "Provide detailed explanation of a specific finding",
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
name: "suggest-fix",
|
|
82
|
+
description: "Generate fix suggestions for detected findings",
|
|
83
|
+
},
|
|
84
|
+
],
|
|
85
|
+
outputFormats: ["json", "sarif", "markdown", "csv", "github-actions"],
|
|
86
|
+
protocols: ["mcp", "json-rpc-2.0", "http-rest"],
|
|
87
|
+
authMethods: ["none", "api-key", "github-token"],
|
|
88
|
+
metadata: {
|
|
89
|
+
judgeCount: 45,
|
|
90
|
+
tribunalJudges: 40,
|
|
91
|
+
ruleCategories: 40,
|
|
92
|
+
supportsDeterministicMode: true,
|
|
93
|
+
supportsLlmTribunal: true,
|
|
94
|
+
supportsStreaming: true,
|
|
95
|
+
supportsEscalation: true,
|
|
96
|
+
supportsAuditTrail: true,
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
// ─── Task Management ─────────────────────────────────────────────────────────
|
|
101
|
+
const taskQueue = new Map();
|
|
102
|
+
function generateTaskId() {
|
|
103
|
+
return `task_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Create a new A2A task from an incoming request.
|
|
107
|
+
*/
|
|
108
|
+
export function createTask(capability, input, requesterId) {
|
|
109
|
+
const task = {
|
|
110
|
+
taskId: generateTaskId(),
|
|
111
|
+
capability,
|
|
112
|
+
status: "pending",
|
|
113
|
+
input,
|
|
114
|
+
requesterId,
|
|
115
|
+
createdAt: new Date().toISOString(),
|
|
116
|
+
};
|
|
117
|
+
taskQueue.set(task.taskId, task);
|
|
118
|
+
return task;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Get a task by ID.
|
|
122
|
+
*/
|
|
123
|
+
export function getTask(taskId) {
|
|
124
|
+
return taskQueue.get(taskId);
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Update task status and optionally set the result.
|
|
128
|
+
*/
|
|
129
|
+
export function completeTask(taskId, result) {
|
|
130
|
+
const task = taskQueue.get(taskId);
|
|
131
|
+
if (!task)
|
|
132
|
+
return undefined;
|
|
133
|
+
task.status = "completed";
|
|
134
|
+
task.output = result;
|
|
135
|
+
task.completedAt = new Date().toISOString();
|
|
136
|
+
return task;
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Mark a task as failed.
|
|
140
|
+
*/
|
|
141
|
+
export function failTask(taskId, code, message) {
|
|
142
|
+
const task = taskQueue.get(taskId);
|
|
143
|
+
if (!task)
|
|
144
|
+
return undefined;
|
|
145
|
+
task.status = "failed";
|
|
146
|
+
task.error = { code, message };
|
|
147
|
+
task.completedAt = new Date().toISOString();
|
|
148
|
+
return task;
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* List all tasks, optionally filtered by status.
|
|
152
|
+
*/
|
|
153
|
+
export function listTasks(status) {
|
|
154
|
+
const tasks = Array.from(taskQueue.values());
|
|
155
|
+
return status ? tasks.filter((t) => t.status === status) : tasks;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Clean up completed/failed tasks older than the given age.
|
|
159
|
+
*/
|
|
160
|
+
export function pruneTasks(maxAgeMs = 3600000) {
|
|
161
|
+
const cutoff = Date.now() - maxAgeMs;
|
|
162
|
+
let pruned = 0;
|
|
163
|
+
for (const [id, task] of taskQueue) {
|
|
164
|
+
if ((task.status === "completed" || task.status === "failed") &&
|
|
165
|
+
task.completedAt &&
|
|
166
|
+
new Date(task.completedAt).getTime() < cutoff) {
|
|
167
|
+
taskQueue.delete(id);
|
|
168
|
+
pruned++;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return pruned;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Handle an incoming A2A JSON-RPC request.
|
|
175
|
+
* Returns a JSON-RPC response.
|
|
176
|
+
*/
|
|
177
|
+
export function handleA2ARequest(request) {
|
|
178
|
+
const { method, params, id } = request;
|
|
179
|
+
switch (method) {
|
|
180
|
+
case "agent/discover":
|
|
181
|
+
return { jsonrpc: "2.0", result: getAgentCard(), id };
|
|
182
|
+
case "agent/capabilities":
|
|
183
|
+
return { jsonrpc: "2.0", result: getAgentCard().capabilities, id };
|
|
184
|
+
case "task/create": {
|
|
185
|
+
if (!params?.capability || !params?.input) {
|
|
186
|
+
return {
|
|
187
|
+
jsonrpc: "2.0",
|
|
188
|
+
error: { code: -32602, message: "Missing required params: capability, input" },
|
|
189
|
+
id,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
const task = createTask(params.capability, params.input, params.requesterId || "unknown");
|
|
193
|
+
return { jsonrpc: "2.0", result: { taskId: task.taskId, status: task.status }, id };
|
|
194
|
+
}
|
|
195
|
+
case "task/status": {
|
|
196
|
+
if (!params?.taskId) {
|
|
197
|
+
return { jsonrpc: "2.0", error: { code: -32602, message: "Missing taskId" }, id };
|
|
198
|
+
}
|
|
199
|
+
const task = getTask(params.taskId);
|
|
200
|
+
if (!task) {
|
|
201
|
+
return { jsonrpc: "2.0", error: { code: -32001, message: "Task not found" }, id };
|
|
202
|
+
}
|
|
203
|
+
return { jsonrpc: "2.0", result: task, id };
|
|
204
|
+
}
|
|
205
|
+
case "task/list":
|
|
206
|
+
return {
|
|
207
|
+
jsonrpc: "2.0",
|
|
208
|
+
result: listTasks(params?.status),
|
|
209
|
+
id,
|
|
210
|
+
};
|
|
211
|
+
default:
|
|
212
|
+
return {
|
|
213
|
+
jsonrpc: "2.0",
|
|
214
|
+
error: { code: -32601, message: `Method not found: ${method}` },
|
|
215
|
+
id,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
}
|
package/dist/api.d.ts
CHANGED
|
@@ -8,12 +8,16 @@
|
|
|
8
8
|
* const result = evaluateCode("const x = eval(input);", "typescript");
|
|
9
9
|
* ```
|
|
10
10
|
*/
|
|
11
|
-
export type { Severity, Verdict, Finding, Patch, LangFamily, JudgesConfig, RuleOverride, ProjectFile, ProjectVerdict, DiffVerdict, DependencyEntry, DependencyVerdict, JudgeEvaluation, TribunalVerdict, JudgeDefinition, EvaluationContextV2, EvidenceBundleV2, SpecializedFindingV2, TribunalVerdictV2, MustFixGateOptions, MustFixGateResult, AppBuilderWorkflowResult, PlainLanguageFinding, WorkflowTask, PolicyProfile, SuppressionRecord, SuppressionResult, } from "./types.js";
|
|
11
|
+
export type { Severity, Verdict, Finding, Patch, LangFamily, JudgesConfig, RuleOverride, ProjectFile, ProjectVerdict, DiffVerdict, DependencyEntry, DependencyVerdict, JudgeEvaluation, TribunalVerdict, JudgeDefinition, EvaluationContextV2, EvidenceBundleV2, SpecializedFindingV2, TribunalVerdictV2, MustFixGateOptions, MustFixGateResult, AppBuilderWorkflowResult, PlainLanguageFinding, WorkflowTask, PolicyProfile, SuppressionRecord, SuppressionResult, ExecutionTrace, RuleTrace, StreamingBatch, JudgeSelectionContext, JudgeSelectionResult, SessionContext, } from "./types.js";
|
|
12
12
|
export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
|
|
13
13
|
export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
|
|
14
14
|
export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
|
|
15
|
-
export { evaluateWithJudge, evaluateWithTribunal, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
16
|
-
export type { FindingDiff, NetChangeGateOptions, NetChangeGateResult } from "./evaluators/index.js";
|
|
15
|
+
export { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
16
|
+
export type { FindingDiff, NetChangeGateOptions, NetChangeGateResult, EvaluationOptions } from "./evaluators/index.js";
|
|
17
|
+
export { selectJudges } from "./evaluators/judge-selector.js";
|
|
18
|
+
export { EvaluationSession, getGlobalSession, resetGlobalSession } from "./evaluation-session.js";
|
|
19
|
+
export { getPreset, composePresets, PRESETS } from "./presets.js";
|
|
20
|
+
export type { Preset } from "./presets.js";
|
|
17
21
|
export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
|
|
18
22
|
export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
|
|
19
23
|
export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
|
|
@@ -28,6 +32,18 @@ export { findingsToSarif, evaluationToSarif, verdictToSarif, validateSarifLog }
|
|
|
28
32
|
export type { SarifValidationError } from "./formatters/sarif.js";
|
|
29
33
|
export { verdictToCsvRows, verdictsToCsv, findingsToCsv } from "./formatters/csv.js";
|
|
30
34
|
export { verdictToGitHubActions } from "./formatters/github-actions.js";
|
|
35
|
+
export { evaluateEscalations, resolveEscalation, computeEscalationSummary, shouldBlockOnEscalations, enhanceReviewWithEscalations, loadEscalationStore, saveEscalationStore, } from "./escalation.js";
|
|
36
|
+
export type { EscalatedFinding, EscalationStore, EscalationSummary, EscalationPolicy, EscalationReason, EscalationStatus, EscalationRouting, } from "./escalation.js";
|
|
37
|
+
export { appendAuditEvent, readAuditTrail, verifyAuditIntegrity, recordEvaluationStart, recordEvaluationComplete, recordFindings, recordSuppression, recordOverride, recordEscalation, recordReviewDecision, recordTriageAction, computeAuditSummary, queryAuditTrail, } from "./audit-trail.js";
|
|
38
|
+
export type { AuditEvent, AuditEventType, AuditPayload, AuditSummary } from "./audit-trail.js";
|
|
39
|
+
export { registerSastProvider, getSastProvider, listSastProviders, ingestSarifFile, ingestSarifContent, mergeSastFindings, } from "./sast-integration.js";
|
|
40
|
+
export type { SastProvider } from "./sast-integration.js";
|
|
41
|
+
export { startReviewConversation, processMessage, getOutstandingFindings, isConversationResolved, exportConversationAsMarkdown, } from "./review-conversation.js";
|
|
42
|
+
export type { ReviewConversation, ConversationMessage, ConversationRole, MessageIntent, ConversationState, DeveloperContext, } from "./review-conversation.js";
|
|
43
|
+
export { getAgentCard, createTask, getTask, completeTask, failTask, listTasks, pruneTasks, handleA2ARequest, } from "./a2a-protocol.js";
|
|
44
|
+
export type { AgentCard, AgentCapability, A2ATask, A2ATaskResult, TaskStatus, A2ARequest, A2AResponse, } from "./a2a-protocol.js";
|
|
45
|
+
export { runFeedbackLoop, formatFeedbackLoopReport } from "./feedback-loop.js";
|
|
46
|
+
export type { FeedbackLoopResult, ConfidenceAdjustment, FeedbackLoopStats } from "./feedback-loop.js";
|
|
31
47
|
export { registerPlugin, unregisterPlugin, getRegisteredPlugins, getCustomRules, getPluginJudges, evaluateCustomRules, runBeforeHooks, runAfterHooks, clearPlugins, } from "./plugins.js";
|
|
32
48
|
export type { CustomRule, JudgesPlugin, PluginRegistration } from "./plugins.js";
|
|
33
49
|
export { JudgeRegistry, defaultRegistry } from "./judge-registry.js";
|
|
@@ -56,6 +72,8 @@ export type { BenchmarkCase, BenchmarkResult, BenchmarkGateOptions, BenchmarkGat
|
|
|
56
72
|
export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, } from "./commands/llm-benchmark.js";
|
|
57
73
|
export type { LlmBenchmarkSnapshot, LlmCaseResult } from "./commands/llm-benchmark.js";
|
|
58
74
|
export type { LlmFinding, ValidationResult } from "./probabilistic/llm-response-validator.js";
|
|
75
|
+
export { optimizeBenchmark, formatAmendmentSection, createEmptyStore, mergeAmendments, } from "./commands/llm-benchmark-optimizer.js";
|
|
76
|
+
export type { PromptAmendment, OptimizerInsight, OptimizationResult, AmendmentStore, } from "./commands/llm-benchmark-optimizer.js";
|
|
59
77
|
export { runReviewAutopilot, dedupeComments, filterAlreadyPostedComments } from "./commands/review.js";
|
|
60
78
|
export { buildContextSnippets } from "./context/context-snippets.js";
|
|
61
79
|
export { EmbeddingCache, FallbackEmbeddingProvider, getOrCreateEmbedding } from "./context/embedding-cache.js";
|
package/dist/api.js
CHANGED
|
@@ -15,7 +15,13 @@ export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loa
|
|
|
15
15
|
// ─── Judge Registry ──────────────────────────────────────────────────────────
|
|
16
16
|
export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
|
|
17
17
|
// ─── Core Evaluation Functions ───────────────────────────────────────────────
|
|
18
|
-
export { evaluateWithJudge, evaluateWithTribunal, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
18
|
+
export { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
19
|
+
// ─── Adaptive Judge Selection ────────────────────────────────────────────────
|
|
20
|
+
export { selectJudges } from "./evaluators/judge-selector.js";
|
|
21
|
+
// ─── Evaluation Session ─────────────────────────────────────────────────────
|
|
22
|
+
export { EvaluationSession, getGlobalSession, resetGlobalSession } from "./evaluation-session.js";
|
|
23
|
+
// ─── Presets ─────────────────────────────────────────────────────────────────
|
|
24
|
+
export { getPreset, composePresets, PRESETS } from "./presets.js";
|
|
19
25
|
// ─── V2 Policy-Aware API ────────────────────────────────────────────────────
|
|
20
26
|
export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
|
|
21
27
|
// ─── Cross-File Taint Analysis ───────────────────────────────────────────────
|
|
@@ -34,6 +40,18 @@ export { clearProjectCache } from "./evaluators/project.js";
|
|
|
34
40
|
export { findingsToSarif, evaluationToSarif, verdictToSarif, validateSarifLog } from "./formatters/sarif.js";
|
|
35
41
|
export { verdictToCsvRows, verdictsToCsv, findingsToCsv } from "./formatters/csv.js";
|
|
36
42
|
export { verdictToGitHubActions } from "./formatters/github-actions.js";
|
|
43
|
+
// ─── Escalation Protocol ────────────────────────────────────────────────────
|
|
44
|
+
export { evaluateEscalations, resolveEscalation, computeEscalationSummary, shouldBlockOnEscalations, enhanceReviewWithEscalations, loadEscalationStore, saveEscalationStore, } from "./escalation.js";
|
|
45
|
+
// ─── Audit Trail ─────────────────────────────────────────────────────────────
|
|
46
|
+
export { appendAuditEvent, readAuditTrail, verifyAuditIntegrity, recordEvaluationStart, recordEvaluationComplete, recordFindings, recordSuppression, recordOverride, recordEscalation, recordReviewDecision, recordTriageAction, computeAuditSummary, queryAuditTrail, } from "./audit-trail.js";
|
|
47
|
+
// ─── SAST Integration ────────────────────────────────────────────────────────
|
|
48
|
+
export { registerSastProvider, getSastProvider, listSastProviders, ingestSarifFile, ingestSarifContent, mergeSastFindings, } from "./sast-integration.js";
|
|
49
|
+
// ─── Multi-Turn Review Conversation ──────────────────────────────────────────
|
|
50
|
+
export { startReviewConversation, processMessage, getOutstandingFindings, isConversationResolved, exportConversationAsMarkdown, } from "./review-conversation.js";
|
|
51
|
+
// ─── A2A Protocol ────────────────────────────────────────────────────────────
|
|
52
|
+
export { getAgentCard, createTask, getTask, completeTask, failTask, listTasks, pruneTasks, handleA2ARequest, } from "./a2a-protocol.js";
|
|
53
|
+
// ─── Fix-Outcome Feedback Loop ───────────────────────────────────────────────
|
|
54
|
+
export { runFeedbackLoop, formatFeedbackLoopReport } from "./feedback-loop.js";
|
|
37
55
|
// ─── Plugin API ──────────────────────────────────────────────────────────────
|
|
38
56
|
export { registerPlugin, unregisterPlugin, getRegisteredPlugins, getCustomRules, getPluginJudges, evaluateCustomRules, runBeforeHooks, runAfterHooks, clearPlugins, } from "./plugins.js";
|
|
39
57
|
// ─── Judge Registry ──────────────────────────────────────────────────────────
|
|
@@ -61,6 +79,8 @@ export { compareCapabilities, formatComparisonReport, formatFullComparisonMatrix
|
|
|
61
79
|
export { runBenchmarkSuite, benchmarkGate, formatBenchmarkReport, formatBenchmarkMarkdown, analyzeL2Coverage, formatL2CoverageReport, ingestFindingsAsBenchmarkCases, deduplicateIngestCases, BENCHMARK_CASES, } from "./commands/benchmark.js";
|
|
62
80
|
// ─── LLM Benchmark ──────────────────────────────────────────────────────────
|
|
63
81
|
export { parseLlmRuleIds, constructPerJudgePrompt, constructTribunalPrompt, selectStratifiedSample, scoreLlmCase, computeLlmMetrics, formatLlmSnapshotMarkdown, formatLayerComparisonMarkdown, extractValidatedLlmFindings, getValidRulePrefixes, } from "./commands/llm-benchmark.js";
|
|
82
|
+
// ─── LLM Benchmark Optimizer (Self-Teaching) ────────────────────────────────
|
|
83
|
+
export { optimizeBenchmark, formatAmendmentSection, createEmptyStore, mergeAmendments, } from "./commands/llm-benchmark-optimizer.js";
|
|
64
84
|
// Review autopilot (GitHub App / scripts)
|
|
65
85
|
export { runReviewAutopilot, dedupeComments, filterAlreadyPostedComments } from "./commands/review.js";
|
|
66
86
|
export { buildContextSnippets } from "./context/context-snippets.js";
|