llm-cli-gateway 1.1.0 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -0
- package/README.md +226 -9
- package/dist/approval-manager.d.ts +1 -1
- package/dist/async-job-manager.d.ts +75 -4
- package/dist/async-job-manager.js +303 -19
- package/dist/auth.d.ts +15 -0
- package/dist/auth.js +46 -0
- package/dist/cli-updater.d.ts +55 -0
- package/dist/cli-updater.js +248 -0
- package/dist/codex-json-parser.d.ts +34 -0
- package/dist/codex-json-parser.js +105 -0
- package/dist/doctor.d.ts +110 -0
- package/dist/doctor.js +280 -0
- package/dist/endpoint-exposure.d.ts +22 -0
- package/dist/endpoint-exposure.js +231 -0
- package/dist/executor.d.ts +2 -0
- package/dist/executor.js +2 -2
- package/dist/flight-recorder.d.ts +3 -1
- package/dist/flight-recorder.js +31 -2
- package/dist/gateway-server.d.ts +2 -0
- package/dist/gateway-server.js +1 -0
- package/dist/gemini-json-parser.d.ts +21 -0
- package/dist/gemini-json-parser.js +47 -0
- package/dist/health.d.ts +7 -0
- package/dist/health.js +22 -0
- package/dist/http-transport.d.ts +22 -0
- package/dist/http-transport.js +164 -0
- package/dist/index.d.ts +210 -2
- package/dist/index.js +2880 -1037
- package/dist/job-store.d.ts +84 -0
- package/dist/job-store.js +251 -0
- package/dist/logger.d.ts +9 -0
- package/dist/logger.js +14 -0
- package/dist/model-registry.d.ts +14 -0
- package/dist/model-registry.js +478 -134
- package/dist/provider-login-guidance.d.ts +21 -0
- package/dist/provider-login-guidance.js +98 -0
- package/dist/provider-status.d.ts +41 -0
- package/dist/provider-status.js +203 -0
- package/dist/request-helpers.d.ts +525 -4
- package/dist/request-helpers.js +653 -0
- package/dist/resources.js +88 -0
- package/dist/session-manager-pg.js +2 -0
- package/dist/session-manager.d.ts +1 -1
- package/dist/session-manager.js +3 -1
- package/dist/validation-normalizer.d.ts +23 -0
- package/dist/validation-normalizer.js +79 -0
- package/dist/validation-orchestrator.d.ts +47 -0
- package/dist/validation-orchestrator.js +145 -0
- package/dist/validation-prompts.d.ts +15 -0
- package/dist/validation-prompts.js +52 -0
- package/dist/validation-report.d.ts +57 -0
- package/dist/validation-report.js +129 -0
- package/dist/validation-tools.d.ts +7 -0
- package/dist/validation-tools.js +198 -0
- package/package.json +16 -6
- package/setup/status.schema.json +271 -0
package/dist/resources.js
CHANGED
|
@@ -54,6 +54,28 @@ export class ResourceProvider {
|
|
|
54
54
|
priority: 0.6,
|
|
55
55
|
},
|
|
56
56
|
},
|
|
57
|
+
{
|
|
58
|
+
uri: "sessions://grok",
|
|
59
|
+
name: "Grok Sessions",
|
|
60
|
+
title: "⚡ Grok Sessions",
|
|
61
|
+
description: "List of Grok conversation sessions",
|
|
62
|
+
mimeType: "application/json",
|
|
63
|
+
annotations: {
|
|
64
|
+
audience: ["user", "assistant"],
|
|
65
|
+
priority: 0.6,
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
uri: "sessions://mistral",
|
|
70
|
+
name: "Mistral Sessions",
|
|
71
|
+
title: "🌬 Mistral Sessions",
|
|
72
|
+
description: "List of Mistral Vibe conversation sessions",
|
|
73
|
+
mimeType: "application/json",
|
|
74
|
+
annotations: {
|
|
75
|
+
audience: ["user", "assistant"],
|
|
76
|
+
priority: 0.6,
|
|
77
|
+
},
|
|
78
|
+
},
|
|
57
79
|
{
|
|
58
80
|
uri: "models://claude",
|
|
59
81
|
name: "Claude Models",
|
|
@@ -87,6 +109,28 @@ export class ResourceProvider {
|
|
|
87
109
|
priority: 0.8,
|
|
88
110
|
},
|
|
89
111
|
},
|
|
112
|
+
{
|
|
113
|
+
uri: "models://grok",
|
|
114
|
+
name: "Grok Models",
|
|
115
|
+
title: "⚡ Grok Models & Capabilities",
|
|
116
|
+
description: "Available Grok models and their capabilities",
|
|
117
|
+
mimeType: "application/json",
|
|
118
|
+
annotations: {
|
|
119
|
+
audience: ["user", "assistant"],
|
|
120
|
+
priority: 0.8,
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
uri: "models://mistral",
|
|
125
|
+
name: "Mistral Models",
|
|
126
|
+
title: "🌬 Mistral Models & Capabilities",
|
|
127
|
+
description: "Available Mistral Vibe models and their capabilities",
|
|
128
|
+
mimeType: "application/json",
|
|
129
|
+
annotations: {
|
|
130
|
+
audience: ["user", "assistant"],
|
|
131
|
+
priority: 0.8,
|
|
132
|
+
},
|
|
133
|
+
},
|
|
90
134
|
{
|
|
91
135
|
uri: "metrics://performance",
|
|
92
136
|
name: "Performance Metrics",
|
|
@@ -121,6 +165,8 @@ export class ResourceProvider {
|
|
|
121
165
|
claude: (await this.sessionManager.getActiveSession("claude"))?.id || null,
|
|
122
166
|
codex: (await this.sessionManager.getActiveSession("codex"))?.id || null,
|
|
123
167
|
gemini: (await this.sessionManager.getActiveSession("gemini"))?.id || null,
|
|
168
|
+
grok: (await this.sessionManager.getActiveSession("grok"))?.id || null,
|
|
169
|
+
mistral: (await this.sessionManager.getActiveSession("mistral"))?.id || null,
|
|
124
170
|
},
|
|
125
171
|
}, null, 2),
|
|
126
172
|
};
|
|
@@ -164,6 +210,32 @@ export class ResourceProvider {
|
|
|
164
210
|
}, null, 2),
|
|
165
211
|
};
|
|
166
212
|
}
|
|
213
|
+
if (uri === "sessions://grok") {
|
|
214
|
+
const sessions = await this.sessionManager.listSessions("grok");
|
|
215
|
+
return {
|
|
216
|
+
uri,
|
|
217
|
+
mimeType: "application/json",
|
|
218
|
+
text: JSON.stringify({
|
|
219
|
+
cli: "grok",
|
|
220
|
+
total: sessions.length,
|
|
221
|
+
sessions,
|
|
222
|
+
activeSession: (await this.sessionManager.getActiveSession("grok"))?.id || null,
|
|
223
|
+
}, null, 2),
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
if (uri === "sessions://mistral") {
|
|
227
|
+
const sessions = await this.sessionManager.listSessions("mistral");
|
|
228
|
+
return {
|
|
229
|
+
uri,
|
|
230
|
+
mimeType: "application/json",
|
|
231
|
+
text: JSON.stringify({
|
|
232
|
+
cli: "mistral",
|
|
233
|
+
total: sessions.length,
|
|
234
|
+
sessions,
|
|
235
|
+
activeSession: (await this.sessionManager.getActiveSession("mistral"))?.id || null,
|
|
236
|
+
}, null, 2),
|
|
237
|
+
};
|
|
238
|
+
}
|
|
167
239
|
// Model capability resources
|
|
168
240
|
if (uri === "models://claude") {
|
|
169
241
|
const cliInfo = getCliInfo();
|
|
@@ -189,6 +261,22 @@ export class ResourceProvider {
|
|
|
189
261
|
text: JSON.stringify(cliInfo.gemini, null, 2),
|
|
190
262
|
};
|
|
191
263
|
}
|
|
264
|
+
if (uri === "models://grok") {
|
|
265
|
+
const cliInfo = getCliInfo();
|
|
266
|
+
return {
|
|
267
|
+
uri,
|
|
268
|
+
mimeType: "application/json",
|
|
269
|
+
text: JSON.stringify(cliInfo.grok, null, 2),
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
if (uri === "models://mistral") {
|
|
273
|
+
const cliInfo = getCliInfo();
|
|
274
|
+
return {
|
|
275
|
+
uri,
|
|
276
|
+
mimeType: "application/json",
|
|
277
|
+
text: JSON.stringify(cliInfo.mistral, null, 2),
|
|
278
|
+
};
|
|
279
|
+
}
|
|
192
280
|
if (uri === "metrics://performance") {
|
|
193
281
|
return {
|
|
194
282
|
uri,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Config } from "./config.js";
|
|
2
2
|
import type { DatabaseConnection } from "./db.js";
|
|
3
3
|
import type { Logger } from "./logger.js";
|
|
4
|
-
export declare const CLI_TYPES: readonly ["claude", "codex", "gemini"];
|
|
4
|
+
export declare const CLI_TYPES: readonly ["claude", "codex", "gemini", "grok", "mistral"];
|
|
5
5
|
export type CliType = (typeof CLI_TYPES)[number];
|
|
6
6
|
export interface Session {
|
|
7
7
|
id: string;
|
package/dist/session-manager.js
CHANGED
|
@@ -4,12 +4,14 @@ import { join, dirname } from "path";
|
|
|
4
4
|
import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync, openSync, fsyncSync, closeSync, chmodSync, } from "fs";
|
|
5
5
|
import { DEFAULT_SESSION_TTL_SECONDS } from "./config.js";
|
|
6
6
|
import { noopLogger } from "./logger.js";
|
|
7
|
-
export const CLI_TYPES = ["claude", "codex", "gemini"];
|
|
7
|
+
export const CLI_TYPES = ["claude", "codex", "gemini", "grok", "mistral"];
|
|
8
8
|
const createEmptyActiveSessions = () => Object.fromEntries(CLI_TYPES.map(cli => [cli, null]));
|
|
9
9
|
const DEFAULT_SESSION_DESCRIPTIONS = {
|
|
10
10
|
claude: "Claude Session",
|
|
11
11
|
codex: "Codex Session",
|
|
12
12
|
gemini: "Gemini Session",
|
|
13
|
+
grok: "Grok Session",
|
|
14
|
+
mistral: "Mistral Session",
|
|
13
15
|
};
|
|
14
16
|
export class FileSessionManager {
|
|
15
17
|
storagePath;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { AsyncJobResult, AsyncJobSnapshot } from "./async-job-manager.js";
|
|
2
|
+
export type ValidationProvider = "claude" | "codex" | "gemini" | "grok" | "mistral";
|
|
3
|
+
export type NormalizedValidationStatus = "running" | "completed" | "failed" | "canceled" | "orphaned" | "skipped";
|
|
4
|
+
export interface RawJobReference {
|
|
5
|
+
jobId: string;
|
|
6
|
+
correlationId: string;
|
|
7
|
+
statusTool: "job_status";
|
|
8
|
+
resultTool: "job_result";
|
|
9
|
+
}
|
|
10
|
+
export interface NormalizedValidationResult {
|
|
11
|
+
provider: ValidationProvider;
|
|
12
|
+
model: string | null;
|
|
13
|
+
status: NormalizedValidationStatus;
|
|
14
|
+
verdict: string | null;
|
|
15
|
+
rationale: string | null;
|
|
16
|
+
risks: string[];
|
|
17
|
+
rawJobReference: RawJobReference | null;
|
|
18
|
+
error: string | null;
|
|
19
|
+
warning?: string;
|
|
20
|
+
}
|
|
21
|
+
export declare function normalizeStartedJob(provider: ValidationProvider, model: string | null, snapshot: AsyncJobSnapshot, warning?: string): NormalizedValidationResult;
|
|
22
|
+
export declare function normalizeSkippedProvider(provider: ValidationProvider, reason: string): NormalizedValidationResult;
|
|
23
|
+
export declare function normalizeJobResult(provider: ValidationProvider, model: string | null, result: AsyncJobResult): NormalizedValidationResult;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
export function normalizeStartedJob(provider, model, snapshot, warning) {
|
|
2
|
+
return {
|
|
3
|
+
provider,
|
|
4
|
+
model,
|
|
5
|
+
status: snapshot.status,
|
|
6
|
+
verdict: snapshot.status === "running" ? "pending" : null,
|
|
7
|
+
rationale: snapshot.status === "running" ? "Provider job is running asynchronously." : null,
|
|
8
|
+
risks: [],
|
|
9
|
+
rawJobReference: {
|
|
10
|
+
jobId: snapshot.id,
|
|
11
|
+
correlationId: snapshot.correlationId,
|
|
12
|
+
statusTool: "job_status",
|
|
13
|
+
resultTool: "job_result",
|
|
14
|
+
},
|
|
15
|
+
error: snapshot.error,
|
|
16
|
+
warning,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export function normalizeSkippedProvider(provider, reason) {
|
|
20
|
+
return {
|
|
21
|
+
provider,
|
|
22
|
+
model: null,
|
|
23
|
+
status: "skipped",
|
|
24
|
+
verdict: "not_run",
|
|
25
|
+
rationale: reason,
|
|
26
|
+
risks: [reason],
|
|
27
|
+
rawJobReference: null,
|
|
28
|
+
error: reason,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
export function normalizeJobResult(provider, model, result) {
|
|
32
|
+
const output = result.stdout.trim();
|
|
33
|
+
const error = result.error || (result.status === "failed" ? result.stderr.trim() : null);
|
|
34
|
+
return {
|
|
35
|
+
provider,
|
|
36
|
+
model,
|
|
37
|
+
status: result.status,
|
|
38
|
+
verdict: inferVerdict(output, result.status),
|
|
39
|
+
rationale: output ? excerpt(output, 1800) : error,
|
|
40
|
+
risks: extractRisks(output, error),
|
|
41
|
+
rawJobReference: {
|
|
42
|
+
jobId: result.id,
|
|
43
|
+
correlationId: result.correlationId,
|
|
44
|
+
statusTool: "job_status",
|
|
45
|
+
resultTool: "job_result",
|
|
46
|
+
},
|
|
47
|
+
error,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
function inferVerdict(output, status) {
|
|
51
|
+
if (status === "running")
|
|
52
|
+
return "pending";
|
|
53
|
+
if (status === "canceled" || status === "orphaned")
|
|
54
|
+
return status;
|
|
55
|
+
if (status === "failed")
|
|
56
|
+
return "failed";
|
|
57
|
+
const verdictMatch = output.match(/(?:^|\n)\s*verdict\s*:\s*(.+)/i);
|
|
58
|
+
if (verdictMatch?.[1])
|
|
59
|
+
return excerpt(verdictMatch[1].trim(), 240);
|
|
60
|
+
if (output)
|
|
61
|
+
return "answered";
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
function extractRisks(output, error) {
|
|
65
|
+
const risks = output
|
|
66
|
+
.split(/\r?\n/)
|
|
67
|
+
.map(line => line.trim())
|
|
68
|
+
.filter(line => /^(?:[-*]\s*)?(?:risk|risks|concern|caution|limitation)\b/i.test(line))
|
|
69
|
+
.slice(0, 5)
|
|
70
|
+
.map(line => excerpt(line, 300));
|
|
71
|
+
if (error && risks.length === 0)
|
|
72
|
+
risks.push(excerpt(error, 300));
|
|
73
|
+
return risks;
|
|
74
|
+
}
|
|
75
|
+
function excerpt(value, max) {
|
|
76
|
+
if (value.length <= max)
|
|
77
|
+
return value;
|
|
78
|
+
return `${value.slice(0, max - 3)}...`;
|
|
79
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { AsyncJobManager } from "./async-job-manager.js";
|
|
2
|
+
import { type ProviderRuntimeStatus } from "./provider-status.js";
|
|
3
|
+
import { type NormalizedValidationResult, type ValidationProvider } from "./validation-normalizer.js";
|
|
4
|
+
import { type ValidationReport } from "./validation-report.js";
|
|
5
|
+
import { type ValidationIntent } from "./validation-prompts.js";
|
|
6
|
+
export interface ValidationOrchestratorDeps {
|
|
7
|
+
asyncJobManager: AsyncJobManager;
|
|
8
|
+
getProviderRuntimeStatus?: (provider: ValidationProvider) => ProviderRuntimeStatus;
|
|
9
|
+
}
|
|
10
|
+
export interface StartValidationInput {
|
|
11
|
+
intent: ValidationIntent;
|
|
12
|
+
question?: string;
|
|
13
|
+
content?: string;
|
|
14
|
+
providers: ValidationProvider[];
|
|
15
|
+
focus?: string;
|
|
16
|
+
riskLevel?: "normal" | "high";
|
|
17
|
+
judgeProvider?: ValidationProvider;
|
|
18
|
+
}
|
|
19
|
+
export interface ValidationRunReport {
|
|
20
|
+
success: boolean;
|
|
21
|
+
validationId: string;
|
|
22
|
+
status: "running" | "partial" | "not_started";
|
|
23
|
+
startedAt: string;
|
|
24
|
+
intent: ValidationIntent;
|
|
25
|
+
originalRequest: {
|
|
26
|
+
question?: string;
|
|
27
|
+
content?: string;
|
|
28
|
+
focus?: string;
|
|
29
|
+
};
|
|
30
|
+
modelList: ValidationProvider[];
|
|
31
|
+
results: NormalizedValidationResult[];
|
|
32
|
+
synthesis: {
|
|
33
|
+
status: "not_requested" | "waiting_for_provider_results" | "running" | "skipped";
|
|
34
|
+
judgeModel: ValidationProvider | null;
|
|
35
|
+
rawJobReference: NormalizedValidationResult["rawJobReference"];
|
|
36
|
+
note: string;
|
|
37
|
+
};
|
|
38
|
+
report: ValidationReport;
|
|
39
|
+
next: string;
|
|
40
|
+
}
|
|
41
|
+
export declare function startValidationRun(deps: ValidationOrchestratorDeps, input: StartValidationInput): ValidationRunReport;
|
|
42
|
+
export declare function startJudgeSynthesis(deps: ValidationOrchestratorDeps, input: {
|
|
43
|
+
question: string;
|
|
44
|
+
providerResults: NormalizedValidationResult[];
|
|
45
|
+
judgeProvider: ValidationProvider;
|
|
46
|
+
}): ValidationRunReport["synthesis"];
|
|
47
|
+
export declare function collectValidationJobResult(deps: ValidationOrchestratorDeps, provider: ValidationProvider, jobId: string, model: string | null, maxChars?: number): NormalizedValidationResult | null;
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { getProviderRuntimeStatus } from "./provider-status.js";
|
|
3
|
+
import { normalizeJobResult, normalizeSkippedProvider, normalizeStartedJob, } from "./validation-normalizer.js";
|
|
4
|
+
import { buildValidationReport } from "./validation-report.js";
|
|
5
|
+
import { buildJudgePrompt, buildValidationPrompt, } from "./validation-prompts.js";
|
|
6
|
+
export function startValidationRun(deps, input) {
|
|
7
|
+
const validationId = randomUUID();
|
|
8
|
+
const startedAt = new Date().toISOString();
|
|
9
|
+
const prompt = buildValidationPrompt({
|
|
10
|
+
intent: input.intent,
|
|
11
|
+
question: input.question,
|
|
12
|
+
content: input.content,
|
|
13
|
+
focus: input.focus,
|
|
14
|
+
riskLevel: input.riskLevel,
|
|
15
|
+
});
|
|
16
|
+
const providers = uniqueProviders(input.providers);
|
|
17
|
+
const results = providers.map(provider => startProviderJob(deps, provider, prompt, validationId));
|
|
18
|
+
const runningCount = results.filter(result => result.status === "running").length;
|
|
19
|
+
const skippedCount = results.filter(result => result.status === "skipped").length;
|
|
20
|
+
const synthesis = plannedJudgeSynthesis(input);
|
|
21
|
+
const status = runningCount === 0 ? "not_started" : skippedCount > 0 ? "partial" : "running";
|
|
22
|
+
const reportInput = {
|
|
23
|
+
validationId,
|
|
24
|
+
status,
|
|
25
|
+
startedAt,
|
|
26
|
+
intent: input.intent,
|
|
27
|
+
originalRequest: {
|
|
28
|
+
question: input.question,
|
|
29
|
+
content: input.content,
|
|
30
|
+
focus: input.focus,
|
|
31
|
+
},
|
|
32
|
+
modelList: providers,
|
|
33
|
+
results,
|
|
34
|
+
synthesis,
|
|
35
|
+
};
|
|
36
|
+
return {
|
|
37
|
+
success: runningCount > 0,
|
|
38
|
+
validationId,
|
|
39
|
+
status,
|
|
40
|
+
startedAt,
|
|
41
|
+
intent: input.intent,
|
|
42
|
+
originalRequest: reportInput.originalRequest,
|
|
43
|
+
modelList: providers,
|
|
44
|
+
results,
|
|
45
|
+
synthesis,
|
|
46
|
+
report: buildValidationReport(reportInput),
|
|
47
|
+
next: "Use job_status to poll each rawJobReference.jobId, job_result to collect provider outputs, then synthesize_validation if a judge summary is needed.",
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
export function startJudgeSynthesis(deps, input) {
|
|
51
|
+
const pending = input.providerResults.find(result => result.status === "running" || result.verdict === "pending");
|
|
52
|
+
if (pending) {
|
|
53
|
+
return {
|
|
54
|
+
status: "waiting_for_provider_results",
|
|
55
|
+
judgeModel: input.judgeProvider,
|
|
56
|
+
rawJobReference: null,
|
|
57
|
+
note: `Provider result for ${pending.provider} is still pending; collect terminal provider results before judge synthesis.`,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
const completedResults = input.providerResults.filter(result => result.status === "completed");
|
|
61
|
+
const omittedResults = input.providerResults.filter(result => result.status !== "completed");
|
|
62
|
+
if (completedResults.length === 0) {
|
|
63
|
+
return {
|
|
64
|
+
status: "skipped",
|
|
65
|
+
judgeModel: input.judgeProvider,
|
|
66
|
+
rawJobReference: null,
|
|
67
|
+
note: "Judge synthesis requires at least one completed provider result; skipped, failed, canceled, or orphaned results are preserved in the report but are not judge evidence.",
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
const runtimeStatus = deps.getProviderRuntimeStatus ?? getProviderRuntimeStatus;
|
|
71
|
+
const runtime = runtimeStatus(input.judgeProvider);
|
|
72
|
+
if (!runtime.installed) {
|
|
73
|
+
return {
|
|
74
|
+
status: "skipped",
|
|
75
|
+
judgeModel: input.judgeProvider,
|
|
76
|
+
rawJobReference: null,
|
|
77
|
+
note: `${runtime.displayName} was selected as judge but is not installed.`,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
const snapshot = deps.asyncJobManager.startJob(input.judgeProvider, buildProviderArgs(input.judgeProvider, buildJudgePrompt({
|
|
81
|
+
question: input.question,
|
|
82
|
+
providerResults: completedResults,
|
|
83
|
+
})), `validation-judge-${randomUUID()}-${input.judgeProvider}`);
|
|
84
|
+
return {
|
|
85
|
+
status: "running",
|
|
86
|
+
judgeModel: input.judgeProvider,
|
|
87
|
+
rawJobReference: {
|
|
88
|
+
jobId: snapshot.id,
|
|
89
|
+
correlationId: snapshot.correlationId,
|
|
90
|
+
statusTool: "job_status",
|
|
91
|
+
resultTool: "job_result",
|
|
92
|
+
},
|
|
93
|
+
note: omittedResults.length > 0
|
|
94
|
+
? `Judge synthesis is running on ${runtime.displayName} using ${completedResults.length} completed provider result(s); ${omittedResults.length} non-completed result(s) were preserved but omitted.`
|
|
95
|
+
: `Judge synthesis is running on ${runtime.displayName} using completed provider results.`,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
export function collectValidationJobResult(deps, provider, jobId, model, maxChars = 200000) {
|
|
99
|
+
const result = deps.asyncJobManager.getJobResult(jobId, maxChars);
|
|
100
|
+
if (!result)
|
|
101
|
+
return null;
|
|
102
|
+
return normalizeJobResult(provider, model, result);
|
|
103
|
+
}
|
|
104
|
+
function startProviderJob(deps, provider, prompt, validationId) {
|
|
105
|
+
const runtimeStatus = deps.getProviderRuntimeStatus ?? getProviderRuntimeStatus;
|
|
106
|
+
const runtime = runtimeStatus(provider);
|
|
107
|
+
if (!runtime.installed) {
|
|
108
|
+
return normalizeSkippedProvider(provider, `${runtime.displayName} runtime is not installed.`);
|
|
109
|
+
}
|
|
110
|
+
const warning = runtime.loginStatus === "authenticated"
|
|
111
|
+
? undefined
|
|
112
|
+
: `${runtime.displayName} login status is ${runtime.loginStatus}; the job may fail until login is complete.`;
|
|
113
|
+
const snapshot = deps.asyncJobManager.startJob(provider, buildProviderArgs(provider, prompt), `validation-${validationId}-${provider}`);
|
|
114
|
+
return normalizeStartedJob(provider, runtime.version, snapshot, warning);
|
|
115
|
+
}
|
|
116
|
+
function plannedJudgeSynthesis(input) {
|
|
117
|
+
if (!input.judgeProvider) {
|
|
118
|
+
return {
|
|
119
|
+
status: "not_requested",
|
|
120
|
+
judgeModel: null,
|
|
121
|
+
rawJobReference: null,
|
|
122
|
+
note: "No judge synthesis was requested; provider disagreement is preserved for the caller.",
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
return {
|
|
126
|
+
status: "waiting_for_provider_results",
|
|
127
|
+
judgeModel: input.judgeProvider,
|
|
128
|
+
rawJobReference: null,
|
|
129
|
+
note: "Collect provider results first, then call synthesize_validation with those results.",
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
function buildProviderArgs(provider, prompt) {
|
|
133
|
+
if (provider === "claude" || provider === "grok" || provider === "mistral") {
|
|
134
|
+
// Mistral Vibe mirrors Grok's `-p PROMPT` headless surface. Model selection
|
|
135
|
+
// is via VIBE_ACTIVE_MODEL env var (no --model flag); for validation runs we
|
|
136
|
+
// let the user's environment pick the active model.
|
|
137
|
+
return ["-p", prompt];
|
|
138
|
+
}
|
|
139
|
+
if (provider === "codex")
|
|
140
|
+
return ["exec", "--skip-git-repo-check", prompt];
|
|
141
|
+
return [prompt];
|
|
142
|
+
}
|
|
143
|
+
function uniqueProviders(providers) {
|
|
144
|
+
return Array.from(new Set(providers));
|
|
145
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { NormalizedValidationResult } from "./validation-normalizer.js";
|
|
2
|
+
export type ValidationIntent = "validate" | "second_opinion" | "red_team" | "consensus" | "ask_model";
|
|
3
|
+
interface BasePromptInput {
|
|
4
|
+
intent: ValidationIntent;
|
|
5
|
+
question?: string;
|
|
6
|
+
content?: string;
|
|
7
|
+
focus?: string;
|
|
8
|
+
riskLevel?: "normal" | "high";
|
|
9
|
+
}
|
|
10
|
+
export declare function buildValidationPrompt(input: BasePromptInput): string;
|
|
11
|
+
export declare function buildJudgePrompt(input: {
|
|
12
|
+
question: string;
|
|
13
|
+
providerResults: NormalizedValidationResult[];
|
|
14
|
+
}): string;
|
|
15
|
+
export {};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
export function buildValidationPrompt(input) {
|
|
2
|
+
const focus = input.focus || "correctness, missing assumptions, and practical next steps";
|
|
3
|
+
const header = [
|
|
4
|
+
"You are one independent reviewer in a personal cross-LLM validation run.",
|
|
5
|
+
"Return a concise answer with these headings: Verdict, Rationale, Risks, Suggested next step.",
|
|
6
|
+
"Do not claim consensus; other model responses will be compared separately.",
|
|
7
|
+
];
|
|
8
|
+
if (input.intent === "second_opinion") {
|
|
9
|
+
return [
|
|
10
|
+
...header,
|
|
11
|
+
`Focus: ${focus}`,
|
|
12
|
+
"",
|
|
13
|
+
`Original question: ${input.question || "(not provided)"}`,
|
|
14
|
+
"",
|
|
15
|
+
"Answer to review:",
|
|
16
|
+
input.content || "",
|
|
17
|
+
].join("\n");
|
|
18
|
+
}
|
|
19
|
+
if (input.intent === "red_team") {
|
|
20
|
+
return [
|
|
21
|
+
...header,
|
|
22
|
+
`Review intensity: ${input.riskLevel || "normal"}`,
|
|
23
|
+
"Challenge assumptions, unsafe advice, unsupported claims, and likely failure modes.",
|
|
24
|
+
"",
|
|
25
|
+
input.content || "",
|
|
26
|
+
].join("\n");
|
|
27
|
+
}
|
|
28
|
+
if (input.intent === "consensus") {
|
|
29
|
+
return [
|
|
30
|
+
...header,
|
|
31
|
+
"Assess whether the claim is true, false, uncertain, or context-dependent.",
|
|
32
|
+
"",
|
|
33
|
+
`Claim: ${input.content || input.question || ""}`,
|
|
34
|
+
].join("\n");
|
|
35
|
+
}
|
|
36
|
+
if (input.intent === "ask_model") {
|
|
37
|
+
return [input.question || input.content || ""].join("\n");
|
|
38
|
+
}
|
|
39
|
+
return [...header, `Focus: ${focus}`, "", input.question || input.content || ""].join("\n");
|
|
40
|
+
}
|
|
41
|
+
export function buildJudgePrompt(input) {
|
|
42
|
+
return [
|
|
43
|
+
"You are the explicit judge model for a personal cross-LLM validation run.",
|
|
44
|
+
"Synthesize only from the provider results below. Preserve material disagreement.",
|
|
45
|
+
"Return: Summary, Agreements, Disagreements, Recommendation, Confidence, Limitations.",
|
|
46
|
+
"",
|
|
47
|
+
`Original request: ${input.question}`,
|
|
48
|
+
"",
|
|
49
|
+
"Provider results:",
|
|
50
|
+
JSON.stringify(input.providerResults, null, 2),
|
|
51
|
+
].join("\n");
|
|
52
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { NormalizedValidationResult, ValidationProvider } from "./validation-normalizer.js";
|
|
2
|
+
import type { ValidationIntent } from "./validation-prompts.js";
|
|
3
|
+
export type ValidationReportConfidence = "none" | "low" | "medium" | "high";
|
|
4
|
+
export interface ValidationReportInput {
|
|
5
|
+
validationId: string;
|
|
6
|
+
status: "running" | "partial" | "not_started";
|
|
7
|
+
startedAt: string;
|
|
8
|
+
intent: ValidationIntent;
|
|
9
|
+
originalRequest: {
|
|
10
|
+
question?: string;
|
|
11
|
+
content?: string;
|
|
12
|
+
focus?: string;
|
|
13
|
+
};
|
|
14
|
+
modelList: ValidationProvider[];
|
|
15
|
+
results: NormalizedValidationResult[];
|
|
16
|
+
synthesis: {
|
|
17
|
+
status: "not_requested" | "waiting_for_provider_results" | "running" | "skipped";
|
|
18
|
+
judgeModel: ValidationProvider | null;
|
|
19
|
+
rawJobReference: NormalizedValidationResult["rawJobReference"];
|
|
20
|
+
note: string;
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
export interface ValidationReport {
|
|
24
|
+
schemaVersion: "validation-report.v1";
|
|
25
|
+
humanReadable: string;
|
|
26
|
+
structuredContent: {
|
|
27
|
+
validationId: string;
|
|
28
|
+
status: ValidationReportInput["status"];
|
|
29
|
+
startedAt: string;
|
|
30
|
+
intent: ValidationIntent;
|
|
31
|
+
originalRequest: ValidationReportInput["originalRequest"];
|
|
32
|
+
modelList: ValidationProvider[];
|
|
33
|
+
perModelOutputs: Array<{
|
|
34
|
+
provider: ValidationProvider;
|
|
35
|
+
model: string | null;
|
|
36
|
+
status: NormalizedValidationResult["status"];
|
|
37
|
+
verdict: string | null;
|
|
38
|
+
rationale: string | null;
|
|
39
|
+
risks: string[];
|
|
40
|
+
jobId: string | null;
|
|
41
|
+
correlationId: string | null;
|
|
42
|
+
warning: string | null;
|
|
43
|
+
error: string | null;
|
|
44
|
+
}>;
|
|
45
|
+
disagreements: {
|
|
46
|
+
hasMaterialDisagreement: boolean;
|
|
47
|
+
summary: string;
|
|
48
|
+
signals: string[];
|
|
49
|
+
};
|
|
50
|
+
finalRecommendation: string;
|
|
51
|
+
confidence: ValidationReportConfidence;
|
|
52
|
+
limitations: string[];
|
|
53
|
+
jobIds: string[];
|
|
54
|
+
synthesis: ValidationReportInput["synthesis"];
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
export declare function buildValidationReport(input: ValidationReportInput): ValidationReport;
|