llm-cli-gateway 2.2.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,12 @@ export function registerValidationTools(server, deps) {
57
57
  judgeModel: providerSchema
58
58
  .optional()
59
59
  .describe("Optional provider to run an explicit judge synthesis job."),
60
+ }, {
61
+ title: "Multi-model validation",
62
+ readOnlyHint: false,
63
+ destructiveHint: true,
64
+ idempotentHint: false,
65
+ openWorldHint: true,
60
66
  }, async ({ question, models, focus, judgeModel }) => textResponse({
61
67
  success: true,
62
68
  tool: "validate_with_models",
@@ -73,6 +79,12 @@ export function registerValidationTools(server, deps) {
73
79
  answer: z.string().min(1).describe("Answer to review."),
74
80
  question: z.string().optional().describe("Original question, if available."),
75
81
  model: providerSchema.default("codex").describe("Provider to ask for the second opinion."),
82
+ }, {
83
+ title: "Second opinion",
84
+ readOnlyHint: false,
85
+ destructiveHint: true,
86
+ idempotentHint: false,
87
+ openWorldHint: true,
76
88
  }, async ({ answer, question, model }) => textResponse({
77
89
  success: true,
78
90
  tool: "second_opinion",
@@ -87,6 +99,12 @@ export function registerValidationTools(server, deps) {
87
99
  server.tool("compare_answers", "Summarize agreement/differences between caller-provided answers LOCALLY — does not call any provider.", {
88
100
  question: z.string().min(1).describe("Question the answers respond to."),
89
101
  answers: z.array(z.string().min(1)).min(2).describe("Two or more answers to compare."),
102
+ }, {
103
+ title: "Compare answers (local)",
104
+ readOnlyHint: true,
105
+ destructiveHint: false,
106
+ idempotentHint: true,
107
+ openWorldHint: false,
90
108
  }, async ({ question, answers }) => textResponse({
91
109
  success: true,
92
110
  tool: "compare_answers",
@@ -106,6 +124,12 @@ export function registerValidationTools(server, deps) {
106
124
  .default("normal")
107
125
  .describe("How aggressively to review."),
108
126
  models: providerListSchema.describe("Providers to ask for adversarial review."),
127
+ }, {
128
+ title: "Red-team review",
129
+ readOnlyHint: false,
130
+ destructiveHint: true,
131
+ idempotentHint: false,
132
+ openWorldHint: true,
109
133
  }, async ({ content, riskLevel, models }) => textResponse({
110
134
  success: true,
111
135
  tool: "red_team_review",
@@ -120,6 +144,12 @@ export function registerValidationTools(server, deps) {
120
144
  server.tool("consensus_check", "Ask provider CLIs whether they agree or disagree with a claim (starts validation jobs).", {
121
145
  claim: z.string().min(1).describe("Claim to check across providers."),
122
146
  models: providerListSchema.describe("Providers to ask for agreement or disagreement."),
147
+ }, {
148
+ title: "Consensus check",
149
+ readOnlyHint: false,
150
+ destructiveHint: true,
151
+ idempotentHint: false,
152
+ openWorldHint: true,
123
153
  }, async ({ claim, models }) => textResponse({
124
154
  success: true,
125
155
  tool: "consensus_check",
@@ -133,6 +163,12 @@ export function registerValidationTools(server, deps) {
133
163
  server.tool("ask_model", "Ask one provider CLI a question through the simplified validation surface (starts a validation job).", {
134
164
  question: z.string().min(1).describe("Question for one provider."),
135
165
  model: providerSchema.default("claude").describe("Provider to ask."),
166
+ }, {
167
+ title: "Ask one model",
168
+ readOnlyHint: false,
169
+ destructiveHint: true,
170
+ idempotentHint: false,
171
+ openWorldHint: true,
136
172
  }, async ({ question, model }) => textResponse({
137
173
  success: true,
138
174
  tool: "ask_model",
@@ -150,6 +186,12 @@ export function registerValidationTools(server, deps) {
150
186
  .min(1)
151
187
  .describe("Terminal normalized provider results from job_result."),
152
188
  judgeModel: providerSchema.default("codex").describe("Provider to run the judge synthesis."),
189
+ }, {
190
+ title: "Synthesize validation",
191
+ readOnlyHint: false,
192
+ destructiveHint: true,
193
+ idempotentHint: false,
194
+ openWorldHint: true,
153
195
  }, async ({ question, providerResults, judgeModel }) => textResponse({
154
196
  success: true,
155
197
  tool: "synthesize_validation",
@@ -160,9 +202,21 @@ export function registerValidationTools(server, deps) {
160
202
  judgeProvider: judgeModel,
161
203
  }),
162
204
  }));
163
- server.tool("list_available_models", "List models and capabilities for every available provider CLI (takes no arguments; complements per-provider list_models).", {}, async () => textResponse({ success: true, models: getAvailableCliInfo() }));
205
+ server.tool("list_available_models", "List models and capabilities for every available provider CLI (takes no arguments; complements per-provider list_models).", {}, {
206
+ title: "All provider models",
207
+ readOnlyHint: true,
208
+ destructiveHint: false,
209
+ idempotentHint: true,
210
+ openWorldHint: false,
211
+ }, async () => textResponse({ success: true, models: getAvailableCliInfo() }));
164
212
  server.tool("job_status", "Check a VALIDATION job's status (jobs started by validate_with_models/ask_model/etc.) — distinct from llm_job_status, which tracks provider request jobs.", {
165
213
  jobId: z.string().min(1).describe("Validation job ID."),
214
+ }, {
215
+ title: "Validation job status",
216
+ readOnlyHint: true,
217
+ destructiveHint: false,
218
+ idempotentHint: true,
219
+ openWorldHint: false,
166
220
  }, async ({ jobId }) => {
167
221
  const job = deps.asyncJobManager.getJobSnapshot(jobId);
168
222
  if (!job) {
@@ -182,6 +236,12 @@ export function registerValidationTools(server, deps) {
182
236
  .max(2000000)
183
237
  .default(200000)
184
238
  .describe("Maximum result size."),
239
+ }, {
240
+ title: "Validation job result",
241
+ readOnlyHint: true,
242
+ destructiveHint: false,
243
+ idempotentHint: true,
244
+ openWorldHint: false,
185
245
  }, async ({ jobId, provider, maxChars }) => {
186
246
  const result = deps.asyncJobManager.getJobResult(jobId, maxChars);
187
247
  if (!result) {
@@ -0,0 +1,43 @@
1
+ import type { Logger } from "./logger.js";
2
+ export type XaiResponsesRole = "system" | "user" | "assistant";
3
+ export type XaiReasoningEffort = "none" | "low" | "medium" | "high";
4
+ export interface XaiResponsesInputMessage {
5
+ role: XaiResponsesRole;
6
+ content: string;
7
+ }
8
+ export interface XaiResponsesRequest {
9
+ baseUrl: string;
10
+ apiKey: string;
11
+ model: string;
12
+ input: string | XaiResponsesInputMessage[];
13
+ instructions?: string;
14
+ previousResponseId?: string;
15
+ maxOutputTokens?: number;
16
+ temperature?: number;
17
+ topP?: number;
18
+ reasoningEffort?: XaiReasoningEffort;
19
+ timeoutMs?: number;
20
+ }
21
+ export interface XaiResponsesUsage {
22
+ inputTokens?: number;
23
+ outputTokens?: number;
24
+ cacheReadTokens?: number;
25
+ costUsd?: number;
26
+ raw?: unknown;
27
+ }
28
+ export interface XaiResponsesResult {
29
+ responseId: string | null;
30
+ model: string;
31
+ status: string | null;
32
+ text: string;
33
+ usage: XaiResponsesUsage;
34
+ raw: unknown;
35
+ httpStatus: number;
36
+ }
37
+ export declare class XaiApiError extends Error {
38
+ readonly status: number | null;
39
+ readonly responseText: string;
40
+ readonly code?: string | undefined;
41
+ constructor(message: string, status?: number | null, responseText?: string, code?: string | undefined);
42
+ }
43
+ export declare function createXaiResponse(params: XaiResponsesRequest, logger?: Logger): Promise<XaiResponsesResult>;
@@ -0,0 +1,191 @@
1
+ import { request as httpRequest } from "node:http";
2
+ import { request as httpsRequest } from "node:https";
3
+ import { URL } from "node:url";
4
+ import { createCircuitBreaker, withRetry } from "./retry.js";
5
+ import { logWarn, noopLogger } from "./logger.js";
6
+ const MAX_RESPONSE_BYTES = 50 * 1024 * 1024;
7
+ const DEFAULT_TIMEOUT_MS = 600_000;
8
+ export class XaiApiError extends Error {
9
+ status;
10
+ responseText;
11
+ code;
12
+ constructor(message, status = null, responseText = "", code) {
13
+ super(message);
14
+ this.status = status;
15
+ this.responseText = responseText;
16
+ this.code = code;
17
+ this.name = "XaiApiError";
18
+ }
19
+ }
20
+ let xaiCircuitBreaker = null;
21
+ function getXaiCircuitBreaker(logger) {
22
+ xaiCircuitBreaker ??= createCircuitBreaker({
23
+ failureThreshold: 3,
24
+ resetTimeout: 60_000,
25
+ onStateChange: state => logWarn(logger, `[xai-api] circuit breaker state changed to ${state}`),
26
+ });
27
+ return xaiCircuitBreaker;
28
+ }
29
+ function isHttpTransient(error) {
30
+ const status = typeof error?.status === "number" ? error.status : null;
31
+ if (status === 429 || (status !== null && status >= 500))
32
+ return true;
33
+ return ["ECONNRESET", "ETIMEDOUT", "ECONNREFUSED", "EPIPE"].includes(String(error?.code ?? ""));
34
+ }
35
+ function responsesUrl(baseUrl) {
36
+ const trimmed = baseUrl.replace(/\/+$/, "");
37
+ const url = new URL(`${trimmed}/responses`);
38
+ if (url.protocol !== "https:" &&
39
+ !(url.protocol === "http:" && ["localhost", "127.0.0.1", "::1", "[::1]"].includes(url.hostname))) {
40
+ throw new XaiApiError("xAI API baseUrl must use https unless it targets localhost/loopback");
41
+ }
42
+ return url;
43
+ }
44
+ function extractErrorMessage(status, body) {
45
+ if (!body)
46
+ return `xAI API request failed with HTTP ${status}`;
47
+ try {
48
+ const parsed = JSON.parse(body);
49
+ const message = parsed?.error?.message ?? parsed?.message ?? parsed?.error;
50
+ if (typeof message === "string" && message.length > 0) {
51
+ return `xAI API request failed with HTTP ${status}: ${message}`;
52
+ }
53
+ }
54
+ catch {
55
+ }
56
+ return `xAI API request failed with HTTP ${status}: ${body.slice(0, 1000)}`;
57
+ }
58
+ function normalizeCostUsd(usage) {
59
+ const ticks = usage?.cost_in_usd_ticks;
60
+ if (typeof ticks === "number" && Number.isFinite(ticks))
61
+ return ticks / 10_000_000_000;
62
+ const nanos = usage?.cost_in_nano_usd;
63
+ if (typeof nanos === "number" && Number.isFinite(nanos))
64
+ return nanos / 1_000_000_000;
65
+ return undefined;
66
+ }
67
+ function extractResponseText(parsed) {
68
+ const output = Array.isArray(parsed?.output) ? parsed.output : [];
69
+ const chunks = [];
70
+ for (const item of output) {
71
+ if (item?.type !== "message" || !Array.isArray(item.content))
72
+ continue;
73
+ for (const content of item.content) {
74
+ if ((content?.type === "output_text" || content?.type === "text") &&
75
+ typeof content.text === "string") {
76
+ chunks.push(content.text);
77
+ }
78
+ }
79
+ }
80
+ if (chunks.length > 0)
81
+ return chunks.join("");
82
+ if (typeof parsed?.output_text === "string")
83
+ return parsed.output_text;
84
+ return "";
85
+ }
86
+ function parseResponsesResult(status, body) {
87
+ const parsed = JSON.parse(body);
88
+ const usage = parsed?.usage ?? {};
89
+ return {
90
+ responseId: typeof parsed?.id === "string" ? parsed.id : null,
91
+ model: typeof parsed?.model === "string" ? parsed.model : "unknown",
92
+ status: typeof parsed?.status === "string" ? parsed.status : null,
93
+ text: extractResponseText(parsed),
94
+ usage: {
95
+ inputTokens: typeof usage.input_tokens === "number"
96
+ ? usage.input_tokens
97
+ : typeof usage.prompt_tokens === "number"
98
+ ? usage.prompt_tokens
99
+ : undefined,
100
+ outputTokens: typeof usage.output_tokens === "number"
101
+ ? usage.output_tokens
102
+ : typeof usage.completion_tokens === "number"
103
+ ? usage.completion_tokens
104
+ : undefined,
105
+ cacheReadTokens: typeof usage?.input_tokens_details?.cached_tokens === "number"
106
+ ? usage.input_tokens_details.cached_tokens
107
+ : typeof usage?.prompt_tokens_details?.cached_tokens === "number"
108
+ ? usage.prompt_tokens_details.cached_tokens
109
+ : undefined,
110
+ costUsd: normalizeCostUsd(usage),
111
+ raw: usage,
112
+ },
113
+ raw: parsed,
114
+ httpStatus: status,
115
+ };
116
+ }
117
+ function postJson(url, body, apiKey, timeoutMs) {
118
+ const payload = JSON.stringify(body);
119
+ const requester = url.protocol === "https:" ? httpsRequest : httpRequest;
120
+ return new Promise((resolve, reject) => {
121
+ const req = requester(url, {
122
+ method: "POST",
123
+ timeout: timeoutMs,
124
+ headers: {
125
+ authorization: `Bearer ${apiKey}`,
126
+ "content-type": "application/json",
127
+ accept: "application/json",
128
+ "content-length": Buffer.byteLength(payload),
129
+ },
130
+ }, res => {
131
+ const chunks = [];
132
+ let bytes = 0;
133
+ res.on("data", chunk => {
134
+ const buf = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
135
+ bytes += buf.length;
136
+ if (bytes > MAX_RESPONSE_BYTES) {
137
+ req.destroy(new XaiApiError("xAI API response exceeded the 50MB limit", null));
138
+ return;
139
+ }
140
+ chunks.push(buf);
141
+ });
142
+ res.on("end", () => {
143
+ const text = Buffer.concat(chunks).toString("utf8");
144
+ const status = res.statusCode ?? 0;
145
+ if (status < 200 || status >= 300) {
146
+ const err = new XaiApiError(extractErrorMessage(status, text), status, text);
147
+ reject(err);
148
+ return;
149
+ }
150
+ resolve(text);
151
+ });
152
+ });
153
+ req.on("timeout", () => {
154
+ req.destroy(new XaiApiError("xAI API request timed out", null, "", "ETIMEDOUT"));
155
+ });
156
+ req.on("error", reject);
157
+ req.end(payload);
158
+ });
159
+ }
160
+ export async function createXaiResponse(params, logger = noopLogger) {
161
+ const requestBody = {
162
+ model: params.model,
163
+ input: params.input,
164
+ store: true,
165
+ };
166
+ if (params.instructions)
167
+ requestBody.instructions = params.instructions;
168
+ if (params.previousResponseId)
169
+ requestBody.previous_response_id = params.previousResponseId;
170
+ if (params.maxOutputTokens !== undefined)
171
+ requestBody.max_output_tokens = params.maxOutputTokens;
172
+ if (params.temperature !== undefined)
173
+ requestBody.temperature = params.temperature;
174
+ if (params.topP !== undefined)
175
+ requestBody.top_p = params.topP;
176
+ if (params.reasoningEffort !== undefined) {
177
+ requestBody.reasoning = { effort: params.reasoningEffort };
178
+ }
179
+ const url = responsesUrl(params.baseUrl);
180
+ const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
181
+ const body = await withRetry(() => postJson(url, requestBody, params.apiKey, timeoutMs), getXaiCircuitBreaker(logger), {
182
+ initialDelay: 1_000,
183
+ maxDelay: 30_000,
184
+ factor: 2,
185
+ isTransient: isHttpTransient,
186
+ onRetry: (error, attempt, delay) => {
187
+ logWarn(logger, `[xai-api] transient request failure on attempt ${attempt}; retrying in ${delay}ms: ${error.message}`);
188
+ },
189
+ }, logger);
190
+ return parseResponsesResult(200, body);
191
+ }
@@ -0,0 +1,65 @@
1
+ -- Initial schema for llm-cli-gateway PostgreSQL backend
2
+ -- Sessions and active session management
3
+
4
+ -- Create sessions table
5
+ CREATE TABLE IF NOT EXISTS sessions (
6
+ id TEXT PRIMARY KEY,
7
+ cli VARCHAR(32) NOT NULL CHECK (cli IN ('claude', 'codex', 'gemini', 'grok', 'mistral', 'grok-api')),
8
+ description TEXT,
9
+ metadata JSONB DEFAULT '{}'::JSONB,
10
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
11
+ last_used_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
12
+ );
13
+
14
+ -- Create active_sessions table (enforces one active per CLI)
15
+ CREATE TABLE IF NOT EXISTS active_sessions (
16
+ cli VARCHAR(32) PRIMARY KEY CHECK (cli IN ('claude', 'codex', 'gemini', 'grok', 'mistral', 'grok-api')),
17
+ session_id TEXT REFERENCES sessions(id) ON DELETE CASCADE,
18
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
19
+ );
20
+
21
+ -- Indexes for performance
22
+ CREATE INDEX IF NOT EXISTS idx_sessions_cli ON sessions(cli);
23
+ CREATE INDEX IF NOT EXISTS idx_sessions_last_used_at ON sessions(last_used_at DESC);
24
+ CREATE INDEX IF NOT EXISTS idx_sessions_metadata ON sessions USING GIN(metadata);
25
+ CREATE INDEX IF NOT EXISTS idx_sessions_cli_last_used ON sessions(cli, last_used_at DESC);
26
+
27
+ -- View for session summary (joins sessions + active_sessions)
28
+ CREATE OR REPLACE VIEW session_summary AS
29
+ SELECT
30
+ s.id,
31
+ s.cli,
32
+ s.description,
33
+ s.created_at,
34
+ s.last_used_at,
35
+ (a.session_id IS NOT NULL) AS is_active
36
+ FROM sessions s
37
+ LEFT JOIN active_sessions a ON s.id = a.session_id;
38
+
39
+ -- Cleanup function for expired sessions
40
+ CREATE OR REPLACE FUNCTION cleanup_expired_sessions(max_age_days INTEGER DEFAULT 30)
41
+ RETURNS INTEGER AS $$
42
+ DECLARE
43
+ deleted_count INTEGER;
44
+ BEGIN
45
+ -- Delete sessions older than max_age_days that are not active
46
+ DELETE FROM sessions
47
+ WHERE last_used_at < NOW() - INTERVAL '1 day' * max_age_days
48
+ AND id NOT IN (SELECT session_id FROM active_sessions WHERE session_id IS NOT NULL);
49
+
50
+ GET DIAGNOSTICS deleted_count = ROW_COUNT;
51
+ RETURN deleted_count;
52
+ END;
53
+ $$ LANGUAGE plpgsql;
54
+
55
+ -- Schema migrations tracking table
56
+ CREATE TABLE IF NOT EXISTS schema_migrations (
57
+ version INTEGER PRIMARY KEY,
58
+ name VARCHAR(255) NOT NULL,
59
+ applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
60
+ );
61
+
62
+ -- Record this migration
63
+ INSERT INTO schema_migrations (version, name)
64
+ VALUES (1, '001_initial_schema')
65
+ ON CONFLICT (version) DO NOTHING;
@@ -0,0 +1,26 @@
1
+ -- Convert session identifiers from UUID to opaque string IDs (TEXT)
2
+ -- Keeps compatibility with file-based manager and legacy custom IDs.
3
+
4
+ DO $$
5
+ BEGIN
6
+ IF EXISTS (
7
+ SELECT 1
8
+ FROM information_schema.columns
9
+ WHERE table_schema = 'public'
10
+ AND table_name = 'sessions'
11
+ AND column_name = 'id'
12
+ AND udt_name = 'uuid'
13
+ ) THEN
14
+ ALTER TABLE active_sessions DROP CONSTRAINT IF EXISTS active_sessions_session_id_fkey;
15
+ ALTER TABLE sessions ALTER COLUMN id TYPE TEXT USING id::text;
16
+ ALTER TABLE active_sessions ALTER COLUMN session_id TYPE TEXT USING session_id::text;
17
+ ALTER TABLE active_sessions
18
+ ADD CONSTRAINT active_sessions_session_id_fkey
19
+ FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE;
20
+ END IF;
21
+ END;
22
+ $$ LANGUAGE plpgsql;
23
+
24
+ INSERT INTO schema_migrations (version, name)
25
+ VALUES (2, '002_session_ids_as_text')
26
+ ON CONFLICT (version) DO NOTHING;
@@ -0,0 +1,20 @@
1
+ -- Widen session provider constraints for API-backed providers.
2
+ -- Existing PostgreSQL installations created before the Grok API provider split
3
+ -- only accepted the original CLI subset. Keep the column values opaque strings
4
+ -- but enforce the current provider set.
5
+
6
+ ALTER TABLE sessions DROP CONSTRAINT IF EXISTS sessions_cli_check;
7
+ ALTER TABLE sessions ALTER COLUMN cli TYPE VARCHAR(32);
8
+ ALTER TABLE sessions
9
+ ADD CONSTRAINT sessions_cli_check
10
+ CHECK (cli IN ('claude', 'codex', 'gemini', 'grok', 'mistral', 'grok-api'));
11
+
12
+ ALTER TABLE active_sessions DROP CONSTRAINT IF EXISTS active_sessions_cli_check;
13
+ ALTER TABLE active_sessions ALTER COLUMN cli TYPE VARCHAR(32);
14
+ ALTER TABLE active_sessions
15
+ ADD CONSTRAINT active_sessions_cli_check
16
+ CHECK (cli IN ('claude', 'codex', 'gemini', 'grok', 'mistral', 'grok-api'));
17
+
18
+ INSERT INTO schema_migrations (version, name)
19
+ VALUES (3, '003_provider_type_sessions')
20
+ ON CONFLICT (version) DO NOTHING;
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "2.0.0",
3
+ "version": "2.4.0",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "llm-cli-gateway",
9
- "version": "2.0.0",
9
+ "version": "2.4.0",
10
10
  "license": "MIT",
11
11
  "dependencies": {
12
12
  "@modelcontextprotocol/sdk": "^1.29.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "2.2.0",
3
+ "version": "2.4.0",
4
4
  "mcpName": "io.github.verivus-oss/llm-cli-gateway",
5
5
  "description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
6
6
  "license": "MIT",
@@ -46,6 +46,7 @@
46
46
  "dist/**/*.js",
47
47
  "dist/**/*.d.ts",
48
48
  "!dist/__tests__/**",
49
+ "migrations/**/*.sql",
49
50
  "npm-shrinkwrap.json",
50
51
  "setup/status.schema.json",
51
52
  "README.md",