llm-cli-gateway 1.5.34 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +111 -0
- package/README.md +31 -0
- package/dist/cache-stats.d.ts +112 -0
- package/dist/cache-stats.js +225 -0
- package/dist/config.d.ts +41 -0
- package/dist/config.js +109 -0
- package/dist/doctor.d.ts +42 -1
- package/dist/doctor.js +121 -2
- package/dist/flight-recorder.d.ts +27 -0
- package/dist/flight-recorder.js +79 -2
- package/dist/index.d.ts +46 -9
- package/dist/index.js +470 -86
- package/dist/pricing.d.ts +54 -0
- package/dist/pricing.js +100 -0
- package/dist/prompt-parts.d.ts +38 -0
- package/dist/prompt-parts.js +42 -0
- package/dist/resources.d.ts +32 -1
- package/dist/resources.js +52 -1
- package/package.json +2 -1
- package/setup/status.schema.json +39 -0
- package/socket.yml +10 -0
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
3
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
4
|
import { randomUUID } from "crypto";
|
|
5
5
|
import { existsSync, readFileSync, readdirSync, renameSync, unlinkSync } from "fs";
|
|
@@ -14,7 +14,7 @@ import { createSessionManager } from "./session-manager.js";
|
|
|
14
14
|
import { ResourceProvider } from "./resources.js";
|
|
15
15
|
import { PerformanceMetrics } from "./metrics.js";
|
|
16
16
|
import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
|
|
17
|
-
import { loadConfig, loadPersistenceConfig } from "./config.js";
|
|
17
|
+
import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, } from "./config.js";
|
|
18
18
|
import { checkHealth } from "./health.js";
|
|
19
19
|
import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
|
|
20
20
|
import { AsyncJobManager } from "./async-job-manager.js";
|
|
@@ -24,6 +24,8 @@ import { checkReviewIntegrity } from "./review-integrity.js";
|
|
|
24
24
|
import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
|
|
25
25
|
import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
|
|
26
26
|
import { createFlightRecorder } from "./flight-recorder.js";
|
|
27
|
+
import { resolvePromptInput, PromptPartsSchema } from "./prompt-parts.js";
|
|
28
|
+
import { computeSessionCacheStats, computeTtlRemaining } from "./cache-stats.js";
|
|
27
29
|
import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
|
|
28
30
|
import { startHttpGateway } from "./http-transport.js";
|
|
29
31
|
import { printDoctorJson } from "./doctor.js";
|
|
@@ -108,6 +110,22 @@ const SYNC_DEADLINE_MS = (() => {
|
|
|
108
110
|
const __filename = fileURLToPath(import.meta.url);
|
|
109
111
|
const __dirname = dirname(__filename);
|
|
110
112
|
const SKILLS_DIR = join(__dirname, "..", ".agents", "skills");
|
|
113
|
+
function packageVersion() {
|
|
114
|
+
const candidates = [
|
|
115
|
+
join(__dirname, "..", "package.json"),
|
|
116
|
+
join(__dirname, "..", "..", "package.json"),
|
|
117
|
+
];
|
|
118
|
+
for (const candidate of candidates) {
|
|
119
|
+
try {
|
|
120
|
+
const parsed = JSON.parse(readFileSync(candidate, "utf8"));
|
|
121
|
+
return parsed.version || "unknown";
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
// Try next candidate.
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return "unknown";
|
|
128
|
+
}
|
|
111
129
|
function loadSkills() {
|
|
112
130
|
const skills = [];
|
|
113
131
|
try {
|
|
@@ -158,29 +176,56 @@ let sessionManager;
|
|
|
158
176
|
let db = null;
|
|
159
177
|
const performanceMetrics = new PerformanceMetrics();
|
|
160
178
|
let resourceProvider;
|
|
161
|
-
|
|
179
|
+
let flightRecorder = null;
|
|
162
180
|
// Resolved persistence config — single source of truth for the async-job backend.
|
|
163
181
|
// Driven by ~/.llm-cli-gateway/config.toml (+ deprecated env-var overrides).
|
|
164
182
|
// When backend = "none", the JobStore is null AND *_request_async tools are not
|
|
165
183
|
// registered (see createGatewayServer), making silent in-memory loss
|
|
166
184
|
// structurally impossible.
|
|
167
|
-
|
|
168
|
-
|
|
185
|
+
let persistenceConfig = null;
|
|
186
|
+
let cacheAwarenessConfig = null;
|
|
187
|
+
let jobStore = null;
|
|
188
|
+
let jobStoreInitialized = false;
|
|
189
|
+
let asyncJobManager = null;
|
|
190
|
+
let approvalManager = null;
|
|
191
|
+
function getFlightRecorder(runtimeLogger = logger) {
|
|
192
|
+
flightRecorder ??= createFlightRecorder(runtimeLogger);
|
|
193
|
+
return flightRecorder;
|
|
194
|
+
}
|
|
195
|
+
function getPersistenceConfig(runtimeLogger = logger) {
|
|
196
|
+
persistenceConfig ??= loadPersistenceConfig(runtimeLogger);
|
|
197
|
+
return persistenceConfig;
|
|
198
|
+
}
|
|
199
|
+
function getCacheAwarenessConfig(runtimeLogger = logger) {
|
|
200
|
+
cacheAwarenessConfig ??= loadCacheAwarenessConfig(runtimeLogger);
|
|
201
|
+
return cacheAwarenessConfig;
|
|
202
|
+
}
|
|
203
|
+
function getJobStore(runtimeLogger = logger) {
|
|
204
|
+
if (jobStoreInitialized)
|
|
205
|
+
return jobStore;
|
|
206
|
+
jobStoreInitialized = true;
|
|
169
207
|
try {
|
|
170
|
-
|
|
208
|
+
jobStore = createJobStore(getPersistenceConfig(runtimeLogger), runtimeLogger);
|
|
171
209
|
}
|
|
172
210
|
catch (err) {
|
|
173
|
-
|
|
174
|
-
|
|
211
|
+
runtimeLogger.error("Failed to open durable job store; async tools will be unavailable", err);
|
|
212
|
+
jobStore = null;
|
|
175
213
|
}
|
|
176
|
-
|
|
177
|
-
|
|
214
|
+
return jobStore;
|
|
215
|
+
}
|
|
216
|
+
function newAsyncJobManager(metrics, runtimeLogger, store = getJobStore(runtimeLogger)) {
|
|
178
217
|
return new AsyncJobManager(runtimeLogger, (cli, durationMs, success) => {
|
|
179
218
|
metrics.recordRequest(cli, durationMs, success);
|
|
180
219
|
}, store);
|
|
181
220
|
}
|
|
182
|
-
|
|
183
|
-
|
|
221
|
+
function getAsyncJobManager(runtimeLogger = logger) {
|
|
222
|
+
asyncJobManager ??= newAsyncJobManager(performanceMetrics, runtimeLogger);
|
|
223
|
+
return asyncJobManager;
|
|
224
|
+
}
|
|
225
|
+
function getApprovalManager(runtimeLogger = logger) {
|
|
226
|
+
approvalManager ??= new ApprovalManager(undefined, runtimeLogger);
|
|
227
|
+
return approvalManager;
|
|
228
|
+
}
|
|
184
229
|
const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
|
|
185
230
|
// U22: Session-provider enum extended to five providers. The storage layer's
|
|
186
231
|
// CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
|
|
@@ -199,22 +244,26 @@ function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
|
199
244
|
? // Factory-created test/HTTP session servers must not mark another instance's
|
|
200
245
|
// durable jobs orphaned. Stdio startup injects the process-global manager.
|
|
201
246
|
newAsyncJobManager(runtimePerformanceMetrics, runtimeLogger, null)
|
|
202
|
-
:
|
|
247
|
+
: getAsyncJobManager(runtimeLogger));
|
|
203
248
|
const runtimeApprovalManager = deps.approvalManager ??
|
|
204
|
-
(options.isolateState
|
|
249
|
+
(options.isolateState
|
|
250
|
+
? new ApprovalManager(undefined, runtimeLogger)
|
|
251
|
+
: getApprovalManager(runtimeLogger));
|
|
252
|
+
const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
|
|
205
253
|
return {
|
|
206
254
|
sessionManager: runtimeSessionManager,
|
|
207
255
|
resourceProvider: deps.resourceProvider ??
|
|
208
256
|
(options.isolateState
|
|
209
|
-
? new ResourceProvider(runtimeSessionManager, runtimePerformanceMetrics)
|
|
257
|
+
? new ResourceProvider(runtimeSessionManager, runtimePerformanceMetrics, runtimeFlightRecorder, deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger))
|
|
210
258
|
: resourceProvider),
|
|
211
259
|
db: "db" in deps ? (deps.db ?? null) : db,
|
|
212
260
|
performanceMetrics: runtimePerformanceMetrics,
|
|
213
261
|
asyncJobManager: runtimeAsyncJobManager,
|
|
214
262
|
approvalManager: runtimeApprovalManager,
|
|
215
|
-
flightRecorder:
|
|
263
|
+
flightRecorder: runtimeFlightRecorder,
|
|
216
264
|
logger: runtimeLogger,
|
|
217
|
-
persistence: deps.persistence ??
|
|
265
|
+
persistence: deps.persistence ?? getPersistenceConfig(runtimeLogger),
|
|
266
|
+
cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
|
|
218
267
|
};
|
|
219
268
|
}
|
|
220
269
|
// Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
|
|
@@ -664,14 +713,118 @@ function registerBaseResources(server, runtime) {
|
|
|
664
713
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
665
714
|
return { contents: contents ? [contents] : [] };
|
|
666
715
|
});
|
|
716
|
+
// Cache-state resources (slice 2). Static URI for global, templated for
|
|
717
|
+
// session/{id} and prefix/{hash}. All three return tokens/hashes/aggregates
|
|
718
|
+
// ONLY — never raw prompt or response text. The structural guarantee is in
|
|
719
|
+
// the SessionCacheStats / PrefixCacheStats / GlobalCacheStats types
|
|
720
|
+
// themselves: those shapes have no prompt/response/system/task fields.
|
|
721
|
+
server.registerResource("cache-state-global", "cache_state://global", {
|
|
722
|
+
title: "💾 Cache State (Global)",
|
|
723
|
+
description: "Aggregate cache hit/miss/savings across all CLIs in the flight recorder. Tokens/hashes only — no prompt text.",
|
|
724
|
+
mimeType: "application/json",
|
|
725
|
+
}, async (uri) => {
|
|
726
|
+
runtime.logger.debug("Reading cache_state://global resource");
|
|
727
|
+
const stats = runtime.resourceProvider.readCacheStateGlobal({
|
|
728
|
+
lastNHours: 24,
|
|
729
|
+
});
|
|
730
|
+
return {
|
|
731
|
+
contents: [
|
|
732
|
+
{
|
|
733
|
+
uri: uri.href,
|
|
734
|
+
mimeType: "application/json",
|
|
735
|
+
text: JSON.stringify(stats, null, 2),
|
|
736
|
+
},
|
|
737
|
+
],
|
|
738
|
+
};
|
|
739
|
+
});
|
|
740
|
+
server.registerResource("cache-state-session", new ResourceTemplate("cache_state://session/{sessionId}", { list: undefined }), {
|
|
741
|
+
title: "💾 Cache State (Session)",
|
|
742
|
+
description: "Per-session cache hit/miss/savings. Tokens/hashes only — no prompt text.",
|
|
743
|
+
mimeType: "application/json",
|
|
744
|
+
}, async (uri, variables) => {
|
|
745
|
+
const sessionId = Array.isArray(variables.sessionId)
|
|
746
|
+
? variables.sessionId[0]
|
|
747
|
+
: variables.sessionId;
|
|
748
|
+
runtime.logger.debug(`Reading cache_state://session/${sessionId}`);
|
|
749
|
+
const stats = runtime.resourceProvider.readCacheStateSession(String(sessionId));
|
|
750
|
+
return {
|
|
751
|
+
contents: [
|
|
752
|
+
{
|
|
753
|
+
uri: uri.href,
|
|
754
|
+
mimeType: "application/json",
|
|
755
|
+
text: JSON.stringify(stats, null, 2),
|
|
756
|
+
},
|
|
757
|
+
],
|
|
758
|
+
};
|
|
759
|
+
});
|
|
760
|
+
server.registerResource("cache-state-prefix", new ResourceTemplate("cache_state://prefix/{hash}", { list: undefined }), {
|
|
761
|
+
title: "💾 Cache State (Prefix)",
|
|
762
|
+
description: "Per-stable-prefix-hash cache hit/miss/savings, with CLI breakdown. Tokens/hashes only — no prompt text.",
|
|
763
|
+
mimeType: "application/json",
|
|
764
|
+
}, async (uri, variables) => {
|
|
765
|
+
const hash = Array.isArray(variables.hash) ? variables.hash[0] : variables.hash;
|
|
766
|
+
runtime.logger.debug(`Reading cache_state://prefix/${hash}`);
|
|
767
|
+
const stats = runtime.resourceProvider.readCacheStateForPrefix(String(hash));
|
|
768
|
+
return {
|
|
769
|
+
contents: [
|
|
770
|
+
{
|
|
771
|
+
uri: uri.href,
|
|
772
|
+
mimeType: "application/json",
|
|
773
|
+
text: JSON.stringify(stats, null, 2),
|
|
774
|
+
},
|
|
775
|
+
],
|
|
776
|
+
};
|
|
777
|
+
});
|
|
778
|
+
}
|
|
779
|
+
/**
|
|
780
|
+
* Slice 1: validate the prompt / promptParts mutex at the prep boundary and
|
|
781
|
+
* return either an error response or the resolved input. The exact error
|
|
782
|
+
* messages are part of the public contract — tests assert them verbatim.
|
|
783
|
+
*/
|
|
784
|
+
function resolvePromptOrPartsForPrep(args) {
|
|
785
|
+
const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
|
|
786
|
+
const hasParts = args.promptParts !== undefined;
|
|
787
|
+
if (hasPrompt && hasParts) {
|
|
788
|
+
return {
|
|
789
|
+
ok: false,
|
|
790
|
+
error: createErrorResponse(args.operation, 1, "", args.correlationId, new Error("provide exactly one of `prompt` or `promptParts`")),
|
|
791
|
+
};
|
|
792
|
+
}
|
|
793
|
+
if (!hasPrompt && !hasParts) {
|
|
794
|
+
return {
|
|
795
|
+
ok: false,
|
|
796
|
+
error: createErrorResponse(args.operation, 1, "", args.correlationId, new Error("one of `prompt` or `promptParts` is required")),
|
|
797
|
+
};
|
|
798
|
+
}
|
|
799
|
+
const resolved = resolvePromptInput({
|
|
800
|
+
prompt: args.prompt,
|
|
801
|
+
promptParts: args.promptParts,
|
|
802
|
+
});
|
|
803
|
+
return {
|
|
804
|
+
ok: true,
|
|
805
|
+
assembledPrompt: resolved.assembledPrompt,
|
|
806
|
+
stablePrefixHash: resolved.stablePrefixHash,
|
|
807
|
+
stablePrefixTokens: resolved.stablePrefixTokens,
|
|
808
|
+
};
|
|
667
809
|
}
|
|
668
810
|
export function prepareClaudeRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
669
811
|
const corrId = params.correlationId || randomUUID();
|
|
670
812
|
const cliInfo = getCliInfo();
|
|
671
813
|
const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
|
|
814
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
815
|
+
prompt: params.prompt,
|
|
816
|
+
promptParts: params.promptParts,
|
|
817
|
+
operation: params.operation,
|
|
818
|
+
correlationId: corrId,
|
|
819
|
+
});
|
|
820
|
+
if (!inputResolution.ok)
|
|
821
|
+
return inputResolution.error;
|
|
822
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
823
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
824
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
672
825
|
// Review integrity check on raw prompt (before optimization)
|
|
673
826
|
const reviewIntegrity = checkReviewIntegrity({
|
|
674
|
-
prompt:
|
|
827
|
+
prompt: assembledPrompt,
|
|
675
828
|
allowedTools: params.allowedTools,
|
|
676
829
|
disallowedTools: params.disallowedTools,
|
|
677
830
|
});
|
|
@@ -682,7 +835,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
682
835
|
score: reviewIntegrity.totalScore,
|
|
683
836
|
});
|
|
684
837
|
}
|
|
685
|
-
let effectivePrompt =
|
|
838
|
+
let effectivePrompt = assembledPrompt;
|
|
686
839
|
if (params.optimizePrompt) {
|
|
687
840
|
const optimized = optimizePromptText(effectivePrompt);
|
|
688
841
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -699,7 +852,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
699
852
|
approvalDecision = runtime.approvalManager.decide({
|
|
700
853
|
cli: "claude",
|
|
701
854
|
operation: params.operation,
|
|
702
|
-
prompt:
|
|
855
|
+
prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
|
|
703
856
|
bypassRequested: params.dangerouslySkipPermissions,
|
|
704
857
|
fullAuto: false,
|
|
705
858
|
requestedMcpServers,
|
|
@@ -778,14 +931,27 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
778
931
|
approvalDecision,
|
|
779
932
|
reviewIntegrity,
|
|
780
933
|
args,
|
|
934
|
+
stablePrefixHash,
|
|
935
|
+
stablePrefixTokens,
|
|
781
936
|
};
|
|
782
937
|
}
|
|
783
938
|
export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
784
939
|
const corrId = params.correlationId || randomUUID();
|
|
785
940
|
const cliInfo = getCliInfo();
|
|
786
941
|
const resolvedModel = resolveModelAlias("codex", params.model, cliInfo);
|
|
942
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
943
|
+
prompt: params.prompt,
|
|
944
|
+
promptParts: params.promptParts,
|
|
945
|
+
operation: params.operation,
|
|
946
|
+
correlationId: corrId,
|
|
947
|
+
});
|
|
948
|
+
if (!inputResolution.ok)
|
|
949
|
+
return inputResolution.error;
|
|
950
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
951
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
952
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
787
953
|
// Review integrity check on raw prompt (before optimization)
|
|
788
|
-
const reviewIntegrity = checkReviewIntegrity({ prompt:
|
|
954
|
+
const reviewIntegrity = checkReviewIntegrity({ prompt: assembledPrompt });
|
|
789
955
|
if (reviewIntegrity.violations.length > 0) {
|
|
790
956
|
runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
|
|
791
957
|
cli: "codex",
|
|
@@ -793,7 +959,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
793
959
|
score: reviewIntegrity.totalScore,
|
|
794
960
|
});
|
|
795
961
|
}
|
|
796
|
-
let effectivePrompt =
|
|
962
|
+
let effectivePrompt = assembledPrompt;
|
|
797
963
|
if (params.optimizePrompt) {
|
|
798
964
|
const optimized = optimizePromptText(effectivePrompt);
|
|
799
965
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -805,7 +971,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
805
971
|
approvalDecision = runtime.approvalManager.decide({
|
|
806
972
|
cli: "codex",
|
|
807
973
|
operation: params.operation,
|
|
808
|
-
prompt:
|
|
974
|
+
prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
|
|
809
975
|
bypassRequested: params.dangerouslyBypassApprovalsAndSandbox,
|
|
810
976
|
fullAuto: params.fullAuto,
|
|
811
977
|
requestedMcpServers,
|
|
@@ -920,15 +1086,28 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
920
1086
|
reviewIntegrity,
|
|
921
1087
|
args,
|
|
922
1088
|
cleanup: highImpactCleanup,
|
|
1089
|
+
stablePrefixHash,
|
|
1090
|
+
stablePrefixTokens,
|
|
923
1091
|
};
|
|
924
1092
|
}
|
|
925
1093
|
export function prepareGeminiRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
926
1094
|
const corrId = params.correlationId || randomUUID();
|
|
927
1095
|
const cliInfo = getCliInfo();
|
|
928
1096
|
const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
|
|
1097
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
1098
|
+
prompt: params.prompt,
|
|
1099
|
+
promptParts: params.promptParts,
|
|
1100
|
+
operation: params.operation,
|
|
1101
|
+
correlationId: corrId,
|
|
1102
|
+
});
|
|
1103
|
+
if (!inputResolution.ok)
|
|
1104
|
+
return inputResolution.error;
|
|
1105
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1106
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1107
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
929
1108
|
// Review integrity check on raw prompt (before optimization)
|
|
930
1109
|
const reviewIntegrity = checkReviewIntegrity({
|
|
931
|
-
prompt:
|
|
1110
|
+
prompt: assembledPrompt,
|
|
932
1111
|
allowedTools: params.allowedTools,
|
|
933
1112
|
});
|
|
934
1113
|
if (reviewIntegrity.violations.length > 0) {
|
|
@@ -938,7 +1117,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
938
1117
|
score: reviewIntegrity.totalScore,
|
|
939
1118
|
});
|
|
940
1119
|
}
|
|
941
|
-
let effectivePrompt =
|
|
1120
|
+
let effectivePrompt = assembledPrompt;
|
|
942
1121
|
if (params.optimizePrompt) {
|
|
943
1122
|
const optimized = optimizePromptText(effectivePrompt);
|
|
944
1123
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -950,7 +1129,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
950
1129
|
approvalDecision = runtime.approvalManager.decide({
|
|
951
1130
|
cli: "gemini",
|
|
952
1131
|
operation: params.operation,
|
|
953
|
-
prompt:
|
|
1132
|
+
prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
|
|
954
1133
|
bypassRequested: params.approvalMode === "yolo",
|
|
955
1134
|
fullAuto: false,
|
|
956
1135
|
requestedMcpServers,
|
|
@@ -1020,15 +1199,28 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1020
1199
|
approvalDecision,
|
|
1021
1200
|
reviewIntegrity,
|
|
1022
1201
|
args,
|
|
1202
|
+
stablePrefixHash,
|
|
1203
|
+
stablePrefixTokens,
|
|
1023
1204
|
};
|
|
1024
1205
|
}
|
|
1025
1206
|
function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
1026
1207
|
const corrId = params.correlationId || randomUUID();
|
|
1027
1208
|
const cliInfo = getCliInfo();
|
|
1028
1209
|
const resolvedModel = resolveModelAlias("grok", params.model, cliInfo);
|
|
1210
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
1211
|
+
prompt: params.prompt,
|
|
1212
|
+
promptParts: params.promptParts,
|
|
1213
|
+
operation: params.operation,
|
|
1214
|
+
correlationId: corrId,
|
|
1215
|
+
});
|
|
1216
|
+
if (!inputResolution.ok)
|
|
1217
|
+
return inputResolution.error;
|
|
1218
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1219
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1220
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1029
1221
|
// Review integrity check on raw prompt (before optimization)
|
|
1030
1222
|
const reviewIntegrity = checkReviewIntegrity({
|
|
1031
|
-
prompt:
|
|
1223
|
+
prompt: assembledPrompt,
|
|
1032
1224
|
allowedTools: params.allowedTools,
|
|
1033
1225
|
disallowedTools: params.disallowedTools,
|
|
1034
1226
|
});
|
|
@@ -1039,7 +1231,7 @@ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
|
1039
1231
|
score: reviewIntegrity.totalScore,
|
|
1040
1232
|
});
|
|
1041
1233
|
}
|
|
1042
|
-
let effectivePrompt =
|
|
1234
|
+
let effectivePrompt = assembledPrompt;
|
|
1043
1235
|
if (params.optimizePrompt) {
|
|
1044
1236
|
const optimized = optimizePromptText(effectivePrompt);
|
|
1045
1237
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -1051,7 +1243,7 @@ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
|
1051
1243
|
approvalDecision = runtime.approvalManager.decide({
|
|
1052
1244
|
cli: "grok",
|
|
1053
1245
|
operation: params.operation,
|
|
1054
|
-
prompt:
|
|
1246
|
+
prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
|
|
1055
1247
|
bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
|
|
1056
1248
|
fullAuto: false,
|
|
1057
1249
|
requestedMcpServers,
|
|
@@ -1095,14 +1287,27 @@ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
|
1095
1287
|
approvalDecision,
|
|
1096
1288
|
reviewIntegrity,
|
|
1097
1289
|
args,
|
|
1290
|
+
stablePrefixHash,
|
|
1291
|
+
stablePrefixTokens,
|
|
1098
1292
|
};
|
|
1099
1293
|
}
|
|
1100
1294
|
export function prepareMistralRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
1101
1295
|
const corrId = params.correlationId || randomUUID();
|
|
1102
1296
|
const cliInfo = getCliInfo();
|
|
1103
1297
|
const resolvedModel = resolveModelAlias("mistral", params.model, cliInfo);
|
|
1104
|
-
const
|
|
1298
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
1105
1299
|
prompt: params.prompt,
|
|
1300
|
+
promptParts: params.promptParts,
|
|
1301
|
+
operation: params.operation,
|
|
1302
|
+
correlationId: corrId,
|
|
1303
|
+
});
|
|
1304
|
+
if (!inputResolution.ok)
|
|
1305
|
+
return inputResolution.error;
|
|
1306
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1307
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1308
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1309
|
+
const reviewIntegrity = checkReviewIntegrity({
|
|
1310
|
+
prompt: assembledPrompt,
|
|
1106
1311
|
allowedTools: params.allowedTools,
|
|
1107
1312
|
disallowedTools: params.disallowedTools,
|
|
1108
1313
|
});
|
|
@@ -1113,7 +1318,7 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1113
1318
|
score: reviewIntegrity.totalScore,
|
|
1114
1319
|
});
|
|
1115
1320
|
}
|
|
1116
|
-
let effectivePrompt =
|
|
1321
|
+
let effectivePrompt = assembledPrompt;
|
|
1117
1322
|
if (params.optimizePrompt) {
|
|
1118
1323
|
const optimized = optimizePromptText(effectivePrompt);
|
|
1119
1324
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -1125,7 +1330,7 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1125
1330
|
approvalDecision = runtime.approvalManager.decide({
|
|
1126
1331
|
cli: "mistral",
|
|
1127
1332
|
operation: params.operation,
|
|
1128
|
-
prompt:
|
|
1333
|
+
prompt: assembledPrompt,
|
|
1129
1334
|
bypassRequested: params.permissionMode === "auto-approve",
|
|
1130
1335
|
fullAuto: false,
|
|
1131
1336
|
requestedMcpServers,
|
|
@@ -1170,6 +1375,8 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1170
1375
|
reviewIntegrity,
|
|
1171
1376
|
args: prep.args,
|
|
1172
1377
|
mistralEnv: prep.env,
|
|
1378
|
+
stablePrefixHash,
|
|
1379
|
+
stablePrefixTokens,
|
|
1173
1380
|
};
|
|
1174
1381
|
}
|
|
1175
1382
|
function isMistralModelSelectionFailure(stderr) {
|
|
@@ -1185,7 +1392,7 @@ function selectMistralRecoveryModel(failedModel) {
|
|
|
1185
1392
|
].filter((model) => Boolean(model && model !== failedModel));
|
|
1186
1393
|
return candidates.find(model => model !== "local");
|
|
1187
1394
|
}
|
|
1188
|
-
function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat) {
|
|
1395
|
+
function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
|
|
1189
1396
|
let finalStdout = stdout;
|
|
1190
1397
|
// Skip response optimization for JSON output to prevent corrupting structured data
|
|
1191
1398
|
if (optimizeResponse && outputFormat !== "json") {
|
|
@@ -1234,8 +1441,41 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
|
|
|
1234
1441
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
1235
1442
|
response.reviewIntegrity = prep.reviewIntegrity;
|
|
1236
1443
|
}
|
|
1444
|
+
if (warnings && warnings.length > 0) {
|
|
1445
|
+
response.warnings = warnings;
|
|
1446
|
+
}
|
|
1237
1447
|
return response;
|
|
1238
1448
|
}
|
|
1449
|
+
/**
|
|
1450
|
+
* Slice 3 helper: compute the cache_ttl_expiring_soon warning for a
|
|
1451
|
+
* claude session, if the feature is enabled, the session has prior cache
|
|
1452
|
+
* writes, and ttlRemainingMs is below the threshold (30s by default).
|
|
1453
|
+
* Returns null when no warning applies.
|
|
1454
|
+
*/
|
|
1455
|
+
function maybeBuildCacheTtlWarning(args) {
|
|
1456
|
+
if (args.cli !== "claude")
|
|
1457
|
+
return null;
|
|
1458
|
+
if (!args.sessionId)
|
|
1459
|
+
return null;
|
|
1460
|
+
if (!args.runtime.cacheAwareness?.warnOnTtlExpiry)
|
|
1461
|
+
return null;
|
|
1462
|
+
const stats = computeSessionCacheStats(args.runtime.flightRecorder, args.sessionId);
|
|
1463
|
+
if (stats.requestCount === 0 || !stats.lastRequestAt)
|
|
1464
|
+
return null;
|
|
1465
|
+
const ttl = computeTtlRemaining(stats, args.cli, {
|
|
1466
|
+
anthropicTtlSeconds: args.runtime.cacheAwareness.anthropicTtlSeconds,
|
|
1467
|
+
});
|
|
1468
|
+
if (ttl === null)
|
|
1469
|
+
return null;
|
|
1470
|
+
const threshold = args.thresholdMs ?? 30_000;
|
|
1471
|
+
if (ttl >= threshold)
|
|
1472
|
+
return null;
|
|
1473
|
+
return {
|
|
1474
|
+
code: "cache_ttl_expiring_soon",
|
|
1475
|
+
ttlRemainingMs: ttl,
|
|
1476
|
+
message: `Anthropic cache breakpoint for session ${args.sessionId} expires in ${ttl}ms (< ${threshold}ms). Subsequent requests may miss the cache.`,
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1239
1479
|
function resolveHandlerRuntime(deps) {
|
|
1240
1480
|
if (deps.runtime)
|
|
1241
1481
|
return deps.runtime;
|
|
@@ -1259,6 +1499,7 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1259
1499
|
const startTime = Date.now();
|
|
1260
1500
|
const prep = prepareGeminiRequest({
|
|
1261
1501
|
prompt: params.prompt,
|
|
1502
|
+
promptParts: params.promptParts,
|
|
1262
1503
|
model: params.model,
|
|
1263
1504
|
approvalMode: params.approvalMode,
|
|
1264
1505
|
approvalStrategy: params.approvalStrategy,
|
|
@@ -1284,10 +1525,12 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1284
1525
|
correlationId: corrId,
|
|
1285
1526
|
cli: "gemini",
|
|
1286
1527
|
model: prep.resolvedModel || "default",
|
|
1287
|
-
prompt:
|
|
1528
|
+
prompt: prep.effectivePrompt,
|
|
1288
1529
|
sessionId: params.sessionId,
|
|
1530
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
1531
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
1289
1532
|
}, runtime);
|
|
1290
|
-
deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${
|
|
1533
|
+
deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${prep.effectivePrompt.length}`);
|
|
1291
1534
|
try {
|
|
1292
1535
|
// Gemini CLI 0.43 supports `--resume`, but not a supported fresh
|
|
1293
1536
|
// `--session-id` flag. Fresh sessions emit no session flag.
|
|
@@ -1383,6 +1626,7 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1383
1626
|
const runtime = resolveHandlerRuntime(deps);
|
|
1384
1627
|
const prep = prepareGeminiRequest({
|
|
1385
1628
|
prompt: params.prompt,
|
|
1629
|
+
promptParts: params.promptParts,
|
|
1386
1630
|
model: params.model,
|
|
1387
1631
|
approvalMode: params.approvalMode,
|
|
1388
1632
|
approvalStrategy: params.approvalStrategy,
|
|
@@ -1462,6 +1706,7 @@ export async function handleGrokRequest(deps, params) {
|
|
|
1462
1706
|
const startTime = Date.now();
|
|
1463
1707
|
const prep = prepareGrokRequest({
|
|
1464
1708
|
prompt: params.prompt,
|
|
1709
|
+
promptParts: params.promptParts,
|
|
1465
1710
|
model: params.model,
|
|
1466
1711
|
outputFormat: params.outputFormat,
|
|
1467
1712
|
alwaysApprove: params.alwaysApprove,
|
|
@@ -1486,10 +1731,12 @@ export async function handleGrokRequest(deps, params) {
|
|
|
1486
1731
|
correlationId: corrId,
|
|
1487
1732
|
cli: "grok",
|
|
1488
1733
|
model: prep.resolvedModel || "default",
|
|
1489
|
-
prompt:
|
|
1734
|
+
prompt: prep.effectivePrompt,
|
|
1490
1735
|
sessionId: params.sessionId,
|
|
1736
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
1737
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
1491
1738
|
}, runtime);
|
|
1492
|
-
deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${
|
|
1739
|
+
deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${prep.effectivePrompt.length}`);
|
|
1493
1740
|
try {
|
|
1494
1741
|
// Session arg planning (pure, no I/O)
|
|
1495
1742
|
const sessionResult = resolveGrokSessionArgs({
|
|
@@ -1578,6 +1825,7 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
1578
1825
|
const runtime = resolveHandlerRuntime(deps);
|
|
1579
1826
|
const prep = prepareGrokRequest({
|
|
1580
1827
|
prompt: params.prompt,
|
|
1828
|
+
promptParts: params.promptParts,
|
|
1581
1829
|
model: params.model,
|
|
1582
1830
|
outputFormat: params.outputFormat,
|
|
1583
1831
|
alwaysApprove: params.alwaysApprove,
|
|
@@ -1658,6 +1906,7 @@ export async function handleMistralRequest(deps, params) {
|
|
|
1658
1906
|
const startTime = Date.now();
|
|
1659
1907
|
const prep = prepareMistralRequest({
|
|
1660
1908
|
prompt: params.prompt,
|
|
1909
|
+
promptParts: params.promptParts,
|
|
1661
1910
|
model: params.model,
|
|
1662
1911
|
outputFormat: params.outputFormat,
|
|
1663
1912
|
permissionMode: params.permissionMode,
|
|
@@ -1681,10 +1930,12 @@ export async function handleMistralRequest(deps, params) {
|
|
|
1681
1930
|
correlationId: corrId,
|
|
1682
1931
|
cli: "mistral",
|
|
1683
1932
|
model: prep.resolvedModel || "default",
|
|
1684
|
-
prompt:
|
|
1933
|
+
prompt: prep.effectivePrompt,
|
|
1685
1934
|
sessionId: params.sessionId,
|
|
1935
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
1936
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
1686
1937
|
}, runtime);
|
|
1687
|
-
deps.logger.info(`[${corrId}] mistral_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode || "auto-approve"}, prompt length=${
|
|
1938
|
+
deps.logger.info(`[${corrId}] mistral_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode || "auto-approve"}, prompt length=${prep.effectivePrompt.length}`);
|
|
1688
1939
|
try {
|
|
1689
1940
|
const sessionResult = resolveMistralSessionArgs({
|
|
1690
1941
|
sessionId: params.sessionId,
|
|
@@ -1795,6 +2046,7 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
1795
2046
|
const runtime = resolveHandlerRuntime(deps);
|
|
1796
2047
|
const prep = prepareMistralRequest({
|
|
1797
2048
|
prompt: params.prompt,
|
|
2049
|
+
promptParts: params.promptParts,
|
|
1798
2050
|
model: params.model,
|
|
1799
2051
|
outputFormat: params.outputFormat,
|
|
1800
2052
|
permissionMode: params.permissionMode,
|
|
@@ -1870,6 +2122,7 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
1870
2122
|
const runtime = resolveHandlerRuntime(deps);
|
|
1871
2123
|
const prep = prepareCodexRequest({
|
|
1872
2124
|
prompt: params.prompt,
|
|
2125
|
+
promptParts: params.promptParts,
|
|
1873
2126
|
model: params.model,
|
|
1874
2127
|
fullAuto: params.fullAuto,
|
|
1875
2128
|
sandboxMode: params.sandboxMode,
|
|
@@ -1986,7 +2239,14 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
1986
2239
|
//──────────────────────────────────────────────────────────────────────────────
|
|
1987
2240
|
export function createGatewayServer(deps = {}) {
|
|
1988
2241
|
const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
|
|
1989
|
-
const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, } = runtime;
|
|
2242
|
+
const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, } = runtime;
|
|
2243
|
+
// `flightRecorder` is destructured into closure scope so the session_get
|
|
2244
|
+
// handler (see ~line 5590) has the FlightRecorderQuery read capability
|
|
2245
|
+
// available without re-resolving runtime. Slice 2 will populate the
|
|
2246
|
+
// `cacheState` field of session_get's response from this read surface.
|
|
2247
|
+
// `cacheAwareness` is the loaded [cache_awareness] block (config.ts).
|
|
2248
|
+
void flightRecorder;
|
|
2249
|
+
void cacheAwareness;
|
|
1990
2250
|
// Structural invariant: tools register iff ALL THREE conditions hold:
|
|
1991
2251
|
// (1) persistence.backend !== "none" — the operator/config has not
|
|
1992
2252
|
// explicitly disabled durable persistence;
|
|
@@ -2012,7 +2272,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2012
2272
|
.string()
|
|
2013
2273
|
.min(1, "Prompt cannot be empty")
|
|
2014
2274
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2015
|
-
.
|
|
2275
|
+
.optional()
|
|
2276
|
+
.describe("Prompt text for Claude (mutually exclusive with promptParts)"),
|
|
2277
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2016
2278
|
model: z
|
|
2017
2279
|
.string()
|
|
2018
2280
|
.optional()
|
|
@@ -2107,13 +2369,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
2107
2369
|
.boolean()
|
|
2108
2370
|
.default(false)
|
|
2109
2371
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2110
|
-
}, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2372
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2111
2373
|
const startTime = Date.now();
|
|
2112
2374
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
2113
2375
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
2114
2376
|
}
|
|
2115
2377
|
const prep = prepareClaudeRequest({
|
|
2116
2378
|
prompt,
|
|
2379
|
+
promptParts,
|
|
2117
2380
|
model,
|
|
2118
2381
|
outputFormat,
|
|
2119
2382
|
allowedTools,
|
|
@@ -2142,26 +2405,53 @@ export function createGatewayServer(deps = {}) {
|
|
|
2142
2405
|
const { corrId, args } = prep;
|
|
2143
2406
|
let durationMs = 0;
|
|
2144
2407
|
let wasSuccessful = false;
|
|
2408
|
+
// Session resolution happens BEFORE safeFlightStart so that:
|
|
2409
|
+
// (1) the TTL warning reads the PRIOR session's lastWriteAt
|
|
2410
|
+
// rather than the row about to be inserted (codex-r1/F1).
|
|
2411
|
+
// (2) the flight-recorder row is tagged with effectiveSessionId
|
|
2412
|
+
// (the session the CLI will actually resume), not the raw
|
|
2413
|
+
// user-provided sessionId.
|
|
2414
|
+
let effectiveSessionId = sessionId;
|
|
2415
|
+
let useContinue = continueSession;
|
|
2416
|
+
// Guard the active-session lookup: in some test harnesses the
|
|
2417
|
+
// sessionManager is undefined; the original try-catch wrapped this
|
|
2418
|
+
// block, so we replicate that tolerance here. Failure leaves
|
|
2419
|
+
// effectiveSessionId as the user-provided sessionId.
|
|
2420
|
+
let activeSession = null;
|
|
2421
|
+
try {
|
|
2422
|
+
activeSession = await sessionManager.getActiveSession("claude");
|
|
2423
|
+
}
|
|
2424
|
+
catch (err) {
|
|
2425
|
+
logger.warn(`[${corrId}] sessionManager.getActiveSession failed (non-fatal): ${err.message}`);
|
|
2426
|
+
}
|
|
2427
|
+
if (!createNewSession && !continueSession && !sessionId && activeSession) {
|
|
2428
|
+
effectiveSessionId = activeSession.id;
|
|
2429
|
+
useContinue = true;
|
|
2430
|
+
}
|
|
2431
|
+
if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
|
|
2432
|
+
useContinue = true;
|
|
2433
|
+
}
|
|
2434
|
+
// Slice 3: if the resolved session has a near-expiry Anthropic
|
|
2435
|
+
// cache breakpoint, attach a structured warning (NOT a hard error)
|
|
2436
|
+
// to the response. Computed BEFORE safeFlightStart so the current
|
|
2437
|
+
// row does not skew lastRequestAt.
|
|
2438
|
+
const ttlWarning = maybeBuildCacheTtlWarning({
|
|
2439
|
+
runtime,
|
|
2440
|
+
sessionId: effectiveSessionId,
|
|
2441
|
+
cli: "claude",
|
|
2442
|
+
});
|
|
2443
|
+
const warnings = ttlWarning ? [ttlWarning] : [];
|
|
2145
2444
|
safeFlightStart({
|
|
2146
2445
|
correlationId: corrId,
|
|
2147
2446
|
cli: "claude",
|
|
2148
2447
|
model: prep.resolvedModel || "default",
|
|
2149
|
-
prompt,
|
|
2150
|
-
sessionId,
|
|
2448
|
+
prompt: prep.effectivePrompt,
|
|
2449
|
+
sessionId: effectiveSessionId,
|
|
2450
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
2451
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
2151
2452
|
}, runtime);
|
|
2152
|
-
logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${
|
|
2453
|
+
logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}`);
|
|
2153
2454
|
try {
|
|
2154
|
-
// Session management
|
|
2155
|
-
let effectiveSessionId = sessionId;
|
|
2156
|
-
let useContinue = continueSession;
|
|
2157
|
-
const activeSession = await sessionManager.getActiveSession("claude");
|
|
2158
|
-
if (!createNewSession && !continueSession && !sessionId && activeSession) {
|
|
2159
|
-
effectiveSessionId = activeSession.id;
|
|
2160
|
-
useContinue = true;
|
|
2161
|
-
}
|
|
2162
|
-
if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
|
|
2163
|
-
useContinue = true;
|
|
2164
|
-
}
|
|
2165
2455
|
if (useContinue) {
|
|
2166
2456
|
args.push("--continue");
|
|
2167
2457
|
}
|
|
@@ -2190,7 +2480,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
2190
2480
|
errorMessage: stderr || `Exit code ${code}`,
|
|
2191
2481
|
status: "failed",
|
|
2192
2482
|
}, runtime);
|
|
2193
|
-
|
|
2483
|
+
// Slice 3: attach any computed warnings to the error response so
|
|
2484
|
+
// the caller still sees cache_ttl_expiring_soon when the CLI
|
|
2485
|
+
// happens to fail for an unrelated reason.
|
|
2486
|
+
const errResp = createErrorResponse("claude", code, stderr, corrId);
|
|
2487
|
+
if (warnings.length > 0) {
|
|
2488
|
+
errResp.warnings = warnings;
|
|
2489
|
+
}
|
|
2490
|
+
return errResp;
|
|
2194
2491
|
}
|
|
2195
2492
|
wasSuccessful = true;
|
|
2196
2493
|
// If we used a session ID and it's not tracked yet, create a session record
|
|
@@ -2221,7 +2518,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2221
2518
|
exitCode: 0,
|
|
2222
2519
|
status: "completed",
|
|
2223
2520
|
}, runtime);
|
|
2224
|
-
return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
|
|
2521
|
+
return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
|
|
2225
2522
|
}
|
|
2226
2523
|
safeFlightComplete(corrId, {
|
|
2227
2524
|
response: stdout,
|
|
@@ -2232,7 +2529,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2232
2529
|
exitCode: 0,
|
|
2233
2530
|
status: "completed",
|
|
2234
2531
|
}, runtime);
|
|
2235
|
-
return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
|
|
2532
|
+
return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
|
|
2236
2533
|
}
|
|
2237
2534
|
catch (error) {
|
|
2238
2535
|
const elapsedMs = Math.max(0, Date.now() - startTime);
|
|
@@ -2262,7 +2559,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2262
2559
|
.string()
|
|
2263
2560
|
.min(1, "Prompt cannot be empty")
|
|
2264
2561
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2265
|
-
.
|
|
2562
|
+
.optional()
|
|
2563
|
+
.describe("Prompt text for Codex (mutually exclusive with promptParts)"),
|
|
2564
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2266
2565
|
model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
|
|
2267
2566
|
fullAuto: z
|
|
2268
2567
|
.boolean()
|
|
@@ -2353,10 +2652,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
2353
2652
|
.boolean()
|
|
2354
2653
|
.optional()
|
|
2355
2654
|
.describe("Codex --ignore-rules: skip project rule files for this run."),
|
|
2356
|
-
}, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
2655
|
+
}, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
2357
2656
|
const startTime = Date.now();
|
|
2358
2657
|
const prep = prepareCodexRequest({
|
|
2359
2658
|
prompt,
|
|
2659
|
+
promptParts,
|
|
2360
2660
|
model,
|
|
2361
2661
|
fullAuto,
|
|
2362
2662
|
sandboxMode,
|
|
@@ -2391,10 +2691,12 @@ export function createGatewayServer(deps = {}) {
|
|
|
2391
2691
|
correlationId: corrId,
|
|
2392
2692
|
cli: "codex",
|
|
2393
2693
|
model: prep.resolvedModel || "default",
|
|
2394
|
-
prompt,
|
|
2694
|
+
prompt: prep.effectivePrompt,
|
|
2395
2695
|
sessionId,
|
|
2696
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
2697
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
2396
2698
|
}, runtime);
|
|
2397
|
-
logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${
|
|
2699
|
+
logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prep.effectivePrompt.length}`);
|
|
2398
2700
|
// U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
|
|
2399
2701
|
// guarantees the cleanup runs exactly once — inline for direct
|
|
2400
2702
|
// execution, on terminal status for the job-backed path (sync
|
|
@@ -2587,7 +2889,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2587
2889
|
.string()
|
|
2588
2890
|
.min(1, "Prompt cannot be empty")
|
|
2589
2891
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2590
|
-
.
|
|
2892
|
+
.optional()
|
|
2893
|
+
.describe("Prompt text for Gemini (mutually exclusive with promptParts)"),
|
|
2894
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2591
2895
|
model: z
|
|
2592
2896
|
.string()
|
|
2593
2897
|
.optional()
|
|
@@ -2640,9 +2944,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
2640
2944
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
2641
2945
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
2642
2946
|
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
2643
|
-
}, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
2947
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
2644
2948
|
return handleGeminiRequest({ sessionManager, logger, runtime }, {
|
|
2645
2949
|
prompt,
|
|
2950
|
+
promptParts,
|
|
2646
2951
|
model,
|
|
2647
2952
|
sessionId,
|
|
2648
2953
|
resumeLatest,
|
|
@@ -2673,7 +2978,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2673
2978
|
.string()
|
|
2674
2979
|
.min(1, "Prompt cannot be empty")
|
|
2675
2980
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2676
|
-
.
|
|
2981
|
+
.optional()
|
|
2982
|
+
.describe("Prompt text for Grok (mutually exclusive with promptParts)"),
|
|
2983
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2677
2984
|
model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
|
|
2678
2985
|
outputFormat: z
|
|
2679
2986
|
.enum(["plain", "json", "streaming-json"])
|
|
@@ -2735,9 +3042,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
2735
3042
|
.boolean()
|
|
2736
3043
|
.default(false)
|
|
2737
3044
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2738
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
3045
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2739
3046
|
return handleGrokRequest({ sessionManager, logger, runtime }, {
|
|
2740
3047
|
prompt,
|
|
3048
|
+
promptParts,
|
|
2741
3049
|
model,
|
|
2742
3050
|
outputFormat,
|
|
2743
3051
|
sessionId,
|
|
@@ -2767,7 +3075,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2767
3075
|
.string()
|
|
2768
3076
|
.min(1, "Prompt cannot be empty")
|
|
2769
3077
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2770
|
-
.
|
|
3078
|
+
.optional()
|
|
3079
|
+
.describe("Prompt text for Mistral Vibe (mutually exclusive with promptParts)"),
|
|
3080
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2771
3081
|
model: z
|
|
2772
3082
|
.string()
|
|
2773
3083
|
.optional()
|
|
@@ -2828,9 +3138,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
2828
3138
|
.boolean()
|
|
2829
3139
|
.default(false)
|
|
2830
3140
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2831
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
3141
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2832
3142
|
return handleMistralRequest({ sessionManager, logger, runtime }, {
|
|
2833
3143
|
prompt,
|
|
3144
|
+
promptParts,
|
|
2834
3145
|
model,
|
|
2835
3146
|
outputFormat,
|
|
2836
3147
|
sessionId,
|
|
@@ -2867,7 +3178,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2867
3178
|
.string()
|
|
2868
3179
|
.min(1, "Prompt cannot be empty")
|
|
2869
3180
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2870
|
-
.
|
|
3181
|
+
.optional()
|
|
3182
|
+
.describe("Prompt text for Claude (mutually exclusive with promptParts)"),
|
|
3183
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2871
3184
|
model: z
|
|
2872
3185
|
.string()
|
|
2873
3186
|
.optional()
|
|
@@ -2961,12 +3274,13 @@ export function createGatewayServer(deps = {}) {
|
|
|
2961
3274
|
.boolean()
|
|
2962
3275
|
.default(false)
|
|
2963
3276
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2964
|
-
}, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3277
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
2965
3278
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
2966
3279
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
2967
3280
|
}
|
|
2968
3281
|
const prep = prepareClaudeRequest({
|
|
2969
3282
|
prompt,
|
|
3283
|
+
promptParts,
|
|
2970
3284
|
model,
|
|
2971
3285
|
outputFormat,
|
|
2972
3286
|
allowedTools,
|
|
@@ -3018,6 +3332,12 @@ export function createGatewayServer(deps = {}) {
|
|
|
3018
3332
|
await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
|
|
3019
3333
|
}
|
|
3020
3334
|
}
|
|
3335
|
+
// Slice 3: TTL warning on resume (async path too).
|
|
3336
|
+
const ttlWarning = maybeBuildCacheTtlWarning({
|
|
3337
|
+
runtime,
|
|
3338
|
+
sessionId: effectiveSessionId,
|
|
3339
|
+
cli: "claude",
|
|
3340
|
+
});
|
|
3021
3341
|
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
3022
3342
|
const effectiveIdleTimeout = outputFormat === "stream-json"
|
|
3023
3343
|
? resolveIdleTimeout("claude", idleTimeoutMs)
|
|
@@ -3040,6 +3360,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3040
3360
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
3041
3361
|
asyncResponse.reviewIntegrity = prep.reviewIntegrity;
|
|
3042
3362
|
}
|
|
3363
|
+
if (ttlWarning) {
|
|
3364
|
+
asyncResponse.warnings = [ttlWarning];
|
|
3365
|
+
}
|
|
3043
3366
|
return {
|
|
3044
3367
|
content: [
|
|
3045
3368
|
{
|
|
@@ -3058,7 +3381,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3058
3381
|
.string()
|
|
3059
3382
|
.min(1, "Prompt cannot be empty")
|
|
3060
3383
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3061
|
-
.
|
|
3384
|
+
.optional()
|
|
3385
|
+
.describe("Prompt text for Codex (mutually exclusive with promptParts)"),
|
|
3386
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
3062
3387
|
model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
|
|
3063
3388
|
fullAuto: z
|
|
3064
3389
|
.boolean()
|
|
@@ -3131,9 +3456,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3131
3456
|
images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
|
|
3132
3457
|
ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
|
|
3133
3458
|
ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
|
|
3134
|
-
}, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
3459
|
+
}, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
3135
3460
|
return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3136
3461
|
prompt,
|
|
3462
|
+
promptParts,
|
|
3137
3463
|
model,
|
|
3138
3464
|
fullAuto,
|
|
3139
3465
|
sandboxMode,
|
|
@@ -3166,7 +3492,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3166
3492
|
.string()
|
|
3167
3493
|
.min(1, "Prompt cannot be empty")
|
|
3168
3494
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3169
|
-
.
|
|
3495
|
+
.optional()
|
|
3496
|
+
.describe("Prompt text for Gemini (mutually exclusive with promptParts)"),
|
|
3497
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
3170
3498
|
model: z
|
|
3171
3499
|
.string()
|
|
3172
3500
|
.optional()
|
|
@@ -3221,9 +3549,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3221
3549
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3222
3550
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
3223
3551
|
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
3224
|
-
}, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
3552
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
3225
3553
|
return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3226
3554
|
prompt,
|
|
3555
|
+
promptParts,
|
|
3227
3556
|
model,
|
|
3228
3557
|
sessionId,
|
|
3229
3558
|
resumeLatest,
|
|
@@ -3250,7 +3579,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3250
3579
|
.string()
|
|
3251
3580
|
.min(1, "Prompt cannot be empty")
|
|
3252
3581
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3253
|
-
.
|
|
3582
|
+
.optional()
|
|
3583
|
+
.describe("Prompt text for Grok (mutually exclusive with promptParts)"),
|
|
3584
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
3254
3585
|
model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
|
|
3255
3586
|
outputFormat: z
|
|
3256
3587
|
.enum(["plain", "json", "streaming-json"])
|
|
@@ -3311,9 +3642,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3311
3642
|
.boolean()
|
|
3312
3643
|
.default(false)
|
|
3313
3644
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3314
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3645
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3315
3646
|
return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3316
3647
|
prompt,
|
|
3648
|
+
promptParts,
|
|
3317
3649
|
model,
|
|
3318
3650
|
outputFormat,
|
|
3319
3651
|
sessionId,
|
|
@@ -3339,7 +3671,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3339
3671
|
.string()
|
|
3340
3672
|
.min(1, "Prompt cannot be empty")
|
|
3341
3673
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3342
|
-
.
|
|
3674
|
+
.optional()
|
|
3675
|
+
.describe("Prompt text for Mistral Vibe (mutually exclusive with promptParts)"),
|
|
3676
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
3343
3677
|
model: z
|
|
3344
3678
|
.string()
|
|
3345
3679
|
.optional()
|
|
@@ -3399,9 +3733,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3399
3733
|
.boolean()
|
|
3400
3734
|
.default(false)
|
|
3401
3735
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3402
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3736
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3403
3737
|
return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3404
3738
|
prompt,
|
|
3739
|
+
promptParts,
|
|
3405
3740
|
model,
|
|
3406
3741
|
outputFormat,
|
|
3407
3742
|
sessionId,
|
|
@@ -3852,6 +4187,38 @@ export function createGatewayServer(deps = {}) {
|
|
|
3852
4187
|
};
|
|
3853
4188
|
}
|
|
3854
4189
|
const activeSession = await sessionManager.getActiveSession(session.cli);
|
|
4190
|
+
// Slice 2: project a compact cacheState view from the flight
|
|
4191
|
+
// recorder at read time. NOT persisted on the Session interface
|
|
4192
|
+
// (sessions.json stays content-free per the project invariant).
|
|
4193
|
+
// The field is OMITTED entirely (not null, not empty object) when
|
|
4194
|
+
// the session has zero rows in the flight recorder so the response
|
|
4195
|
+
// stays compact for fresh sessions.
|
|
4196
|
+
//
|
|
4197
|
+
// Slice 3: include ttlRemainingMs derived from the gateway's
|
|
4198
|
+
// configured TTL policy. Null for non-claude sessions.
|
|
4199
|
+
let cacheState;
|
|
4200
|
+
try {
|
|
4201
|
+
const stats = computeSessionCacheStats(flightRecorder, session.id);
|
|
4202
|
+
if (stats.requestCount > 0) {
|
|
4203
|
+
const ttlRemainingMs = computeTtlRemaining(stats, stats.cli, {
|
|
4204
|
+
anthropicTtlSeconds: cacheAwareness?.anthropicTtlSeconds ?? 300,
|
|
4205
|
+
});
|
|
4206
|
+
cacheState = {
|
|
4207
|
+
cli: stats.cli,
|
|
4208
|
+
prefixDistinct: stats.distinctPrefixCount,
|
|
4209
|
+
totalCacheReadTokens: stats.totalCacheReadTokens,
|
|
4210
|
+
totalCacheCreationTokens: stats.totalCacheCreationTokens,
|
|
4211
|
+
requestCount: stats.requestCount,
|
|
4212
|
+
hitCount: stats.hitCount,
|
|
4213
|
+
hitRate: stats.hitRate,
|
|
4214
|
+
estimatedSavingsUsd: stats.estimatedSavingsUsd,
|
|
4215
|
+
ttlRemainingMs,
|
|
4216
|
+
};
|
|
4217
|
+
}
|
|
4218
|
+
}
|
|
4219
|
+
catch (err) {
|
|
4220
|
+
logger.warn?.(`[session_get] cache-stats lookup failed (non-fatal)`, err);
|
|
4221
|
+
}
|
|
3855
4222
|
return {
|
|
3856
4223
|
content: [
|
|
3857
4224
|
{
|
|
@@ -3861,6 +4228,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3861
4228
|
session: {
|
|
3862
4229
|
...session,
|
|
3863
4230
|
isActive: activeSession?.id === session.id,
|
|
4231
|
+
...(cacheState ? { cacheState } : {}),
|
|
3864
4232
|
},
|
|
3865
4233
|
}, null, 2),
|
|
3866
4234
|
},
|
|
@@ -3913,7 +4281,7 @@ async function initializeSessionManager() {
|
|
|
3913
4281
|
sessionManager = await createSessionManager(config, undefined, logger);
|
|
3914
4282
|
logger.info("File-based session manager initialized");
|
|
3915
4283
|
}
|
|
3916
|
-
resourceProvider = new ResourceProvider(sessionManager, performanceMetrics);
|
|
4284
|
+
resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));
|
|
3917
4285
|
}
|
|
3918
4286
|
//──────────────────────────────────────────────────────────────────────────────
|
|
3919
4287
|
// Health Check Resource (only if using PostgreSQL)
|
|
@@ -3944,7 +4312,7 @@ function registerHealthResource(server) {
|
|
|
3944
4312
|
description: "Async job health (CPU, memory, zombie detection)",
|
|
3945
4313
|
mimeType: "application/json",
|
|
3946
4314
|
}, async (uri) => {
|
|
3947
|
-
const health =
|
|
4315
|
+
const health = getAsyncJobManager().getJobHealth();
|
|
3948
4316
|
return {
|
|
3949
4317
|
contents: [
|
|
3950
4318
|
{
|
|
@@ -3980,8 +4348,10 @@ async function shutdown(signal) {
|
|
|
3980
4348
|
await db.disconnect();
|
|
3981
4349
|
logger.info("Database connections closed");
|
|
3982
4350
|
}
|
|
3983
|
-
flightRecorder
|
|
3984
|
-
|
|
4351
|
+
if (flightRecorder) {
|
|
4352
|
+
flightRecorder.close();
|
|
4353
|
+
logger.info("Flight recorder closed");
|
|
4354
|
+
}
|
|
3985
4355
|
process.exit(0);
|
|
3986
4356
|
}
|
|
3987
4357
|
catch (error) {
|
|
@@ -3997,6 +4367,20 @@ process.on("SIGINT", () => shutdown("SIGINT"));
|
|
|
3997
4367
|
async function main() {
|
|
3998
4368
|
startWindowsBootstrapperSelfHeal();
|
|
3999
4369
|
const args = process.argv.slice(2);
|
|
4370
|
+
if (args[0] === "--version" || args[0] === "-version" || args[0] === "version") {
|
|
4371
|
+
process.stdout.write(`${packageVersion()}\n`);
|
|
4372
|
+
return;
|
|
4373
|
+
}
|
|
4374
|
+
if (args[0] === "--help" || args[0] === "-help" || args[0] === "/?" || args[0] === "help") {
|
|
4375
|
+
process.stdout.write([
|
|
4376
|
+
"llm-cli-gateway MCP server",
|
|
4377
|
+
"",
|
|
4378
|
+
"Usage:",
|
|
4379
|
+
" llm-cli-gateway [doctor --json|contracts --json|--transport=http|--version]",
|
|
4380
|
+
"",
|
|
4381
|
+
].join("\n"));
|
|
4382
|
+
return;
|
|
4383
|
+
}
|
|
4000
4384
|
if (args[0] === "doctor") {
|
|
4001
4385
|
if (args.includes("--json")) {
|
|
4002
4386
|
printDoctorJson();
|
|
@@ -4035,9 +4419,9 @@ async function main() {
|
|
|
4035
4419
|
resourceProvider,
|
|
4036
4420
|
db,
|
|
4037
4421
|
performanceMetrics,
|
|
4038
|
-
asyncJobManager,
|
|
4039
|
-
approvalManager,
|
|
4040
|
-
flightRecorder,
|
|
4422
|
+
asyncJobManager: getAsyncJobManager(logger),
|
|
4423
|
+
approvalManager: getApprovalManager(logger),
|
|
4424
|
+
flightRecorder: getFlightRecorder(logger),
|
|
4041
4425
|
logger,
|
|
4042
4426
|
};
|
|
4043
4427
|
if (transportMode === "http") {
|