llm-cli-gateway 1.5.35 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +201 -0
- package/README.md +35 -4
- package/dist/cache-stats.d.ts +112 -0
- package/dist/cache-stats.js +225 -0
- package/dist/config.d.ts +41 -0
- package/dist/config.js +109 -0
- package/dist/doctor.d.ts +42 -1
- package/dist/doctor.js +121 -2
- package/dist/flight-recorder.d.ts +27 -0
- package/dist/flight-recorder.js +79 -2
- package/dist/index.d.ts +46 -9
- package/dist/index.js +395 -67
- package/dist/pricing.d.ts +54 -0
- package/dist/pricing.js +100 -0
- package/dist/prompt-parts.d.ts +38 -0
- package/dist/prompt-parts.js +42 -0
- package/dist/resources.d.ts +32 -1
- package/dist/resources.js +52 -1
- package/package.json +2 -1
- package/setup/status.schema.json +39 -0
- package/socket.yml +29 -0
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
3
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
4
|
import { randomUUID } from "crypto";
|
|
5
5
|
import { existsSync, readFileSync, readdirSync, renameSync, unlinkSync } from "fs";
|
|
@@ -14,7 +14,7 @@ import { createSessionManager } from "./session-manager.js";
|
|
|
14
14
|
import { ResourceProvider } from "./resources.js";
|
|
15
15
|
import { PerformanceMetrics } from "./metrics.js";
|
|
16
16
|
import { estimateTokens, optimizePrompt as optimizePromptText, optimizeResponse as optimizeResponseText, } from "./optimizer.js";
|
|
17
|
-
import { loadConfig, loadPersistenceConfig } from "./config.js";
|
|
17
|
+
import { loadConfig, loadPersistenceConfig, loadCacheAwarenessConfig, } from "./config.js";
|
|
18
18
|
import { checkHealth } from "./health.js";
|
|
19
19
|
import { clearModelRegistryCache, getAvailableCliInfo, getCliInfo, resolveModelAlias, } from "./model-registry.js";
|
|
20
20
|
import { AsyncJobManager } from "./async-job-manager.js";
|
|
@@ -24,6 +24,8 @@ import { checkReviewIntegrity } from "./review-integrity.js";
|
|
|
24
24
|
import { buildClaudeMcpConfig, CLAUDE_MCP_SERVER_NAMES, } from "./claude-mcp-config.js";
|
|
25
25
|
import { resolveGrokSessionArgs, resolveMistralSessionArgs, resolveCodexSessionArgs, sanitizeCliArgValues, prepareMistralRequest as buildMistralCliInvocation, MISTRAL_AGENT_MODES, GATEWAY_SESSION_PREFIX, resolveClaudePermissionFlags, resolveCodexSandboxFlags, CLAUDE_PERMISSION_MODES, GEMINI_APPROVAL_MODES, CODEX_SANDBOX_MODES, CODEX_ASK_FOR_APPROVAL_MODES, CLAUDE_EFFORT_LEVELS, prepareClaudeHighImpactFlags, validateClaudeAgentsMap, prepareCodexHighImpactFlags, prepareCodexForkRequest, CODEX_CONFIG_OVERRIDES_SCHEMA, prepareGeminiHighImpactFlags, prependGeminiAttachments, resolveGeminiSessionPlan, GEMINI_HIGH_IMPACT_PARAMS_SCHEMA, } from "./request-helpers.js";
|
|
26
26
|
import { createFlightRecorder } from "./flight-recorder.js";
|
|
27
|
+
import { resolvePromptInput, PromptPartsSchema } from "./prompt-parts.js";
|
|
28
|
+
import { computeSessionCacheStats, computeTtlRemaining } from "./cache-stats.js";
|
|
27
29
|
import { getCliVersions, runCliUpgrade } from "./cli-updater.js";
|
|
28
30
|
import { startHttpGateway } from "./http-transport.js";
|
|
29
31
|
import { printDoctorJson } from "./doctor.js";
|
|
@@ -181,6 +183,7 @@ let flightRecorder = null;
|
|
|
181
183
|
// registered (see createGatewayServer), making silent in-memory loss
|
|
182
184
|
// structurally impossible.
|
|
183
185
|
let persistenceConfig = null;
|
|
186
|
+
let cacheAwarenessConfig = null;
|
|
184
187
|
let jobStore = null;
|
|
185
188
|
let jobStoreInitialized = false;
|
|
186
189
|
let asyncJobManager = null;
|
|
@@ -193,6 +196,10 @@ function getPersistenceConfig(runtimeLogger = logger) {
|
|
|
193
196
|
persistenceConfig ??= loadPersistenceConfig(runtimeLogger);
|
|
194
197
|
return persistenceConfig;
|
|
195
198
|
}
|
|
199
|
+
function getCacheAwarenessConfig(runtimeLogger = logger) {
|
|
200
|
+
cacheAwarenessConfig ??= loadCacheAwarenessConfig(runtimeLogger);
|
|
201
|
+
return cacheAwarenessConfig;
|
|
202
|
+
}
|
|
196
203
|
function getJobStore(runtimeLogger = logger) {
|
|
197
204
|
if (jobStoreInitialized)
|
|
198
205
|
return jobStore;
|
|
@@ -242,19 +249,21 @@ function resolveGatewayServerRuntime(deps = {}, options = {}) {
|
|
|
242
249
|
(options.isolateState
|
|
243
250
|
? new ApprovalManager(undefined, runtimeLogger)
|
|
244
251
|
: getApprovalManager(runtimeLogger));
|
|
252
|
+
const runtimeFlightRecorder = deps.flightRecorder ?? getFlightRecorder(runtimeLogger);
|
|
245
253
|
return {
|
|
246
254
|
sessionManager: runtimeSessionManager,
|
|
247
255
|
resourceProvider: deps.resourceProvider ??
|
|
248
256
|
(options.isolateState
|
|
249
|
-
? new ResourceProvider(runtimeSessionManager, runtimePerformanceMetrics)
|
|
257
|
+
? new ResourceProvider(runtimeSessionManager, runtimePerformanceMetrics, runtimeFlightRecorder, deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger))
|
|
250
258
|
: resourceProvider),
|
|
251
259
|
db: "db" in deps ? (deps.db ?? null) : db,
|
|
252
260
|
performanceMetrics: runtimePerformanceMetrics,
|
|
253
261
|
asyncJobManager: runtimeAsyncJobManager,
|
|
254
262
|
approvalManager: runtimeApprovalManager,
|
|
255
|
-
flightRecorder:
|
|
263
|
+
flightRecorder: runtimeFlightRecorder,
|
|
256
264
|
logger: runtimeLogger,
|
|
257
265
|
persistence: deps.persistence ?? getPersistenceConfig(runtimeLogger),
|
|
266
|
+
cacheAwareness: deps.cacheAwareness ?? getCacheAwarenessConfig(runtimeLogger),
|
|
258
267
|
};
|
|
259
268
|
}
|
|
260
269
|
// Per-CLI idle timeouts: kill process if no stdout/stderr activity for this duration.
|
|
@@ -704,14 +713,118 @@ function registerBaseResources(server, runtime) {
|
|
|
704
713
|
const contents = await runtime.resourceProvider.readResource(uri.href);
|
|
705
714
|
return { contents: contents ? [contents] : [] };
|
|
706
715
|
});
|
|
716
|
+
// Cache-state resources (slice 2). Static URI for global, templated for
|
|
717
|
+
// session/{id} and prefix/{hash}. All three return tokens/hashes/aggregates
|
|
718
|
+
// ONLY — never raw prompt or response text. The structural guarantee is in
|
|
719
|
+
// the SessionCacheStats / PrefixCacheStats / GlobalCacheStats types
|
|
720
|
+
// themselves: those shapes have no prompt/response/system/task fields.
|
|
721
|
+
server.registerResource("cache-state-global", "cache_state://global", {
|
|
722
|
+
title: "💾 Cache State (Global)",
|
|
723
|
+
description: "Aggregate cache hit/miss/savings across all CLIs in the flight recorder. Tokens/hashes only — no prompt text.",
|
|
724
|
+
mimeType: "application/json",
|
|
725
|
+
}, async (uri) => {
|
|
726
|
+
runtime.logger.debug("Reading cache_state://global resource");
|
|
727
|
+
const stats = runtime.resourceProvider.readCacheStateGlobal({
|
|
728
|
+
lastNHours: 24,
|
|
729
|
+
});
|
|
730
|
+
return {
|
|
731
|
+
contents: [
|
|
732
|
+
{
|
|
733
|
+
uri: uri.href,
|
|
734
|
+
mimeType: "application/json",
|
|
735
|
+
text: JSON.stringify(stats, null, 2),
|
|
736
|
+
},
|
|
737
|
+
],
|
|
738
|
+
};
|
|
739
|
+
});
|
|
740
|
+
server.registerResource("cache-state-session", new ResourceTemplate("cache_state://session/{sessionId}", { list: undefined }), {
|
|
741
|
+
title: "💾 Cache State (Session)",
|
|
742
|
+
description: "Per-session cache hit/miss/savings. Tokens/hashes only — no prompt text.",
|
|
743
|
+
mimeType: "application/json",
|
|
744
|
+
}, async (uri, variables) => {
|
|
745
|
+
const sessionId = Array.isArray(variables.sessionId)
|
|
746
|
+
? variables.sessionId[0]
|
|
747
|
+
: variables.sessionId;
|
|
748
|
+
runtime.logger.debug(`Reading cache_state://session/${sessionId}`);
|
|
749
|
+
const stats = runtime.resourceProvider.readCacheStateSession(String(sessionId));
|
|
750
|
+
return {
|
|
751
|
+
contents: [
|
|
752
|
+
{
|
|
753
|
+
uri: uri.href,
|
|
754
|
+
mimeType: "application/json",
|
|
755
|
+
text: JSON.stringify(stats, null, 2),
|
|
756
|
+
},
|
|
757
|
+
],
|
|
758
|
+
};
|
|
759
|
+
});
|
|
760
|
+
server.registerResource("cache-state-prefix", new ResourceTemplate("cache_state://prefix/{hash}", { list: undefined }), {
|
|
761
|
+
title: "💾 Cache State (Prefix)",
|
|
762
|
+
description: "Per-stable-prefix-hash cache hit/miss/savings, with CLI breakdown. Tokens/hashes only — no prompt text.",
|
|
763
|
+
mimeType: "application/json",
|
|
764
|
+
}, async (uri, variables) => {
|
|
765
|
+
const hash = Array.isArray(variables.hash) ? variables.hash[0] : variables.hash;
|
|
766
|
+
runtime.logger.debug(`Reading cache_state://prefix/${hash}`);
|
|
767
|
+
const stats = runtime.resourceProvider.readCacheStateForPrefix(String(hash));
|
|
768
|
+
return {
|
|
769
|
+
contents: [
|
|
770
|
+
{
|
|
771
|
+
uri: uri.href,
|
|
772
|
+
mimeType: "application/json",
|
|
773
|
+
text: JSON.stringify(stats, null, 2),
|
|
774
|
+
},
|
|
775
|
+
],
|
|
776
|
+
};
|
|
777
|
+
});
|
|
778
|
+
}
|
|
779
|
+
/**
|
|
780
|
+
* Slice 1: validate the prompt / promptParts mutex at the prep boundary and
|
|
781
|
+
* return either an error response or the resolved input. The exact error
|
|
782
|
+
* messages are part of the public contract — tests assert them verbatim.
|
|
783
|
+
*/
|
|
784
|
+
function resolvePromptOrPartsForPrep(args) {
|
|
785
|
+
const hasPrompt = typeof args.prompt === "string" && args.prompt.length > 0;
|
|
786
|
+
const hasParts = args.promptParts !== undefined;
|
|
787
|
+
if (hasPrompt && hasParts) {
|
|
788
|
+
return {
|
|
789
|
+
ok: false,
|
|
790
|
+
error: createErrorResponse(args.operation, 1, "", args.correlationId, new Error("provide exactly one of `prompt` or `promptParts`")),
|
|
791
|
+
};
|
|
792
|
+
}
|
|
793
|
+
if (!hasPrompt && !hasParts) {
|
|
794
|
+
return {
|
|
795
|
+
ok: false,
|
|
796
|
+
error: createErrorResponse(args.operation, 1, "", args.correlationId, new Error("one of `prompt` or `promptParts` is required")),
|
|
797
|
+
};
|
|
798
|
+
}
|
|
799
|
+
const resolved = resolvePromptInput({
|
|
800
|
+
prompt: args.prompt,
|
|
801
|
+
promptParts: args.promptParts,
|
|
802
|
+
});
|
|
803
|
+
return {
|
|
804
|
+
ok: true,
|
|
805
|
+
assembledPrompt: resolved.assembledPrompt,
|
|
806
|
+
stablePrefixHash: resolved.stablePrefixHash,
|
|
807
|
+
stablePrefixTokens: resolved.stablePrefixTokens,
|
|
808
|
+
};
|
|
707
809
|
}
|
|
708
810
|
export function prepareClaudeRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
709
811
|
const corrId = params.correlationId || randomUUID();
|
|
710
812
|
const cliInfo = getCliInfo();
|
|
711
813
|
const resolvedModel = resolveModelAlias("claude", params.model, cliInfo);
|
|
814
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
815
|
+
prompt: params.prompt,
|
|
816
|
+
promptParts: params.promptParts,
|
|
817
|
+
operation: params.operation,
|
|
818
|
+
correlationId: corrId,
|
|
819
|
+
});
|
|
820
|
+
if (!inputResolution.ok)
|
|
821
|
+
return inputResolution.error;
|
|
822
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
823
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
824
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
712
825
|
// Review integrity check on raw prompt (before optimization)
|
|
713
826
|
const reviewIntegrity = checkReviewIntegrity({
|
|
714
|
-
prompt:
|
|
827
|
+
prompt: assembledPrompt,
|
|
715
828
|
allowedTools: params.allowedTools,
|
|
716
829
|
disallowedTools: params.disallowedTools,
|
|
717
830
|
});
|
|
@@ -722,7 +835,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
722
835
|
score: reviewIntegrity.totalScore,
|
|
723
836
|
});
|
|
724
837
|
}
|
|
725
|
-
let effectivePrompt =
|
|
838
|
+
let effectivePrompt = assembledPrompt;
|
|
726
839
|
if (params.optimizePrompt) {
|
|
727
840
|
const optimized = optimizePromptText(effectivePrompt);
|
|
728
841
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -739,7 +852,7 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
739
852
|
approvalDecision = runtime.approvalManager.decide({
|
|
740
853
|
cli: "claude",
|
|
741
854
|
operation: params.operation,
|
|
742
|
-
prompt:
|
|
855
|
+
prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
|
|
743
856
|
bypassRequested: params.dangerouslySkipPermissions,
|
|
744
857
|
fullAuto: false,
|
|
745
858
|
requestedMcpServers,
|
|
@@ -818,14 +931,27 @@ export function prepareClaudeRequest(params, runtime = resolveGatewayServerRunti
|
|
|
818
931
|
approvalDecision,
|
|
819
932
|
reviewIntegrity,
|
|
820
933
|
args,
|
|
934
|
+
stablePrefixHash,
|
|
935
|
+
stablePrefixTokens,
|
|
821
936
|
};
|
|
822
937
|
}
|
|
823
938
|
export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
824
939
|
const corrId = params.correlationId || randomUUID();
|
|
825
940
|
const cliInfo = getCliInfo();
|
|
826
941
|
const resolvedModel = resolveModelAlias("codex", params.model, cliInfo);
|
|
942
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
943
|
+
prompt: params.prompt,
|
|
944
|
+
promptParts: params.promptParts,
|
|
945
|
+
operation: params.operation,
|
|
946
|
+
correlationId: corrId,
|
|
947
|
+
});
|
|
948
|
+
if (!inputResolution.ok)
|
|
949
|
+
return inputResolution.error;
|
|
950
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
951
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
952
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
827
953
|
// Review integrity check on raw prompt (before optimization)
|
|
828
|
-
const reviewIntegrity = checkReviewIntegrity({ prompt:
|
|
954
|
+
const reviewIntegrity = checkReviewIntegrity({ prompt: assembledPrompt });
|
|
829
955
|
if (reviewIntegrity.violations.length > 0) {
|
|
830
956
|
runtime.logger.info(`[${corrId}] Review integrity violations detected: ${reviewIntegrity.violations.map(v => v.type).join(", ")}`, {
|
|
831
957
|
cli: "codex",
|
|
@@ -833,7 +959,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
833
959
|
score: reviewIntegrity.totalScore,
|
|
834
960
|
});
|
|
835
961
|
}
|
|
836
|
-
let effectivePrompt =
|
|
962
|
+
let effectivePrompt = assembledPrompt;
|
|
837
963
|
if (params.optimizePrompt) {
|
|
838
964
|
const optimized = optimizePromptText(effectivePrompt);
|
|
839
965
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -845,7 +971,7 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
845
971
|
approvalDecision = runtime.approvalManager.decide({
|
|
846
972
|
cli: "codex",
|
|
847
973
|
operation: params.operation,
|
|
848
|
-
prompt:
|
|
974
|
+
prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
|
|
849
975
|
bypassRequested: params.dangerouslyBypassApprovalsAndSandbox,
|
|
850
976
|
fullAuto: params.fullAuto,
|
|
851
977
|
requestedMcpServers,
|
|
@@ -960,15 +1086,28 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
960
1086
|
reviewIntegrity,
|
|
961
1087
|
args,
|
|
962
1088
|
cleanup: highImpactCleanup,
|
|
1089
|
+
stablePrefixHash,
|
|
1090
|
+
stablePrefixTokens,
|
|
963
1091
|
};
|
|
964
1092
|
}
|
|
965
1093
|
export function prepareGeminiRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
966
1094
|
const corrId = params.correlationId || randomUUID();
|
|
967
1095
|
const cliInfo = getCliInfo();
|
|
968
1096
|
const resolvedModel = resolveModelAlias("gemini", params.model, cliInfo);
|
|
1097
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
1098
|
+
prompt: params.prompt,
|
|
1099
|
+
promptParts: params.promptParts,
|
|
1100
|
+
operation: params.operation,
|
|
1101
|
+
correlationId: corrId,
|
|
1102
|
+
});
|
|
1103
|
+
if (!inputResolution.ok)
|
|
1104
|
+
return inputResolution.error;
|
|
1105
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1106
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1107
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
969
1108
|
// Review integrity check on raw prompt (before optimization)
|
|
970
1109
|
const reviewIntegrity = checkReviewIntegrity({
|
|
971
|
-
prompt:
|
|
1110
|
+
prompt: assembledPrompt,
|
|
972
1111
|
allowedTools: params.allowedTools,
|
|
973
1112
|
});
|
|
974
1113
|
if (reviewIntegrity.violations.length > 0) {
|
|
@@ -978,7 +1117,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
978
1117
|
score: reviewIntegrity.totalScore,
|
|
979
1118
|
});
|
|
980
1119
|
}
|
|
981
|
-
let effectivePrompt =
|
|
1120
|
+
let effectivePrompt = assembledPrompt;
|
|
982
1121
|
if (params.optimizePrompt) {
|
|
983
1122
|
const optimized = optimizePromptText(effectivePrompt);
|
|
984
1123
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -990,7 +1129,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
990
1129
|
approvalDecision = runtime.approvalManager.decide({
|
|
991
1130
|
cli: "gemini",
|
|
992
1131
|
operation: params.operation,
|
|
993
|
-
prompt:
|
|
1132
|
+
prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
|
|
994
1133
|
bypassRequested: params.approvalMode === "yolo",
|
|
995
1134
|
fullAuto: false,
|
|
996
1135
|
requestedMcpServers,
|
|
@@ -1060,15 +1199,28 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1060
1199
|
approvalDecision,
|
|
1061
1200
|
reviewIntegrity,
|
|
1062
1201
|
args,
|
|
1202
|
+
stablePrefixHash,
|
|
1203
|
+
stablePrefixTokens,
|
|
1063
1204
|
};
|
|
1064
1205
|
}
|
|
1065
1206
|
function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
1066
1207
|
const corrId = params.correlationId || randomUUID();
|
|
1067
1208
|
const cliInfo = getCliInfo();
|
|
1068
1209
|
const resolvedModel = resolveModelAlias("grok", params.model, cliInfo);
|
|
1210
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
1211
|
+
prompt: params.prompt,
|
|
1212
|
+
promptParts: params.promptParts,
|
|
1213
|
+
operation: params.operation,
|
|
1214
|
+
correlationId: corrId,
|
|
1215
|
+
});
|
|
1216
|
+
if (!inputResolution.ok)
|
|
1217
|
+
return inputResolution.error;
|
|
1218
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1219
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1220
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1069
1221
|
// Review integrity check on raw prompt (before optimization)
|
|
1070
1222
|
const reviewIntegrity = checkReviewIntegrity({
|
|
1071
|
-
prompt:
|
|
1223
|
+
prompt: assembledPrompt,
|
|
1072
1224
|
allowedTools: params.allowedTools,
|
|
1073
1225
|
disallowedTools: params.disallowedTools,
|
|
1074
1226
|
});
|
|
@@ -1079,7 +1231,7 @@ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
|
1079
1231
|
score: reviewIntegrity.totalScore,
|
|
1080
1232
|
});
|
|
1081
1233
|
}
|
|
1082
|
-
let effectivePrompt =
|
|
1234
|
+
let effectivePrompt = assembledPrompt;
|
|
1083
1235
|
if (params.optimizePrompt) {
|
|
1084
1236
|
const optimized = optimizePromptText(effectivePrompt);
|
|
1085
1237
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -1091,7 +1243,7 @@ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
|
1091
1243
|
approvalDecision = runtime.approvalManager.decide({
|
|
1092
1244
|
cli: "grok",
|
|
1093
1245
|
operation: params.operation,
|
|
1094
|
-
prompt:
|
|
1246
|
+
prompt: assembledPrompt, // Use raw assembled prompt for review-context detection, not optimized
|
|
1095
1247
|
bypassRequested: Boolean(params.alwaysApprove) || params.permissionMode === "bypassPermissions",
|
|
1096
1248
|
fullAuto: false,
|
|
1097
1249
|
requestedMcpServers,
|
|
@@ -1135,14 +1287,27 @@ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
|
1135
1287
|
approvalDecision,
|
|
1136
1288
|
reviewIntegrity,
|
|
1137
1289
|
args,
|
|
1290
|
+
stablePrefixHash,
|
|
1291
|
+
stablePrefixTokens,
|
|
1138
1292
|
};
|
|
1139
1293
|
}
|
|
1140
1294
|
export function prepareMistralRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
1141
1295
|
const corrId = params.correlationId || randomUUID();
|
|
1142
1296
|
const cliInfo = getCliInfo();
|
|
1143
1297
|
const resolvedModel = resolveModelAlias("mistral", params.model, cliInfo);
|
|
1144
|
-
const
|
|
1298
|
+
const inputResolution = resolvePromptOrPartsForPrep({
|
|
1145
1299
|
prompt: params.prompt,
|
|
1300
|
+
promptParts: params.promptParts,
|
|
1301
|
+
operation: params.operation,
|
|
1302
|
+
correlationId: corrId,
|
|
1303
|
+
});
|
|
1304
|
+
if (!inputResolution.ok)
|
|
1305
|
+
return inputResolution.error;
|
|
1306
|
+
const assembledPrompt = inputResolution.assembledPrompt;
|
|
1307
|
+
const stablePrefixHash = inputResolution.stablePrefixHash;
|
|
1308
|
+
const stablePrefixTokens = inputResolution.stablePrefixTokens;
|
|
1309
|
+
const reviewIntegrity = checkReviewIntegrity({
|
|
1310
|
+
prompt: assembledPrompt,
|
|
1146
1311
|
allowedTools: params.allowedTools,
|
|
1147
1312
|
disallowedTools: params.disallowedTools,
|
|
1148
1313
|
});
|
|
@@ -1153,7 +1318,7 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1153
1318
|
score: reviewIntegrity.totalScore,
|
|
1154
1319
|
});
|
|
1155
1320
|
}
|
|
1156
|
-
let effectivePrompt =
|
|
1321
|
+
let effectivePrompt = assembledPrompt;
|
|
1157
1322
|
if (params.optimizePrompt) {
|
|
1158
1323
|
const optimized = optimizePromptText(effectivePrompt);
|
|
1159
1324
|
logOptimizationTokens("prompt", corrId, effectivePrompt, optimized);
|
|
@@ -1165,7 +1330,7 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1165
1330
|
approvalDecision = runtime.approvalManager.decide({
|
|
1166
1331
|
cli: "mistral",
|
|
1167
1332
|
operation: params.operation,
|
|
1168
|
-
prompt:
|
|
1333
|
+
prompt: assembledPrompt,
|
|
1169
1334
|
bypassRequested: params.permissionMode === "auto-approve",
|
|
1170
1335
|
fullAuto: false,
|
|
1171
1336
|
requestedMcpServers,
|
|
@@ -1210,6 +1375,8 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1210
1375
|
reviewIntegrity,
|
|
1211
1376
|
args: prep.args,
|
|
1212
1377
|
mistralEnv: prep.env,
|
|
1378
|
+
stablePrefixHash,
|
|
1379
|
+
stablePrefixTokens,
|
|
1213
1380
|
};
|
|
1214
1381
|
}
|
|
1215
1382
|
function isMistralModelSelectionFailure(stderr) {
|
|
@@ -1225,7 +1392,7 @@ function selectMistralRecoveryModel(failedModel) {
|
|
|
1225
1392
|
].filter((model) => Boolean(model && model !== failedModel));
|
|
1226
1393
|
return candidates.find(model => model !== "local");
|
|
1227
1394
|
}
|
|
1228
|
-
function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat) {
|
|
1395
|
+
function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
|
|
1229
1396
|
let finalStdout = stdout;
|
|
1230
1397
|
// Skip response optimization for JSON output to prevent corrupting structured data
|
|
1231
1398
|
if (optimizeResponse && outputFormat !== "json") {
|
|
@@ -1274,8 +1441,41 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
|
|
|
1274
1441
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
1275
1442
|
response.reviewIntegrity = prep.reviewIntegrity;
|
|
1276
1443
|
}
|
|
1444
|
+
if (warnings && warnings.length > 0) {
|
|
1445
|
+
response.warnings = warnings;
|
|
1446
|
+
}
|
|
1277
1447
|
return response;
|
|
1278
1448
|
}
|
|
1449
|
+
/**
|
|
1450
|
+
* Slice 3 helper: compute the cache_ttl_expiring_soon warning for a
|
|
1451
|
+
* claude session, if the feature is enabled, the session has prior cache
|
|
1452
|
+
* writes, and ttlRemainingMs is below the threshold (30s by default).
|
|
1453
|
+
* Returns null when no warning applies.
|
|
1454
|
+
*/
|
|
1455
|
+
function maybeBuildCacheTtlWarning(args) {
|
|
1456
|
+
if (args.cli !== "claude")
|
|
1457
|
+
return null;
|
|
1458
|
+
if (!args.sessionId)
|
|
1459
|
+
return null;
|
|
1460
|
+
if (!args.runtime.cacheAwareness?.warnOnTtlExpiry)
|
|
1461
|
+
return null;
|
|
1462
|
+
const stats = computeSessionCacheStats(args.runtime.flightRecorder, args.sessionId);
|
|
1463
|
+
if (stats.requestCount === 0 || !stats.lastRequestAt)
|
|
1464
|
+
return null;
|
|
1465
|
+
const ttl = computeTtlRemaining(stats, args.cli, {
|
|
1466
|
+
anthropicTtlSeconds: args.runtime.cacheAwareness.anthropicTtlSeconds,
|
|
1467
|
+
});
|
|
1468
|
+
if (ttl === null)
|
|
1469
|
+
return null;
|
|
1470
|
+
const threshold = args.thresholdMs ?? 30_000;
|
|
1471
|
+
if (ttl >= threshold)
|
|
1472
|
+
return null;
|
|
1473
|
+
return {
|
|
1474
|
+
code: "cache_ttl_expiring_soon",
|
|
1475
|
+
ttlRemainingMs: ttl,
|
|
1476
|
+
message: `Anthropic cache breakpoint for session ${args.sessionId} expires in ${ttl}ms (< ${threshold}ms). Subsequent requests may miss the cache.`,
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1279
1479
|
function resolveHandlerRuntime(deps) {
|
|
1280
1480
|
if (deps.runtime)
|
|
1281
1481
|
return deps.runtime;
|
|
@@ -1299,6 +1499,7 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1299
1499
|
const startTime = Date.now();
|
|
1300
1500
|
const prep = prepareGeminiRequest({
|
|
1301
1501
|
prompt: params.prompt,
|
|
1502
|
+
promptParts: params.promptParts,
|
|
1302
1503
|
model: params.model,
|
|
1303
1504
|
approvalMode: params.approvalMode,
|
|
1304
1505
|
approvalStrategy: params.approvalStrategy,
|
|
@@ -1324,10 +1525,12 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1324
1525
|
correlationId: corrId,
|
|
1325
1526
|
cli: "gemini",
|
|
1326
1527
|
model: prep.resolvedModel || "default",
|
|
1327
|
-
prompt:
|
|
1528
|
+
prompt: prep.effectivePrompt,
|
|
1328
1529
|
sessionId: params.sessionId,
|
|
1530
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
1531
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
1329
1532
|
}, runtime);
|
|
1330
|
-
deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${
|
|
1533
|
+
deps.logger.info(`[${corrId}] gemini_request invoked with model=${prep.resolvedModel || "default"}, approvalMode=${params.approvalMode}, prompt length=${prep.effectivePrompt.length}`);
|
|
1331
1534
|
try {
|
|
1332
1535
|
// Gemini CLI 0.43 supports `--resume`, but not a supported fresh
|
|
1333
1536
|
// `--session-id` flag. Fresh sessions emit no session flag.
|
|
@@ -1423,6 +1626,7 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1423
1626
|
const runtime = resolveHandlerRuntime(deps);
|
|
1424
1627
|
const prep = prepareGeminiRequest({
|
|
1425
1628
|
prompt: params.prompt,
|
|
1629
|
+
promptParts: params.promptParts,
|
|
1426
1630
|
model: params.model,
|
|
1427
1631
|
approvalMode: params.approvalMode,
|
|
1428
1632
|
approvalStrategy: params.approvalStrategy,
|
|
@@ -1502,6 +1706,7 @@ export async function handleGrokRequest(deps, params) {
|
|
|
1502
1706
|
const startTime = Date.now();
|
|
1503
1707
|
const prep = prepareGrokRequest({
|
|
1504
1708
|
prompt: params.prompt,
|
|
1709
|
+
promptParts: params.promptParts,
|
|
1505
1710
|
model: params.model,
|
|
1506
1711
|
outputFormat: params.outputFormat,
|
|
1507
1712
|
alwaysApprove: params.alwaysApprove,
|
|
@@ -1526,10 +1731,12 @@ export async function handleGrokRequest(deps, params) {
|
|
|
1526
1731
|
correlationId: corrId,
|
|
1527
1732
|
cli: "grok",
|
|
1528
1733
|
model: prep.resolvedModel || "default",
|
|
1529
|
-
prompt:
|
|
1734
|
+
prompt: prep.effectivePrompt,
|
|
1530
1735
|
sessionId: params.sessionId,
|
|
1736
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
1737
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
1531
1738
|
}, runtime);
|
|
1532
|
-
deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${
|
|
1739
|
+
deps.logger.info(`[${corrId}] grok_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode}, prompt length=${prep.effectivePrompt.length}`);
|
|
1533
1740
|
try {
|
|
1534
1741
|
// Session arg planning (pure, no I/O)
|
|
1535
1742
|
const sessionResult = resolveGrokSessionArgs({
|
|
@@ -1618,6 +1825,7 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
1618
1825
|
const runtime = resolveHandlerRuntime(deps);
|
|
1619
1826
|
const prep = prepareGrokRequest({
|
|
1620
1827
|
prompt: params.prompt,
|
|
1828
|
+
promptParts: params.promptParts,
|
|
1621
1829
|
model: params.model,
|
|
1622
1830
|
outputFormat: params.outputFormat,
|
|
1623
1831
|
alwaysApprove: params.alwaysApprove,
|
|
@@ -1698,6 +1906,7 @@ export async function handleMistralRequest(deps, params) {
|
|
|
1698
1906
|
const startTime = Date.now();
|
|
1699
1907
|
const prep = prepareMistralRequest({
|
|
1700
1908
|
prompt: params.prompt,
|
|
1909
|
+
promptParts: params.promptParts,
|
|
1701
1910
|
model: params.model,
|
|
1702
1911
|
outputFormat: params.outputFormat,
|
|
1703
1912
|
permissionMode: params.permissionMode,
|
|
@@ -1721,10 +1930,12 @@ export async function handleMistralRequest(deps, params) {
|
|
|
1721
1930
|
correlationId: corrId,
|
|
1722
1931
|
cli: "mistral",
|
|
1723
1932
|
model: prep.resolvedModel || "default",
|
|
1724
|
-
prompt:
|
|
1933
|
+
prompt: prep.effectivePrompt,
|
|
1725
1934
|
sessionId: params.sessionId,
|
|
1935
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
1936
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
1726
1937
|
}, runtime);
|
|
1727
|
-
deps.logger.info(`[${corrId}] mistral_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode || "auto-approve"}, prompt length=${
|
|
1938
|
+
deps.logger.info(`[${corrId}] mistral_request invoked with model=${prep.resolvedModel || "default"}, permissionMode=${params.permissionMode || "auto-approve"}, prompt length=${prep.effectivePrompt.length}`);
|
|
1728
1939
|
try {
|
|
1729
1940
|
const sessionResult = resolveMistralSessionArgs({
|
|
1730
1941
|
sessionId: params.sessionId,
|
|
@@ -1835,6 +2046,7 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
1835
2046
|
const runtime = resolveHandlerRuntime(deps);
|
|
1836
2047
|
const prep = prepareMistralRequest({
|
|
1837
2048
|
prompt: params.prompt,
|
|
2049
|
+
promptParts: params.promptParts,
|
|
1838
2050
|
model: params.model,
|
|
1839
2051
|
outputFormat: params.outputFormat,
|
|
1840
2052
|
permissionMode: params.permissionMode,
|
|
@@ -1910,6 +2122,7 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
1910
2122
|
const runtime = resolveHandlerRuntime(deps);
|
|
1911
2123
|
const prep = prepareCodexRequest({
|
|
1912
2124
|
prompt: params.prompt,
|
|
2125
|
+
promptParts: params.promptParts,
|
|
1913
2126
|
model: params.model,
|
|
1914
2127
|
fullAuto: params.fullAuto,
|
|
1915
2128
|
sandboxMode: params.sandboxMode,
|
|
@@ -2026,7 +2239,14 @@ export async function handleCodexRequestAsync(deps, params) {
|
|
|
2026
2239
|
//──────────────────────────────────────────────────────────────────────────────
|
|
2027
2240
|
export function createGatewayServer(deps = {}) {
|
|
2028
2241
|
const runtime = resolveGatewayServerRuntime(deps, { isolateState: true });
|
|
2029
|
-
const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, } = runtime;
|
|
2242
|
+
const { sessionManager, asyncJobManager, approvalManager, performanceMetrics, logger, persistence, flightRecorder, cacheAwareness, } = runtime;
|
|
2243
|
+
// `flightRecorder` is destructured into closure scope so the session_get
|
|
2244
|
+
// handler (see ~line 5590) has the FlightRecorderQuery read capability
|
|
2245
|
+
// available without re-resolving runtime. Slice 2 will populate the
|
|
2246
|
+
// `cacheState` field of session_get's response from this read surface.
|
|
2247
|
+
// `cacheAwareness` is the loaded [cache_awareness] block (config.ts).
|
|
2248
|
+
void flightRecorder;
|
|
2249
|
+
void cacheAwareness;
|
|
2030
2250
|
// Structural invariant: tools register iff ALL THREE conditions hold:
|
|
2031
2251
|
// (1) persistence.backend !== "none" — the operator/config has not
|
|
2032
2252
|
// explicitly disabled durable persistence;
|
|
@@ -2052,7 +2272,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2052
2272
|
.string()
|
|
2053
2273
|
.min(1, "Prompt cannot be empty")
|
|
2054
2274
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2055
|
-
.
|
|
2275
|
+
.optional()
|
|
2276
|
+
.describe("Prompt text for Claude (mutually exclusive with promptParts)"),
|
|
2277
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2056
2278
|
model: z
|
|
2057
2279
|
.string()
|
|
2058
2280
|
.optional()
|
|
@@ -2147,13 +2369,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
2147
2369
|
.boolean()
|
|
2148
2370
|
.default(false)
|
|
2149
2371
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2150
|
-
}, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2372
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2151
2373
|
const startTime = Date.now();
|
|
2152
2374
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
2153
2375
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
2154
2376
|
}
|
|
2155
2377
|
const prep = prepareClaudeRequest({
|
|
2156
2378
|
prompt,
|
|
2379
|
+
promptParts,
|
|
2157
2380
|
model,
|
|
2158
2381
|
outputFormat,
|
|
2159
2382
|
allowedTools,
|
|
@@ -2182,26 +2405,53 @@ export function createGatewayServer(deps = {}) {
|
|
|
2182
2405
|
const { corrId, args } = prep;
|
|
2183
2406
|
let durationMs = 0;
|
|
2184
2407
|
let wasSuccessful = false;
|
|
2408
|
+
// Session resolution happens BEFORE safeFlightStart so that:
|
|
2409
|
+
// (1) the TTL warning reads the PRIOR session's lastWriteAt
|
|
2410
|
+
// rather than the row about to be inserted (codex-r1/F1).
|
|
2411
|
+
// (2) the flight-recorder row is tagged with effectiveSessionId
|
|
2412
|
+
// (the session the CLI will actually resume), not the raw
|
|
2413
|
+
// user-provided sessionId.
|
|
2414
|
+
let effectiveSessionId = sessionId;
|
|
2415
|
+
let useContinue = continueSession;
|
|
2416
|
+
// Guard the active-session lookup: in some test harnesses the
|
|
2417
|
+
// sessionManager is undefined; the original try-catch wrapped this
|
|
2418
|
+
// block, so we replicate that tolerance here. Failure leaves
|
|
2419
|
+
// effectiveSessionId as the user-provided sessionId.
|
|
2420
|
+
let activeSession = null;
|
|
2421
|
+
try {
|
|
2422
|
+
activeSession = await sessionManager.getActiveSession("claude");
|
|
2423
|
+
}
|
|
2424
|
+
catch (err) {
|
|
2425
|
+
logger.warn(`[${corrId}] sessionManager.getActiveSession failed (non-fatal): ${err.message}`);
|
|
2426
|
+
}
|
|
2427
|
+
if (!createNewSession && !continueSession && !sessionId && activeSession) {
|
|
2428
|
+
effectiveSessionId = activeSession.id;
|
|
2429
|
+
useContinue = true;
|
|
2430
|
+
}
|
|
2431
|
+
if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
|
|
2432
|
+
useContinue = true;
|
|
2433
|
+
}
|
|
2434
|
+
// Slice 3: if the resolved session has a near-expiry Anthropic
|
|
2435
|
+
// cache breakpoint, attach a structured warning (NOT a hard error)
|
|
2436
|
+
// to the response. Computed BEFORE safeFlightStart so the current
|
|
2437
|
+
// row does not skew lastRequestAt.
|
|
2438
|
+
const ttlWarning = maybeBuildCacheTtlWarning({
|
|
2439
|
+
runtime,
|
|
2440
|
+
sessionId: effectiveSessionId,
|
|
2441
|
+
cli: "claude",
|
|
2442
|
+
});
|
|
2443
|
+
const warnings = ttlWarning ? [ttlWarning] : [];
|
|
2185
2444
|
safeFlightStart({
|
|
2186
2445
|
correlationId: corrId,
|
|
2187
2446
|
cli: "claude",
|
|
2188
2447
|
model: prep.resolvedModel || "default",
|
|
2189
|
-
prompt,
|
|
2190
|
-
sessionId,
|
|
2448
|
+
prompt: prep.effectivePrompt,
|
|
2449
|
+
sessionId: effectiveSessionId,
|
|
2450
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
2451
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
2191
2452
|
}, runtime);
|
|
2192
|
-
logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${
|
|
2453
|
+
logger.info(`[${corrId}] claude_request invoked with model=${prep.resolvedModel || "default"}, outputFormat=${outputFormat}, prompt length=${prep.effectivePrompt.length}, sessionId=${effectiveSessionId}`);
|
|
2193
2454
|
try {
|
|
2194
|
-
// Session management
|
|
2195
|
-
let effectiveSessionId = sessionId;
|
|
2196
|
-
let useContinue = continueSession;
|
|
2197
|
-
const activeSession = await sessionManager.getActiveSession("claude");
|
|
2198
|
-
if (!createNewSession && !continueSession && !sessionId && activeSession) {
|
|
2199
|
-
effectiveSessionId = activeSession.id;
|
|
2200
|
-
useContinue = true;
|
|
2201
|
-
}
|
|
2202
|
-
if (!useContinue && effectiveSessionId && activeSession?.id === effectiveSessionId) {
|
|
2203
|
-
useContinue = true;
|
|
2204
|
-
}
|
|
2205
2455
|
if (useContinue) {
|
|
2206
2456
|
args.push("--continue");
|
|
2207
2457
|
}
|
|
@@ -2230,7 +2480,14 @@ export function createGatewayServer(deps = {}) {
|
|
|
2230
2480
|
errorMessage: stderr || `Exit code ${code}`,
|
|
2231
2481
|
status: "failed",
|
|
2232
2482
|
}, runtime);
|
|
2233
|
-
|
|
2483
|
+
// Slice 3: attach any computed warnings to the error response so
|
|
2484
|
+
// the caller still sees cache_ttl_expiring_soon when the CLI
|
|
2485
|
+
// happens to fail for an unrelated reason.
|
|
2486
|
+
const errResp = createErrorResponse("claude", code, stderr, corrId);
|
|
2487
|
+
if (warnings.length > 0) {
|
|
2488
|
+
errResp.warnings = warnings;
|
|
2489
|
+
}
|
|
2490
|
+
return errResp;
|
|
2234
2491
|
}
|
|
2235
2492
|
wasSuccessful = true;
|
|
2236
2493
|
// If we used a session ID and it's not tracked yet, create a session record
|
|
@@ -2261,7 +2518,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2261
2518
|
exitCode: 0,
|
|
2262
2519
|
status: "completed",
|
|
2263
2520
|
}, runtime);
|
|
2264
|
-
return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
|
|
2521
|
+
return buildCliResponse("claude", parsed.text, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
|
|
2265
2522
|
}
|
|
2266
2523
|
safeFlightComplete(corrId, {
|
|
2267
2524
|
response: stdout,
|
|
@@ -2272,7 +2529,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
2272
2529
|
exitCode: 0,
|
|
2273
2530
|
status: "completed",
|
|
2274
2531
|
}, runtime);
|
|
2275
|
-
return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat);
|
|
2532
|
+
return buildCliResponse("claude", stdout, optimizeResponse, corrId, effectiveSessionId, prep, durationMs, undefined, outputFormat, warnings);
|
|
2276
2533
|
}
|
|
2277
2534
|
catch (error) {
|
|
2278
2535
|
const elapsedMs = Math.max(0, Date.now() - startTime);
|
|
@@ -2302,7 +2559,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2302
2559
|
.string()
|
|
2303
2560
|
.min(1, "Prompt cannot be empty")
|
|
2304
2561
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2305
|
-
.
|
|
2562
|
+
.optional()
|
|
2563
|
+
.describe("Prompt text for Codex (mutually exclusive with promptParts)"),
|
|
2564
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2306
2565
|
model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
|
|
2307
2566
|
fullAuto: z
|
|
2308
2567
|
.boolean()
|
|
@@ -2393,10 +2652,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
2393
2652
|
.boolean()
|
|
2394
2653
|
.optional()
|
|
2395
2654
|
.describe("Codex --ignore-rules: skip project rule files for this run."),
|
|
2396
|
-
}, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
2655
|
+
}, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
2397
2656
|
const startTime = Date.now();
|
|
2398
2657
|
const prep = prepareCodexRequest({
|
|
2399
2658
|
prompt,
|
|
2659
|
+
promptParts,
|
|
2400
2660
|
model,
|
|
2401
2661
|
fullAuto,
|
|
2402
2662
|
sandboxMode,
|
|
@@ -2431,10 +2691,12 @@ export function createGatewayServer(deps = {}) {
|
|
|
2431
2691
|
correlationId: corrId,
|
|
2432
2692
|
cli: "codex",
|
|
2433
2693
|
model: prep.resolvedModel || "default",
|
|
2434
|
-
prompt,
|
|
2694
|
+
prompt: prep.effectivePrompt,
|
|
2435
2695
|
sessionId,
|
|
2696
|
+
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
2697
|
+
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
2436
2698
|
}, runtime);
|
|
2437
|
-
logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${
|
|
2699
|
+
logger.info(`[${corrId}] codex_request invoked with model=${prep.resolvedModel || "default"}, fullAuto=${fullAuto}, prompt length=${prep.effectivePrompt.length}`);
|
|
2438
2700
|
// U26 fix: pass the outputSchema cleanup to awaitJobOrDefer, which
|
|
2439
2701
|
// guarantees the cleanup runs exactly once — inline for direct
|
|
2440
2702
|
// execution, on terminal status for the job-backed path (sync
|
|
@@ -2627,7 +2889,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2627
2889
|
.string()
|
|
2628
2890
|
.min(1, "Prompt cannot be empty")
|
|
2629
2891
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2630
|
-
.
|
|
2892
|
+
.optional()
|
|
2893
|
+
.describe("Prompt text for Gemini (mutually exclusive with promptParts)"),
|
|
2894
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2631
2895
|
model: z
|
|
2632
2896
|
.string()
|
|
2633
2897
|
.optional()
|
|
@@ -2680,9 +2944,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
2680
2944
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
2681
2945
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
2682
2946
|
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
2683
|
-
}, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
2947
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
2684
2948
|
return handleGeminiRequest({ sessionManager, logger, runtime }, {
|
|
2685
2949
|
prompt,
|
|
2950
|
+
promptParts,
|
|
2686
2951
|
model,
|
|
2687
2952
|
sessionId,
|
|
2688
2953
|
resumeLatest,
|
|
@@ -2713,7 +2978,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2713
2978
|
.string()
|
|
2714
2979
|
.min(1, "Prompt cannot be empty")
|
|
2715
2980
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2716
|
-
.
|
|
2981
|
+
.optional()
|
|
2982
|
+
.describe("Prompt text for Grok (mutually exclusive with promptParts)"),
|
|
2983
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2717
2984
|
model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
|
|
2718
2985
|
outputFormat: z
|
|
2719
2986
|
.enum(["plain", "json", "streaming-json"])
|
|
@@ -2775,9 +3042,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
2775
3042
|
.boolean()
|
|
2776
3043
|
.default(false)
|
|
2777
3044
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2778
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
3045
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2779
3046
|
return handleGrokRequest({ sessionManager, logger, runtime }, {
|
|
2780
3047
|
prompt,
|
|
3048
|
+
promptParts,
|
|
2781
3049
|
model,
|
|
2782
3050
|
outputFormat,
|
|
2783
3051
|
sessionId,
|
|
@@ -2807,7 +3075,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2807
3075
|
.string()
|
|
2808
3076
|
.min(1, "Prompt cannot be empty")
|
|
2809
3077
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2810
|
-
.
|
|
3078
|
+
.optional()
|
|
3079
|
+
.describe("Prompt text for Mistral Vibe (mutually exclusive with promptParts)"),
|
|
3080
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2811
3081
|
model: z
|
|
2812
3082
|
.string()
|
|
2813
3083
|
.optional()
|
|
@@ -2868,9 +3138,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
2868
3138
|
.boolean()
|
|
2869
3139
|
.default(false)
|
|
2870
3140
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
2871
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
3141
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, }) => {
|
|
2872
3142
|
return handleMistralRequest({ sessionManager, logger, runtime }, {
|
|
2873
3143
|
prompt,
|
|
3144
|
+
promptParts,
|
|
2874
3145
|
model,
|
|
2875
3146
|
outputFormat,
|
|
2876
3147
|
sessionId,
|
|
@@ -2907,7 +3178,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
2907
3178
|
.string()
|
|
2908
3179
|
.min(1, "Prompt cannot be empty")
|
|
2909
3180
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
2910
|
-
.
|
|
3181
|
+
.optional()
|
|
3182
|
+
.describe("Prompt text for Claude (mutually exclusive with promptParts)"),
|
|
3183
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
2911
3184
|
model: z
|
|
2912
3185
|
.string()
|
|
2913
3186
|
.optional()
|
|
@@ -3001,12 +3274,13 @@ export function createGatewayServer(deps = {}) {
|
|
|
3001
3274
|
.boolean()
|
|
3002
3275
|
.default(false)
|
|
3003
3276
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3004
|
-
}, async ({ prompt, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3277
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, continueSession, createNewSession, allowedTools, disallowedTools, dangerouslySkipPermissions, permissionMode, agent, agents, forkSession, systemPrompt, appendSystemPrompt, maxBudgetUsd, maxTurns, effort, excludeDynamicSystemPromptSections, approvalStrategy, approvalPolicy, mcpServers, strictMcpConfig, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3005
3278
|
if (systemPrompt !== undefined && appendSystemPrompt !== undefined) {
|
|
3006
3279
|
return createErrorResponse("claude", 1, "", correlationId, new Error("systemPrompt and appendSystemPrompt are mutually exclusive; use one or the other (not both)."));
|
|
3007
3280
|
}
|
|
3008
3281
|
const prep = prepareClaudeRequest({
|
|
3009
3282
|
prompt,
|
|
3283
|
+
promptParts,
|
|
3010
3284
|
model,
|
|
3011
3285
|
outputFormat,
|
|
3012
3286
|
allowedTools,
|
|
@@ -3058,6 +3332,12 @@ export function createGatewayServer(deps = {}) {
|
|
|
3058
3332
|
await sessionManager.createSession("claude", "Claude Session", effectiveSessionId);
|
|
3059
3333
|
}
|
|
3060
3334
|
}
|
|
3335
|
+
// Slice 3: TTL warning on resume (async path too).
|
|
3336
|
+
const ttlWarning = maybeBuildCacheTtlWarning({
|
|
3337
|
+
runtime,
|
|
3338
|
+
sessionId: effectiveSessionId,
|
|
3339
|
+
cli: "claude",
|
|
3340
|
+
});
|
|
3061
3341
|
// Idle timeout only for stream-json (text/json produce no output until done)
|
|
3062
3342
|
const effectiveIdleTimeout = outputFormat === "stream-json"
|
|
3063
3343
|
? resolveIdleTimeout("claude", idleTimeoutMs)
|
|
@@ -3080,6 +3360,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3080
3360
|
if (prep.reviewIntegrity && prep.reviewIntegrity.violations.length > 0) {
|
|
3081
3361
|
asyncResponse.reviewIntegrity = prep.reviewIntegrity;
|
|
3082
3362
|
}
|
|
3363
|
+
if (ttlWarning) {
|
|
3364
|
+
asyncResponse.warnings = [ttlWarning];
|
|
3365
|
+
}
|
|
3083
3366
|
return {
|
|
3084
3367
|
content: [
|
|
3085
3368
|
{
|
|
@@ -3098,7 +3381,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3098
3381
|
.string()
|
|
3099
3382
|
.min(1, "Prompt cannot be empty")
|
|
3100
3383
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3101
|
-
.
|
|
3384
|
+
.optional()
|
|
3385
|
+
.describe("Prompt text for Codex (mutually exclusive with promptParts)"),
|
|
3386
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
3102
3387
|
model: z.string().optional().describe("Model name or alias (e.g. gpt-5.4, latest)"),
|
|
3103
3388
|
fullAuto: z
|
|
3104
3389
|
.boolean()
|
|
@@ -3171,9 +3456,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3171
3456
|
images: z.array(z.string()).optional().describe("Codex -i <path>: image attachments."),
|
|
3172
3457
|
ignoreUserConfig: z.boolean().optional().describe("Codex --ignore-user-config."),
|
|
3173
3458
|
ignoreRules: z.boolean().optional().describe("Codex --ignore-rules."),
|
|
3174
|
-
}, async ({ prompt, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
3459
|
+
}, async ({ prompt, promptParts, model, fullAuto, sandboxMode, askForApproval, useLegacyFullAutoFlag, dangerouslyBypassApprovalsAndSandbox, approvalStrategy, approvalPolicy, mcpServers, sessionId, resumeLatest, createNewSession, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, outputSchema, search, profile, configOverrides, ephemeral, images, ignoreUserConfig, ignoreRules, }) => {
|
|
3175
3460
|
return handleCodexRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3176
3461
|
prompt,
|
|
3462
|
+
promptParts,
|
|
3177
3463
|
model,
|
|
3178
3464
|
fullAuto,
|
|
3179
3465
|
sandboxMode,
|
|
@@ -3206,7 +3492,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3206
3492
|
.string()
|
|
3207
3493
|
.min(1, "Prompt cannot be empty")
|
|
3208
3494
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3209
|
-
.
|
|
3495
|
+
.optional()
|
|
3496
|
+
.describe("Prompt text for Gemini (mutually exclusive with promptParts)"),
|
|
3497
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
3210
3498
|
model: z
|
|
3211
3499
|
.string()
|
|
3212
3500
|
.optional()
|
|
@@ -3261,9 +3549,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3261
3549
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3262
3550
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
3263
3551
|
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
3264
|
-
}, async ({ prompt, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
3552
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, }) => {
|
|
3265
3553
|
return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3266
3554
|
prompt,
|
|
3555
|
+
promptParts,
|
|
3267
3556
|
model,
|
|
3268
3557
|
sessionId,
|
|
3269
3558
|
resumeLatest,
|
|
@@ -3290,7 +3579,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3290
3579
|
.string()
|
|
3291
3580
|
.min(1, "Prompt cannot be empty")
|
|
3292
3581
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3293
|
-
.
|
|
3582
|
+
.optional()
|
|
3583
|
+
.describe("Prompt text for Grok (mutually exclusive with promptParts)"),
|
|
3584
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
3294
3585
|
model: z.string().optional().describe("Model name or alias (e.g. grok-build, latest)"),
|
|
3295
3586
|
outputFormat: z
|
|
3296
3587
|
.enum(["plain", "json", "streaming-json"])
|
|
@@ -3351,9 +3642,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3351
3642
|
.boolean()
|
|
3352
3643
|
.default(false)
|
|
3353
3644
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3354
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3645
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3355
3646
|
return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3356
3647
|
prompt,
|
|
3648
|
+
promptParts,
|
|
3357
3649
|
model,
|
|
3358
3650
|
outputFormat,
|
|
3359
3651
|
sessionId,
|
|
@@ -3379,7 +3671,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3379
3671
|
.string()
|
|
3380
3672
|
.min(1, "Prompt cannot be empty")
|
|
3381
3673
|
.max(100000, "Prompt too long (max 100k chars)")
|
|
3382
|
-
.
|
|
3674
|
+
.optional()
|
|
3675
|
+
.describe("Prompt text for Mistral Vibe (mutually exclusive with promptParts)"),
|
|
3676
|
+
promptParts: PromptPartsSchema.optional().describe("Cache-aware structured prompt: { system?, tools?, context?, task }. Mutually exclusive with prompt. Stable parts hash into cache_state for prefix-discipline tracking."),
|
|
3383
3677
|
model: z
|
|
3384
3678
|
.string()
|
|
3385
3679
|
.optional()
|
|
@@ -3439,9 +3733,10 @@ export function createGatewayServer(deps = {}) {
|
|
|
3439
3733
|
.boolean()
|
|
3440
3734
|
.default(false)
|
|
3441
3735
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3442
|
-
}, async ({ prompt, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3736
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, }) => {
|
|
3443
3737
|
return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3444
3738
|
prompt,
|
|
3739
|
+
promptParts,
|
|
3445
3740
|
model,
|
|
3446
3741
|
outputFormat,
|
|
3447
3742
|
sessionId,
|
|
@@ -3892,6 +4187,38 @@ export function createGatewayServer(deps = {}) {
|
|
|
3892
4187
|
};
|
|
3893
4188
|
}
|
|
3894
4189
|
const activeSession = await sessionManager.getActiveSession(session.cli);
|
|
4190
|
+
// Slice 2: project a compact cacheState view from the flight
|
|
4191
|
+
// recorder at read time. NOT persisted on the Session interface
|
|
4192
|
+
// (sessions.json stays content-free per the project invariant).
|
|
4193
|
+
// The field is OMITTED entirely (not null, not empty object) when
|
|
4194
|
+
// the session has zero rows in the flight recorder so the response
|
|
4195
|
+
// stays compact for fresh sessions.
|
|
4196
|
+
//
|
|
4197
|
+
// Slice 3: include ttlRemainingMs derived from the gateway's
|
|
4198
|
+
// configured TTL policy. Null for non-claude sessions.
|
|
4199
|
+
let cacheState;
|
|
4200
|
+
try {
|
|
4201
|
+
const stats = computeSessionCacheStats(flightRecorder, session.id);
|
|
4202
|
+
if (stats.requestCount > 0) {
|
|
4203
|
+
const ttlRemainingMs = computeTtlRemaining(stats, stats.cli, {
|
|
4204
|
+
anthropicTtlSeconds: cacheAwareness?.anthropicTtlSeconds ?? 300,
|
|
4205
|
+
});
|
|
4206
|
+
cacheState = {
|
|
4207
|
+
cli: stats.cli,
|
|
4208
|
+
prefixDistinct: stats.distinctPrefixCount,
|
|
4209
|
+
totalCacheReadTokens: stats.totalCacheReadTokens,
|
|
4210
|
+
totalCacheCreationTokens: stats.totalCacheCreationTokens,
|
|
4211
|
+
requestCount: stats.requestCount,
|
|
4212
|
+
hitCount: stats.hitCount,
|
|
4213
|
+
hitRate: stats.hitRate,
|
|
4214
|
+
estimatedSavingsUsd: stats.estimatedSavingsUsd,
|
|
4215
|
+
ttlRemainingMs,
|
|
4216
|
+
};
|
|
4217
|
+
}
|
|
4218
|
+
}
|
|
4219
|
+
catch (err) {
|
|
4220
|
+
logger.warn?.(`[session_get] cache-stats lookup failed (non-fatal)`, err);
|
|
4221
|
+
}
|
|
3895
4222
|
return {
|
|
3896
4223
|
content: [
|
|
3897
4224
|
{
|
|
@@ -3901,6 +4228,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3901
4228
|
session: {
|
|
3902
4229
|
...session,
|
|
3903
4230
|
isActive: activeSession?.id === session.id,
|
|
4231
|
+
...(cacheState ? { cacheState } : {}),
|
|
3904
4232
|
},
|
|
3905
4233
|
}, null, 2),
|
|
3906
4234
|
},
|
|
@@ -3953,7 +4281,7 @@ async function initializeSessionManager() {
|
|
|
3953
4281
|
sessionManager = await createSessionManager(config, undefined, logger);
|
|
3954
4282
|
logger.info("File-based session manager initialized");
|
|
3955
4283
|
}
|
|
3956
|
-
resourceProvider = new ResourceProvider(sessionManager, performanceMetrics);
|
|
4284
|
+
resourceProvider = new ResourceProvider(sessionManager, performanceMetrics, getFlightRecorder(logger), getCacheAwarenessConfig(logger));
|
|
3957
4285
|
}
|
|
3958
4286
|
//──────────────────────────────────────────────────────────────────────────────
|
|
3959
4287
|
// Health Check Resource (only if using PostgreSQL)
|