llm-cli-gateway 1.17.3 → 1.17.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/README.md +1 -1
- package/dist/approval-manager.js +0 -8
- package/dist/async-job-manager.d.ts +0 -113
- package/dist/async-job-manager.js +6 -124
- package/dist/cache-stats.d.ts +0 -89
- package/dist/cache-stats.js +0 -62
- package/dist/claude-mcp-config.js +0 -1
- package/dist/cli-updater.d.ts +0 -8
- package/dist/cli-updater.js +0 -12
- package/dist/codex-json-parser.d.ts +0 -20
- package/dist/codex-json-parser.js +0 -21
- package/dist/config.d.ts +0 -31
- package/dist/config.js +2 -72
- package/dist/db.d.ts +0 -18
- package/dist/db.js +0 -22
- package/dist/doctor.d.ts +0 -49
- package/dist/doctor.js +0 -47
- package/dist/endpoint-exposure.js +0 -1
- package/dist/executor.d.ts +0 -19
- package/dist/executor.js +3 -38
- package/dist/flight-recorder.d.ts +0 -26
- package/dist/flight-recorder.js +1 -70
- package/dist/gemini-json-parser.d.ts +0 -25
- package/dist/gemini-json-parser.js +0 -28
- package/dist/health.d.ts +0 -3
- package/dist/health.js +0 -3
- package/dist/index.d.ts +12 -208
- package/dist/index.js +116 -588
- package/dist/job-store.d.ts +0 -74
- package/dist/job-store.js +1 -73
- package/dist/logger.d.ts +0 -7
- package/dist/logger.js +0 -6
- package/dist/migrate-sessions.d.ts +0 -3
- package/dist/migrate-sessions.js +0 -16
- package/dist/migrate.js +1 -18
- package/dist/mistral-meta-json-parser.js +0 -67
- package/dist/model-registry.js +0 -13
- package/dist/pricing.d.ts +0 -46
- package/dist/pricing.js +0 -47
- package/dist/process-monitor.d.ts +0 -15
- package/dist/process-monitor.js +2 -31
- package/dist/prompt-parts.d.ts +6 -31
- package/dist/prompt-parts.js +0 -11
- package/dist/provider-status.d.ts +0 -8
- package/dist/provider-status.js +0 -11
- package/dist/request-helpers.d.ts +4 -316
- package/dist/request-helpers.js +13 -231
- package/dist/resources.d.ts +0 -20
- package/dist/resources.js +1 -34
- package/dist/retry.d.ts +0 -45
- package/dist/retry.js +3 -40
- package/dist/session-manager-pg.d.ts +0 -32
- package/dist/session-manager-pg.js +0 -32
- package/dist/session-manager.d.ts +0 -21
- package/dist/session-manager.js +1 -15
- package/dist/stream-json-parser.d.ts +0 -18
- package/dist/stream-json-parser.js +0 -22
- package/dist/upstream-contracts.d.ts +0 -55
- package/dist/upstream-contracts.js +86 -64
- package/dist/validation-orchestrator.js +0 -3
- package/dist/worktree-manager.d.ts +0 -9
- package/dist/worktree-manager.js +0 -21
- package/package.json +1 -1
package/dist/model-registry.js
CHANGED
|
@@ -14,9 +14,6 @@ const FALLBACK_INFO = {
|
|
|
14
14
|
modelOrder: ["opus", "sonnet", "haiku"],
|
|
15
15
|
},
|
|
16
16
|
codex: {
|
|
17
|
-
// U26: gpt-5.5 is the bundled fallback default. Config/env overrides still
|
|
18
|
-
// win (applyCodexOverrides runs after). Older aliases are retained in the
|
|
19
|
-
// models map so callers that still pass `gpt-5.3-codex` resolve cleanly.
|
|
20
17
|
description: "OpenAI's Codex CLI - best for code execution in sandboxed environments",
|
|
21
18
|
models: {
|
|
22
19
|
"gpt-5.5": "Latest Codex frontier model. Best for: most Codex tasks (default since U26)",
|
|
@@ -36,8 +33,6 @@ const FALLBACK_INFO = {
|
|
|
36
33
|
},
|
|
37
34
|
},
|
|
38
35
|
grok: {
|
|
39
|
-
// No hardcoded `defaultModel`. Let Grok CLI pick its own built-in default
|
|
40
|
-
// unless an explicit value is found via env vars in applyGrokOverrides.
|
|
41
36
|
description: "xAI's Grok Build CLI - best for agentic coding tasks via xAI's Grok models",
|
|
42
37
|
models: {
|
|
43
38
|
"grok-build": "Default Grok model for code/agentic tasks. Best for: most Grok build sessions",
|
|
@@ -45,10 +40,6 @@ const FALLBACK_INFO = {
|
|
|
45
40
|
modelOrder: ["grok-build"],
|
|
46
41
|
},
|
|
47
42
|
mistral: {
|
|
48
|
-
// Mistral Vibe selects the active model via VIBE_ACTIVE_MODEL; there is no
|
|
49
|
-
// `--model` flag. Do not set a bundled default here: Vibe's own default and
|
|
50
|
-
// user config move independently of this gateway. The model list is only a
|
|
51
|
-
// low-confidence recovery set for stale config/model-not-found failures.
|
|
52
43
|
description: "Mistral AI's Vibe CLI - agentic coding via Mistral models (model selection via VIBE_ACTIVE_MODEL env var)",
|
|
53
44
|
models: {
|
|
54
45
|
"mistral-medium-3.5": "Vibe coding model alias observed in Vibe 2.x defaults. Used only when discovery/config requires an explicit VIBE_ACTIVE_MODEL.",
|
|
@@ -99,9 +90,6 @@ export function resolveModelAlias(cli, model, info) {
|
|
|
99
90
|
const normalized = trimmed.toLowerCase();
|
|
100
91
|
const cliInfo = info[cli];
|
|
101
92
|
if (normalized === "default" || normalized === "latest") {
|
|
102
|
-
// If no default is configured, return undefined so the CLI picks its own
|
|
103
|
-
// built-in default. Avoids passing the literal string "default"/"latest"
|
|
104
|
-
// as a model name to the CLI.
|
|
105
93
|
return cliInfo.defaultModel;
|
|
106
94
|
}
|
|
107
95
|
const alias = resolveConfiguredAlias(cliInfo, normalized);
|
|
@@ -393,7 +381,6 @@ function applyMistralOverrides(info) {
|
|
|
393
381
|
addEnvModels(info, "MISTRAL_MODELS");
|
|
394
382
|
addEnvAliases(info, "mistral", "MISTRAL_MODEL_ALIASES");
|
|
395
383
|
addGlobalEnvAliases(info, "mistral");
|
|
396
|
-
// Vibe uses VIBE_ACTIVE_MODEL instead of a CLI flag. Explicit env values win.
|
|
397
384
|
const envDefault = process.env.MISTRAL_DEFAULT_MODEL || process.env.VIBE_ACTIVE_MODEL;
|
|
398
385
|
if (envDefault) {
|
|
399
386
|
const source = process.env.MISTRAL_DEFAULT_MODEL
|
package/dist/pricing.d.ts
CHANGED
|
@@ -1,54 +1,8 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Per-model pricing for cache-savings estimation.
|
|
3
|
-
*
|
|
4
|
-
* `priced_as_of` is the date these numbers were last refreshed. The
|
|
5
|
-
* gateway's doctor surfaces this so operators can see when the table is
|
|
6
|
-
* stale — pricing is an ESTIMATE, not a billing number.
|
|
7
|
-
*
|
|
8
|
-
* Pricing units: USD per 1M tokens.
|
|
9
|
-
*
|
|
10
|
-
* Anthropic source: <https://platform.claude.com/docs/en/about-claude/pricing>
|
|
11
|
-
* - Sonnet 4.x / Sonnet 3.5: $3 input / $15 output.
|
|
12
|
-
* - Opus 4.5+ / Mythos Preview: $15 input / $75 output.
|
|
13
|
-
* - Opus 4 / 4.1 (deprecated): same as 4.5+.
|
|
14
|
-
* - Haiku 4.5: $1 input / $5 output.
|
|
15
|
-
* - Haiku 3.5 (Vertex-only): $0.80 input / $4 output.
|
|
16
|
-
*
|
|
17
|
-
* Cache pricing multipliers (Anthropic):
|
|
18
|
-
* - cache write 5-min TTL: 1.25× base input.
|
|
19
|
-
* - cache write 1-hour TTL: 2× base input.
|
|
20
|
-
* - cache read: 0.10× base input (90% savings).
|
|
21
|
-
*
|
|
22
|
-
* Codex / OpenAI: GPT-5.4 input ~$1.25 / output $10 per 1M (approx; OpenAI
|
|
23
|
-
* does not publish a stable per-CLI table). Cached input ~50% of base.
|
|
24
|
-
*
|
|
25
|
-
* Gemini, Grok, Mistral: pricing varies by model and is not surfaced in
|
|
26
|
-
* gateway today. Returns 0 for unknown.
|
|
27
|
-
*/
|
|
28
1
|
export interface PricePerMillion {
|
|
29
2
|
inputUsd: number;
|
|
30
3
|
outputUsd: number;
|
|
31
|
-
/** Multiplier on inputUsd for a cache HIT (read). Anthropic: 0.10. */
|
|
32
4
|
cacheReadMultiplier: number;
|
|
33
5
|
}
|
|
34
6
|
export declare const PRICING_AS_OF = "2026-05-26";
|
|
35
|
-
/**
|
|
36
|
-
* Look up pricing by (cli, model) name. Best-effort; unknown models return
|
|
37
|
-
* ZEROED pricing so estimated_savings_usd in aggregates falls back to 0
|
|
38
|
-
* rather than throwing OR over-reporting savings on an unpriced model.
|
|
39
|
-
*
|
|
40
|
-
* Recognised model families:
|
|
41
|
-
* - claude: model name contains "sonnet" | "opus" | "haiku".
|
|
42
|
-
* - codex: model name contains "gpt-5" or "o3" (current OpenAI families).
|
|
43
|
-
*
|
|
44
|
-
* Anything outside these explicit matches returns ZERO. This is a
|
|
45
|
-
* deliberate conservative choice — we'd rather under-report savings on
|
|
46
|
-
* an unrecognised model than over-report on one whose actual pricing we
|
|
47
|
-
* don't know. Update this table when a new model family ships.
|
|
48
|
-
*/
|
|
49
7
|
export declare function getPricing(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", model: string): PricePerMillion;
|
|
50
|
-
/**
|
|
51
|
-
* Estimate USD saved by `cacheReadTokens` being served from cache instead
|
|
52
|
-
* of fresh input. Returns 0 for zero cache reads or unknown pricing.
|
|
53
|
-
*/
|
|
54
8
|
export declare function estimateCacheSavingsUsd(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", model: string, cacheReadTokens: number): number;
|
package/dist/pricing.js
CHANGED
|
@@ -1,30 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Per-model pricing for cache-savings estimation.
|
|
3
|
-
*
|
|
4
|
-
* `priced_as_of` is the date these numbers were last refreshed. The
|
|
5
|
-
* gateway's doctor surfaces this so operators can see when the table is
|
|
6
|
-
* stale — pricing is an ESTIMATE, not a billing number.
|
|
7
|
-
*
|
|
8
|
-
* Pricing units: USD per 1M tokens.
|
|
9
|
-
*
|
|
10
|
-
* Anthropic source: <https://platform.claude.com/docs/en/about-claude/pricing>
|
|
11
|
-
* - Sonnet 4.x / Sonnet 3.5: $3 input / $15 output.
|
|
12
|
-
* - Opus 4.5+ / Mythos Preview: $15 input / $75 output.
|
|
13
|
-
* - Opus 4 / 4.1 (deprecated): same as 4.5+.
|
|
14
|
-
* - Haiku 4.5: $1 input / $5 output.
|
|
15
|
-
* - Haiku 3.5 (Vertex-only): $0.80 input / $4 output.
|
|
16
|
-
*
|
|
17
|
-
* Cache pricing multipliers (Anthropic):
|
|
18
|
-
* - cache write 5-min TTL: 1.25× base input.
|
|
19
|
-
* - cache write 1-hour TTL: 2× base input.
|
|
20
|
-
* - cache read: 0.10× base input (90% savings).
|
|
21
|
-
*
|
|
22
|
-
* Codex / OpenAI: GPT-5.4 input ~$1.25 / output $10 per 1M (approx; OpenAI
|
|
23
|
-
* does not publish a stable per-CLI table). Cached input ~50% of base.
|
|
24
|
-
*
|
|
25
|
-
* Gemini, Grok, Mistral: pricing varies by model and is not surfaced in
|
|
26
|
-
* gateway today. Returns 0 for unknown.
|
|
27
|
-
*/
|
|
28
1
|
export const PRICING_AS_OF = "2026-05-26";
|
|
29
2
|
const ANTHROPIC_SONNET = {
|
|
30
3
|
inputUsd: 3,
|
|
@@ -44,7 +17,6 @@ const ANTHROPIC_HAIKU = {
|
|
|
44
17
|
const OPENAI_GPT5 = {
|
|
45
18
|
inputUsd: 1.25,
|
|
46
19
|
outputUsd: 10,
|
|
47
|
-
// OpenAI prompt-caching: cached input tokens billed at 50% of base.
|
|
48
20
|
cacheReadMultiplier: 0.5,
|
|
49
21
|
};
|
|
50
22
|
const ZERO = {
|
|
@@ -52,20 +24,6 @@ const ZERO = {
|
|
|
52
24
|
outputUsd: 0,
|
|
53
25
|
cacheReadMultiplier: 0,
|
|
54
26
|
};
|
|
55
|
-
/**
|
|
56
|
-
* Look up pricing by (cli, model) name. Best-effort; unknown models return
|
|
57
|
-
* ZEROED pricing so estimated_savings_usd in aggregates falls back to 0
|
|
58
|
-
* rather than throwing OR over-reporting savings on an unpriced model.
|
|
59
|
-
*
|
|
60
|
-
* Recognised model families:
|
|
61
|
-
* - claude: model name contains "sonnet" | "opus" | "haiku".
|
|
62
|
-
* - codex: model name contains "gpt-5" or "o3" (current OpenAI families).
|
|
63
|
-
*
|
|
64
|
-
* Anything outside these explicit matches returns ZERO. This is a
|
|
65
|
-
* deliberate conservative choice — we'd rather under-report savings on
|
|
66
|
-
* an unrecognised model than over-report on one whose actual pricing we
|
|
67
|
-
* don't know. Update this table when a new model family ships.
|
|
68
|
-
*/
|
|
69
27
|
export function getPricing(cli, model) {
|
|
70
28
|
const lower = model.toLowerCase();
|
|
71
29
|
if (cli === "claude") {
|
|
@@ -84,17 +42,12 @@ export function getPricing(cli, model) {
|
|
|
84
42
|
}
|
|
85
43
|
return ZERO;
|
|
86
44
|
}
|
|
87
|
-
/**
|
|
88
|
-
* Estimate USD saved by `cacheReadTokens` being served from cache instead
|
|
89
|
-
* of fresh input. Returns 0 for zero cache reads or unknown pricing.
|
|
90
|
-
*/
|
|
91
45
|
export function estimateCacheSavingsUsd(cli, model, cacheReadTokens) {
|
|
92
46
|
if (cacheReadTokens <= 0)
|
|
93
47
|
return 0;
|
|
94
48
|
const p = getPricing(cli, model);
|
|
95
49
|
if (p.inputUsd === 0)
|
|
96
50
|
return 0;
|
|
97
|
-
// Savings = (fresh-input-cost) - (cache-read-cost) = inputUsd × (1 - mult)
|
|
98
51
|
const savedPerToken = (p.inputUsd * (1 - p.cacheReadMultiplier)) / 1_000_000;
|
|
99
52
|
return cacheReadTokens * savedPerToken;
|
|
100
53
|
}
|
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* On-demand process health monitoring via /proc (Linux).
|
|
3
|
-
* Gracefully degrades on non-Linux platforms.
|
|
4
|
-
*/
|
|
5
1
|
import type { Logger } from "./logger.js";
|
|
6
2
|
export interface ProcessHealth {
|
|
7
3
|
pid: number;
|
|
@@ -20,25 +16,15 @@ export interface JobHealth {
|
|
|
20
16
|
isZombie: boolean;
|
|
21
17
|
runningForMs: number;
|
|
22
18
|
}
|
|
23
|
-
/**
|
|
24
|
-
* Parse /proc/[pid]/stat safely.
|
|
25
|
-
* The `comm` field (field 2) is in parentheses and may contain spaces,
|
|
26
|
-
* so we find the LAST ')' and parse remaining fields from there.
|
|
27
|
-
*/
|
|
28
19
|
export declare function parseProcStat(content: string): {
|
|
29
20
|
state: string;
|
|
30
21
|
utime: number;
|
|
31
22
|
stime: number;
|
|
32
23
|
} | null;
|
|
33
|
-
/**
|
|
34
|
-
* Parse VmRSS from /proc/[pid]/status.
|
|
35
|
-
* Returns RSS in kilobytes (already in kB in /proc/[pid]/status).
|
|
36
|
-
*/
|
|
37
24
|
export declare function parseVmRss(content: string): number | null;
|
|
38
25
|
export declare class ProcessMonitor {
|
|
39
26
|
private prevSamples;
|
|
40
27
|
constructor(_logger?: Logger);
|
|
41
|
-
/** Clear all cached CPU samples */
|
|
42
28
|
reset(): void;
|
|
43
29
|
sampleProcess(pid: number): ProcessHealth;
|
|
44
30
|
checkJobHealth(jobs: {
|
|
@@ -48,6 +34,5 @@ export declare class ProcessMonitor {
|
|
|
48
34
|
pid: number | null;
|
|
49
35
|
startedAt: string;
|
|
50
36
|
}[]): JobHealth[];
|
|
51
|
-
/** Clean up stale samples for PIDs that no longer exist */
|
|
52
37
|
cleanupSamples(activePids: Set<number>): void;
|
|
53
38
|
}
|
package/dist/process-monitor.js
CHANGED
|
@@ -1,20 +1,10 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* On-demand process health monitoring via /proc (Linux).
|
|
3
|
-
* Gracefully degrades on non-Linux platforms.
|
|
4
|
-
*/
|
|
5
1
|
import { readFileSync } from "fs";
|
|
6
|
-
/**
|
|
7
|
-
* Parse /proc/[pid]/stat safely.
|
|
8
|
-
* The `comm` field (field 2) is in parentheses and may contain spaces,
|
|
9
|
-
* so we find the LAST ')' and parse remaining fields from there.
|
|
10
|
-
*/
|
|
11
2
|
export function parseProcStat(content) {
|
|
12
3
|
const lastParen = content.lastIndexOf(")");
|
|
13
4
|
if (lastParen === -1)
|
|
14
5
|
return null;
|
|
15
|
-
const afterComm = content.slice(lastParen + 2);
|
|
6
|
+
const afterComm = content.slice(lastParen + 2);
|
|
16
7
|
const fields = afterComm.split(" ");
|
|
17
|
-
// fields[0] = state, fields[11] = utime (14-3), fields[12] = stime (15-3)
|
|
18
8
|
if (fields.length < 13)
|
|
19
9
|
return null;
|
|
20
10
|
const utime = parseInt(fields[11], 10);
|
|
@@ -27,22 +17,14 @@ export function parseProcStat(content) {
|
|
|
27
17
|
stime,
|
|
28
18
|
};
|
|
29
19
|
}
|
|
30
|
-
/**
|
|
31
|
-
* Parse VmRSS from /proc/[pid]/status.
|
|
32
|
-
* Returns RSS in kilobytes (already in kB in /proc/[pid]/status).
|
|
33
|
-
*/
|
|
34
20
|
export function parseVmRss(content) {
|
|
35
21
|
const match = content.match(/^VmRSS:\s+(\d+)\s+kB$/m);
|
|
36
22
|
return match ? parseInt(match[1], 10) : null;
|
|
37
23
|
}
|
|
38
|
-
/**
|
|
39
|
-
* Read total system CPU jiffies from /proc/stat.
|
|
40
|
-
* Used to normalize per-process CPU into a percentage.
|
|
41
|
-
*/
|
|
42
24
|
function getTotalCpuJiffies() {
|
|
43
25
|
try {
|
|
44
26
|
const content = readFileSync("/proc/stat", "utf-8");
|
|
45
|
-
const cpuLine = content.split("\n")[0];
|
|
27
|
+
const cpuLine = content.split("\n")[0];
|
|
46
28
|
const fields = cpuLine.split(/\s+/).slice(1).map(Number);
|
|
47
29
|
return fields.reduce((a, b) => a + b, 0);
|
|
48
30
|
}
|
|
@@ -51,16 +33,13 @@ function getTotalCpuJiffies() {
|
|
|
51
33
|
}
|
|
52
34
|
}
|
|
53
35
|
export class ProcessMonitor {
|
|
54
|
-
// Previous samples for CPU delta calculation
|
|
55
36
|
prevSamples = new Map();
|
|
56
37
|
constructor(_logger) { }
|
|
57
|
-
/** Clear all cached CPU samples */
|
|
58
38
|
reset() {
|
|
59
39
|
this.prevSamples.clear();
|
|
60
40
|
}
|
|
61
41
|
sampleProcess(pid) {
|
|
62
42
|
const now = new Date().toISOString();
|
|
63
|
-
// 1. Existence check
|
|
64
43
|
let alive = false;
|
|
65
44
|
try {
|
|
66
45
|
process.kill(pid, 0);
|
|
@@ -77,12 +56,10 @@ export class ProcessMonitor {
|
|
|
77
56
|
sampledAt: now,
|
|
78
57
|
};
|
|
79
58
|
}
|
|
80
|
-
// EPERM = process exists but we can't signal it
|
|
81
59
|
if (err.code === "EPERM") {
|
|
82
60
|
alive = true;
|
|
83
61
|
}
|
|
84
62
|
}
|
|
85
|
-
// 2. Parse /proc/[pid]/stat for state + CPU ticks
|
|
86
63
|
let state = null;
|
|
87
64
|
let cpuPercent = null;
|
|
88
65
|
try {
|
|
@@ -90,7 +67,6 @@ export class ProcessMonitor {
|
|
|
90
67
|
const parsed = parseProcStat(statContent);
|
|
91
68
|
if (parsed) {
|
|
92
69
|
state = parsed.state;
|
|
93
|
-
// CPU delta calculation
|
|
94
70
|
const totalJiffies = getTotalCpuJiffies();
|
|
95
71
|
const prev = this.prevSamples.get(pid);
|
|
96
72
|
if (prev && totalJiffies !== null) {
|
|
@@ -100,7 +76,6 @@ export class ProcessMonitor {
|
|
|
100
76
|
cpuPercent = (processJiffiesDelta / totalJiffiesDelta) * 100;
|
|
101
77
|
}
|
|
102
78
|
}
|
|
103
|
-
// Store for next delta
|
|
104
79
|
if (totalJiffies !== null) {
|
|
105
80
|
this.prevSamples.set(pid, {
|
|
106
81
|
utime: parsed.utime,
|
|
@@ -112,16 +87,13 @@ export class ProcessMonitor {
|
|
|
112
87
|
}
|
|
113
88
|
}
|
|
114
89
|
catch {
|
|
115
|
-
// /proc not available (non-Linux) — degrade gracefully
|
|
116
90
|
}
|
|
117
|
-
// 3. Parse /proc/[pid]/status for VmRSS
|
|
118
91
|
let memoryRssKb = null;
|
|
119
92
|
try {
|
|
120
93
|
const statusContent = readFileSync(`/proc/${pid}/status`, "utf-8");
|
|
121
94
|
memoryRssKb = parseVmRss(statusContent);
|
|
122
95
|
}
|
|
123
96
|
catch {
|
|
124
|
-
// Non-Linux or process exited between checks
|
|
125
97
|
}
|
|
126
98
|
return { pid, alive, state, cpuPercent, memoryRssKb, sampledAt: now };
|
|
127
99
|
}
|
|
@@ -151,7 +123,6 @@ export class ProcessMonitor {
|
|
|
151
123
|
};
|
|
152
124
|
});
|
|
153
125
|
}
|
|
154
|
-
/** Clean up stale samples for PIDs that no longer exist */
|
|
155
126
|
cleanupSamples(activePids) {
|
|
156
127
|
for (const pid of this.prevSamples.keys()) {
|
|
157
128
|
if (!activePids.has(pid)) {
|
package/dist/prompt-parts.d.ts
CHANGED
|
@@ -9,20 +9,6 @@ export interface PromptParts {
|
|
|
9
9
|
tools?: string;
|
|
10
10
|
context?: string;
|
|
11
11
|
task: string;
|
|
12
|
-
/**
|
|
13
|
-
* Slice κ (Claude only): per-block opt-in to Anthropic `cache_control`
|
|
14
|
-
* breakpoints. Setting `system: true` (or tools/context) marks that
|
|
15
|
-
* block with `cache_control: {type:"ephemeral", ttl:"1h"}` in the
|
|
16
|
-
* stream-json payload the gateway pipes to `claude --input-format
|
|
17
|
-
* stream-json`. The `task` block is NEVER marked (it's the volatile
|
|
18
|
-
* tail). Empty parts are silently skipped even if their flag is true.
|
|
19
|
-
*
|
|
20
|
-
* Constraint: callers MUST also pass `outputFormat:"stream-json"` —
|
|
21
|
-
* mixing cacheControl with text/json output returns an error response.
|
|
22
|
-
* `ttl` is hard-coded to `"1h"` because Claude Code injects its own
|
|
23
|
-
* 1h-marked system blocks ahead of caller content and Anthropic
|
|
24
|
-
* rejects a 1h block after a 5m block.
|
|
25
|
-
*/
|
|
26
12
|
cacheControl?: PromptPartsCacheControl;
|
|
27
13
|
}
|
|
28
14
|
export declare const PromptPartsSchema: z.ZodObject<{
|
|
@@ -35,32 +21,32 @@ export declare const PromptPartsSchema: z.ZodObject<{
|
|
|
35
21
|
tools: z.ZodOptional<z.ZodBoolean>;
|
|
36
22
|
context: z.ZodOptional<z.ZodBoolean>;
|
|
37
23
|
}, "strict", z.ZodTypeAny, {
|
|
38
|
-
system?: boolean | undefined;
|
|
39
24
|
tools?: boolean | undefined;
|
|
25
|
+
system?: boolean | undefined;
|
|
40
26
|
context?: boolean | undefined;
|
|
41
27
|
}, {
|
|
42
|
-
system?: boolean | undefined;
|
|
43
28
|
tools?: boolean | undefined;
|
|
29
|
+
system?: boolean | undefined;
|
|
44
30
|
context?: boolean | undefined;
|
|
45
31
|
}>>;
|
|
46
32
|
}, "strip", z.ZodTypeAny, {
|
|
47
33
|
task: string;
|
|
48
|
-
system?: string | undefined;
|
|
49
34
|
tools?: string | undefined;
|
|
35
|
+
system?: string | undefined;
|
|
50
36
|
context?: string | undefined;
|
|
51
37
|
cacheControl?: {
|
|
52
|
-
system?: boolean | undefined;
|
|
53
38
|
tools?: boolean | undefined;
|
|
39
|
+
system?: boolean | undefined;
|
|
54
40
|
context?: boolean | undefined;
|
|
55
41
|
} | undefined;
|
|
56
42
|
}, {
|
|
57
43
|
task: string;
|
|
58
|
-
system?: string | undefined;
|
|
59
44
|
tools?: string | undefined;
|
|
45
|
+
system?: string | undefined;
|
|
60
46
|
context?: string | undefined;
|
|
61
47
|
cacheControl?: {
|
|
62
|
-
system?: boolean | undefined;
|
|
63
48
|
tools?: boolean | undefined;
|
|
49
|
+
system?: boolean | undefined;
|
|
64
50
|
context?: boolean | undefined;
|
|
65
51
|
} | undefined;
|
|
66
52
|
}>;
|
|
@@ -98,15 +84,4 @@ export interface AssembleClaudeCacheBlocksResult {
|
|
|
98
84
|
payload: ClaudeStreamJsonUserMessage;
|
|
99
85
|
markedBlockCount: number;
|
|
100
86
|
}
|
|
101
|
-
/**
|
|
102
|
-
* Slice κ: build the Claude `--input-format stream-json` payload from
|
|
103
|
-
* a `PromptParts`. Each non-empty part becomes one content block in
|
|
104
|
-
* `system → tools → context → task` order; parts whose name is `true`
|
|
105
|
-
* in `cacheControl` get `cache_control: {type:"ephemeral", ttl:"1h"}`.
|
|
106
|
-
*
|
|
107
|
-
* Empty parts are skipped (no zero-byte blocks) — a true flag on an
|
|
108
|
-
* empty part is silently a no-op and not counted in `markedBlockCount`.
|
|
109
|
-
* The `task` block is never marked, even if a caller accidentally
|
|
110
|
-
* tries (the schema doesn't expose `task` in `cacheControl`).
|
|
111
|
-
*/
|
|
112
87
|
export declare function assembleClaudeCacheBlocks(parts: PromptParts): AssembleClaudeCacheBlocksResult;
|
package/dist/prompt-parts.js
CHANGED
|
@@ -48,17 +48,6 @@ export function resolvePromptInput(input) {
|
|
|
48
48
|
stablePrefixTokens: null,
|
|
49
49
|
};
|
|
50
50
|
}
|
|
51
|
-
/**
|
|
52
|
-
* Slice κ: build the Claude `--input-format stream-json` payload from
|
|
53
|
-
* a `PromptParts`. Each non-empty part becomes one content block in
|
|
54
|
-
* `system → tools → context → task` order; parts whose name is `true`
|
|
55
|
-
* in `cacheControl` get `cache_control: {type:"ephemeral", ttl:"1h"}`.
|
|
56
|
-
*
|
|
57
|
-
* Empty parts are skipped (no zero-byte blocks) — a true flag on an
|
|
58
|
-
* empty part is silently a no-op and not counted in `markedBlockCount`.
|
|
59
|
-
* The `task` block is never marked, even if a caller accidentally
|
|
60
|
-
* tries (the schema doesn't expose `task` in `cacheControl`).
|
|
61
|
-
*/
|
|
62
51
|
export function assembleClaudeCacheBlocks(parts) {
|
|
63
52
|
const blocks = [];
|
|
64
53
|
let markedBlockCount = 0;
|
|
@@ -30,12 +30,4 @@ export interface GeminiAuthStatus {
|
|
|
30
30
|
status: "present" | "not_found";
|
|
31
31
|
methods: GeminiAuthMethods;
|
|
32
32
|
}
|
|
33
|
-
/**
|
|
34
|
-
* U27: Detect Gemini auth across all supported methods.
|
|
35
|
-
* Returns "present" if ANY of:
|
|
36
|
-
* - OAuth credential file present (~/.gemini/oauth_creds.json, etc.)
|
|
37
|
-
* - GEMINI_API_KEY env var set and non-empty
|
|
38
|
-
* - GOOGLE_API_KEY env var set and non-empty
|
|
39
|
-
* - GOOGLE_CLOUD_PROJECT set AND GOOGLE_GENAI_USE_VERTEXAI=true
|
|
40
|
-
*/
|
|
41
33
|
export declare function geminiAuthStatus(env?: NodeJS.ProcessEnv, home?: string): GeminiAuthStatus;
|
package/dist/provider-status.js
CHANGED
|
@@ -12,8 +12,6 @@ const VERSION_ARGS = {
|
|
|
12
12
|
grok: ["--version"],
|
|
13
13
|
mistral: ["--version"],
|
|
14
14
|
};
|
|
15
|
-
// Mistral Vibe ships as the `vibe` binary (PyPI package mistral-vibe); the gateway
|
|
16
|
-
// uses `mistral` as the provider key but invokes `vibe` on the shell.
|
|
17
15
|
export const PROVIDER_COMMANDS = {
|
|
18
16
|
claude: "claude",
|
|
19
17
|
codex: "codex",
|
|
@@ -130,7 +128,6 @@ function inferLoginStatus(provider, exitCode, output) {
|
|
|
130
128
|
return "not_authenticated";
|
|
131
129
|
}
|
|
132
130
|
catch {
|
|
133
|
-
// Fall through to text heuristics.
|
|
134
131
|
}
|
|
135
132
|
}
|
|
136
133
|
if (/not\s+(logged|signed|authenticated)\s*in|unauthenticated|login required|not authorized/i.test(output)) {
|
|
@@ -158,14 +155,6 @@ function loginCheckDetail(provider, status, exitCode) {
|
|
|
158
155
|
return `${provider} login check exited non-zero without exposing credential material.`;
|
|
159
156
|
return `${provider} login check completed, but the output did not clearly indicate login state.`;
|
|
160
157
|
}
|
|
161
|
-
/**
|
|
162
|
-
* U27: Detect Gemini auth across all supported methods.
|
|
163
|
-
* Returns "present" if ANY of:
|
|
164
|
-
* - OAuth credential file present (~/.gemini/oauth_creds.json, etc.)
|
|
165
|
-
* - GEMINI_API_KEY env var set and non-empty
|
|
166
|
-
* - GOOGLE_API_KEY env var set and non-empty
|
|
167
|
-
* - GOOGLE_CLOUD_PROJECT set AND GOOGLE_GENAI_USE_VERTEXAI=true
|
|
168
|
-
*/
|
|
169
158
|
export function geminiAuthStatus(env = process.env, home = homedir()) {
|
|
170
159
|
const candidates = [
|
|
171
160
|
join(home, ".gemini", "oauth_creds.json"),
|