llm-cli-gateway 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +31 -1
- package/dist/approval-manager.js +7 -4
- package/dist/async-job-manager.js +18 -11
- package/dist/claude-mcp-config.js +7 -4
- package/dist/config.js +15 -9
- package/dist/db.js +4 -4
- package/dist/executor.js +20 -13
- package/dist/flight-recorder.d.ts +48 -0
- package/dist/flight-recorder.js +220 -0
- package/dist/health.js +3 -3
- package/dist/index.d.ts +1 -0
- package/dist/index.js +812 -259
- package/dist/logger.js +1 -1
- package/dist/metrics.js +9 -12
- package/dist/migrate-sessions.js +2 -2
- package/dist/model-registry.js +12 -14
- package/dist/optimizer.js +9 -9
- package/dist/process-monitor.js +24 -8
- package/dist/request-helpers.d.ts +7 -0
- package/dist/request-helpers.js +24 -2
- package/dist/resources.js +32 -32
- package/dist/retry.js +6 -4
- package/dist/review-integrity.d.ts +6 -38
- package/dist/review-integrity.js +41 -275
- package/dist/session-manager-pg.js +6 -4
- package/dist/session-manager.js +7 -4
- package/dist/stream-json-parser.js +8 -6
- package/package.json +6 -3
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import { chmodSync, existsSync, mkdirSync } from "fs";
|
|
2
|
+
import os from "os";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import { createRequire } from "module";
|
|
5
|
+
const MAX_THINKING_BYTES = 1_000_000;
|
|
6
|
+
export function resolveFlightRecorderDbPath() {
|
|
7
|
+
const configured = process.env.LLM_GATEWAY_LOGS_DB;
|
|
8
|
+
if (configured !== undefined) {
|
|
9
|
+
const normalized = configured.trim().toLowerCase();
|
|
10
|
+
if (!normalized || normalized === "none") {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
return configured.trim();
|
|
14
|
+
}
|
|
15
|
+
return path.join(os.homedir(), ".llm-cli-gateway", "logs.db");
|
|
16
|
+
}
|
|
17
|
+
const TRUNCATION_SUFFIX = "[TRUNCATED]";
|
|
18
|
+
const TRUNCATION_SUFFIX_BYTES = Buffer.byteLength(TRUNCATION_SUFFIX, "utf8");
|
|
19
|
+
function truncateThinkingBlocks(blocks) {
|
|
20
|
+
const result = [];
|
|
21
|
+
let used = 0;
|
|
22
|
+
for (const block of blocks) {
|
|
23
|
+
const bytes = Buffer.byteLength(block, "utf8");
|
|
24
|
+
if (used + bytes <= MAX_THINKING_BYTES) {
|
|
25
|
+
result.push(block);
|
|
26
|
+
used += bytes;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
// Reserve space for the suffix so total stays within budget
|
|
30
|
+
const budget = Math.max(0, MAX_THINKING_BYTES - used - TRUNCATION_SUFFIX_BYTES);
|
|
31
|
+
if (budget > 0) {
|
|
32
|
+
// Truncate on code point boundaries by using string iteration
|
|
33
|
+
let charBytes = 0;
|
|
34
|
+
let safeEnd = 0;
|
|
35
|
+
for (const char of block) {
|
|
36
|
+
const charSize = Buffer.byteLength(char, "utf8");
|
|
37
|
+
if (charBytes + charSize > budget)
|
|
38
|
+
break;
|
|
39
|
+
charBytes += charSize;
|
|
40
|
+
safeEnd += char.length; // char.length handles surrogate pairs
|
|
41
|
+
}
|
|
42
|
+
const sliced = block.slice(0, safeEnd);
|
|
43
|
+
result.push(sliced ? `${sliced}${TRUNCATION_SUFFIX}` : TRUNCATION_SUFFIX);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
result.push(TRUNCATION_SUFFIX);
|
|
47
|
+
}
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
export class FlightRecorder {
|
|
53
|
+
db;
|
|
54
|
+
insertStartTxn;
|
|
55
|
+
updateCompleteTxn;
|
|
56
|
+
constructor(dbPath) {
|
|
57
|
+
const require = createRequire(import.meta.url);
|
|
58
|
+
const BetterSqlite3 = require("better-sqlite3");
|
|
59
|
+
const directory = path.dirname(dbPath);
|
|
60
|
+
if (!existsSync(directory)) {
|
|
61
|
+
mkdirSync(directory, { recursive: true });
|
|
62
|
+
}
|
|
63
|
+
this.db = new BetterSqlite3(dbPath);
|
|
64
|
+
this.db.pragma("journal_mode = WAL");
|
|
65
|
+
this.db.pragma("foreign_keys = ON");
|
|
66
|
+
this.db.exec(`
|
|
67
|
+
CREATE TABLE IF NOT EXISTS _migrations (
|
|
68
|
+
version INTEGER PRIMARY KEY,
|
|
69
|
+
applied_at TEXT NOT NULL
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
CREATE TABLE IF NOT EXISTS requests (
|
|
73
|
+
id TEXT PRIMARY KEY,
|
|
74
|
+
cli TEXT NOT NULL,
|
|
75
|
+
model TEXT NOT NULL,
|
|
76
|
+
prompt TEXT NOT NULL,
|
|
77
|
+
system TEXT,
|
|
78
|
+
response TEXT,
|
|
79
|
+
session_id TEXT,
|
|
80
|
+
duration_ms INTEGER,
|
|
81
|
+
datetime_utc TEXT NOT NULL,
|
|
82
|
+
input_tokens INTEGER,
|
|
83
|
+
output_tokens INTEGER
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
CREATE TABLE IF NOT EXISTS gateway_metadata (
|
|
87
|
+
request_id TEXT PRIMARY KEY REFERENCES requests(id),
|
|
88
|
+
retry_count INTEGER DEFAULT 0,
|
|
89
|
+
circuit_breaker_state TEXT,
|
|
90
|
+
cost_usd REAL,
|
|
91
|
+
approval_decision TEXT,
|
|
92
|
+
optimization_applied INTEGER DEFAULT 0,
|
|
93
|
+
thinking_blocks TEXT,
|
|
94
|
+
exit_code INTEGER,
|
|
95
|
+
error_message TEXT,
|
|
96
|
+
async_job_id TEXT,
|
|
97
|
+
status TEXT NOT NULL DEFAULT 'started'
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
CREATE INDEX IF NOT EXISTS idx_requests_datetime ON requests(datetime_utc);
|
|
101
|
+
CREATE INDEX IF NOT EXISTS idx_requests_model ON requests(model);
|
|
102
|
+
CREATE INDEX IF NOT EXISTS idx_requests_cli ON requests(cli);
|
|
103
|
+
CREATE INDEX IF NOT EXISTS idx_requests_session ON requests(session_id);
|
|
104
|
+
CREATE INDEX IF NOT EXISTS idx_metadata_status ON gateway_metadata(status);
|
|
105
|
+
`);
|
|
106
|
+
this.db
|
|
107
|
+
.prepare("INSERT OR IGNORE INTO _migrations(version, applied_at) VALUES(1, ?)")
|
|
108
|
+
.run(new Date().toISOString());
|
|
109
|
+
if (process.platform !== "win32") {
|
|
110
|
+
try {
|
|
111
|
+
chmodSync(dbPath, 0o600);
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
// Best effort permissions hardening.
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
const insertRequest = this.db.prepare(`
|
|
118
|
+
INSERT INTO requests (id, cli, model, prompt, system, session_id, datetime_utc)
|
|
119
|
+
VALUES (@id, @cli, @model, @prompt, @system, @session_id, @datetime_utc)
|
|
120
|
+
`);
|
|
121
|
+
const insertMetadata = this.db.prepare(`
|
|
122
|
+
INSERT INTO gateway_metadata (request_id, async_job_id, status)
|
|
123
|
+
VALUES (@request_id, @async_job_id, 'started')
|
|
124
|
+
`);
|
|
125
|
+
this.insertStartTxn = this.db.transaction((entry) => {
|
|
126
|
+
insertRequest.run({
|
|
127
|
+
id: entry.correlationId,
|
|
128
|
+
cli: entry.cli,
|
|
129
|
+
model: entry.model,
|
|
130
|
+
prompt: entry.prompt,
|
|
131
|
+
system: entry.system || null,
|
|
132
|
+
session_id: entry.sessionId || null,
|
|
133
|
+
datetime_utc: new Date().toISOString(),
|
|
134
|
+
});
|
|
135
|
+
insertMetadata.run({
|
|
136
|
+
request_id: entry.correlationId,
|
|
137
|
+
async_job_id: entry.asyncJobId || null,
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
const updateRequests = this.db.prepare(`
|
|
141
|
+
UPDATE requests
|
|
142
|
+
SET response = @response,
|
|
143
|
+
duration_ms = @duration_ms,
|
|
144
|
+
input_tokens = @input_tokens,
|
|
145
|
+
output_tokens = @output_tokens
|
|
146
|
+
WHERE id = @id
|
|
147
|
+
`);
|
|
148
|
+
const updateMetadata = this.db.prepare(`
|
|
149
|
+
UPDATE gateway_metadata
|
|
150
|
+
SET retry_count = @retry_count,
|
|
151
|
+
circuit_breaker_state = @circuit_breaker_state,
|
|
152
|
+
cost_usd = @cost_usd,
|
|
153
|
+
approval_decision = @approval_decision,
|
|
154
|
+
optimization_applied = @optimization_applied,
|
|
155
|
+
thinking_blocks = @thinking_blocks,
|
|
156
|
+
exit_code = @exit_code,
|
|
157
|
+
error_message = @error_message,
|
|
158
|
+
status = @status
|
|
159
|
+
WHERE request_id = @id AND status = 'started'
|
|
160
|
+
`);
|
|
161
|
+
this.updateCompleteTxn = this.db.transaction((correlationId, result) => {
|
|
162
|
+
const thinkingBlocks = result.thinkingBlocks && result.thinkingBlocks.length > 0
|
|
163
|
+
? JSON.stringify(truncateThinkingBlocks(result.thinkingBlocks))
|
|
164
|
+
: null;
|
|
165
|
+
updateRequests.run({
|
|
166
|
+
id: correlationId,
|
|
167
|
+
response: result.response,
|
|
168
|
+
duration_ms: result.durationMs,
|
|
169
|
+
input_tokens: result.inputTokens ?? null,
|
|
170
|
+
output_tokens: result.outputTokens ?? null,
|
|
171
|
+
});
|
|
172
|
+
updateMetadata.run({
|
|
173
|
+
id: correlationId,
|
|
174
|
+
retry_count: result.retryCount,
|
|
175
|
+
circuit_breaker_state: result.circuitBreakerState,
|
|
176
|
+
cost_usd: result.costUsd ?? null,
|
|
177
|
+
approval_decision: result.approvalDecision ?? null,
|
|
178
|
+
optimization_applied: result.optimizationApplied ? 1 : 0,
|
|
179
|
+
thinking_blocks: thinkingBlocks,
|
|
180
|
+
exit_code: result.exitCode,
|
|
181
|
+
error_message: result.errorMessage ?? null,
|
|
182
|
+
status: result.status,
|
|
183
|
+
});
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
logStart(entry) {
|
|
187
|
+
this.insertStartTxn(entry);
|
|
188
|
+
}
|
|
189
|
+
logComplete(correlationId, result) {
|
|
190
|
+
this.updateCompleteTxn(correlationId, result);
|
|
191
|
+
}
|
|
192
|
+
flush() {
|
|
193
|
+
// No-op: better-sqlite3 writes synchronously.
|
|
194
|
+
}
|
|
195
|
+
close() {
|
|
196
|
+
this.db.close();
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
export class NoopFlightRecorder {
|
|
200
|
+
logStart(_entry) { }
|
|
201
|
+
logComplete(_correlationId, _result) { }
|
|
202
|
+
flush() { }
|
|
203
|
+
close() { }
|
|
204
|
+
}
|
|
205
|
+
export function createFlightRecorder(logger) {
|
|
206
|
+
const dbPath = resolveFlightRecorderDbPath();
|
|
207
|
+
if (!dbPath) {
|
|
208
|
+
logger.info("Flight recorder disabled (LLM_GATEWAY_LOGS_DB=none)");
|
|
209
|
+
return new NoopFlightRecorder();
|
|
210
|
+
}
|
|
211
|
+
try {
|
|
212
|
+
const recorder = new FlightRecorder(dbPath);
|
|
213
|
+
logger.info(`Flight recorder enabled at ${dbPath}`);
|
|
214
|
+
return recorder;
|
|
215
|
+
}
|
|
216
|
+
catch (error) {
|
|
217
|
+
logger.error("Flight recorder unavailable; continuing without SQLite logging", error);
|
|
218
|
+
return new NoopFlightRecorder();
|
|
219
|
+
}
|
|
220
|
+
}
|
package/dist/health.js
CHANGED
|
@@ -10,13 +10,13 @@ export async function checkHealth(db) {
|
|
|
10
10
|
status: "unhealthy",
|
|
11
11
|
postgres: {
|
|
12
12
|
status: result.postgres.connected ? "up" : "down",
|
|
13
|
-
latency: result.postgres.latency
|
|
13
|
+
latency: result.postgres.latency,
|
|
14
14
|
},
|
|
15
15
|
redis: {
|
|
16
16
|
status: result.redis.connected ? "up" : "down",
|
|
17
|
-
latency: result.redis.latency
|
|
17
|
+
latency: result.redis.latency,
|
|
18
18
|
},
|
|
19
|
-
timestamp: new Date().toISOString()
|
|
19
|
+
timestamp: new Date().toISOString(),
|
|
20
20
|
};
|
|
21
21
|
// Determine overall health status
|
|
22
22
|
if (result.postgres.connected && result.redis.connected) {
|
package/dist/index.d.ts
CHANGED