llm-cli-gateway 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,283 +1,49 @@
1
- /**
2
- * Review Integrity Bypass Detection
3
- *
4
- * Detects when orchestrating agents neuter the multi-LLM review process by:
5
- * - Embedding tool-suppression language in review prompts
6
- * - Inlining full code instead of letting reviewers read files directly
7
- * - Setting allowedTools:[] to strip tool access from reviewers
8
- *
9
- * Two-gate design: violations only emitted when BOTH review context AND
10
- * a restriction are detected. This avoids false positives on non-review
11
- * prompts that happen to contain similar language.
12
- */
13
- // Two-part review context detection: a REVIEW_ACTION verb/phrase + a CODE_ANCHOR
14
- // in the same prompt. This avoids false positives like "Analyze customer feedback"
15
- // (has action but no code anchor) while catching "Analyze the implementation" (has both).
16
- //
17
- // Unambiguous multi-word phrases (code review, security audit, etc.) match on
18
- // their own without needing a separate code anchor.
19
- // Phrases that are unambiguously code-review context on their own:
20
- const UNAMBIGUOUS_REVIEW = /\b(code\s*review|security\s*audit|security\s*review|security\s*(?:vulnerabilit(?:y|ies)|scan|assessment)|bug\s*finding|quality\s*analysis|code\s*quality|code\s*audit|code\s*inspection|static\s*analysis|penetration\s*test(?:ing)?|threat\s*model|owasp|pentest|red[- ]?team|backdoor|exploitab(?:le|ility)|vulnerabilit(?:y|ies)|defects?|flaws?|weakness(?:es)?)\b/i;
21
- // Broad review-action verbs that need a code anchor to confirm context:
22
- const REVIEW_ACTIONS = /\b(review|audit|analyze|inspect|examine|assess|evaluate|verify|validate|triage|hunt|vet(?:ting)?|probe|diagnos(?:e|tics?)|find\s*(?:bugs?|issues?|defects?|flaws?|attack\s*(?:surface|path|vector)s?)|check\s*(?:for\s+)?(?:bugs?|issues?|errors?|problems?|defects?)|look\s*over|scan\s*(?:for|the))\b/i;
23
- // Code-related anchor words that confirm the prompt is about software.
24
- // Excludes ambiguous words (service, session, controller, route) that appear in non-code contexts.
25
- const CODE_ANCHORS = /\b(code|source|implementation|function|method|class|module|component|files?|patch|diff|commit|PR|pull\s*request|API|endpoint|auth|parser|codebase|repositor(?:y|ies)|repo|src|\.ts|\.js|\.py|\.go|\.rs|\.java|error\s*handling|middleware|handler|test\s*suite|retry|database|query|schema|config)\b/i;
26
- /**
27
- * Detect whether the prompt is a review/audit context.
28
- * Uses two-part detection: unambiguous phrases match alone,
29
- * ambiguous verbs (review, analyze, etc.) require a code anchor.
30
- * Normalizes Unicode before matching to prevent confusable bypasses.
31
- */
32
- export function isReviewContext(prompt) {
33
- const normalized = normalizeForMatching(prompt);
34
- if (UNAMBIGUOUS_REVIEW.test(normalized))
35
- return true;
36
- return REVIEW_ACTIONS.test(normalized) && CODE_ANCHORS.test(normalized);
37
- }
38
- // Normalize text for matching: NFKD decomposition to fold compatibility characters AND
39
- // decompose precomposed diacritics, then strip combining marks and confusables.
40
- function normalizeForMatching(text) {
41
- return text
42
- // NFKD: decomposes compatibility chars AND precomposed diacritics (é → e + U+0301)
43
- .normalize("NFKD")
44
- // Strip combining marks (diacritics): é (e + U+0301), n̸ (n + U+0338), etc.
45
- // Must happen AFTER NFKD decomposition so precomposed characters are split first.
46
- .replace(/[\u0300-\u036F]/g, "")
47
- // Strip invisible Unicode format characters (zero-width joiners, soft hyphens, etc.)
48
- .replace(/[\u200B-\u200F\u2028-\u202F\u2060-\u206F\uFEFF\u00AD]/g, "")
49
- .replace(/[\u2018\u2019\u0060\u00B4]/g, "'")
50
- .replace(/[\u201C\u201D]/g, '"')
51
- // Fold common Cyrillic confusables that survive NFKC (visually identical to Latin)
52
- .replace(/\u0430/g, "a") // а → a
53
- .replace(/\u0435/g, "e") // е → e
54
- .replace(/\u043E/g, "o") // о → o
55
- .replace(/\u0440/g, "p") // р → p
56
- .replace(/\u0441/g, "c") // с → c
57
- .replace(/\u0445/g, "x") // х → x
58
- .replace(/\u0456/g, "i") // і → i (Cyrillic i)
59
- .replace(/\u0410/g, "A") // А → A
60
- .replace(/\u0415/g, "E") // Е → E
61
- .replace(/\u041E/g, "O") // О → O
62
- .replace(/\u0420/g, "P") // Р → P
63
- .replace(/\u0421/g, "C") // С → C
64
- .replace(/\u0425/g, "X") // Х → X
65
- // Fold common Greek confusables (visually identical to Latin)
66
- .replace(/\u03BF/g, "o") // ο → o (Greek omicron)
67
- .replace(/\u03C5/g, "u") // υ → u (Greek upsilon)
68
- .replace(/\u03BD/g, "v") // ν → v (Greek nu)
69
- .replace(/\u03B1/g, "a") // α → a (Greek alpha)
70
- .replace(/\u03B5/g, "e") // ε → e (Greek epsilon)
71
- .replace(/\u03B9/g, "i") // ι → i (Greek iota)
72
- .replace(/\u03BA/g, "k") // κ → k (Greek kappa)
73
- .replace(/\u03C1/g, "p") // ρ → p (Greek rho)
74
- .replace(/\u039F/g, "O") // Ο → O (Greek capital omicron)
75
- .replace(/\u0391/g, "A") // Α → A (Greek capital alpha)
76
- .replace(/\u0395/g, "E") // Ε → E (Greek capital epsilon)
77
- .replace(/\u0399/g, "I") // Ι → I (Greek capital iota)
78
- .replace(/\u039A/g, "K") // Κ → K (Greek capital kappa)
79
- // Fold Latin small capitals and modifier letters (used in visual spoofing)
80
- .replace(/\u1D0F/g, "o") // ᴏ → o (Latin small capital O)
81
- .replace(/\u1D20/g, "v") // ᴠ → v (Latin small capital V)
82
- .replace(/\u1D00/g, "a") // ᴀ → a (Latin small capital A)
83
- .replace(/\u1D04/g, "c") // ᴄ → c (Latin small capital C)
84
- .replace(/\u1D07/g, "e") // ᴇ → e (Latin small capital E)
85
- .replace(/\u026A/g, "i") // ɪ → i (Latin small capital I)
86
- .replace(/\u0280/g, "r"); // ʀ → r (Latin small capital R)
1
+ const REVIEW_CONTEXT_PATTERN = /\b(review|audit|analy[sz]e|analysis|inspect|assess|pentest|security|vulnerabilit(?:y|ies)|bug(?:s)?|defect(?:s)?|quality|code\s+review)\b/i;
2
+ const TOOL_SUPPRESSION_PATTERN = /\b(do\s*not|don't|never|without)\b[\s\S]{0,80}\b(tool(?:s)?|shell|bash|command(?:s)?)\b/i;
3
+ const CRITICAL_TOOLS = ["Read", "Grep", "Glob", "Bash"];
4
+ function canonicalizeTools(tools) {
5
+ return tools
6
+ .map(raw => raw.trim())
7
+ .filter(Boolean)
8
+ .map(trimmed => {
9
+ const cut = Math.min(...[trimmed.indexOf("("), trimmed.indexOf(":"), trimmed.length].filter(i => i >= 0));
10
+ return trimmed.slice(0, cut).trim();
11
+ });
87
12
  }
88
- // Patterns that combine negation with tool/command references.
89
- // Each pattern requires a negation word near a tool-related action.
90
- // Tolerates punctuation and intervening clauses between negation and tool noun.
91
- // (?:[\w,]+\s+){0,6} allows up to 6 intervening words/commas for punctuation-separated negations.
92
- const TOOL_SUPPRESSION_PATTERNS = [
93
- /\b(?:do\s+not|don't|never|must\s+not|should\s+not|shouldn't|cannot|can't)\s*,?\s*(?:[\w,]+\s+){0,6}(?:run|use|execute|invoke|call|access)\s+(?:(?:\w+\s+){0,4})(?:tools?|shell\s*commands?|bash|terminal|cli|commands?)\b/i,
94
- /\b(?:do\s+not|don't|never|must\s+not|should\s+not|shouldn't)\s*,?\s*(?:[\w,]+\s+){0,6}(?:read|open|access|consult)\s+(?:(?:\w+\s+){0,4})(?:files?|the\s+file\s*system|disk|repositor(?:y|ies)\s*files?)\b/i,
95
- /\bwithout\s+(?:using|running|executing|accessing)\s+(?:(?:\w+\s+){0,4})(?:tools?|shell\s*commands?|external)\b/i,
96
- /\b(?:respond|answer|analyze|reply)\s+(?:only|solely|exclusively)\s+(?:based\s+on|from|using)\s+(?:the\s+)?(?:code|context|information|text)\s+(?:provided|given|above|below)\b/i,
97
- /\bno\s+(?:tool|shell|file|command|filesystem)\s+(?:access|usage|calls?|execution)\b/i,
98
- // Specific tool-name suppression: "Do not use Read or Grep", "never call Bash"
99
- // Case-sensitive for tool identifiers to avoid false positives like "read replicas"
100
- /\b(?:[Dd]o\s+not|[Dd]on't|[Nn]ever|[Mm]ust\s+not|[Ss]hould\s+not|[Ss]houldn't|[Cc]annot|[Cc]an't)\s+(?:run|use|execute|invoke|call|access)\s+(?:Read|Grep|Glob|Bash|Write|Edit)\b/,
101
- // "avoid/refrain from using tools" or "avoid opening files"
102
- /\b(?:avoid|refrain\s+from)\s+(?:using|running|executing|accessing|calling|opening)\s+(?:(?:\w+\s+){0,4})(?:tools?|shell\s*commands?|bash|terminal|cli|commands?|external|files?|additional\s+files?)\b/i,
103
- // Standalone "no tools" — bare denial of tool access
104
- /\bno\s+tools\b/i,
105
- // "base your answer on this diff/snippet/code only"
106
- /\b(?:base|ground)\s+(?:your\s+)?(?:answer|response|analysis|review|conclusions?)\s+(?:on|upon)\s+(?:this|the)\s+(?:diff|snippet|code|patch|context|excerpt)\s+(?:only|alone|exclusively)\b/i,
107
- // "use reasoning/analysis only" (from context, not tools)
108
- /\buse\s+(?:only\s+)?(?:reasoning|analysis|your\s+judgment)\s+(?:only\s+)?(?:from|based\s+on)\s+(?:the\s+)?(?:snippet|diff|code|context|patch)\b/i,
109
- // "work offline" / "do not call external resources"
110
- /\bwork\s+offline\b/i,
111
- // "self-contained" / "snippet only" / "sole source of truth"
112
- /\b(?:self[- ]contained|snippet[- ]only|sole\s+source\s+of\s+truth)\b/i,
113
- // "keep analysis to/within this snippet/excerpt/diff"
114
- /\b(?:keep|restrict|limit|confine)\s+(?:the\s+)?(?:analysis|review|response|yourself)\s+(?:to|within)\s+(?:this|the)\s+(?:snippet|excerpt|diff|patch|code|context|text)\b/i,
115
- // "tool access is unavailable/disabled/restricted"
116
- /\btool\s+access\s+(?:is\s+)?(?:unavailable|disabled|restricted|not\s+available)\b/i,
117
- // "use only what is shown/provided/pasted"
118
- /\buse\s+only\s+(?:what\s+is\s+)?(?:shown|provided|pasted|given|included)\b/i,
119
- // "no need to execute/run/access"
120
- /\bno\s+need\s+to\s+(?:execute|run|access|open|read)\b/i,
121
- ];
122
- /**
123
- * Detect tool-suppression language in a prompt.
124
- * Returns the matched patterns for diagnostics.
125
- */
126
- export function detectToolSuppression(prompt) {
127
- const normalized = normalizeForMatching(prompt);
128
- const matches = [];
129
- for (const pattern of TOOL_SUPPRESSION_PATTERNS) {
130
- const match = normalized.match(pattern);
131
- if (match) {
132
- matches.push(match[0]);
133
- }
134
- }
135
- return matches;
13
+ export function isReviewContext(prompt) {
14
+ return REVIEW_CONTEXT_PATTERN.test(prompt);
136
15
  }
137
- // Note: <code[^>]*> already matches code inside <pre><code> blocks,
138
- // so a separate <pre><code> pattern is not needed (would double-count).
139
- // Case-insensitive for <CODE>/<PRE> tags. Fence regex uses backreference for matched opener/closer.
140
- const INLINED_CODE_PATTERNS = [
141
- /<code[^>]*>([\s\S]*?)<\/code>/gi,
142
- // Standalone <pre> blocks that don't contain <code> (avoids double-counting <pre><code>)
143
- /<pre[^>]*>(?!\s*<code)([\s\S]*?)<\/pre>/gi,
144
- // Multi-line backtick fences: opener and closer must use same number of backticks
145
- /(`{3,})[^\n]*\r?\n([\s\S]*?)\1/g,
146
- // Multi-line tilde fences
147
- /(~{3,})[^\n]*\r?\n([\s\S]*?)\1/g,
148
- // Single-line backtick fences: ```<content>``` on one line
149
- /`{3,}[^\n`]*`{3,}/g,
150
- ];
151
- // Group index for captured content differs per pattern:
152
- // HTML code: group 1; pre: group 1; backtick/tilde multi-line: group 2; single-line: group 0 (full match)
153
- const INLINED_CODE_CONTENT_GROUPS = [1, 1, 2, 2, 0];
154
- const INLINED_CODE_MIN_LENGTH = 200;
155
- const INLINED_CODE_TOTAL_THRESHOLD = 1000;
156
- // Heuristic for detecting raw code pasted without fences or tags.
157
- // Multi-language token pattern: JS/TS + Rust + Python + Go + Java + C/C++.
158
- // Word-boundary tokens use \b; symbol tokens match without \b.
159
- const RAW_CODE_TOKEN_PATTERN = /(?:\b(?:import|export|from|require|function|const|let|var|class|interface|type|return|if|else|for|while|switch|case|try|catch|throw|async|await|new|this|fn|impl|pub|struct|match|mod|use|crate|mut|enum|trait|unsafe|def|elif|lambda|yield|pass|with|raise|except|func|package|defer|goroutine|chan|select|void|static|final|abstract|extends|implements|override|sizeof|template|namespace|include|typedef|printf|println)\b|=>|===|!==|[{};])/g;
160
- const RAW_CODE_MIN_TOKENS = 15;
161
- const RAW_CODE_DENSITY_THRESHOLD = 1.5; // tokens per 100 chars
162
- /**
163
- * Detect inlined code blocks that look like full file dumps.
164
- * Two detection strategies:
165
- * 1. Any single code block with 200+ chars is flagged.
166
- * 2. Fallback: if total chars across ALL code blocks (even small ones)
167
- * exceeds 1000, flag to catch split-block bypass attempts.
168
- */
169
- export function detectInlinedCode(prompt) {
170
- let count = 0;
171
- let totalChars = 0;
172
- let allBlocksTotal = 0;
173
- let allBlocksCount = 0;
174
- for (let i = 0; i < INLINED_CODE_PATTERNS.length; i++) {
175
- const pattern = INLINED_CODE_PATTERNS[i];
176
- const contentGroup = INLINED_CODE_CONTENT_GROUPS[i];
177
- pattern.lastIndex = 0;
178
- let match;
179
- while ((match = pattern.exec(prompt)) !== null) {
180
- const rawContent = contentGroup === 0 ? match[0] : match[contentGroup];
181
- const content = (rawContent || "").trim();
182
- allBlocksCount++;
183
- allBlocksTotal += content.length;
184
- if (content.length >= INLINED_CODE_MIN_LENGTH) {
185
- count++;
186
- totalChars += content.length;
187
- }
188
- }
189
- }
190
- // Fallback: catch split-block bypass (many small blocks totaling large payload)
191
- if (count === 0 && allBlocksTotal >= INLINED_CODE_TOTAL_THRESHOLD) {
192
- count = allBlocksCount;
193
- totalChars = allBlocksTotal;
194
- }
195
- // Fallback: detect plain-text code dumps (no fences or tags) via code-token density.
196
- // Only triggers when no fenced/tagged blocks were found and the prompt is large enough.
197
- if (count === 0 && prompt.length >= INLINED_CODE_TOTAL_THRESHOLD) {
198
- const codeTokens = prompt.match(RAW_CODE_TOKEN_PATTERN);
199
- const tokenCount = codeTokens ? codeTokens.length : 0;
200
- // Require minimum absolute token count AND density ratio (tokens per 100 chars)
201
- const density = (tokenCount / prompt.length) * 100;
202
- if (tokenCount >= RAW_CODE_MIN_TOKENS && density >= RAW_CODE_DENSITY_THRESHOLD) {
203
- count = 1;
204
- totalChars = prompt.length;
16
+ export function checkReviewIntegrity(input) {
17
+ const violations = [];
18
+ const reviewContext = isReviewContext(input.prompt);
19
+ if (reviewContext && input.allowedTools && input.allowedTools.length === 0) {
20
+ violations.push({
21
+ type: "empty_allowed_tools",
22
+ score: 6,
23
+ detail: "Review request with empty allowedTools limits reviewer capability",
24
+ });
25
+ }
26
+ if (reviewContext && input.disallowedTools && input.disallowedTools.length > 0) {
27
+ const canonical = canonicalizeTools(input.disallowedTools);
28
+ const blockedCritical = CRITICAL_TOOLS.filter(tool => canonical.includes(tool));
29
+ if (blockedCritical.length > 0) {
30
+ violations.push({
31
+ type: "critical_tools_disallowed",
32
+ score: 6,
33
+ detail: `Critical review tools disallowed: ${blockedCritical.join(", ")}`,
34
+ });
205
35
  }
206
36
  }
207
- return { count, totalChars };
208
- }
209
- /**
210
- * Combined review integrity check. Only emits violations when BOTH
211
- * review context is detected AND a restriction is present.
212
- */
213
- // Tools that reviewers need to independently verify code claims.
214
- const CRITICAL_REVIEW_TOOLS = ["Read", "Grep", "Glob", "Bash"];
215
- // Extract base tool name from scoped/pattern forms like "Read(*)", "Bash(git:*)", "Grep"
216
- function canonicalizeToolName(spec) {
217
- const trimmed = spec.trim();
218
- const parenIdx = trimmed.indexOf("(");
219
- const colonIdx = trimmed.indexOf(":");
220
- const cutIdx = parenIdx >= 0 && colonIdx >= 0
221
- ? Math.min(parenIdx, colonIdx)
222
- : parenIdx >= 0 ? parenIdx : colonIdx >= 0 ? colonIdx : -1;
223
- return cutIdx >= 0 ? trimmed.slice(0, cutIdx).trim() : trimmed;
224
- }
225
- export function checkReviewIntegrity(params) {
226
- const reviewContext = isReviewContext(params.prompt);
227
- const result = {
228
- isReviewContext: reviewContext,
229
- violations: [],
230
- totalScore: 0,
231
- };
232
- // Gate: no violations emitted for non-review prompts
233
- if (!reviewContext) {
234
- return result;
235
- }
236
- // Check tool suppression language
237
- const suppressionMatches = detectToolSuppression(params.prompt);
238
- if (suppressionMatches.length > 0) {
239
- const violation = {
37
+ if (reviewContext && TOOL_SUPPRESSION_PATTERN.test(input.prompt)) {
38
+ violations.push({
240
39
  type: "tool_suppression",
241
40
  score: 4,
242
- detail: `Prompt contains tool-suppression language in review context: ${suppressionMatches.join("; ")}`,
243
- };
244
- result.violations.push(violation);
245
- result.totalScore += violation.score;
246
- }
247
- // Check inlined code
248
- const inlined = detectInlinedCode(params.prompt);
249
- if (inlined.count > 0) {
250
- const violation = {
251
- type: "inlined_code",
252
- score: 2,
253
- detail: `Prompt inlines ${inlined.count} code block(s) (${inlined.totalChars} chars) instead of file paths — reviewers should read files directly`,
254
- };
255
- result.violations.push(violation);
256
- result.totalScore += violation.score;
257
- }
258
- // Check empty allowedTools
259
- if (params.allowedTools && params.allowedTools.length === 0) {
260
- const violation = {
261
- type: "empty_allowed_tools",
262
- score: 4,
263
- detail: "allowedTools is empty in review context — reviewers need tool access to read files and verify claims",
264
- };
265
- result.violations.push(violation);
266
- result.totalScore += violation.score;
267
- }
268
- // Check disallowedTools blocking critical review tools (canonicalize to handle scoped forms like "Read(*)")
269
- if (params.disallowedTools && params.disallowedTools.length > 0) {
270
- const canonicalized = params.disallowedTools.map(canonicalizeToolName);
271
- const blocked = CRITICAL_REVIEW_TOOLS.filter(t => canonicalized.includes(t));
272
- if (blocked.length > 0) {
273
- const violation = {
274
- type: "critical_tools_disallowed",
275
- score: 4,
276
- detail: `Critical review tools disallowed: ${blocked.join(", ")} — reviewers need these to verify claims`,
277
- };
278
- result.violations.push(violation);
279
- result.totalScore += violation.score;
280
- }
41
+ detail: "Prompt contains tool-suppression language in review context",
42
+ });
281
43
  }
282
- return result;
44
+ return {
45
+ isReviewContext: reviewContext,
46
+ violations,
47
+ totalScore: violations.reduce((sum, violation) => sum + violation.score, 0),
48
+ };
283
49
  }
@@ -2,7 +2,7 @@ import { randomUUID } from "crypto";
2
2
  const DEFAULT_SESSION_DESCRIPTIONS = {
3
3
  claude: "Claude Session",
4
4
  codex: "Codex Session",
5
- gemini: "Gemini Session"
5
+ gemini: "Gemini Session",
6
6
  };
7
7
  /**
8
8
  * PostgreSQL-backed session manager with Redis caching
@@ -127,7 +127,7 @@ export class PostgreSQLSessionManager {
127
127
  cli,
128
128
  createdAt: now,
129
129
  lastUsedAt: now,
130
- description: sessionDescription
130
+ description: sessionDescription,
131
131
  };
132
132
  // Write-through to cache
133
133
  try {
@@ -207,7 +207,9 @@ export class PostgreSQLSessionManager {
207
207
  : `SELECT id, cli, description, metadata, created_at AS "createdAt", last_used_at AS "lastUsedAt"
208
208
  FROM sessions
209
209
  ORDER BY last_used_at DESC`;
210
- const result = cli ? await this.pool.query(query, [cli]) : await this.pool.query(query);
210
+ const result = cli
211
+ ? await this.pool.query(query, [cli])
212
+ : await this.pool.query(query);
211
213
  const sessions = result.rows;
212
214
  // Cache CLI-specific lists
213
215
  if (cacheKey) {
@@ -369,7 +371,7 @@ export class PostgreSQLSessionManager {
369
371
  await Promise.all([
370
372
  this.redis.del("active_session:claude"),
371
373
  this.redis.del("active_session:codex"),
372
- this.redis.del("active_session:gemini")
374
+ this.redis.del("active_session:gemini"),
373
375
  ]);
374
376
  }
375
377
  catch (error) {
@@ -1,7 +1,7 @@
1
1
  import { randomUUID } from "crypto";
2
2
  import { homedir } from "os";
3
3
  import { join, dirname } from "path";
4
- import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync, openSync, fsyncSync, closeSync, chmodSync } from "fs";
4
+ import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync, openSync, fsyncSync, closeSync, chmodSync, } from "fs";
5
5
  import { DEFAULT_SESSION_TTL_SECONDS } from "./config.js";
6
6
  import { noopLogger } from "./logger.js";
7
7
  export const CLI_TYPES = ["claude", "codex", "gemini"];
@@ -9,7 +9,7 @@ const createEmptyActiveSessions = () => Object.fromEntries(CLI_TYPES.map(cli =>
9
9
  const DEFAULT_SESSION_DESCRIPTIONS = {
10
10
  claude: "Claude Session",
11
11
  codex: "Codex Session",
12
- gemini: "Gemini Session"
12
+ gemini: "Gemini Session",
13
13
  };
14
14
  export class FileSessionManager {
15
15
  storagePath;
@@ -65,7 +65,10 @@ export class FileSessionManager {
65
65
  }
66
66
  saveStorage() {
67
67
  const tempPath = `${this.storagePath}.tmp.${process.pid}`;
68
- writeFileSync(tempPath, JSON.stringify(this.storage, null, 2), { encoding: "utf-8", mode: 0o600 });
68
+ writeFileSync(tempPath, JSON.stringify(this.storage, null, 2), {
69
+ encoding: "utf-8",
70
+ mode: 0o600,
71
+ });
69
72
  const fd = openSync(tempPath, "r+");
70
73
  try {
71
74
  fsyncSync(fd);
@@ -85,7 +88,7 @@ export class FileSessionManager {
85
88
  cli,
86
89
  createdAt: new Date().toISOString(),
87
90
  lastUsedAt: new Date().toISOString(),
88
- description: sessionDescription
91
+ description: sessionDescription,
89
92
  };
90
93
  this.storage.sessions[id] = session;
91
94
  // Set as active session if none exists for this CLI
@@ -42,12 +42,14 @@ export function parseStreamJson(stdout) {
42
42
  }
43
43
  // Extract from result event (preferred)
44
44
  if (resultEvent) {
45
- const usage = resultEvent.usage ? {
46
- inputTokens: resultEvent.usage.input_tokens ?? 0,
47
- outputTokens: resultEvent.usage.output_tokens ?? 0,
48
- cacheReadInputTokens: resultEvent.usage.cache_read_input_tokens ?? 0,
49
- cacheCreationInputTokens: resultEvent.usage.cache_creation_input_tokens ?? 0,
50
- } : null;
45
+ const usage = resultEvent.usage
46
+ ? {
47
+ inputTokens: resultEvent.usage.input_tokens ?? 0,
48
+ outputTokens: resultEvent.usage.output_tokens ?? 0,
49
+ cacheReadInputTokens: resultEvent.usage.cache_read_input_tokens ?? 0,
50
+ cacheCreationInputTokens: resultEvent.usage.cache_creation_input_tokens ?? 0,
51
+ }
52
+ : null;
51
53
  return {
52
54
  text: resultEvent.result ?? "",
53
55
  costUsd: resultEvent.total_cost_usd ?? null,
package/package.json CHANGED
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "name": "llm-cli-gateway",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
+ "mcpName": "io.github.verivus-oss/llm-cli-gateway",
4
5
  "description": "MCP server providing unified access to Claude Code, Codex, and Gemini CLIs with session management, retry logic, and async job orchestration.",
5
6
  "license": "MIT",
6
7
  "author": {
@@ -38,7 +39,7 @@
38
39
  "llm-cli-gateway": "./dist/index.js"
39
40
  },
40
41
  "engines": {
41
- "node": ">=18.0.0"
42
+ "node": ">=20.0.0"
42
43
  },
43
44
  "files": [
44
45
  "dist/**/*.js",
@@ -60,7 +61,7 @@
60
61
  "test:unit": "vitest run src/__tests__/executor.test.ts",
61
62
  "test:session": "vitest run src/__tests__/session-manager.test.ts",
62
63
  "test:session-pg": "bash ./scripts/test-pg.sh src/__tests__/session-manager-pg.test.ts",
63
- "test:integration": "vitest run src/__tests__/integration.test.ts",
64
+ "test:integration": "INTEGRATION_TESTS=1 vitest run src/__tests__/integration.test.ts",
64
65
  "test:pg": "bash ./scripts/test-pg.sh",
65
66
  "test:all": "npm run test && npm run test:pg",
66
67
  "lint": "eslint src/**/*.ts",
@@ -71,16 +72,19 @@
71
72
  },
72
73
  "dependencies": {
73
74
  "@modelcontextprotocol/sdk": "^1.0.0",
75
+ "better-sqlite3": "^11.0.0",
74
76
  "ioredis": "^5.4.1",
75
77
  "pg": "^8.12.0",
76
78
  "toml": "^3.0.0",
77
79
  "zod": "^3.23.0"
78
80
  },
79
81
  "devDependencies": {
82
+ "@types/better-sqlite3": "^7.6.0",
80
83
  "@types/node": "^20.19.30",
81
84
  "@types/pg": "^8.11.10",
82
85
  "@typescript-eslint/eslint-plugin": "^6.0.0",
83
86
  "@typescript-eslint/parser": "^6.0.0",
87
+ "@vitest/coverage-v8": "^4.1.2",
84
88
  "eslint": "^8.0.0",
85
89
  "eslint-config-prettier": "^9.0.0",
86
90
  "prettier": "^3.0.0",