consult-llm-mcp 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -354,6 +354,34 @@ claude mcp add consult-llm -e OPENAI_MODE=cli -- npx -y consult-llm-mcp
354
354
  > Set reasoning effort with `-e CODEX_REASONING_EFFORT=high`. Options:
355
355
  > `none`, `minimal`, `low`, `medium`, `high`, `xhigh` (gpt-5.1-codex-max only).
356
356
 
357
+ #### Multi-turn conversations
358
+
359
+ CLI mode supports multi-turn conversations via the `thread_id` parameter. The
360
+ first response includes a `[thread_id:xxx]` prefix. Pass that ID in follow-up
361
+ requests to continue the conversation with full context from prior turns.
362
+
363
+ This works with both Gemini CLI and Codex CLI. Gemini uses session IDs, Codex
364
+ uses thread IDs, but both are passed through the same `thread_id` parameter.
365
+
366
+ ```
367
+ ⏺ consult-llm - consult_llm (MCP)(prompt: "What's your take on winter?",
368
+ model: "gpt-5.3-codex")
369
+ ⎿ [thread_id:thread_b1ff711...]
370
+
371
+ Winter is high-variance, not universally the worst. ...
372
+
373
+ ⏺ consult-llm - consult_llm (MCP)(prompt: "What about rain?",
374
+ model: "gpt-5.3-codex",
375
+ thread_id: "thread_b1ff711...")
376
+ ⎿ [thread_id:thread_b1ff711...]
377
+
378
+ Rain has high upside, high annoyance depending on context. ...
379
+ ```
380
+
381
+ See [skills/debate/SKILL.md](skills/debate/SKILL.md) for a skill where the agent
382
+ debates an opponent LLM through multiple turns, then synthesizes and implements
383
+ the result.
384
+
357
385
  ### Web mode
358
386
 
359
387
  Copies the formatted prompt to clipboard instead of querying an LLM. Paste into
@@ -381,8 +409,8 @@ See the "Using web mode..." example above for a concrete transcript.
381
409
  - `DEEPSEEK_API_KEY` - Your DeepSeek API key (required for DeepSeek models)
382
410
  - `CONSULT_LLM_DEFAULT_MODEL` - Override the default model (optional)
383
411
  - Options: `gpt-5.2` (default), `gemini-2.5-pro`, `gemini-3-pro-preview`,
384
- `deepseek-reasoner`, `gpt-5.2-codex`, `gpt-5.1-codex-max`, `gpt-5.1-codex`,
385
- `gpt-5.1-codex-mini`, `gpt-5.1`
412
+ `deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-max`,
413
+ `gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5.1`
386
414
  - `GEMINI_MODE` - Choose between API or CLI mode for Gemini models (optional)
387
415
  - Options: `api` (default), `cli`
388
416
  - CLI mode uses the system-installed `gemini` CLI tool
@@ -474,8 +502,8 @@ models complex questions.
474
502
 
475
503
  - **model** (optional): LLM model to use
476
504
  - Options: `gpt-5.2` (default), `gemini-2.5-pro`, `gemini-3-pro-preview`,
477
- `deepseek-reasoner`, `gpt-5.2-codex`, `gpt-5.1-codex-max`, `gpt-5.1-codex`,
478
- `gpt-5.1-codex-mini`, `gpt-5.1`
505
+ `deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-max`,
506
+ `gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5.1`
479
507
 
480
508
  - **web_mode** (optional): Copy prompt to clipboard instead of querying LLM
481
509
  - Default: `false`
@@ -483,6 +511,11 @@ models complex questions.
483
511
  contents) is copied to clipboard for manual pasting into browser-based LLM
484
512
  services
485
513
 
514
+ - **thread_id** (optional): Resume a multi-turn conversation
515
+ - Works with Codex CLI (`gpt-*`) and Gemini CLI (`gemini-*`) in CLI mode
516
+ - The first response includes a `[thread_id:xxx]` prefix — pass that ID back
517
+ as `thread_id` in follow-up requests to maintain conversation context
518
+
486
519
  - **git_diff** (optional): Include git diff output as context
487
520
  - **files** (required): Specific files to include in diff
488
521
  - **repo_path** (optional): Path to git repository (defaults to current
@@ -497,6 +530,7 @@ models complex questions.
497
530
  - **deepseek-reasoner**: DeepSeek's reasoning model ($0.55/$2.19 per million
498
531
  tokens)
499
532
  - **gpt-5.2**: OpenAI's latest GPT model
533
+ - **gpt-5.3-codex**: OpenAI's Codex model based on GPT-5.3
500
534
  - **gpt-5.2-codex**: OpenAI's Codex model based on GPT-5.2
501
535
  - **gpt-5.1-codex-max**: Strongest OpenAI Codex model
502
536
  - **gpt-5.1-codex**: OpenAI's Codex model optimized for coding
@@ -586,7 +620,8 @@ need reliability or custom instructions.
586
620
 
587
621
  Here's an example [Claude Code skill](https://code.claude.com/docs/en/skills)
588
622
  that uses the `consult_llm` MCP tool to create commands like "ask gemini" or
589
- "ask codex". See [examples/SKILL.md](examples/SKILL.md) for the full content.
623
+ "ask codex". See [skills/consult/SKILL.md](skills/consult/SKILL.md) for the full
624
+ content.
590
625
 
591
626
  Save it as `~/.claude/skills/consult-llm/SKILL.md` and you can then use it by
592
627
  typing "ask gemini about X" or "ask codex about X" in Claude Code.
package/dist/config.d.ts CHANGED
@@ -3,7 +3,7 @@ export declare const SupportedChatModel: z.ZodEnum<{
3
3
  [x: string]: string;
4
4
  }>;
5
5
  export type SupportedChatModel = z.infer<typeof SupportedChatModel>;
6
- export declare const fallbackModel: "gemini-2.5-pro" | "gemini-3-pro-preview" | "deepseek-reasoner" | "gpt-5.2" | "gpt-5.2-codex" | "gpt-5.1-codex-max" | "gpt-5.1-codex" | "gpt-5.1-codex-mini" | "gpt-5.1";
6
+ export declare const fallbackModel: "gemini-2.5-pro" | "gemini-3-pro-preview" | "deepseek-reasoner" | "gpt-5.2" | "gpt-5.3-codex" | "gpt-5.2-codex" | "gpt-5.1-codex-max" | "gpt-5.1-codex" | "gpt-5.1-codex-mini" | "gpt-5.1";
7
7
  declare const Config: z.ZodObject<{
8
8
  openaiApiKey: z.ZodOptional<z.ZodString>;
9
9
  geminiApiKey: z.ZodOptional<z.ZodString>;
@@ -1,5 +1,6 @@
1
1
  import { type SupportedChatModel } from './schema.js';
2
- export declare function queryLlm(prompt: string, model: SupportedChatModel, filePaths?: string[]): Promise<{
2
+ export declare function queryLlm(prompt: string, model: SupportedChatModel, filePaths?: string[], threadId?: string): Promise<{
3
3
  response: string;
4
4
  costInfo: string;
5
+ threadId?: string;
5
6
  }>;
package/dist/llm-query.js CHANGED
@@ -2,13 +2,13 @@ import { getExecutorForModel } from './llm.js';
2
2
  import { calculateCost } from './llm-cost.js';
3
3
  import { config } from './config.js';
4
4
  import { getSystemPrompt } from './system-prompt.js';
5
- export async function queryLlm(prompt, model, filePaths) {
5
+ export async function queryLlm(prompt, model, filePaths, threadId) {
6
6
  const executor = getExecutorForModel(model);
7
7
  // Get system prompt (with CLI suffix if needed)
8
8
  const isCliMode = (model.startsWith('gemini-') && config.geminiMode === 'cli') ||
9
9
  (model.startsWith('gpt-') && config.openaiMode === 'cli');
10
10
  const systemPrompt = getSystemPrompt(isCliMode);
11
- const { response, usage } = await executor.execute(prompt, model, systemPrompt, filePaths);
11
+ const { response, usage, threadId: returnedThreadId, } = await executor.execute(prompt, model, systemPrompt, filePaths, threadId);
12
12
  if (!response) {
13
13
  throw new Error('No response from the model');
14
14
  }
@@ -22,5 +22,5 @@ export async function queryLlm(prompt, model, filePaths) {
22
22
  // Handle case where usage is not available (from CLI)
23
23
  costInfo = 'Cost data not available (using CLI mode)';
24
24
  }
25
- return { response, costInfo };
25
+ return { response, costInfo, threadId: returnedThreadId };
26
26
  }
package/dist/llm.d.ts CHANGED
@@ -1,9 +1,18 @@
1
1
  import OpenAI from 'openai';
2
2
  import { type SupportedChatModel as SupportedChatModelType } from './schema.js';
3
3
  export interface LlmExecutor {
4
- execute(prompt: string, model: SupportedChatModelType, systemPrompt: string, filePaths?: string[]): Promise<{
4
+ execute(prompt: string, model: SupportedChatModelType, systemPrompt: string, filePaths?: string[], threadId?: string): Promise<{
5
5
  response: string;
6
6
  usage: OpenAI.CompletionUsage | null;
7
+ threadId?: string;
7
8
  }>;
8
9
  }
10
+ export declare function parseGeminiJson(output: string): {
11
+ sessionId: string | undefined;
12
+ response: string;
13
+ };
14
+ export declare function parseCodexJsonl(output: string): {
15
+ threadId: string | undefined;
16
+ response: string;
17
+ };
9
18
  export declare const getExecutorForModel: (model: SupportedChatModelType) => LlmExecutor;
package/dist/llm.js CHANGED
@@ -31,10 +31,15 @@ function createApiExecutor(client) {
31
31
  },
32
32
  };
33
33
  }
34
- /**
35
- * Creates an executor that delegates to a command-line tool.
36
- */
37
- function createCliExecutor(cliConfig) {
34
+ // --- CLI Executors ---
35
+ export function parseGeminiJson(output) {
36
+ const parsed = JSON.parse(output);
37
+ return {
38
+ sessionId: parsed.session_id,
39
+ response: parsed.response ?? '',
40
+ };
41
+ }
42
+ function createGeminiExecutor() {
38
43
  const buildFullPrompt = (prompt, systemPrompt, filePaths) => {
39
44
  let fullPrompt = `${systemPrompt}\n\n${prompt}`;
40
45
  if (filePaths && filePaths.length > 0) {
@@ -46,81 +51,187 @@ function createCliExecutor(cliConfig) {
46
51
  return fullPrompt;
47
52
  };
48
53
  return {
49
- async execute(prompt, model, systemPrompt, filePaths) {
50
- const fullPrompt = buildFullPrompt(prompt, systemPrompt, filePaths);
51
- const args = cliConfig.buildArgs(model, fullPrompt);
52
- const { cliName } = cliConfig;
54
+ async execute(prompt, model, systemPrompt, filePaths, threadId) {
55
+ const message = threadId
56
+ ? prompt
57
+ : buildFullPrompt(prompt, systemPrompt, filePaths);
58
+ const args = ['-m', model, '-o', 'json'];
59
+ if (threadId) {
60
+ args.push('-r', threadId);
61
+ }
62
+ args.push('-p', message);
53
63
  return new Promise((resolve, reject) => {
54
64
  try {
55
- logCliDebug(`Spawning ${cliName} CLI`, {
65
+ logCliDebug('Spawning gemini CLI', {
56
66
  model,
57
- promptLength: fullPrompt.length,
58
- filePathsCount: filePaths?.length || 0,
59
- args: args,
60
- promptPreview: fullPrompt.slice(0, 300),
67
+ promptLength: message.length,
68
+ threadId,
69
+ args,
61
70
  });
62
- const child = spawn(cliName, args, {
71
+ const child = spawn('gemini', args, {
63
72
  shell: false,
64
73
  stdio: ['ignore', 'pipe', 'pipe'],
65
74
  });
66
75
  let stdout = '';
67
76
  let stderr = '';
68
77
  const startTime = Date.now();
69
- child.on('spawn', () => logCliDebug(`${cliName} CLI process spawned successfully`));
78
+ child.on('spawn', () => logCliDebug('gemini CLI process spawned successfully'));
70
79
  child.stdout.on('data', (data) => (stdout += data.toString()));
71
80
  child.stderr.on('data', (data) => (stderr += data.toString()));
72
81
  child.on('close', (code) => {
73
82
  const duration = Date.now() - startTime;
74
- logCliDebug(`${cliName} CLI process closed`, {
83
+ logCliDebug('gemini CLI process closed', {
75
84
  code,
76
85
  duration: `${duration}ms`,
77
86
  stdoutLength: stdout.length,
78
87
  stderrLength: stderr.length,
79
88
  });
80
89
  if (code === 0) {
81
- resolve({ response: stdout.trim(), usage: null });
90
+ try {
91
+ const parsed = parseGeminiJson(stdout);
92
+ if (!parsed.response) {
93
+ reject(new Error('No response found in Gemini JSON output'));
94
+ return;
95
+ }
96
+ resolve({
97
+ response: parsed.response,
98
+ usage: null,
99
+ threadId: parsed.sessionId,
100
+ });
101
+ }
102
+ catch {
103
+ reject(new Error(`Failed to parse Gemini JSON output: ${stdout.slice(0, 200)}`));
104
+ }
82
105
  }
83
106
  else {
84
- reject(cliConfig.handleNonZeroExit(code ?? -1, stderr));
107
+ if (stderr.includes('RESOURCE_EXHAUSTED')) {
108
+ reject(new Error(`Gemini quota exceeded. Consider using gemini-2.0-flash model. Error: ${stderr.trim()}`));
109
+ }
110
+ else {
111
+ reject(new Error(`Gemini CLI exited with code ${code ?? -1}. Error: ${stderr.trim()}`));
112
+ }
85
113
  }
86
114
  });
87
115
  child.on('error', (err) => {
88
- logCliDebug(`Failed to spawn ${cliName} CLI`, {
89
- error: err.message,
90
- });
91
- reject(new Error(`Failed to spawn ${cliName} CLI. Is it installed and in PATH? Error: ${err.message}`));
116
+ logCliDebug('Failed to spawn gemini CLI', { error: err.message });
117
+ reject(new Error(`Failed to spawn gemini CLI. Is it installed and in PATH? Error: ${err.message}`));
92
118
  });
93
119
  }
94
120
  catch (err) {
95
- reject(new Error(`Synchronous error while trying to spawn ${cliName}: ${err instanceof Error ? err.message : String(err)}`));
121
+ reject(new Error(`Synchronous error while trying to spawn gemini: ${err instanceof Error ? err.message : String(err)}`));
96
122
  }
97
123
  });
98
124
  },
99
125
  };
100
126
  }
101
- // --- CLI Configurations ---
102
- const geminiCliConfig = {
103
- cliName: 'gemini',
104
- buildArgs: (model, fullPrompt) => ['-m', model, '-p', fullPrompt],
105
- handleNonZeroExit: (code, stderr) => {
106
- if (stderr.includes('RESOURCE_EXHAUSTED')) {
107
- return new Error(`Gemini quota exceeded. Consider using gemini-2.0-flash model. Error: ${stderr.trim()}`);
127
+ export function parseCodexJsonl(output) {
128
+ let threadId;
129
+ const messages = [];
130
+ for (const line of output.split('\n')) {
131
+ const trimmed = line.trim();
132
+ if (!trimmed)
133
+ continue;
134
+ try {
135
+ const event = JSON.parse(trimmed);
136
+ if (event.type === 'thread.started' && event.thread_id) {
137
+ threadId = event.thread_id;
138
+ }
139
+ else if (event.type === 'item.completed' &&
140
+ event.item?.type === 'agent_message' &&
141
+ event.item?.text) {
142
+ messages.push(event.item.text);
143
+ }
108
144
  }
109
- return new Error(`Gemini CLI exited with code ${code}. Error: ${stderr.trim()}`);
110
- },
111
- };
112
- const codexCliConfig = {
113
- cliName: 'codex',
114
- buildArgs: (model, fullPrompt) => {
115
- const args = ['exec', '--skip-git-repo-check', '-m', model];
116
- if (config.codexReasoningEffort) {
117
- args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
145
+ catch {
146
+ // Skip non-JSON lines (e.g. the ERROR log from resume)
118
147
  }
119
- args.push(fullPrompt);
120
- return args;
121
- },
122
- handleNonZeroExit: (code, stderr) => new Error(`Codex CLI exited with code ${code}. Error: ${stderr.trim()}`),
123
- };
148
+ }
149
+ return { threadId, response: messages.join('\n') };
150
+ }
151
+ function createCodexExecutor() {
152
+ const appendFiles = (text, filePaths) => {
153
+ if (!filePaths || filePaths.length === 0)
154
+ return text;
155
+ const fileRefs = filePaths
156
+ .map((path) => `@${relative(process.cwd(), path)}`)
157
+ .join(' ');
158
+ return `${text}\n\nFiles: ${fileRefs}`;
159
+ };
160
+ return {
161
+ async execute(prompt, model, systemPrompt, filePaths, threadId) {
162
+ const message = appendFiles(prompt, filePaths);
163
+ const fullPrompt = threadId
164
+ ? message // On resume, include files but skip system prompt
165
+ : `${systemPrompt}\n\n${message}`;
166
+ const args = [];
167
+ if (threadId) {
168
+ args.push('exec', 'resume', '--json', '--skip-git-repo-check');
169
+ if (config.codexReasoningEffort) {
170
+ args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
171
+ }
172
+ args.push('-m', model, threadId, fullPrompt);
173
+ }
174
+ else {
175
+ args.push('exec', '--json', '--skip-git-repo-check');
176
+ if (config.codexReasoningEffort) {
177
+ args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
178
+ }
179
+ args.push('-m', model, fullPrompt);
180
+ }
181
+ return new Promise((resolve, reject) => {
182
+ try {
183
+ logCliDebug('Spawning codex CLI', {
184
+ model,
185
+ promptLength: fullPrompt.length,
186
+ threadId,
187
+ args,
188
+ });
189
+ const child = spawn('codex', args, {
190
+ shell: false,
191
+ stdio: ['ignore', 'pipe', 'pipe'],
192
+ });
193
+ let stdout = '';
194
+ let stderr = '';
195
+ const startTime = Date.now();
196
+ child.on('spawn', () => logCliDebug('codex CLI process spawned successfully'));
197
+ child.stdout.on('data', (data) => (stdout += data.toString()));
198
+ child.stderr.on('data', (data) => (stderr += data.toString()));
199
+ child.on('close', (code) => {
200
+ const duration = Date.now() - startTime;
201
+ logCliDebug('codex CLI process closed', {
202
+ code,
203
+ duration: `${duration}ms`,
204
+ stdoutLength: stdout.length,
205
+ stderrLength: stderr.length,
206
+ });
207
+ if (code === 0) {
208
+ const parsed = parseCodexJsonl(stdout);
209
+ if (!parsed.response) {
210
+ reject(new Error('No agent_message found in Codex JSONL output'));
211
+ return;
212
+ }
213
+ resolve({
214
+ response: parsed.response,
215
+ usage: null,
216
+ threadId: parsed.threadId,
217
+ });
218
+ }
219
+ else {
220
+ reject(new Error(`Codex CLI exited with code ${code ?? -1}. Error: ${stderr.trim()}`));
221
+ }
222
+ });
223
+ child.on('error', (err) => {
224
+ logCliDebug('Failed to spawn codex CLI', { error: err.message });
225
+ reject(new Error(`Failed to spawn codex CLI. Is it installed and in PATH? Error: ${err.message}`));
226
+ });
227
+ }
228
+ catch (err) {
229
+ reject(new Error(`Synchronous error while trying to spawn codex: ${err instanceof Error ? err.message : String(err)}`));
230
+ }
231
+ });
232
+ },
233
+ };
234
+ }
124
235
  const createExecutorProvider = () => {
125
236
  const executorCache = new Map();
126
237
  const clientCache = new Map();
@@ -172,7 +283,7 @@ const createExecutorProvider = () => {
172
283
  if (model.startsWith('gpt-')) {
173
284
  executor =
174
285
  config.openaiMode === 'cli'
175
- ? createCliExecutor(codexCliConfig)
286
+ ? createCodexExecutor()
176
287
  : createApiExecutor(getOpenAIClient());
177
288
  }
178
289
  else if (model.startsWith('deepseek-')) {
@@ -181,7 +292,7 @@ const createExecutorProvider = () => {
181
292
  else if (model.startsWith('gemini-')) {
182
293
  executor =
183
294
  config.geminiMode === 'cli'
184
- ? createCliExecutor(geminiCliConfig)
295
+ ? createGeminiExecutor()
185
296
  : createApiExecutor(getGeminiApiClient());
186
297
  }
187
298
  else {
package/dist/llm.test.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
2
  import { EventEmitter } from 'events';
3
- import { getExecutorForModel } from './llm.js';
3
+ import { getExecutorForModel, parseCodexJsonl, parseGeminiJson } from './llm.js';
4
4
  const createCompletionMock = vi.hoisted(() => vi.fn());
5
5
  const spawnMock = vi.hoisted(() => vi.fn());
6
6
  const logCliDebugMock = vi.hoisted(() => vi.fn());
@@ -88,11 +88,76 @@ describe('API executor', () => {
88
88
  await expect(executor.execute('prompt', 'gpt-5.1', 'system')).rejects.toThrow('No response from the model via API');
89
89
  });
90
90
  });
91
- describe('CLI executor', () => {
91
+ const codexJsonlOutput = (threadId, text) => [
92
+ JSON.stringify({ type: 'thread.started', thread_id: threadId }),
93
+ JSON.stringify({
94
+ type: 'item.completed',
95
+ item: { type: 'agent_message', text },
96
+ }),
97
+ ].join('\n');
98
+ describe('parseCodexJsonl', () => {
99
+ it('extracts thread_id and agent_message text', () => {
100
+ const output = codexJsonlOutput('thread_abc', 'hello world');
101
+ const result = parseCodexJsonl(output);
102
+ expect(result.threadId).toBe('thread_abc');
103
+ expect(result.response).toBe('hello world');
104
+ });
105
+ it('concatenates multiple agent_message items', () => {
106
+ const output = [
107
+ JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
108
+ JSON.stringify({
109
+ type: 'item.completed',
110
+ item: { type: 'agent_message', text: 'first' },
111
+ }),
112
+ JSON.stringify({
113
+ type: 'item.completed',
114
+ item: { type: 'agent_message', text: 'second' },
115
+ }),
116
+ ].join('\n');
117
+ const result = parseCodexJsonl(output);
118
+ expect(result.response).toBe('first\nsecond');
119
+ });
120
+ it('skips reasoning items', () => {
121
+ const output = [
122
+ JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
123
+ JSON.stringify({
124
+ type: 'item.completed',
125
+ item: { type: 'reasoning', text: 'thinking...' },
126
+ }),
127
+ JSON.stringify({
128
+ type: 'item.completed',
129
+ item: { type: 'agent_message', text: 'answer' },
130
+ }),
131
+ ].join('\n');
132
+ const result = parseCodexJsonl(output);
133
+ expect(result.response).toBe('answer');
134
+ });
135
+ it('skips non-JSON lines', () => {
136
+ const output = [
137
+ 'ERROR: some log line',
138
+ JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
139
+ 'another garbage line',
140
+ JSON.stringify({
141
+ type: 'item.completed',
142
+ item: { type: 'agent_message', text: 'result' },
143
+ }),
144
+ ].join('\n');
145
+ const result = parseCodexJsonl(output);
146
+ expect(result.threadId).toBe('t1');
147
+ expect(result.response).toBe('result');
148
+ });
149
+ it('returns empty response when no agent_message found', () => {
150
+ const output = JSON.stringify({ type: 'thread.started', thread_id: 't1' });
151
+ const result = parseCodexJsonl(output);
152
+ expect(result.threadId).toBe('t1');
153
+ expect(result.response).toBe('');
154
+ });
155
+ });
156
+ describe('Codex CLI executor', () => {
92
157
  const setupSpawn = (child) => {
93
158
  spawnMock.mockReturnValue(child);
94
159
  };
95
- it('spawns codex CLI with combined prompt and files', async () => {
160
+ it('spawns codex CLI with --json and parses JSONL output', async () => {
96
161
  mockConfig.openaiMode = 'cli';
97
162
  const child = createChildProcess();
98
163
  setupSpawn(child);
@@ -100,20 +165,64 @@ describe('CLI executor', () => {
100
165
  const promise = executor.execute('user', 'gpt-5.1', 'system', [
101
166
  '/absolute/path/to/file.ts',
102
167
  ]);
103
- resolveCliExecution(child, { stdout: 'result', code: 0 });
168
+ resolveCliExecution(child, {
169
+ stdout: codexJsonlOutput('thread_123', 'result'),
170
+ code: 0,
171
+ });
104
172
  const args = spawnMock.mock.calls[0];
105
173
  expect(args?.[0]).toBe('codex');
106
174
  const cliArgs = args?.[1];
107
175
  expect(cliArgs[0]).toBe('exec');
108
- expect(cliArgs[1]).toBe('--skip-git-repo-check');
109
- expect(cliArgs[2]).toBe('-m');
110
- expect(cliArgs[3]).toBe('gpt-5.1');
111
- expect(cliArgs[4]).toContain('system');
112
- expect(cliArgs[4]).toContain('user');
113
- expect(cliArgs[4]).toContain('Files: @');
176
+ expect(cliArgs[1]).toBe('--json');
177
+ expect(cliArgs[2]).toBe('--skip-git-repo-check');
178
+ expect(cliArgs).toContain('-m');
179
+ expect(cliArgs).toContain('gpt-5.1');
180
+ // Last arg is the prompt with system + user + files
181
+ const promptArg = cliArgs[cliArgs.length - 1];
182
+ expect(promptArg).toContain('system');
183
+ expect(promptArg).toContain('user');
184
+ expect(promptArg).toContain('Files: @');
114
185
  const result = await promise;
115
186
  expect(result.response).toBe('result');
116
187
  expect(result.usage).toBeNull();
188
+ expect(result.threadId).toBe('thread_123');
189
+ });
190
+ it('resumes a session with thread_id', async () => {
191
+ mockConfig.openaiMode = 'cli';
192
+ const child = createChildProcess();
193
+ setupSpawn(child);
194
+ const executor = getExecutorForModel('gpt-5.1');
195
+ const promise = executor.execute('follow up question', 'gpt-5.1', 'system', undefined, 'thread_abc');
196
+ resolveCliExecution(child, {
197
+ stdout: codexJsonlOutput('thread_abc', 'follow up answer'),
198
+ code: 0,
199
+ });
200
+ const args = spawnMock.mock.calls[0];
201
+ const cliArgs = args?.[1];
202
+ expect(cliArgs[0]).toBe('exec');
203
+ expect(cliArgs[1]).toBe('resume');
204
+ expect(cliArgs[2]).toBe('--json');
205
+ expect(cliArgs[3]).toBe('--skip-git-repo-check');
206
+ expect(cliArgs).toContain('thread_abc');
207
+ // Prompt should NOT contain system prompt on resume
208
+ const promptArg = cliArgs[cliArgs.length - 1];
209
+ expect(promptArg).toBe('follow up question');
210
+ expect(promptArg).not.toContain('system');
211
+ const result = await promise;
212
+ expect(result.response).toBe('follow up answer');
213
+ expect(result.threadId).toBe('thread_abc');
214
+ });
215
+ it('rejects when no agent_message in JSONL output', async () => {
216
+ mockConfig.openaiMode = 'cli';
217
+ const child = createChildProcess();
218
+ setupSpawn(child);
219
+ const executor = getExecutorForModel('gpt-5.1');
220
+ const promise = executor.execute('user', 'gpt-5.1', 'system');
221
+ resolveCliExecution(child, {
222
+ stdout: JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
223
+ code: 0,
224
+ });
225
+ await expect(promise).rejects.toThrow('No agent_message found in Codex JSONL output');
117
226
  });
118
227
  it('rejects with codex errors on non-zero exit', async () => {
119
228
  mockConfig.openaiMode = 'cli';
@@ -131,25 +240,16 @@ describe('CLI executor', () => {
131
240
  setupSpawn(child);
132
241
  const executor = getExecutorForModel('gpt-5.1');
133
242
  const promise = executor.execute('user', 'gpt-5.1', 'system');
134
- resolveCliExecution(child, { stdout: 'result', code: 0 });
243
+ resolveCliExecution(child, {
244
+ stdout: codexJsonlOutput('t1', 'result'),
245
+ code: 0,
246
+ });
135
247
  const args = spawnMock.mock.calls[0];
136
248
  const cliArgs = args?.[1];
137
249
  expect(cliArgs).toContain('-c');
138
250
  expect(cliArgs).toContain('model_reasoning_effort="xhigh"');
139
251
  await promise;
140
- mockConfig.codexReasoningEffort = undefined; // reset for other tests
141
- });
142
- it('wraps gemini quota errors specially', async () => {
143
- mockConfig.geminiMode = 'cli';
144
- const child = createChildProcess();
145
- setupSpawn(child);
146
- const executor = getExecutorForModel('gemini-2.5-pro');
147
- const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
148
- resolveCliExecution(child, {
149
- stderr: 'RESOURCE_EXHAUSTED: quota exceeded',
150
- code: 1,
151
- });
152
- await expect(promise).rejects.toThrow('Gemini quota exceeded');
252
+ mockConfig.codexReasoningEffort = undefined;
153
253
  });
154
254
  it('handles spawn error events with friendly message', async () => {
155
255
  mockConfig.openaiMode = 'cli';
@@ -169,6 +269,112 @@ describe('CLI executor', () => {
169
269
  await expect(executor.execute('user', 'gpt-5.1', 'system')).rejects.toThrow('Synchronous error while trying to spawn codex: sync failure');
170
270
  });
171
271
  });
272
+ const geminiJsonOutput = (sessionId, response) => JSON.stringify({ session_id: sessionId, response, stats: {} });
273
+ describe('parseGeminiJson', () => {
274
+ it('extracts session_id and response', () => {
275
+ const output = geminiJsonOutput('sess_abc', 'hello world');
276
+ const result = parseGeminiJson(output);
277
+ expect(result.sessionId).toBe('sess_abc');
278
+ expect(result.response).toBe('hello world');
279
+ });
280
+ it('returns empty response when response is missing', () => {
281
+ const output = JSON.stringify({ session_id: 's1' });
282
+ const result = parseGeminiJson(output);
283
+ expect(result.sessionId).toBe('s1');
284
+ expect(result.response).toBe('');
285
+ });
286
+ });
287
+ describe('Gemini CLI executor', () => {
288
+ const setupSpawn = (child) => {
289
+ spawnMock.mockReturnValue(child);
290
+ };
291
+ it('spawns gemini CLI with -o json and parses JSON output', async () => {
292
+ mockConfig.geminiMode = 'cli';
293
+ const child = createChildProcess();
294
+ setupSpawn(child);
295
+ const executor = getExecutorForModel('gemini-2.5-pro');
296
+ const promise = executor.execute('user prompt', 'gemini-2.5-pro', 'system');
297
+ resolveCliExecution(child, {
298
+ stdout: geminiJsonOutput('sess_123', 'result'),
299
+ code: 0,
300
+ });
301
+ const args = spawnMock.mock.calls[0];
302
+ expect(args?.[0]).toBe('gemini');
303
+ const cliArgs = args?.[1];
304
+ expect(cliArgs).toContain('-m');
305
+ expect(cliArgs).toContain('gemini-2.5-pro');
306
+ expect(cliArgs).toContain('-o');
307
+ expect(cliArgs).toContain('json');
308
+ expect(cliArgs).toContain('-p');
309
+ const result = await promise;
310
+ expect(result.response).toBe('result');
311
+ expect(result.usage).toBeNull();
312
+ expect(result.threadId).toBe('sess_123');
313
+ });
314
+ it('resumes a session with thread_id', async () => {
315
+ mockConfig.geminiMode = 'cli';
316
+ const child = createChildProcess();
317
+ setupSpawn(child);
318
+ const executor = getExecutorForModel('gemini-2.5-pro');
319
+ const promise = executor.execute('follow up', 'gemini-2.5-pro', 'system', undefined, 'sess_abc');
320
+ resolveCliExecution(child, {
321
+ stdout: geminiJsonOutput('sess_abc', 'follow up answer'),
322
+ code: 0,
323
+ });
324
+ const args = spawnMock.mock.calls[0];
325
+ const cliArgs = args?.[1];
326
+ expect(cliArgs).toContain('-r');
327
+ expect(cliArgs).toContain('sess_abc');
328
+ // Prompt should NOT contain system prompt on resume
329
+ const pIdx = cliArgs.indexOf('-p');
330
+ expect(cliArgs[pIdx + 1]).toBe('follow up');
331
+ const result = await promise;
332
+ expect(result.response).toBe('follow up answer');
333
+ expect(result.threadId).toBe('sess_abc');
334
+ });
335
+ it('rejects when no response in JSON output', async () => {
336
+ mockConfig.geminiMode = 'cli';
337
+ const child = createChildProcess();
338
+ setupSpawn(child);
339
+ const executor = getExecutorForModel('gemini-2.5-pro');
340
+ const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
341
+ resolveCliExecution(child, {
342
+ stdout: JSON.stringify({ session_id: 's1' }),
343
+ code: 0,
344
+ });
345
+ await expect(promise).rejects.toThrow('No response found in Gemini JSON output');
346
+ });
347
+ it('rejects with parse error on invalid JSON', async () => {
348
+ mockConfig.geminiMode = 'cli';
349
+ const child = createChildProcess();
350
+ setupSpawn(child);
351
+ const executor = getExecutorForModel('gemini-2.5-pro');
352
+ const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
353
+ resolveCliExecution(child, { stdout: 'not json', code: 0 });
354
+ await expect(promise).rejects.toThrow('Failed to parse Gemini JSON output');
355
+ });
356
+ it('wraps gemini quota errors specially', async () => {
357
+ mockConfig.geminiMode = 'cli';
358
+ const child = createChildProcess();
359
+ setupSpawn(child);
360
+ const executor = getExecutorForModel('gemini-2.5-pro');
361
+ const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
362
+ resolveCliExecution(child, {
363
+ stderr: 'RESOURCE_EXHAUSTED: quota exceeded',
364
+ code: 1,
365
+ });
366
+ await expect(promise).rejects.toThrow('Gemini quota exceeded');
367
+ });
368
+ it('handles spawn error events with friendly message', async () => {
369
+ mockConfig.geminiMode = 'cli';
370
+ const child = createChildProcess();
371
+ setupSpawn(child);
372
+ const executor = getExecutorForModel('gemini-2.5-pro');
373
+ const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
374
+ child.emit('error', new Error('not found'));
375
+ await expect(promise).rejects.toThrow('Failed to spawn gemini CLI. Is it installed and in PATH? Error: not found');
376
+ });
377
+ });
172
378
  describe('executor selection', () => {
173
379
  it('uses deepseek API client', async () => {
174
380
  createCompletionMock.mockResolvedValue({
package/dist/models.d.ts CHANGED
@@ -1 +1 @@
1
- export declare const ALL_MODELS: readonly ["gemini-2.5-pro", "gemini-3-pro-preview", "deepseek-reasoner", "gpt-5.2", "gpt-5.2-codex", "gpt-5.1-codex-max", "gpt-5.1-codex", "gpt-5.1-codex-mini", "gpt-5.1"];
1
+ export declare const ALL_MODELS: readonly ["gemini-2.5-pro", "gemini-3-pro-preview", "deepseek-reasoner", "gpt-5.2", "gpt-5.3-codex", "gpt-5.2-codex", "gpt-5.1-codex-max", "gpt-5.1-codex", "gpt-5.1-codex-mini", "gpt-5.1"];
package/dist/models.js CHANGED
@@ -3,6 +3,7 @@ export const ALL_MODELS = [
3
3
  'gemini-3-pro-preview',
4
4
  'deepseek-reasoner',
5
5
  'gpt-5.2',
6
+ 'gpt-5.3-codex',
6
7
  'gpt-5.2-codex',
7
8
  'gpt-5.1-codex-max',
8
9
  'gpt-5.1-codex',
package/dist/schema.d.ts CHANGED
@@ -10,6 +10,7 @@ export declare const ConsultLlmArgs: z.ZodObject<{
10
10
  [x: string]: string;
11
11
  }>>>;
12
12
  web_mode: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
13
+ thread_id: z.ZodOptional<z.ZodString>;
13
14
  git_diff: z.ZodOptional<z.ZodObject<{
14
15
  repo_path: z.ZodOptional<z.ZodString>;
15
16
  files: z.ZodArray<z.ZodString>;
@@ -18,6 +19,6 @@ export declare const ConsultLlmArgs: z.ZodObject<{
18
19
  }, z.core.$strip>;
19
20
  export declare const toolSchema: {
20
21
  readonly name: "consult_llm";
21
- readonly description: "Ask a more powerful AI for help with complex problems. Provide your question in the prompt field and always include relevant code files as context.\n\nBe specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.\n\nIMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of \"Should I use X or Y approach?\", ask \"What's the best approach for this problem?\" Let the consultant LLM provide unbiased recommendations.";
22
+ readonly description: "Ask a more powerful AI for help with complex problems. Provide your question in the prompt field and always include relevant code files as context.\n\nBe specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.\n\nIMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of \"Should I use X or Y approach?\", ask \"What's the best approach for this problem?\" Let the consultant LLM provide unbiased recommendations.\n\nFor multi-turn conversations with CLI models (Codex and Gemini), the response includes a [thread_id:xxx] prefix. Extract this ID and pass it as the thread_id parameter in follow-up requests to maintain conversation context.";
22
23
  readonly inputSchema: z.core.JSONSchema.JSONSchema;
23
24
  };
package/dist/schema.js CHANGED
@@ -19,6 +19,10 @@ export const ConsultLlmArgs = z.object({
19
19
  .optional()
20
20
  .default(false)
21
21
  .describe("If true, copy the formatted prompt to the clipboard instead of querying an LLM. When true, the `model` parameter is ignored. Use this to paste the prompt into browser-based LLM services. IMPORTANT: Only use this when the user specifically requests it. When true, wait for the user to provide the external LLM's response before proceeding with any implementation."),
22
+ thread_id: z
23
+ .string()
24
+ .optional()
25
+ .describe('Thread/session ID for resuming a conversation. Works with Codex CLI (gpt-*) and Gemini CLI (gemini-*) in CLI mode. Returned in the response prefix as [thread_id:xxx].'),
22
26
  git_diff: z
23
27
  .object({
24
28
  repo_path: z
@@ -47,6 +51,8 @@ export const toolSchema = {
47
51
 
48
52
  Be specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.
49
53
 
50
- IMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of "Should I use X or Y approach?", ask "What's the best approach for this problem?" Let the consultant LLM provide unbiased recommendations.`,
54
+ IMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of "Should I use X or Y approach?", ask "What's the best approach for this problem?" Let the consultant LLM provide unbiased recommendations.
55
+
56
+ For multi-turn conversations with CLI models (Codex and Gemini), the response includes a [thread_id:xxx] prefix. Extract this ID and pass it as the thread_id parameter in follow-up requests to maintain conversation context.`,
51
57
  inputSchema: consultLlmInputSchema,
52
58
  };
@@ -43,6 +43,15 @@ describe('ConsultLlmArgs', () => {
43
43
  expect(parsed.model).toBeDefined();
44
44
  expect(ALL_MODELS).toContain(parsed.model);
45
45
  });
46
+ it('accepts optional thread_id as string', () => {
47
+ const withThread = ConsultLlmArgs.parse({
48
+ prompt: 'follow up',
49
+ thread_id: 'thread_abc',
50
+ });
51
+ expect(withThread.thread_id).toBe('thread_abc');
52
+ const withoutThread = ConsultLlmArgs.parse({ prompt: 'no thread' });
53
+ expect(withoutThread.thread_id).toBeUndefined();
54
+ });
46
55
  it('defaults web_mode to false but honors explicit value', () => {
47
56
  const parsedDefault = ConsultLlmArgs.parse({ prompt: 'default case' });
48
57
  expect(parsedDefault.web_mode).toBe(false);
package/dist/server.js CHANGED
@@ -47,7 +47,7 @@ export async function handleConsultLlm(args) {
47
47
  .join(', ');
48
48
  throw new Error(`Invalid request parameters: ${errors}`);
49
49
  }
50
- const { files, prompt: userPrompt, git_diff, web_mode, model: parsedModel, } = parseResult.data;
50
+ const { files, prompt: userPrompt, git_diff, web_mode, model: parsedModel, thread_id: threadId, } = parseResult.data;
51
51
  const providedModel = typeof args === 'object' &&
52
52
  args !== null &&
53
53
  Object.prototype.hasOwnProperty.call(args, 'model');
@@ -56,6 +56,9 @@ export async function handleConsultLlm(args) {
56
56
  : (config.defaultModel ?? parsedModel);
57
57
  logToolCall('consult_llm', args);
58
58
  const isCliMode = isCliExecution(model);
59
+ if (threadId && !isCliMode) {
60
+ throw new Error('thread_id is only supported with CLI mode models (Codex or Gemini CLI)');
61
+ }
59
62
  let prompt;
60
63
  let filePaths;
61
64
  if (web_mode || !isCliMode) {
@@ -95,10 +98,13 @@ ${prompt}`;
95
98
  content: [{ type: 'text', text: responseMessage }],
96
99
  };
97
100
  }
98
- const { response, costInfo } = await queryLlm(prompt, model, filePaths);
101
+ const { response, costInfo, threadId: returnedThreadId, } = await queryLlm(prompt, model, filePaths, threadId);
99
102
  await logResponse(model, response, costInfo);
103
+ const responseText = returnedThreadId
104
+ ? `[thread_id:${returnedThreadId}]\n\n${response}`
105
+ : response;
100
106
  return {
101
- content: [{ type: 'text', text: response }],
107
+ content: [{ type: 'text', text: responseText }],
102
108
  };
103
109
  }
104
110
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
@@ -88,13 +88,13 @@ describe('handleConsultLlm', () => {
88
88
  expect(processFilesMock).toHaveBeenCalledWith(['file1.ts']);
89
89
  expect(generateGitDiffMock).toHaveBeenCalledWith(undefined, ['src/index.ts'], 'HEAD');
90
90
  expect(buildPromptMock).toHaveBeenCalledWith('help me', expect.any(Array), 'diff output');
91
- expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.1', undefined);
91
+ expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.1', undefined, undefined);
92
92
  expect(result.content[0]?.text).toBe('ok');
93
93
  });
94
94
  it('uses explicit model even when config default exists', async () => {
95
95
  mockConfig.defaultModel = 'gpt-5.1';
96
96
  await handleConsultLlm({ prompt: 'hello', model: 'gpt-5.2' });
97
- expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.2', undefined);
97
+ expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.2', undefined, undefined);
98
98
  });
99
99
  it('builds CLI prompts without file contents', async () => {
100
100
  mockConfig.openaiMode = 'cli';
@@ -137,6 +137,55 @@ describe('handleConsultLlm', () => {
137
137
  expect(queryLlmMock).not.toHaveBeenCalled();
138
138
  expect(result.content[0]?.text).toContain('Prompt copied to clipboard');
139
139
  });
140
+ it('passes thread_id to queryLlm for Codex CLI models', async () => {
141
+ mockConfig.openaiMode = 'cli';
142
+ await handleConsultLlm({
143
+ prompt: 'follow up',
144
+ model: 'gpt-5.2',
145
+ thread_id: 'thread_abc',
146
+ });
147
+ const callArgs = queryLlmMock.mock.calls[0];
148
+ expect(callArgs[3]).toBe('thread_abc');
149
+ });
150
+ it('prefixes response with thread_id when returned', async () => {
151
+ mockConfig.openaiMode = 'cli';
152
+ queryLlmMock.mockResolvedValueOnce({
153
+ response: 'answer',
154
+ costInfo: null,
155
+ threadId: 'thread_xyz',
156
+ });
157
+ const result = await handleConsultLlm({
158
+ prompt: 'question',
159
+ model: 'gpt-5.2',
160
+ });
161
+ expect(result.content[0]?.text).toBe('[thread_id:thread_xyz]\n\nanswer');
162
+ });
163
+ it('passes thread_id to queryLlm for Gemini CLI models', async () => {
164
+ mockConfig.geminiMode = 'cli';
165
+ await handleConsultLlm({
166
+ prompt: 'follow up',
167
+ model: 'gemini-2.5-pro',
168
+ thread_id: 'sess_abc',
169
+ });
170
+ const callArgs = queryLlmMock.mock.calls[0];
171
+ expect(callArgs[3]).toBe('sess_abc');
172
+ });
173
+ it('rejects thread_id with non-CLI model', async () => {
174
+ mockConfig.openaiMode = 'api';
175
+ await expect(handleConsultLlm({
176
+ prompt: 'hello',
177
+ model: 'gpt-5.2',
178
+ thread_id: 'thread_abc',
179
+ })).rejects.toThrow('thread_id is only supported with CLI mode models');
180
+ });
181
+ it('rejects thread_id with Gemini API model', async () => {
182
+ mockConfig.geminiMode = 'api';
183
+ await expect(handleConsultLlm({
184
+ prompt: 'hello',
185
+ model: 'gemini-2.5-pro',
186
+ thread_id: 'sess_abc',
187
+ })).rejects.toThrow('thread_id is only supported with CLI mode models');
188
+ });
140
189
  it('propagates query errors', async () => {
141
190
  queryLlmMock.mockRejectedValueOnce(new Error('boom'));
142
191
  await expect(handleConsultLlm({ prompt: 'oops' })).rejects.toThrow('boom');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "consult-llm-mcp",
3
- "version": "2.0.0",
3
+ "version": "2.1.0",
4
4
  "description": "MCP server for consulting powerful AI models",
5
5
  "type": "module",
6
6
  "main": "dist/main.js",