npm - consult-llm-mcp - Versions diffs - 2.0.0 → 2.1.0 - Mend

consult-llm-mcp 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md CHANGED Viewed

@@ -354,6 +354,34 @@ claude mcp add consult-llm -e OPENAI_MODE=cli -- npx -y consult-llm-mcp
 > Set reasoning effort with `-e CODEX_REASONING_EFFORT=high`. Options:
 > `none`, `minimal`, `low`, `medium`, `high`, `xhigh` (gpt-5.1-codex-max only).
+#### Multi-turn conversations
+CLI mode supports multi-turn conversations via the `thread_id` parameter. The
+first response includes a `[thread_id:xxx]` prefix. Pass that ID in follow-up
+requests to continue the conversation with full context from prior turns.
+This works with both Gemini CLI and Codex CLI. Gemini uses session IDs, Codex
+uses thread IDs, but both are passed through the same `thread_id` parameter.
+```
+⏺ consult-llm - consult_llm (MCP)(prompt: "What's your take on winter?",
+                                   model: "gpt-5.3-codex")
+  ⎿  [thread_id:thread_b1ff711...]
+     Winter is high-variance, not universally the worst. ...
+⏺ consult-llm - consult_llm (MCP)(prompt: "What about rain?",
+                                   model: "gpt-5.3-codex",
+                                   thread_id: "thread_b1ff711...")
+  ⎿  [thread_id:thread_b1ff711...]
+     Rain has high upside, high annoyance depending on context. ...
+```
+See [skills/debate/SKILL.md](skills/debate/SKILL.md) for a skill where the agent
+debates an opponent LLM through multiple turns, then synthesizes and implements
+the result.
 ### Web mode
 Copies the formatted prompt to clipboard instead of querying an LLM. Paste into
@@ -381,8 +409,8 @@ See the "Using web mode..." example above for a concrete transcript.
 - `DEEPSEEK_API_KEY` - Your DeepSeek API key (required for DeepSeek models)
 - `CONSULT_LLM_DEFAULT_MODEL` - Override the default model (optional)
   - Options: `gpt-5.2` (default), `gemini-2.5-pro`, `gemini-3-pro-preview`,
-    `deepseek-reasoner`, `gpt-5.2-codex`, `gpt-5.1-codex-max`, `gpt-5.1-codex`,
-    `gpt-5.1-codex-mini`, `gpt-5.1`
+    `deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-max`,
+    `gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5.1`
 - `GEMINI_MODE` - Choose between API or CLI mode for Gemini models (optional)
   - Options: `api` (default), `cli`
   - CLI mode uses the system-installed `gemini` CLI tool
@@ -474,8 +502,8 @@ models complex questions.
 - **model** (optional): LLM model to use
   - Options: `gpt-5.2` (default), `gemini-2.5-pro`, `gemini-3-pro-preview`,
-    `deepseek-reasoner`, `gpt-5.2-codex`, `gpt-5.1-codex-max`, `gpt-5.1-codex`,
-    `gpt-5.1-codex-mini`, `gpt-5.1`
+    `deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-max`,
+    `gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5.1`
 - **web_mode** (optional): Copy prompt to clipboard instead of querying LLM
   - Default: `false`
@@ -483,6 +511,11 @@ models complex questions.
     contents) is copied to clipboard for manual pasting into browser-based LLM
     services
+- **thread_id** (optional): Resume a multi-turn conversation
+  - Works with Codex CLI (`gpt-*`) and Gemini CLI (`gemini-*`) in CLI mode
+  - The first response includes a `[thread_id:xxx]` prefix — pass that ID back
+    as `thread_id` in follow-up requests to maintain conversation context
 - **git_diff** (optional): Include git diff output as context
   - **files** (required): Specific files to include in diff
   - **repo_path** (optional): Path to git repository (defaults to current
@@ -497,6 +530,7 @@ models complex questions.
 - **deepseek-reasoner**: DeepSeek's reasoning model ($0.55/$2.19 per million
   tokens)
 - **gpt-5.2**: OpenAI's latest GPT model
+- **gpt-5.3-codex**: OpenAI's Codex model based on GPT-5.3
 - **gpt-5.2-codex**: OpenAI's Codex model based on GPT-5.2
 - **gpt-5.1-codex-max**: Strongest OpenAI Codex model
 - **gpt-5.1-codex**: OpenAI's Codex model optimized for coding
@@ -586,7 +620,8 @@ need reliability or custom instructions.
 Here's an example [Claude Code skill](https://code.claude.com/docs/en/skills)
 that uses the `consult_llm` MCP tool to create commands like "ask gemini" or
-"ask codex". See [examples/SKILL.md](examples/SKILL.md) for the full content.
+"ask codex". See [skills/consult/SKILL.md](skills/consult/SKILL.md) for the full
+content.
 Save it as `~/.claude/skills/consult-llm/SKILL.md` and you can then use it by
 typing "ask gemini about X" or "ask codex about X" in Claude Code.

package/dist/config.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@ export declare const SupportedChatModel: z.ZodEnum<{
     [x: string]: string;
 }>;
 export type SupportedChatModel = z.infer<typeof SupportedChatModel>;
-export declare const fallbackModel: "gemini-2.5-pro" | "gemini-3-pro-preview" | "deepseek-reasoner" | "gpt-5.2" | "gpt-5.2-codex" | "gpt-5.1-codex-max" | "gpt-5.1-codex" | "gpt-5.1-codex-mini" | "gpt-5.1";
+export declare const fallbackModel: "gemini-2.5-pro" | "gemini-3-pro-preview" | "deepseek-reasoner" | "gpt-5.2" | "gpt-5.3-codex" | "gpt-5.2-codex" | "gpt-5.1-codex-max" | "gpt-5.1-codex" | "gpt-5.1-codex-mini" | "gpt-5.1";
 declare const Config: z.ZodObject<{
     openaiApiKey: z.ZodOptional<z.ZodString>;
     geminiApiKey: z.ZodOptional<z.ZodString>;

package/dist/llm-query.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { type SupportedChatModel } from './schema.js';
-export declare function queryLlm(prompt: string, model: SupportedChatModel, filePaths?: string[]): Promise<{
+export declare function queryLlm(prompt: string, model: SupportedChatModel, filePaths?: string[], threadId?: string): Promise<{
     response: string;
     costInfo: string;
+    threadId?: string;
 }>;

package/dist/llm-query.js CHANGED Viewed

@@ -2,13 +2,13 @@ import { getExecutorForModel } from './llm.js';
 import { calculateCost } from './llm-cost.js';
 import { config } from './config.js';
 import { getSystemPrompt } from './system-prompt.js';
-export async function queryLlm(prompt, model, filePaths) {
+export async function queryLlm(prompt, model, filePaths, threadId) {
     const executor = getExecutorForModel(model);
     // Get system prompt (with CLI suffix if needed)
     const isCliMode = (model.startsWith('gemini-') && config.geminiMode === 'cli') ||
         (model.startsWith('gpt-') && config.openaiMode === 'cli');
     const systemPrompt = getSystemPrompt(isCliMode);
-    const { response, usage } = await executor.execute(prompt, model, systemPrompt, filePaths);
+    const { response, usage, threadId: returnedThreadId, } = await executor.execute(prompt, model, systemPrompt, filePaths, threadId);
     if (!response) {
         throw new Error('No response from the model');
     }
@@ -22,5 +22,5 @@ export async function queryLlm(prompt, model, filePaths) {
         // Handle case where usage is not available (from CLI)
         costInfo = 'Cost data not available (using CLI mode)';
     }
-    return { response, costInfo };
+    return { response, costInfo, threadId: returnedThreadId };
 }

package/dist/llm.d.ts CHANGED Viewed

@@ -1,9 +1,18 @@
 import OpenAI from 'openai';
 import { type SupportedChatModel as SupportedChatModelType } from './schema.js';
 export interface LlmExecutor {
-    execute(prompt: string, model: SupportedChatModelType, systemPrompt: string, filePaths?: string[]): Promise<{
+    execute(prompt: string, model: SupportedChatModelType, systemPrompt: string, filePaths?: string[], threadId?: string): Promise<{
         response: string;
         usage: OpenAI.CompletionUsage | null;
+        threadId?: string;
     }>;
 }
+export declare function parseGeminiJson(output: string): {
+    sessionId: string | undefined;
+    response: string;
+};
+export declare function parseCodexJsonl(output: string): {
+    threadId: string | undefined;
+    response: string;
+};
 export declare const getExecutorForModel: (model: SupportedChatModelType) => LlmExecutor;

package/dist/llm.js CHANGED Viewed

@@ -31,10 +31,15 @@ function createApiExecutor(client) {
         },
     };
 }
-/**
- * Creates an executor that delegates to a command-line tool.
- */
-function createCliExecutor(cliConfig) {
+// --- CLI Executors ---
+export function parseGeminiJson(output) {
+    const parsed = JSON.parse(output);
+    return {
+        sessionId: parsed.session_id,
+        response: parsed.response ?? '',
+    };
+}
+function createGeminiExecutor() {
     const buildFullPrompt = (prompt, systemPrompt, filePaths) => {
         let fullPrompt = `${systemPrompt}\n\n${prompt}`;
         if (filePaths && filePaths.length > 0) {
@@ -46,81 +51,187 @@ function createCliExecutor(cliConfig) {
         return fullPrompt;
     };
     return {
-        async execute(prompt, model, systemPrompt, filePaths) {
-            const fullPrompt = buildFullPrompt(prompt, systemPrompt, filePaths);
-            const args = cliConfig.buildArgs(model, fullPrompt);
-            const { cliName } = cliConfig;
+        async execute(prompt, model, systemPrompt, filePaths, threadId) {
+            const message = threadId
+                ? prompt
+                : buildFullPrompt(prompt, systemPrompt, filePaths);
+            const args = ['-m', model, '-o', 'json'];
+            if (threadId) {
+                args.push('-r', threadId);
+            }
+            args.push('-p', message);
             return new Promise((resolve, reject) => {
                 try {
-                    logCliDebug(`Spawning ${cliName} CLI`, {
+                    logCliDebug('Spawning gemini CLI', {
                         model,
-                        promptLength: fullPrompt.length,
-                        filePathsCount: filePaths?.length || 0,
-                        args: args,
-                        promptPreview: fullPrompt.slice(0, 300),
+                        promptLength: message.length,
+                        threadId,
+                        args,
                     });
-                    const child = spawn(cliName, args, {
+                    const child = spawn('gemini', args, {
                         shell: false,
                         stdio: ['ignore', 'pipe', 'pipe'],
                     });
                     let stdout = '';
                     let stderr = '';
                     const startTime = Date.now();
-                    child.on('spawn', () => logCliDebug(`${cliName} CLI process spawned successfully`));
+                    child.on('spawn', () => logCliDebug('gemini CLI process spawned successfully'));
                     child.stdout.on('data', (data) => (stdout += data.toString()));
                     child.stderr.on('data', (data) => (stderr += data.toString()));
                     child.on('close', (code) => {
                         const duration = Date.now() - startTime;
-                        logCliDebug(`${cliName} CLI process closed`, {
+                        logCliDebug('gemini CLI process closed', {
                             code,
                             duration: `${duration}ms`,
                             stdoutLength: stdout.length,
                             stderrLength: stderr.length,
                         });
                         if (code === 0) {
-                            resolve({ response: stdout.trim(), usage: null });
+                            try {
+                                const parsed = parseGeminiJson(stdout);
+                                if (!parsed.response) {
+                                    reject(new Error('No response found in Gemini JSON output'));
+                                    return;
+                                }
+                                resolve({
+                                    response: parsed.response,
+                                    usage: null,
+                                    threadId: parsed.sessionId,
+                                });
+                            }
+                            catch {
+                                reject(new Error(`Failed to parse Gemini JSON output: ${stdout.slice(0, 200)}`));
+                            }
                         }
                         else {
-                            reject(cliConfig.handleNonZeroExit(code ?? -1, stderr));
+                            if (stderr.includes('RESOURCE_EXHAUSTED')) {
+                                reject(new Error(`Gemini quota exceeded. Consider using gemini-2.0-flash model. Error: ${stderr.trim()}`));
+                            }
+                            else {
+                                reject(new Error(`Gemini CLI exited with code ${code ?? -1}. Error: ${stderr.trim()}`));
+                            }
                         }
                     });
                     child.on('error', (err) => {
-                        logCliDebug(`Failed to spawn ${cliName} CLI`, {
-                            error: err.message,
-                        });
-                        reject(new Error(`Failed to spawn ${cliName} CLI. Is it installed and in PATH? Error: ${err.message}`));
+                        logCliDebug('Failed to spawn gemini CLI', { error: err.message });
+                        reject(new Error(`Failed to spawn gemini CLI. Is it installed and in PATH? Error: ${err.message}`));
                     });
                 }
                 catch (err) {
-                    reject(new Error(`Synchronous error while trying to spawn ${cliName}: ${err instanceof Error ? err.message : String(err)}`));
+                    reject(new Error(`Synchronous error while trying to spawn gemini: ${err instanceof Error ? err.message : String(err)}`));
                 }
             });
         },
     };
 }
-// --- CLI Configurations ---
-const geminiCliConfig = {
-    cliName: 'gemini',
-    buildArgs: (model, fullPrompt) => ['-m', model, '-p', fullPrompt],
-    handleNonZeroExit: (code, stderr) => {
-        if (stderr.includes('RESOURCE_EXHAUSTED')) {
-            return new Error(`Gemini quota exceeded. Consider using gemini-2.0-flash model. Error: ${stderr.trim()}`);
+export function parseCodexJsonl(output) {
+    let threadId;
+    const messages = [];
+    for (const line of output.split('\n')) {
+        const trimmed = line.trim();
+        if (!trimmed)
+            continue;
+        try {
+            const event = JSON.parse(trimmed);
+            if (event.type === 'thread.started' && event.thread_id) {
+                threadId = event.thread_id;
+            }
+            else if (event.type === 'item.completed' &&
+                event.item?.type === 'agent_message' &&
+                event.item?.text) {
+                messages.push(event.item.text);
+            }
         }
-        return new Error(`Gemini CLI exited with code ${code}. Error: ${stderr.trim()}`);
-    },
-};
-const codexCliConfig = {
-    cliName: 'codex',
-    buildArgs: (model, fullPrompt) => {
-        const args = ['exec', '--skip-git-repo-check', '-m', model];
-        if (config.codexReasoningEffort) {
-            args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
+        catch {
+            // Skip non-JSON lines (e.g. the ERROR log from resume)
         }
-        args.push(fullPrompt);
-        return args;
-    },
-    handleNonZeroExit: (code, stderr) => new Error(`Codex CLI exited with code ${code}. Error: ${stderr.trim()}`),
-};
+    }
+    return { threadId, response: messages.join('\n') };
+}
+function createCodexExecutor() {
+    const appendFiles = (text, filePaths) => {
+        if (!filePaths || filePaths.length === 0)
+            return text;
+        const fileRefs = filePaths
+            .map((path) => `@${relative(process.cwd(), path)}`)
+            .join(' ');
+        return `${text}\n\nFiles: ${fileRefs}`;
+    };
+    return {
+        async execute(prompt, model, systemPrompt, filePaths, threadId) {
+            const message = appendFiles(prompt, filePaths);
+            const fullPrompt = threadId
+                ? message // On resume, include files but skip system prompt
+                : `${systemPrompt}\n\n${message}`;
+            const args = [];
+            if (threadId) {
+                args.push('exec', 'resume', '--json', '--skip-git-repo-check');
+                if (config.codexReasoningEffort) {
+                    args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
+                }
+                args.push('-m', model, threadId, fullPrompt);
+            }
+            else {
+                args.push('exec', '--json', '--skip-git-repo-check');
+                if (config.codexReasoningEffort) {
+                    args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
+                }
+                args.push('-m', model, fullPrompt);
+            }
+            return new Promise((resolve, reject) => {
+                try {
+                    logCliDebug('Spawning codex CLI', {
+                        model,
+                        promptLength: fullPrompt.length,
+                        threadId,
+                        args,
+                    });
+                    const child = spawn('codex', args, {
+                        shell: false,
+                        stdio: ['ignore', 'pipe', 'pipe'],
+                    });
+                    let stdout = '';
+                    let stderr = '';
+                    const startTime = Date.now();
+                    child.on('spawn', () => logCliDebug('codex CLI process spawned successfully'));
+                    child.stdout.on('data', (data) => (stdout += data.toString()));
+                    child.stderr.on('data', (data) => (stderr += data.toString()));
+                    child.on('close', (code) => {
+                        const duration = Date.now() - startTime;
+                        logCliDebug('codex CLI process closed', {
+                            code,
+                            duration: `${duration}ms`,
+                            stdoutLength: stdout.length,
+                            stderrLength: stderr.length,
+                        });
+                        if (code === 0) {
+                            const parsed = parseCodexJsonl(stdout);
+                            if (!parsed.response) {
+                                reject(new Error('No agent_message found in Codex JSONL output'));
+                                return;
+                            }
+                            resolve({
+                                response: parsed.response,
+                                usage: null,
+                                threadId: parsed.threadId,
+                            });
+                        }
+                        else {
+                            reject(new Error(`Codex CLI exited with code ${code ?? -1}. Error: ${stderr.trim()}`));
+                        }
+                    });
+                    child.on('error', (err) => {
+                        logCliDebug('Failed to spawn codex CLI', { error: err.message });
+                        reject(new Error(`Failed to spawn codex CLI. Is it installed and in PATH? Error: ${err.message}`));
+                    });
+                }
+                catch (err) {
+                    reject(new Error(`Synchronous error while trying to spawn codex: ${err instanceof Error ? err.message : String(err)}`));
+                }
+            });
+        },
+    };
+}
 const createExecutorProvider = () => {
     const executorCache = new Map();
     const clientCache = new Map();
@@ -172,7 +283,7 @@ const createExecutorProvider = () => {
         if (model.startsWith('gpt-')) {
             executor =
                 config.openaiMode === 'cli'
-                    ? createCliExecutor(codexCliConfig)
+                    ? createCodexExecutor()
                     : createApiExecutor(getOpenAIClient());
         }
         else if (model.startsWith('deepseek-')) {
@@ -181,7 +292,7 @@ const createExecutorProvider = () => {
         else if (model.startsWith('gemini-')) {
             executor =
                 config.geminiMode === 'cli'
-                    ? createCliExecutor(geminiCliConfig)
+                    ? createGeminiExecutor()
                     : createApiExecutor(getGeminiApiClient());
         }
         else {

package/dist/llm.test.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { EventEmitter } from 'events';
-import { getExecutorForModel } from './llm.js';
+import { getExecutorForModel, parseCodexJsonl, parseGeminiJson } from './llm.js';
 const createCompletionMock = vi.hoisted(() => vi.fn());
 const spawnMock = vi.hoisted(() => vi.fn());
 const logCliDebugMock = vi.hoisted(() => vi.fn());
@@ -88,11 +88,76 @@ describe('API executor', () => {
         await expect(executor.execute('prompt', 'gpt-5.1', 'system')).rejects.toThrow('No response from the model via API');
     });
 });
-describe('CLI executor', () => {
+const codexJsonlOutput = (threadId, text) => [
+    JSON.stringify({ type: 'thread.started', thread_id: threadId }),
+    JSON.stringify({
+        type: 'item.completed',
+        item: { type: 'agent_message', text },
+    }),
+].join('\n');
+describe('parseCodexJsonl', () => {
+    it('extracts thread_id and agent_message text', () => {
+        const output = codexJsonlOutput('thread_abc', 'hello world');
+        const result = parseCodexJsonl(output);
+        expect(result.threadId).toBe('thread_abc');
+        expect(result.response).toBe('hello world');
+    });
+    it('concatenates multiple agent_message items', () => {
+        const output = [
+            JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
+            JSON.stringify({
+                type: 'item.completed',
+                item: { type: 'agent_message', text: 'first' },
+            }),
+            JSON.stringify({
+                type: 'item.completed',
+                item: { type: 'agent_message', text: 'second' },
+            }),
+        ].join('\n');
+        const result = parseCodexJsonl(output);
+        expect(result.response).toBe('first\nsecond');
+    });
+    it('skips reasoning items', () => {
+        const output = [
+            JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
+            JSON.stringify({
+                type: 'item.completed',
+                item: { type: 'reasoning', text: 'thinking...' },
+            }),
+            JSON.stringify({
+                type: 'item.completed',
+                item: { type: 'agent_message', text: 'answer' },
+            }),
+        ].join('\n');
+        const result = parseCodexJsonl(output);
+        expect(result.response).toBe('answer');
+    });
+    it('skips non-JSON lines', () => {
+        const output = [
+            'ERROR: some log line',
+            JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
+            'another garbage line',
+            JSON.stringify({
+                type: 'item.completed',
+                item: { type: 'agent_message', text: 'result' },
+            }),
+        ].join('\n');
+        const result = parseCodexJsonl(output);
+        expect(result.threadId).toBe('t1');
+        expect(result.response).toBe('result');
+    });
+    it('returns empty response when no agent_message found', () => {
+        const output = JSON.stringify({ type: 'thread.started', thread_id: 't1' });
+        const result = parseCodexJsonl(output);
+        expect(result.threadId).toBe('t1');
+        expect(result.response).toBe('');
+    });
+});
+describe('Codex CLI executor', () => {
     const setupSpawn = (child) => {
         spawnMock.mockReturnValue(child);
     };
-    it('spawns codex CLI with combined prompt and files', async () => {
+    it('spawns codex CLI with --json and parses JSONL output', async () => {
         mockConfig.openaiMode = 'cli';
         const child = createChildProcess();
         setupSpawn(child);
@@ -100,20 +165,64 @@ describe('CLI executor', () => {
         const promise = executor.execute('user', 'gpt-5.1', 'system', [
             '/absolute/path/to/file.ts',
         ]);
-        resolveCliExecution(child, { stdout: 'result', code: 0 });
+        resolveCliExecution(child, {
+            stdout: codexJsonlOutput('thread_123', 'result'),
+            code: 0,
+        });
         const args = spawnMock.mock.calls[0];
         expect(args?.[0]).toBe('codex');
         const cliArgs = args?.[1];
         expect(cliArgs[0]).toBe('exec');
-        expect(cliArgs[1]).toBe('--skip-git-repo-check');
-        expect(cliArgs[2]).toBe('-m');
-        expect(cliArgs[3]).toBe('gpt-5.1');
-        expect(cliArgs[4]).toContain('system');
-        expect(cliArgs[4]).toContain('user');
-        expect(cliArgs[4]).toContain('Files: @');
+        expect(cliArgs[1]).toBe('--json');
+        expect(cliArgs[2]).toBe('--skip-git-repo-check');
+        expect(cliArgs).toContain('-m');
+        expect(cliArgs).toContain('gpt-5.1');
+        // Last arg is the prompt with system + user + files
+        const promptArg = cliArgs[cliArgs.length - 1];
+        expect(promptArg).toContain('system');
+        expect(promptArg).toContain('user');
+        expect(promptArg).toContain('Files: @');
         const result = await promise;
         expect(result.response).toBe('result');
         expect(result.usage).toBeNull();
+        expect(result.threadId).toBe('thread_123');
+    });
+    it('resumes a session with thread_id', async () => {
+        mockConfig.openaiMode = 'cli';
+        const child = createChildProcess();
+        setupSpawn(child);
+        const executor = getExecutorForModel('gpt-5.1');
+        const promise = executor.execute('follow up question', 'gpt-5.1', 'system', undefined, 'thread_abc');
+        resolveCliExecution(child, {
+            stdout: codexJsonlOutput('thread_abc', 'follow up answer'),
+            code: 0,
+        });
+        const args = spawnMock.mock.calls[0];
+        const cliArgs = args?.[1];
+        expect(cliArgs[0]).toBe('exec');
+        expect(cliArgs[1]).toBe('resume');
+        expect(cliArgs[2]).toBe('--json');
+        expect(cliArgs[3]).toBe('--skip-git-repo-check');
+        expect(cliArgs).toContain('thread_abc');
+        // Prompt should NOT contain system prompt on resume
+        const promptArg = cliArgs[cliArgs.length - 1];
+        expect(promptArg).toBe('follow up question');
+        expect(promptArg).not.toContain('system');
+        const result = await promise;
+        expect(result.response).toBe('follow up answer');
+        expect(result.threadId).toBe('thread_abc');
+    });
+    it('rejects when no agent_message in JSONL output', async () => {
+        mockConfig.openaiMode = 'cli';
+        const child = createChildProcess();
+        setupSpawn(child);
+        const executor = getExecutorForModel('gpt-5.1');
+        const promise = executor.execute('user', 'gpt-5.1', 'system');
+        resolveCliExecution(child, {
+            stdout: JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
+            code: 0,
+        });
+        await expect(promise).rejects.toThrow('No agent_message found in Codex JSONL output');
     });
     it('rejects with codex errors on non-zero exit', async () => {
         mockConfig.openaiMode = 'cli';
@@ -131,25 +240,16 @@ describe('CLI executor', () => {
         setupSpawn(child);
         const executor = getExecutorForModel('gpt-5.1');
         const promise = executor.execute('user', 'gpt-5.1', 'system');
-        resolveCliExecution(child, { stdout: 'result', code: 0 });
+        resolveCliExecution(child, {
+            stdout: codexJsonlOutput('t1', 'result'),
+            code: 0,
+        });
         const args = spawnMock.mock.calls[0];
         const cliArgs = args?.[1];
         expect(cliArgs).toContain('-c');
         expect(cliArgs).toContain('model_reasoning_effort="xhigh"');
         await promise;
-        mockConfig.codexReasoningEffort = undefined; // reset for other tests
-    });
-    it('wraps gemini quota errors specially', async () => {
-        mockConfig.geminiMode = 'cli';
-        const child = createChildProcess();
-        setupSpawn(child);
-        const executor = getExecutorForModel('gemini-2.5-pro');
-        const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
-        resolveCliExecution(child, {
-            stderr: 'RESOURCE_EXHAUSTED: quota exceeded',
-            code: 1,
-        });
-        await expect(promise).rejects.toThrow('Gemini quota exceeded');
+        mockConfig.codexReasoningEffort = undefined;
     });
     it('handles spawn error events with friendly message', async () => {
         mockConfig.openaiMode = 'cli';
@@ -169,6 +269,112 @@ describe('CLI executor', () => {
         await expect(executor.execute('user', 'gpt-5.1', 'system')).rejects.toThrow('Synchronous error while trying to spawn codex: sync failure');
     });
 });
+const geminiJsonOutput = (sessionId, response) => JSON.stringify({ session_id: sessionId, response, stats: {} });
+describe('parseGeminiJson', () => {
+    it('extracts session_id and response', () => {
+        const output = geminiJsonOutput('sess_abc', 'hello world');
+        const result = parseGeminiJson(output);
+        expect(result.sessionId).toBe('sess_abc');
+        expect(result.response).toBe('hello world');
+    });
+    it('returns empty response when response is missing', () => {
+        const output = JSON.stringify({ session_id: 's1' });
+        const result = parseGeminiJson(output);
+        expect(result.sessionId).toBe('s1');
+        expect(result.response).toBe('');
+    });
+});
+describe('Gemini CLI executor', () => {
+    const setupSpawn = (child) => {
+        spawnMock.mockReturnValue(child);
+    };
+    it('spawns gemini CLI with -o json and parses JSON output', async () => {
+        mockConfig.geminiMode = 'cli';
+        const child = createChildProcess();
+        setupSpawn(child);
+        const executor = getExecutorForModel('gemini-2.5-pro');
+        const promise = executor.execute('user prompt', 'gemini-2.5-pro', 'system');
+        resolveCliExecution(child, {
+            stdout: geminiJsonOutput('sess_123', 'result'),
+            code: 0,
+        });
+        const args = spawnMock.mock.calls[0];
+        expect(args?.[0]).toBe('gemini');
+        const cliArgs = args?.[1];
+        expect(cliArgs).toContain('-m');
+        expect(cliArgs).toContain('gemini-2.5-pro');
+        expect(cliArgs).toContain('-o');
+        expect(cliArgs).toContain('json');
+        expect(cliArgs).toContain('-p');
+        const result = await promise;
+        expect(result.response).toBe('result');
+        expect(result.usage).toBeNull();
+        expect(result.threadId).toBe('sess_123');
+    });
+    it('resumes a session with thread_id', async () => {
+        mockConfig.geminiMode = 'cli';
+        const child = createChildProcess();
+        setupSpawn(child);
+        const executor = getExecutorForModel('gemini-2.5-pro');
+        const promise = executor.execute('follow up', 'gemini-2.5-pro', 'system', undefined, 'sess_abc');
+        resolveCliExecution(child, {
+            stdout: geminiJsonOutput('sess_abc', 'follow up answer'),
+            code: 0,
+        });
+        const args = spawnMock.mock.calls[0];
+        const cliArgs = args?.[1];
+        expect(cliArgs).toContain('-r');
+        expect(cliArgs).toContain('sess_abc');
+        // Prompt should NOT contain system prompt on resume
+        const pIdx = cliArgs.indexOf('-p');
+        expect(cliArgs[pIdx + 1]).toBe('follow up');
+        const result = await promise;
+        expect(result.response).toBe('follow up answer');
+        expect(result.threadId).toBe('sess_abc');
+    });
+    it('rejects when no response in JSON output', async () => {
+        mockConfig.geminiMode = 'cli';
+        const child = createChildProcess();
+        setupSpawn(child);
+        const executor = getExecutorForModel('gemini-2.5-pro');
+        const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
+        resolveCliExecution(child, {
+            stdout: JSON.stringify({ session_id: 's1' }),
+            code: 0,
+        });
+        await expect(promise).rejects.toThrow('No response found in Gemini JSON output');
+    });
+    it('rejects with parse error on invalid JSON', async () => {
+        mockConfig.geminiMode = 'cli';
+        const child = createChildProcess();
+        setupSpawn(child);
+        const executor = getExecutorForModel('gemini-2.5-pro');
+        const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
+        resolveCliExecution(child, { stdout: 'not json', code: 0 });
+        await expect(promise).rejects.toThrow('Failed to parse Gemini JSON output');
+    });
+    it('wraps gemini quota errors specially', async () => {
+        mockConfig.geminiMode = 'cli';
+        const child = createChildProcess();
+        setupSpawn(child);
+        const executor = getExecutorForModel('gemini-2.5-pro');
+        const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
+        resolveCliExecution(child, {
+            stderr: 'RESOURCE_EXHAUSTED: quota exceeded',
+            code: 1,
+        });
+        await expect(promise).rejects.toThrow('Gemini quota exceeded');
+    });
+    it('handles spawn error events with friendly message', async () => {
+        mockConfig.geminiMode = 'cli';
+        const child = createChildProcess();
+        setupSpawn(child);
+        const executor = getExecutorForModel('gemini-2.5-pro');
+        const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
+        child.emit('error', new Error('not found'));
+        await expect(promise).rejects.toThrow('Failed to spawn gemini CLI. Is it installed and in PATH? Error: not found');
+    });
+});
 describe('executor selection', () => {
     it('uses deepseek API client', async () => {
         createCompletionMock.mockResolvedValue({

package/dist/models.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare const ALL_MODELS: readonly ["gemini-2.5-pro", "gemini-3-pro-preview", "deepseek-reasoner", "gpt-5.2", "gpt-5.2-codex", "gpt-5.1-codex-max", "gpt-5.1-codex", "gpt-5.1-codex-mini", "gpt-5.1"];
1	+ export declare const ALL_MODELS: readonly ["gemini-2.5-pro", "gemini-3-pro-preview", "deepseek-reasoner", "gpt-5.2", "gpt-5.3-codex", "gpt-5.2-codex", "gpt-5.1-codex-max", "gpt-5.1-codex", "gpt-5.1-codex-mini", "gpt-5.1"];

package/dist/models.js CHANGED Viewed

@@ -3,6 +3,7 @@ export const ALL_MODELS = [
     'gemini-3-pro-preview',
     'deepseek-reasoner',
     'gpt-5.2',
+    'gpt-5.3-codex',
     'gpt-5.2-codex',
     'gpt-5.1-codex-max',
     'gpt-5.1-codex',

package/dist/schema.d.ts CHANGED Viewed

@@ -10,6 +10,7 @@ export declare const ConsultLlmArgs: z.ZodObject<{
         [x: string]: string;
     }>>>;
     web_mode: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
+    thread_id: z.ZodOptional<z.ZodString>;
     git_diff: z.ZodOptional<z.ZodObject<{
         repo_path: z.ZodOptional<z.ZodString>;
         files: z.ZodArray<z.ZodString>;
@@ -18,6 +19,6 @@ export declare const ConsultLlmArgs: z.ZodObject<{
 }, z.core.$strip>;
 export declare const toolSchema: {
     readonly name: "consult_llm";
-    readonly description: "Ask a more powerful AI for help with complex problems. Provide your question in the prompt field and always include relevant code files as context.\n\nBe specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.\n\nIMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of \"Should I use X or Y approach?\", ask \"What's the best approach for this problem?\" Let the consultant LLM provide unbiased recommendations.";
+    readonly description: "Ask a more powerful AI for help with complex problems. Provide your question in the prompt field and always include relevant code files as context.\n\nBe specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.\n\nIMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of \"Should I use X or Y approach?\", ask \"What's the best approach for this problem?\" Let the consultant LLM provide unbiased recommendations.\n\nFor multi-turn conversations with CLI models (Codex and Gemini), the response includes a [thread_id:xxx] prefix. Extract this ID and pass it as the thread_id parameter in follow-up requests to maintain conversation context.";
     readonly inputSchema: z.core.JSONSchema.JSONSchema;
 };

package/dist/schema.js CHANGED Viewed

@@ -19,6 +19,10 @@ export const ConsultLlmArgs = z.object({
         .optional()
         .default(false)
         .describe("If true, copy the formatted prompt to the clipboard instead of querying an LLM. When true, the `model` parameter is ignored. Use this to paste the prompt into browser-based LLM services. IMPORTANT: Only use this when the user specifically requests it. When true, wait for the user to provide the external LLM's response before proceeding with any implementation."),
+    thread_id: z
+        .string()
+        .optional()
+        .describe('Thread/session ID for resuming a conversation. Works with Codex CLI (gpt-*) and Gemini CLI (gemini-*) in CLI mode. Returned in the response prefix as [thread_id:xxx].'),
     git_diff: z
         .object({
         repo_path: z
@@ -47,6 +51,8 @@ export const toolSchema = {
 Be specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.
-IMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of "Should I use X or Y approach?", ask "What's the best approach for this problem?" Let the consultant LLM provide unbiased recommendations.`,
+IMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of "Should I use X or Y approach?", ask "What's the best approach for this problem?" Let the consultant LLM provide unbiased recommendations.
+For multi-turn conversations with CLI models (Codex and Gemini), the response includes a [thread_id:xxx] prefix. Extract this ID and pass it as the thread_id parameter in follow-up requests to maintain conversation context.`,
     inputSchema: consultLlmInputSchema,
 };

package/dist/schema.test.js CHANGED Viewed

@@ -43,6 +43,15 @@ describe('ConsultLlmArgs', () => {
         expect(parsed.model).toBeDefined();
         expect(ALL_MODELS).toContain(parsed.model);
     });
+    it('accepts optional thread_id as string', () => {
+        const withThread = ConsultLlmArgs.parse({
+            prompt: 'follow up',
+            thread_id: 'thread_abc',
+        });
+        expect(withThread.thread_id).toBe('thread_abc');
+        const withoutThread = ConsultLlmArgs.parse({ prompt: 'no thread' });
+        expect(withoutThread.thread_id).toBeUndefined();
+    });
     it('defaults web_mode to false but honors explicit value', () => {
         const parsedDefault = ConsultLlmArgs.parse({ prompt: 'default case' });
         expect(parsedDefault.web_mode).toBe(false);

package/dist/server.js CHANGED Viewed

@@ -47,7 +47,7 @@ export async function handleConsultLlm(args) {
             .join(', ');
         throw new Error(`Invalid request parameters: ${errors}`);
     }
-    const { files, prompt: userPrompt, git_diff, web_mode, model: parsedModel, } = parseResult.data;
+    const { files, prompt: userPrompt, git_diff, web_mode, model: parsedModel, thread_id: threadId, } = parseResult.data;
     const providedModel = typeof args === 'object' &&
         args !== null &&
         Object.prototype.hasOwnProperty.call(args, 'model');
@@ -56,6 +56,9 @@ export async function handleConsultLlm(args) {
         : (config.defaultModel ?? parsedModel);
     logToolCall('consult_llm', args);
     const isCliMode = isCliExecution(model);
+    if (threadId && !isCliMode) {
+        throw new Error('thread_id is only supported with CLI mode models (Codex or Gemini CLI)');
+    }
     let prompt;
     let filePaths;
     if (web_mode || !isCliMode) {
@@ -95,10 +98,13 @@ ${prompt}`;
             content: [{ type: 'text', text: responseMessage }],
         };
     }
-    const { response, costInfo } = await queryLlm(prompt, model, filePaths);
+    const { response, costInfo, threadId: returnedThreadId, } = await queryLlm(prompt, model, filePaths, threadId);
     await logResponse(model, response, costInfo);
+    const responseText = returnedThreadId
+        ? `[thread_id:${returnedThreadId}]\n\n${response}`
+        : response;
     return {
-        content: [{ type: 'text', text: response }],
+        content: [{ type: 'text', text: responseText }],
     };
 }
 server.setRequestHandler(CallToolRequestSchema, async (request) => {

package/dist/server.test.js CHANGED Viewed

@@ -88,13 +88,13 @@ describe('handleConsultLlm', () => {
         expect(processFilesMock).toHaveBeenCalledWith(['file1.ts']);
         expect(generateGitDiffMock).toHaveBeenCalledWith(undefined, ['src/index.ts'], 'HEAD');
         expect(buildPromptMock).toHaveBeenCalledWith('help me', expect.any(Array), 'diff output');
-        expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.1', undefined);
+        expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.1', undefined, undefined);
         expect(result.content[0]?.text).toBe('ok');
     });
     it('uses explicit model even when config default exists', async () => {
         mockConfig.defaultModel = 'gpt-5.1';
         await handleConsultLlm({ prompt: 'hello', model: 'gpt-5.2' });
-        expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.2', undefined);
+        expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.2', undefined, undefined);
     });
     it('builds CLI prompts without file contents', async () => {
         mockConfig.openaiMode = 'cli';
@@ -137,6 +137,55 @@ describe('handleConsultLlm', () => {
         expect(queryLlmMock).not.toHaveBeenCalled();
         expect(result.content[0]?.text).toContain('Prompt copied to clipboard');
     });
+    it('passes thread_id to queryLlm for Codex CLI models', async () => {
+        mockConfig.openaiMode = 'cli';
+        await handleConsultLlm({
+            prompt: 'follow up',
+            model: 'gpt-5.2',
+            thread_id: 'thread_abc',
+        });
+        const callArgs = queryLlmMock.mock.calls[0];
+        expect(callArgs[3]).toBe('thread_abc');
+    });
+    it('prefixes response with thread_id when returned', async () => {
+        mockConfig.openaiMode = 'cli';
+        queryLlmMock.mockResolvedValueOnce({
+            response: 'answer',
+            costInfo: null,
+            threadId: 'thread_xyz',
+        });
+        const result = await handleConsultLlm({
+            prompt: 'question',
+            model: 'gpt-5.2',
+        });
+        expect(result.content[0]?.text).toBe('[thread_id:thread_xyz]\n\nanswer');
+    });
+    it('passes thread_id to queryLlm for Gemini CLI models', async () => {
+        mockConfig.geminiMode = 'cli';
+        await handleConsultLlm({
+            prompt: 'follow up',
+            model: 'gemini-2.5-pro',
+            thread_id: 'sess_abc',
+        });
+        const callArgs = queryLlmMock.mock.calls[0];
+        expect(callArgs[3]).toBe('sess_abc');
+    });
+    it('rejects thread_id with non-CLI model', async () => {
+        mockConfig.openaiMode = 'api';
+        await expect(handleConsultLlm({
+            prompt: 'hello',
+            model: 'gpt-5.2',
+            thread_id: 'thread_abc',
+        })).rejects.toThrow('thread_id is only supported with CLI mode models');
+    });
+    it('rejects thread_id with Gemini API model', async () => {
+        mockConfig.geminiMode = 'api';
+        await expect(handleConsultLlm({
+            prompt: 'hello',
+            model: 'gemini-2.5-pro',
+            thread_id: 'sess_abc',
+        })).rejects.toThrow('thread_id is only supported with CLI mode models');
+    });
     it('propagates query errors', async () => {
         queryLlmMock.mockRejectedValueOnce(new Error('boom'));
         await expect(handleConsultLlm({ prompt: 'oops' })).rejects.toThrow('boom');

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "consult-llm-mcp",
-  "version": "2.0.0",
+  "version": "2.1.0",
   "description": "MCP server for consulting powerful AI models",
   "type": "module",
   "main": "dist/main.js",