consult-llm-mcp 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -5
- package/dist/config.d.ts +1 -1
- package/dist/llm-query.d.ts +2 -1
- package/dist/llm-query.js +3 -3
- package/dist/llm.d.ts +10 -1
- package/dist/llm.js +157 -46
- package/dist/llm.test.js +230 -24
- package/dist/models.d.ts +1 -1
- package/dist/models.js +1 -0
- package/dist/schema.d.ts +2 -1
- package/dist/schema.js +7 -1
- package/dist/schema.test.js +9 -0
- package/dist/server.js +9 -3
- package/dist/server.test.js +51 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -354,6 +354,34 @@ claude mcp add consult-llm -e OPENAI_MODE=cli -- npx -y consult-llm-mcp
|
|
|
354
354
|
> Set reasoning effort with `-e CODEX_REASONING_EFFORT=high`. Options:
|
|
355
355
|
> `none`, `minimal`, `low`, `medium`, `high`, `xhigh` (gpt-5.1-codex-max only).
|
|
356
356
|
|
|
357
|
+
#### Multi-turn conversations
|
|
358
|
+
|
|
359
|
+
CLI mode supports multi-turn conversations via the `thread_id` parameter. The
|
|
360
|
+
first response includes a `[thread_id:xxx]` prefix. Pass that ID in follow-up
|
|
361
|
+
requests to continue the conversation with full context from prior turns.
|
|
362
|
+
|
|
363
|
+
This works with both Gemini CLI and Codex CLI. Gemini uses session IDs, Codex
|
|
364
|
+
uses thread IDs, but both are passed through the same `thread_id` parameter.
|
|
365
|
+
|
|
366
|
+
```
|
|
367
|
+
⏺ consult-llm - consult_llm (MCP)(prompt: "What's your take on winter?",
|
|
368
|
+
model: "gpt-5.3-codex")
|
|
369
|
+
⎿ [thread_id:thread_b1ff711...]
|
|
370
|
+
|
|
371
|
+
Winter is high-variance, not universally the worst. ...
|
|
372
|
+
|
|
373
|
+
⏺ consult-llm - consult_llm (MCP)(prompt: "What about rain?",
|
|
374
|
+
model: "gpt-5.3-codex",
|
|
375
|
+
thread_id: "thread_b1ff711...")
|
|
376
|
+
⎿ [thread_id:thread_b1ff711...]
|
|
377
|
+
|
|
378
|
+
Rain has high upside, high annoyance depending on context. ...
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
See [skills/debate/SKILL.md](skills/debate/SKILL.md) for a skill where the agent
|
|
382
|
+
debates an opponent LLM through multiple turns, then synthesizes and implements
|
|
383
|
+
the result.
|
|
384
|
+
|
|
357
385
|
### Web mode
|
|
358
386
|
|
|
359
387
|
Copies the formatted prompt to clipboard instead of querying an LLM. Paste into
|
|
@@ -381,8 +409,8 @@ See the "Using web mode..." example above for a concrete transcript.
|
|
|
381
409
|
- `DEEPSEEK_API_KEY` - Your DeepSeek API key (required for DeepSeek models)
|
|
382
410
|
- `CONSULT_LLM_DEFAULT_MODEL` - Override the default model (optional)
|
|
383
411
|
- Options: `gpt-5.2` (default), `gemini-2.5-pro`, `gemini-3-pro-preview`,
|
|
384
|
-
`deepseek-reasoner`, `gpt-5.
|
|
385
|
-
`gpt-5.1-codex-mini`, `gpt-5.1`
|
|
412
|
+
`deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-max`,
|
|
413
|
+
`gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5.1`
|
|
386
414
|
- `GEMINI_MODE` - Choose between API or CLI mode for Gemini models (optional)
|
|
387
415
|
- Options: `api` (default), `cli`
|
|
388
416
|
- CLI mode uses the system-installed `gemini` CLI tool
|
|
@@ -474,8 +502,8 @@ models complex questions.
|
|
|
474
502
|
|
|
475
503
|
- **model** (optional): LLM model to use
|
|
476
504
|
- Options: `gpt-5.2` (default), `gemini-2.5-pro`, `gemini-3-pro-preview`,
|
|
477
|
-
`deepseek-reasoner`, `gpt-5.
|
|
478
|
-
`gpt-5.1-codex-mini`, `gpt-5.1`
|
|
505
|
+
`deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-max`,
|
|
506
|
+
`gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5.1`
|
|
479
507
|
|
|
480
508
|
- **web_mode** (optional): Copy prompt to clipboard instead of querying LLM
|
|
481
509
|
- Default: `false`
|
|
@@ -483,6 +511,11 @@ models complex questions.
|
|
|
483
511
|
contents) is copied to clipboard for manual pasting into browser-based LLM
|
|
484
512
|
services
|
|
485
513
|
|
|
514
|
+
- **thread_id** (optional): Resume a multi-turn conversation
|
|
515
|
+
- Works with Codex CLI (`gpt-*`) and Gemini CLI (`gemini-*`) in CLI mode
|
|
516
|
+
- The first response includes a `[thread_id:xxx]` prefix — pass that ID back
|
|
517
|
+
as `thread_id` in follow-up requests to maintain conversation context
|
|
518
|
+
|
|
486
519
|
- **git_diff** (optional): Include git diff output as context
|
|
487
520
|
- **files** (required): Specific files to include in diff
|
|
488
521
|
- **repo_path** (optional): Path to git repository (defaults to current
|
|
@@ -497,6 +530,7 @@ models complex questions.
|
|
|
497
530
|
- **deepseek-reasoner**: DeepSeek's reasoning model ($0.55/$2.19 per million
|
|
498
531
|
tokens)
|
|
499
532
|
- **gpt-5.2**: OpenAI's latest GPT model
|
|
533
|
+
- **gpt-5.3-codex**: OpenAI's Codex model based on GPT-5.3
|
|
500
534
|
- **gpt-5.2-codex**: OpenAI's Codex model based on GPT-5.2
|
|
501
535
|
- **gpt-5.1-codex-max**: Strongest OpenAI Codex model
|
|
502
536
|
- **gpt-5.1-codex**: OpenAI's Codex model optimized for coding
|
|
@@ -586,7 +620,8 @@ need reliability or custom instructions.
|
|
|
586
620
|
|
|
587
621
|
Here's an example [Claude Code skill](https://code.claude.com/docs/en/skills)
|
|
588
622
|
that uses the `consult_llm` MCP tool to create commands like "ask gemini" or
|
|
589
|
-
"ask codex". See [
|
|
623
|
+
"ask codex". See [skills/consult/SKILL.md](skills/consult/SKILL.md) for the full
|
|
624
|
+
content.
|
|
590
625
|
|
|
591
626
|
Save it as `~/.claude/skills/consult-llm/SKILL.md` and you can then use it by
|
|
592
627
|
typing "ask gemini about X" or "ask codex about X" in Claude Code.
|
package/dist/config.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ export declare const SupportedChatModel: z.ZodEnum<{
|
|
|
3
3
|
[x: string]: string;
|
|
4
4
|
}>;
|
|
5
5
|
export type SupportedChatModel = z.infer<typeof SupportedChatModel>;
|
|
6
|
-
export declare const fallbackModel: "gemini-2.5-pro" | "gemini-3-pro-preview" | "deepseek-reasoner" | "gpt-5.2" | "gpt-5.2-codex" | "gpt-5.1-codex-max" | "gpt-5.1-codex" | "gpt-5.1-codex-mini" | "gpt-5.1";
|
|
6
|
+
export declare const fallbackModel: "gemini-2.5-pro" | "gemini-3-pro-preview" | "deepseek-reasoner" | "gpt-5.2" | "gpt-5.3-codex" | "gpt-5.2-codex" | "gpt-5.1-codex-max" | "gpt-5.1-codex" | "gpt-5.1-codex-mini" | "gpt-5.1";
|
|
7
7
|
declare const Config: z.ZodObject<{
|
|
8
8
|
openaiApiKey: z.ZodOptional<z.ZodString>;
|
|
9
9
|
geminiApiKey: z.ZodOptional<z.ZodString>;
|
package/dist/llm-query.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { type SupportedChatModel } from './schema.js';
|
|
2
|
-
export declare function queryLlm(prompt: string, model: SupportedChatModel, filePaths?: string[]): Promise<{
|
|
2
|
+
export declare function queryLlm(prompt: string, model: SupportedChatModel, filePaths?: string[], threadId?: string): Promise<{
|
|
3
3
|
response: string;
|
|
4
4
|
costInfo: string;
|
|
5
|
+
threadId?: string;
|
|
5
6
|
}>;
|
package/dist/llm-query.js
CHANGED
|
@@ -2,13 +2,13 @@ import { getExecutorForModel } from './llm.js';
|
|
|
2
2
|
import { calculateCost } from './llm-cost.js';
|
|
3
3
|
import { config } from './config.js';
|
|
4
4
|
import { getSystemPrompt } from './system-prompt.js';
|
|
5
|
-
export async function queryLlm(prompt, model, filePaths) {
|
|
5
|
+
export async function queryLlm(prompt, model, filePaths, threadId) {
|
|
6
6
|
const executor = getExecutorForModel(model);
|
|
7
7
|
// Get system prompt (with CLI suffix if needed)
|
|
8
8
|
const isCliMode = (model.startsWith('gemini-') && config.geminiMode === 'cli') ||
|
|
9
9
|
(model.startsWith('gpt-') && config.openaiMode === 'cli');
|
|
10
10
|
const systemPrompt = getSystemPrompt(isCliMode);
|
|
11
|
-
const { response, usage } = await executor.execute(prompt, model, systemPrompt, filePaths);
|
|
11
|
+
const { response, usage, threadId: returnedThreadId, } = await executor.execute(prompt, model, systemPrompt, filePaths, threadId);
|
|
12
12
|
if (!response) {
|
|
13
13
|
throw new Error('No response from the model');
|
|
14
14
|
}
|
|
@@ -22,5 +22,5 @@ export async function queryLlm(prompt, model, filePaths) {
|
|
|
22
22
|
// Handle case where usage is not available (from CLI)
|
|
23
23
|
costInfo = 'Cost data not available (using CLI mode)';
|
|
24
24
|
}
|
|
25
|
-
return { response, costInfo };
|
|
25
|
+
return { response, costInfo, threadId: returnedThreadId };
|
|
26
26
|
}
|
package/dist/llm.d.ts
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
1
|
import OpenAI from 'openai';
|
|
2
2
|
import { type SupportedChatModel as SupportedChatModelType } from './schema.js';
|
|
3
3
|
export interface LlmExecutor {
|
|
4
|
-
execute(prompt: string, model: SupportedChatModelType, systemPrompt: string, filePaths?: string[]): Promise<{
|
|
4
|
+
execute(prompt: string, model: SupportedChatModelType, systemPrompt: string, filePaths?: string[], threadId?: string): Promise<{
|
|
5
5
|
response: string;
|
|
6
6
|
usage: OpenAI.CompletionUsage | null;
|
|
7
|
+
threadId?: string;
|
|
7
8
|
}>;
|
|
8
9
|
}
|
|
10
|
+
export declare function parseGeminiJson(output: string): {
|
|
11
|
+
sessionId: string | undefined;
|
|
12
|
+
response: string;
|
|
13
|
+
};
|
|
14
|
+
export declare function parseCodexJsonl(output: string): {
|
|
15
|
+
threadId: string | undefined;
|
|
16
|
+
response: string;
|
|
17
|
+
};
|
|
9
18
|
export declare const getExecutorForModel: (model: SupportedChatModelType) => LlmExecutor;
|
package/dist/llm.js
CHANGED
|
@@ -31,10 +31,15 @@ function createApiExecutor(client) {
|
|
|
31
31
|
},
|
|
32
32
|
};
|
|
33
33
|
}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
// --- CLI Executors ---
|
|
35
|
+
export function parseGeminiJson(output) {
|
|
36
|
+
const parsed = JSON.parse(output);
|
|
37
|
+
return {
|
|
38
|
+
sessionId: parsed.session_id,
|
|
39
|
+
response: parsed.response ?? '',
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
function createGeminiExecutor() {
|
|
38
43
|
const buildFullPrompt = (prompt, systemPrompt, filePaths) => {
|
|
39
44
|
let fullPrompt = `${systemPrompt}\n\n${prompt}`;
|
|
40
45
|
if (filePaths && filePaths.length > 0) {
|
|
@@ -46,81 +51,187 @@ function createCliExecutor(cliConfig) {
|
|
|
46
51
|
return fullPrompt;
|
|
47
52
|
};
|
|
48
53
|
return {
|
|
49
|
-
async execute(prompt, model, systemPrompt, filePaths) {
|
|
50
|
-
const
|
|
51
|
-
|
|
52
|
-
|
|
54
|
+
async execute(prompt, model, systemPrompt, filePaths, threadId) {
|
|
55
|
+
const message = threadId
|
|
56
|
+
? prompt
|
|
57
|
+
: buildFullPrompt(prompt, systemPrompt, filePaths);
|
|
58
|
+
const args = ['-m', model, '-o', 'json'];
|
|
59
|
+
if (threadId) {
|
|
60
|
+
args.push('-r', threadId);
|
|
61
|
+
}
|
|
62
|
+
args.push('-p', message);
|
|
53
63
|
return new Promise((resolve, reject) => {
|
|
54
64
|
try {
|
|
55
|
-
logCliDebug(
|
|
65
|
+
logCliDebug('Spawning gemini CLI', {
|
|
56
66
|
model,
|
|
57
|
-
promptLength:
|
|
58
|
-
|
|
59
|
-
args
|
|
60
|
-
promptPreview: fullPrompt.slice(0, 300),
|
|
67
|
+
promptLength: message.length,
|
|
68
|
+
threadId,
|
|
69
|
+
args,
|
|
61
70
|
});
|
|
62
|
-
const child = spawn(
|
|
71
|
+
const child = spawn('gemini', args, {
|
|
63
72
|
shell: false,
|
|
64
73
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
65
74
|
});
|
|
66
75
|
let stdout = '';
|
|
67
76
|
let stderr = '';
|
|
68
77
|
const startTime = Date.now();
|
|
69
|
-
child.on('spawn', () => logCliDebug(
|
|
78
|
+
child.on('spawn', () => logCliDebug('gemini CLI process spawned successfully'));
|
|
70
79
|
child.stdout.on('data', (data) => (stdout += data.toString()));
|
|
71
80
|
child.stderr.on('data', (data) => (stderr += data.toString()));
|
|
72
81
|
child.on('close', (code) => {
|
|
73
82
|
const duration = Date.now() - startTime;
|
|
74
|
-
logCliDebug(
|
|
83
|
+
logCliDebug('gemini CLI process closed', {
|
|
75
84
|
code,
|
|
76
85
|
duration: `${duration}ms`,
|
|
77
86
|
stdoutLength: stdout.length,
|
|
78
87
|
stderrLength: stderr.length,
|
|
79
88
|
});
|
|
80
89
|
if (code === 0) {
|
|
81
|
-
|
|
90
|
+
try {
|
|
91
|
+
const parsed = parseGeminiJson(stdout);
|
|
92
|
+
if (!parsed.response) {
|
|
93
|
+
reject(new Error('No response found in Gemini JSON output'));
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
resolve({
|
|
97
|
+
response: parsed.response,
|
|
98
|
+
usage: null,
|
|
99
|
+
threadId: parsed.sessionId,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
reject(new Error(`Failed to parse Gemini JSON output: ${stdout.slice(0, 200)}`));
|
|
104
|
+
}
|
|
82
105
|
}
|
|
83
106
|
else {
|
|
84
|
-
|
|
107
|
+
if (stderr.includes('RESOURCE_EXHAUSTED')) {
|
|
108
|
+
reject(new Error(`Gemini quota exceeded. Consider using gemini-2.0-flash model. Error: ${stderr.trim()}`));
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
reject(new Error(`Gemini CLI exited with code ${code ?? -1}. Error: ${stderr.trim()}`));
|
|
112
|
+
}
|
|
85
113
|
}
|
|
86
114
|
});
|
|
87
115
|
child.on('error', (err) => {
|
|
88
|
-
logCliDebug(
|
|
89
|
-
|
|
90
|
-
});
|
|
91
|
-
reject(new Error(`Failed to spawn ${cliName} CLI. Is it installed and in PATH? Error: ${err.message}`));
|
|
116
|
+
logCliDebug('Failed to spawn gemini CLI', { error: err.message });
|
|
117
|
+
reject(new Error(`Failed to spawn gemini CLI. Is it installed and in PATH? Error: ${err.message}`));
|
|
92
118
|
});
|
|
93
119
|
}
|
|
94
120
|
catch (err) {
|
|
95
|
-
reject(new Error(`Synchronous error while trying to spawn
|
|
121
|
+
reject(new Error(`Synchronous error while trying to spawn gemini: ${err instanceof Error ? err.message : String(err)}`));
|
|
96
122
|
}
|
|
97
123
|
});
|
|
98
124
|
},
|
|
99
125
|
};
|
|
100
126
|
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
if (
|
|
107
|
-
|
|
127
|
+
export function parseCodexJsonl(output) {
|
|
128
|
+
let threadId;
|
|
129
|
+
const messages = [];
|
|
130
|
+
for (const line of output.split('\n')) {
|
|
131
|
+
const trimmed = line.trim();
|
|
132
|
+
if (!trimmed)
|
|
133
|
+
continue;
|
|
134
|
+
try {
|
|
135
|
+
const event = JSON.parse(trimmed);
|
|
136
|
+
if (event.type === 'thread.started' && event.thread_id) {
|
|
137
|
+
threadId = event.thread_id;
|
|
138
|
+
}
|
|
139
|
+
else if (event.type === 'item.completed' &&
|
|
140
|
+
event.item?.type === 'agent_message' &&
|
|
141
|
+
event.item?.text) {
|
|
142
|
+
messages.push(event.item.text);
|
|
143
|
+
}
|
|
108
144
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
};
|
|
112
|
-
const codexCliConfig = {
|
|
113
|
-
cliName: 'codex',
|
|
114
|
-
buildArgs: (model, fullPrompt) => {
|
|
115
|
-
const args = ['exec', '--skip-git-repo-check', '-m', model];
|
|
116
|
-
if (config.codexReasoningEffort) {
|
|
117
|
-
args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
|
|
145
|
+
catch {
|
|
146
|
+
// Skip non-JSON lines (e.g. the ERROR log from resume)
|
|
118
147
|
}
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
148
|
+
}
|
|
149
|
+
return { threadId, response: messages.join('\n') };
|
|
150
|
+
}
|
|
151
|
+
function createCodexExecutor() {
|
|
152
|
+
const appendFiles = (text, filePaths) => {
|
|
153
|
+
if (!filePaths || filePaths.length === 0)
|
|
154
|
+
return text;
|
|
155
|
+
const fileRefs = filePaths
|
|
156
|
+
.map((path) => `@${relative(process.cwd(), path)}`)
|
|
157
|
+
.join(' ');
|
|
158
|
+
return `${text}\n\nFiles: ${fileRefs}`;
|
|
159
|
+
};
|
|
160
|
+
return {
|
|
161
|
+
async execute(prompt, model, systemPrompt, filePaths, threadId) {
|
|
162
|
+
const message = appendFiles(prompt, filePaths);
|
|
163
|
+
const fullPrompt = threadId
|
|
164
|
+
? message // On resume, include files but skip system prompt
|
|
165
|
+
: `${systemPrompt}\n\n${message}`;
|
|
166
|
+
const args = [];
|
|
167
|
+
if (threadId) {
|
|
168
|
+
args.push('exec', 'resume', '--json', '--skip-git-repo-check');
|
|
169
|
+
if (config.codexReasoningEffort) {
|
|
170
|
+
args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
|
|
171
|
+
}
|
|
172
|
+
args.push('-m', model, threadId, fullPrompt);
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
args.push('exec', '--json', '--skip-git-repo-check');
|
|
176
|
+
if (config.codexReasoningEffort) {
|
|
177
|
+
args.push('-c', `model_reasoning_effort="${config.codexReasoningEffort}"`);
|
|
178
|
+
}
|
|
179
|
+
args.push('-m', model, fullPrompt);
|
|
180
|
+
}
|
|
181
|
+
return new Promise((resolve, reject) => {
|
|
182
|
+
try {
|
|
183
|
+
logCliDebug('Spawning codex CLI', {
|
|
184
|
+
model,
|
|
185
|
+
promptLength: fullPrompt.length,
|
|
186
|
+
threadId,
|
|
187
|
+
args,
|
|
188
|
+
});
|
|
189
|
+
const child = spawn('codex', args, {
|
|
190
|
+
shell: false,
|
|
191
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
192
|
+
});
|
|
193
|
+
let stdout = '';
|
|
194
|
+
let stderr = '';
|
|
195
|
+
const startTime = Date.now();
|
|
196
|
+
child.on('spawn', () => logCliDebug('codex CLI process spawned successfully'));
|
|
197
|
+
child.stdout.on('data', (data) => (stdout += data.toString()));
|
|
198
|
+
child.stderr.on('data', (data) => (stderr += data.toString()));
|
|
199
|
+
child.on('close', (code) => {
|
|
200
|
+
const duration = Date.now() - startTime;
|
|
201
|
+
logCliDebug('codex CLI process closed', {
|
|
202
|
+
code,
|
|
203
|
+
duration: `${duration}ms`,
|
|
204
|
+
stdoutLength: stdout.length,
|
|
205
|
+
stderrLength: stderr.length,
|
|
206
|
+
});
|
|
207
|
+
if (code === 0) {
|
|
208
|
+
const parsed = parseCodexJsonl(stdout);
|
|
209
|
+
if (!parsed.response) {
|
|
210
|
+
reject(new Error('No agent_message found in Codex JSONL output'));
|
|
211
|
+
return;
|
|
212
|
+
}
|
|
213
|
+
resolve({
|
|
214
|
+
response: parsed.response,
|
|
215
|
+
usage: null,
|
|
216
|
+
threadId: parsed.threadId,
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
else {
|
|
220
|
+
reject(new Error(`Codex CLI exited with code ${code ?? -1}. Error: ${stderr.trim()}`));
|
|
221
|
+
}
|
|
222
|
+
});
|
|
223
|
+
child.on('error', (err) => {
|
|
224
|
+
logCliDebug('Failed to spawn codex CLI', { error: err.message });
|
|
225
|
+
reject(new Error(`Failed to spawn codex CLI. Is it installed and in PATH? Error: ${err.message}`));
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
catch (err) {
|
|
229
|
+
reject(new Error(`Synchronous error while trying to spawn codex: ${err instanceof Error ? err.message : String(err)}`));
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
},
|
|
233
|
+
};
|
|
234
|
+
}
|
|
124
235
|
const createExecutorProvider = () => {
|
|
125
236
|
const executorCache = new Map();
|
|
126
237
|
const clientCache = new Map();
|
|
@@ -172,7 +283,7 @@ const createExecutorProvider = () => {
|
|
|
172
283
|
if (model.startsWith('gpt-')) {
|
|
173
284
|
executor =
|
|
174
285
|
config.openaiMode === 'cli'
|
|
175
|
-
?
|
|
286
|
+
? createCodexExecutor()
|
|
176
287
|
: createApiExecutor(getOpenAIClient());
|
|
177
288
|
}
|
|
178
289
|
else if (model.startsWith('deepseek-')) {
|
|
@@ -181,7 +292,7 @@ const createExecutorProvider = () => {
|
|
|
181
292
|
else if (model.startsWith('gemini-')) {
|
|
182
293
|
executor =
|
|
183
294
|
config.geminiMode === 'cli'
|
|
184
|
-
?
|
|
295
|
+
? createGeminiExecutor()
|
|
185
296
|
: createApiExecutor(getGeminiApiClient());
|
|
186
297
|
}
|
|
187
298
|
else {
|
package/dist/llm.test.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
2
2
|
import { EventEmitter } from 'events';
|
|
3
|
-
import { getExecutorForModel } from './llm.js';
|
|
3
|
+
import { getExecutorForModel, parseCodexJsonl, parseGeminiJson } from './llm.js';
|
|
4
4
|
const createCompletionMock = vi.hoisted(() => vi.fn());
|
|
5
5
|
const spawnMock = vi.hoisted(() => vi.fn());
|
|
6
6
|
const logCliDebugMock = vi.hoisted(() => vi.fn());
|
|
@@ -88,11 +88,76 @@ describe('API executor', () => {
|
|
|
88
88
|
await expect(executor.execute('prompt', 'gpt-5.1', 'system')).rejects.toThrow('No response from the model via API');
|
|
89
89
|
});
|
|
90
90
|
});
|
|
91
|
-
|
|
91
|
+
const codexJsonlOutput = (threadId, text) => [
|
|
92
|
+
JSON.stringify({ type: 'thread.started', thread_id: threadId }),
|
|
93
|
+
JSON.stringify({
|
|
94
|
+
type: 'item.completed',
|
|
95
|
+
item: { type: 'agent_message', text },
|
|
96
|
+
}),
|
|
97
|
+
].join('\n');
|
|
98
|
+
describe('parseCodexJsonl', () => {
|
|
99
|
+
it('extracts thread_id and agent_message text', () => {
|
|
100
|
+
const output = codexJsonlOutput('thread_abc', 'hello world');
|
|
101
|
+
const result = parseCodexJsonl(output);
|
|
102
|
+
expect(result.threadId).toBe('thread_abc');
|
|
103
|
+
expect(result.response).toBe('hello world');
|
|
104
|
+
});
|
|
105
|
+
it('concatenates multiple agent_message items', () => {
|
|
106
|
+
const output = [
|
|
107
|
+
JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
|
|
108
|
+
JSON.stringify({
|
|
109
|
+
type: 'item.completed',
|
|
110
|
+
item: { type: 'agent_message', text: 'first' },
|
|
111
|
+
}),
|
|
112
|
+
JSON.stringify({
|
|
113
|
+
type: 'item.completed',
|
|
114
|
+
item: { type: 'agent_message', text: 'second' },
|
|
115
|
+
}),
|
|
116
|
+
].join('\n');
|
|
117
|
+
const result = parseCodexJsonl(output);
|
|
118
|
+
expect(result.response).toBe('first\nsecond');
|
|
119
|
+
});
|
|
120
|
+
it('skips reasoning items', () => {
|
|
121
|
+
const output = [
|
|
122
|
+
JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
|
|
123
|
+
JSON.stringify({
|
|
124
|
+
type: 'item.completed',
|
|
125
|
+
item: { type: 'reasoning', text: 'thinking...' },
|
|
126
|
+
}),
|
|
127
|
+
JSON.stringify({
|
|
128
|
+
type: 'item.completed',
|
|
129
|
+
item: { type: 'agent_message', text: 'answer' },
|
|
130
|
+
}),
|
|
131
|
+
].join('\n');
|
|
132
|
+
const result = parseCodexJsonl(output);
|
|
133
|
+
expect(result.response).toBe('answer');
|
|
134
|
+
});
|
|
135
|
+
it('skips non-JSON lines', () => {
|
|
136
|
+
const output = [
|
|
137
|
+
'ERROR: some log line',
|
|
138
|
+
JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
|
|
139
|
+
'another garbage line',
|
|
140
|
+
JSON.stringify({
|
|
141
|
+
type: 'item.completed',
|
|
142
|
+
item: { type: 'agent_message', text: 'result' },
|
|
143
|
+
}),
|
|
144
|
+
].join('\n');
|
|
145
|
+
const result = parseCodexJsonl(output);
|
|
146
|
+
expect(result.threadId).toBe('t1');
|
|
147
|
+
expect(result.response).toBe('result');
|
|
148
|
+
});
|
|
149
|
+
it('returns empty response when no agent_message found', () => {
|
|
150
|
+
const output = JSON.stringify({ type: 'thread.started', thread_id: 't1' });
|
|
151
|
+
const result = parseCodexJsonl(output);
|
|
152
|
+
expect(result.threadId).toBe('t1');
|
|
153
|
+
expect(result.response).toBe('');
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
describe('Codex CLI executor', () => {
|
|
92
157
|
const setupSpawn = (child) => {
|
|
93
158
|
spawnMock.mockReturnValue(child);
|
|
94
159
|
};
|
|
95
|
-
it('spawns codex CLI with
|
|
160
|
+
it('spawns codex CLI with --json and parses JSONL output', async () => {
|
|
96
161
|
mockConfig.openaiMode = 'cli';
|
|
97
162
|
const child = createChildProcess();
|
|
98
163
|
setupSpawn(child);
|
|
@@ -100,20 +165,64 @@ describe('CLI executor', () => {
|
|
|
100
165
|
const promise = executor.execute('user', 'gpt-5.1', 'system', [
|
|
101
166
|
'/absolute/path/to/file.ts',
|
|
102
167
|
]);
|
|
103
|
-
resolveCliExecution(child, {
|
|
168
|
+
resolveCliExecution(child, {
|
|
169
|
+
stdout: codexJsonlOutput('thread_123', 'result'),
|
|
170
|
+
code: 0,
|
|
171
|
+
});
|
|
104
172
|
const args = spawnMock.mock.calls[0];
|
|
105
173
|
expect(args?.[0]).toBe('codex');
|
|
106
174
|
const cliArgs = args?.[1];
|
|
107
175
|
expect(cliArgs[0]).toBe('exec');
|
|
108
|
-
expect(cliArgs[1]).toBe('--
|
|
109
|
-
expect(cliArgs[2]).toBe('-
|
|
110
|
-
expect(cliArgs
|
|
111
|
-
expect(cliArgs
|
|
112
|
-
|
|
113
|
-
|
|
176
|
+
expect(cliArgs[1]).toBe('--json');
|
|
177
|
+
expect(cliArgs[2]).toBe('--skip-git-repo-check');
|
|
178
|
+
expect(cliArgs).toContain('-m');
|
|
179
|
+
expect(cliArgs).toContain('gpt-5.1');
|
|
180
|
+
// Last arg is the prompt with system + user + files
|
|
181
|
+
const promptArg = cliArgs[cliArgs.length - 1];
|
|
182
|
+
expect(promptArg).toContain('system');
|
|
183
|
+
expect(promptArg).toContain('user');
|
|
184
|
+
expect(promptArg).toContain('Files: @');
|
|
114
185
|
const result = await promise;
|
|
115
186
|
expect(result.response).toBe('result');
|
|
116
187
|
expect(result.usage).toBeNull();
|
|
188
|
+
expect(result.threadId).toBe('thread_123');
|
|
189
|
+
});
|
|
190
|
+
it('resumes a session with thread_id', async () => {
|
|
191
|
+
mockConfig.openaiMode = 'cli';
|
|
192
|
+
const child = createChildProcess();
|
|
193
|
+
setupSpawn(child);
|
|
194
|
+
const executor = getExecutorForModel('gpt-5.1');
|
|
195
|
+
const promise = executor.execute('follow up question', 'gpt-5.1', 'system', undefined, 'thread_abc');
|
|
196
|
+
resolveCliExecution(child, {
|
|
197
|
+
stdout: codexJsonlOutput('thread_abc', 'follow up answer'),
|
|
198
|
+
code: 0,
|
|
199
|
+
});
|
|
200
|
+
const args = spawnMock.mock.calls[0];
|
|
201
|
+
const cliArgs = args?.[1];
|
|
202
|
+
expect(cliArgs[0]).toBe('exec');
|
|
203
|
+
expect(cliArgs[1]).toBe('resume');
|
|
204
|
+
expect(cliArgs[2]).toBe('--json');
|
|
205
|
+
expect(cliArgs[3]).toBe('--skip-git-repo-check');
|
|
206
|
+
expect(cliArgs).toContain('thread_abc');
|
|
207
|
+
// Prompt should NOT contain system prompt on resume
|
|
208
|
+
const promptArg = cliArgs[cliArgs.length - 1];
|
|
209
|
+
expect(promptArg).toBe('follow up question');
|
|
210
|
+
expect(promptArg).not.toContain('system');
|
|
211
|
+
const result = await promise;
|
|
212
|
+
expect(result.response).toBe('follow up answer');
|
|
213
|
+
expect(result.threadId).toBe('thread_abc');
|
|
214
|
+
});
|
|
215
|
+
it('rejects when no agent_message in JSONL output', async () => {
|
|
216
|
+
mockConfig.openaiMode = 'cli';
|
|
217
|
+
const child = createChildProcess();
|
|
218
|
+
setupSpawn(child);
|
|
219
|
+
const executor = getExecutorForModel('gpt-5.1');
|
|
220
|
+
const promise = executor.execute('user', 'gpt-5.1', 'system');
|
|
221
|
+
resolveCliExecution(child, {
|
|
222
|
+
stdout: JSON.stringify({ type: 'thread.started', thread_id: 't1' }),
|
|
223
|
+
code: 0,
|
|
224
|
+
});
|
|
225
|
+
await expect(promise).rejects.toThrow('No agent_message found in Codex JSONL output');
|
|
117
226
|
});
|
|
118
227
|
it('rejects with codex errors on non-zero exit', async () => {
|
|
119
228
|
mockConfig.openaiMode = 'cli';
|
|
@@ -131,25 +240,16 @@ describe('CLI executor', () => {
|
|
|
131
240
|
setupSpawn(child);
|
|
132
241
|
const executor = getExecutorForModel('gpt-5.1');
|
|
133
242
|
const promise = executor.execute('user', 'gpt-5.1', 'system');
|
|
134
|
-
resolveCliExecution(child, {
|
|
243
|
+
resolveCliExecution(child, {
|
|
244
|
+
stdout: codexJsonlOutput('t1', 'result'),
|
|
245
|
+
code: 0,
|
|
246
|
+
});
|
|
135
247
|
const args = spawnMock.mock.calls[0];
|
|
136
248
|
const cliArgs = args?.[1];
|
|
137
249
|
expect(cliArgs).toContain('-c');
|
|
138
250
|
expect(cliArgs).toContain('model_reasoning_effort="xhigh"');
|
|
139
251
|
await promise;
|
|
140
|
-
mockConfig.codexReasoningEffort = undefined;
|
|
141
|
-
});
|
|
142
|
-
it('wraps gemini quota errors specially', async () => {
|
|
143
|
-
mockConfig.geminiMode = 'cli';
|
|
144
|
-
const child = createChildProcess();
|
|
145
|
-
setupSpawn(child);
|
|
146
|
-
const executor = getExecutorForModel('gemini-2.5-pro');
|
|
147
|
-
const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
|
|
148
|
-
resolveCliExecution(child, {
|
|
149
|
-
stderr: 'RESOURCE_EXHAUSTED: quota exceeded',
|
|
150
|
-
code: 1,
|
|
151
|
-
});
|
|
152
|
-
await expect(promise).rejects.toThrow('Gemini quota exceeded');
|
|
252
|
+
mockConfig.codexReasoningEffort = undefined;
|
|
153
253
|
});
|
|
154
254
|
it('handles spawn error events with friendly message', async () => {
|
|
155
255
|
mockConfig.openaiMode = 'cli';
|
|
@@ -169,6 +269,112 @@ describe('CLI executor', () => {
|
|
|
169
269
|
await expect(executor.execute('user', 'gpt-5.1', 'system')).rejects.toThrow('Synchronous error while trying to spawn codex: sync failure');
|
|
170
270
|
});
|
|
171
271
|
});
|
|
272
|
+
const geminiJsonOutput = (sessionId, response) => JSON.stringify({ session_id: sessionId, response, stats: {} });
|
|
273
|
+
describe('parseGeminiJson', () => {
|
|
274
|
+
it('extracts session_id and response', () => {
|
|
275
|
+
const output = geminiJsonOutput('sess_abc', 'hello world');
|
|
276
|
+
const result = parseGeminiJson(output);
|
|
277
|
+
expect(result.sessionId).toBe('sess_abc');
|
|
278
|
+
expect(result.response).toBe('hello world');
|
|
279
|
+
});
|
|
280
|
+
it('returns empty response when response is missing', () => {
|
|
281
|
+
const output = JSON.stringify({ session_id: 's1' });
|
|
282
|
+
const result = parseGeminiJson(output);
|
|
283
|
+
expect(result.sessionId).toBe('s1');
|
|
284
|
+
expect(result.response).toBe('');
|
|
285
|
+
});
|
|
286
|
+
});
|
|
287
|
+
describe('Gemini CLI executor', () => {
|
|
288
|
+
const setupSpawn = (child) => {
|
|
289
|
+
spawnMock.mockReturnValue(child);
|
|
290
|
+
};
|
|
291
|
+
it('spawns gemini CLI with -o json and parses JSON output', async () => {
|
|
292
|
+
mockConfig.geminiMode = 'cli';
|
|
293
|
+
const child = createChildProcess();
|
|
294
|
+
setupSpawn(child);
|
|
295
|
+
const executor = getExecutorForModel('gemini-2.5-pro');
|
|
296
|
+
const promise = executor.execute('user prompt', 'gemini-2.5-pro', 'system');
|
|
297
|
+
resolveCliExecution(child, {
|
|
298
|
+
stdout: geminiJsonOutput('sess_123', 'result'),
|
|
299
|
+
code: 0,
|
|
300
|
+
});
|
|
301
|
+
const args = spawnMock.mock.calls[0];
|
|
302
|
+
expect(args?.[0]).toBe('gemini');
|
|
303
|
+
const cliArgs = args?.[1];
|
|
304
|
+
expect(cliArgs).toContain('-m');
|
|
305
|
+
expect(cliArgs).toContain('gemini-2.5-pro');
|
|
306
|
+
expect(cliArgs).toContain('-o');
|
|
307
|
+
expect(cliArgs).toContain('json');
|
|
308
|
+
expect(cliArgs).toContain('-p');
|
|
309
|
+
const result = await promise;
|
|
310
|
+
expect(result.response).toBe('result');
|
|
311
|
+
expect(result.usage).toBeNull();
|
|
312
|
+
expect(result.threadId).toBe('sess_123');
|
|
313
|
+
});
|
|
314
|
+
it('resumes a session with thread_id', async () => {
|
|
315
|
+
mockConfig.geminiMode = 'cli';
|
|
316
|
+
const child = createChildProcess();
|
|
317
|
+
setupSpawn(child);
|
|
318
|
+
const executor = getExecutorForModel('gemini-2.5-pro');
|
|
319
|
+
const promise = executor.execute('follow up', 'gemini-2.5-pro', 'system', undefined, 'sess_abc');
|
|
320
|
+
resolveCliExecution(child, {
|
|
321
|
+
stdout: geminiJsonOutput('sess_abc', 'follow up answer'),
|
|
322
|
+
code: 0,
|
|
323
|
+
});
|
|
324
|
+
const args = spawnMock.mock.calls[0];
|
|
325
|
+
const cliArgs = args?.[1];
|
|
326
|
+
expect(cliArgs).toContain('-r');
|
|
327
|
+
expect(cliArgs).toContain('sess_abc');
|
|
328
|
+
// Prompt should NOT contain system prompt on resume
|
|
329
|
+
const pIdx = cliArgs.indexOf('-p');
|
|
330
|
+
expect(cliArgs[pIdx + 1]).toBe('follow up');
|
|
331
|
+
const result = await promise;
|
|
332
|
+
expect(result.response).toBe('follow up answer');
|
|
333
|
+
expect(result.threadId).toBe('sess_abc');
|
|
334
|
+
});
|
|
335
|
+
it('rejects when no response in JSON output', async () => {
|
|
336
|
+
mockConfig.geminiMode = 'cli';
|
|
337
|
+
const child = createChildProcess();
|
|
338
|
+
setupSpawn(child);
|
|
339
|
+
const executor = getExecutorForModel('gemini-2.5-pro');
|
|
340
|
+
const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
|
|
341
|
+
resolveCliExecution(child, {
|
|
342
|
+
stdout: JSON.stringify({ session_id: 's1' }),
|
|
343
|
+
code: 0,
|
|
344
|
+
});
|
|
345
|
+
await expect(promise).rejects.toThrow('No response found in Gemini JSON output');
|
|
346
|
+
});
|
|
347
|
+
it('rejects with parse error on invalid JSON', async () => {
|
|
348
|
+
mockConfig.geminiMode = 'cli';
|
|
349
|
+
const child = createChildProcess();
|
|
350
|
+
setupSpawn(child);
|
|
351
|
+
const executor = getExecutorForModel('gemini-2.5-pro');
|
|
352
|
+
const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
|
|
353
|
+
resolveCliExecution(child, { stdout: 'not json', code: 0 });
|
|
354
|
+
await expect(promise).rejects.toThrow('Failed to parse Gemini JSON output');
|
|
355
|
+
});
|
|
356
|
+
it('wraps gemini quota errors specially', async () => {
|
|
357
|
+
mockConfig.geminiMode = 'cli';
|
|
358
|
+
const child = createChildProcess();
|
|
359
|
+
setupSpawn(child);
|
|
360
|
+
const executor = getExecutorForModel('gemini-2.5-pro');
|
|
361
|
+
const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
|
|
362
|
+
resolveCliExecution(child, {
|
|
363
|
+
stderr: 'RESOURCE_EXHAUSTED: quota exceeded',
|
|
364
|
+
code: 1,
|
|
365
|
+
});
|
|
366
|
+
await expect(promise).rejects.toThrow('Gemini quota exceeded');
|
|
367
|
+
});
|
|
368
|
+
it('handles spawn error events with friendly message', async () => {
|
|
369
|
+
mockConfig.geminiMode = 'cli';
|
|
370
|
+
const child = createChildProcess();
|
|
371
|
+
setupSpawn(child);
|
|
372
|
+
const executor = getExecutorForModel('gemini-2.5-pro');
|
|
373
|
+
const promise = executor.execute('user', 'gemini-2.5-pro', 'system');
|
|
374
|
+
child.emit('error', new Error('not found'));
|
|
375
|
+
await expect(promise).rejects.toThrow('Failed to spawn gemini CLI. Is it installed and in PATH? Error: not found');
|
|
376
|
+
});
|
|
377
|
+
});
|
|
172
378
|
describe('executor selection', () => {
|
|
173
379
|
it('uses deepseek API client', async () => {
|
|
174
380
|
createCompletionMock.mockResolvedValue({
|
package/dist/models.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const ALL_MODELS: readonly ["gemini-2.5-pro", "gemini-3-pro-preview", "deepseek-reasoner", "gpt-5.2", "gpt-5.2-codex", "gpt-5.1-codex-max", "gpt-5.1-codex", "gpt-5.1-codex-mini", "gpt-5.1"];
|
|
1
|
+
export declare const ALL_MODELS: readonly ["gemini-2.5-pro", "gemini-3-pro-preview", "deepseek-reasoner", "gpt-5.2", "gpt-5.3-codex", "gpt-5.2-codex", "gpt-5.1-codex-max", "gpt-5.1-codex", "gpt-5.1-codex-mini", "gpt-5.1"];
|
package/dist/models.js
CHANGED
package/dist/schema.d.ts
CHANGED
|
@@ -10,6 +10,7 @@ export declare const ConsultLlmArgs: z.ZodObject<{
|
|
|
10
10
|
[x: string]: string;
|
|
11
11
|
}>>>;
|
|
12
12
|
web_mode: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
13
|
+
thread_id: z.ZodOptional<z.ZodString>;
|
|
13
14
|
git_diff: z.ZodOptional<z.ZodObject<{
|
|
14
15
|
repo_path: z.ZodOptional<z.ZodString>;
|
|
15
16
|
files: z.ZodArray<z.ZodString>;
|
|
@@ -18,6 +19,6 @@ export declare const ConsultLlmArgs: z.ZodObject<{
|
|
|
18
19
|
}, z.core.$strip>;
|
|
19
20
|
export declare const toolSchema: {
|
|
20
21
|
readonly name: "consult_llm";
|
|
21
|
-
readonly description: "Ask a more powerful AI for help with complex problems. Provide your question in the prompt field and always include relevant code files as context.\n\nBe specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.\n\nIMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of \"Should I use X or Y approach?\", ask \"What's the best approach for this problem?\" Let the consultant LLM provide unbiased recommendations.";
|
|
22
|
+
readonly description: "Ask a more powerful AI for help with complex problems. Provide your question in the prompt field and always include relevant code files as context.\n\nBe specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.\n\nIMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of \"Should I use X or Y approach?\", ask \"What's the best approach for this problem?\" Let the consultant LLM provide unbiased recommendations.\n\nFor multi-turn conversations with CLI models (Codex and Gemini), the response includes a [thread_id:xxx] prefix. Extract this ID and pass it as the thread_id parameter in follow-up requests to maintain conversation context.";
|
|
22
23
|
readonly inputSchema: z.core.JSONSchema.JSONSchema;
|
|
23
24
|
};
|
package/dist/schema.js
CHANGED
|
@@ -19,6 +19,10 @@ export const ConsultLlmArgs = z.object({
|
|
|
19
19
|
.optional()
|
|
20
20
|
.default(false)
|
|
21
21
|
.describe("If true, copy the formatted prompt to the clipboard instead of querying an LLM. When true, the `model` parameter is ignored. Use this to paste the prompt into browser-based LLM services. IMPORTANT: Only use this when the user specifically requests it. When true, wait for the user to provide the external LLM's response before proceeding with any implementation."),
|
|
22
|
+
thread_id: z
|
|
23
|
+
.string()
|
|
24
|
+
.optional()
|
|
25
|
+
.describe('Thread/session ID for resuming a conversation. Works with Codex CLI (gpt-*) and Gemini CLI (gemini-*) in CLI mode. Returned in the response prefix as [thread_id:xxx].'),
|
|
22
26
|
git_diff: z
|
|
23
27
|
.object({
|
|
24
28
|
repo_path: z
|
|
@@ -47,6 +51,8 @@ export const toolSchema = {
|
|
|
47
51
|
|
|
48
52
|
Be specific about what you want: code implementation, code review, bug analysis, architecture advice, etc.
|
|
49
53
|
|
|
50
|
-
IMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of "Should I use X or Y approach?", ask "What's the best approach for this problem?" Let the consultant LLM provide unbiased recommendations
|
|
54
|
+
IMPORTANT: Ask neutral, open-ended questions. Avoid suggesting specific solutions or alternatives in your prompt as this can bias the analysis. Instead of "Should I use X or Y approach?", ask "What's the best approach for this problem?" Let the consultant LLM provide unbiased recommendations.
|
|
55
|
+
|
|
56
|
+
For multi-turn conversations with CLI models (Codex and Gemini), the response includes a [thread_id:xxx] prefix. Extract this ID and pass it as the thread_id parameter in follow-up requests to maintain conversation context.`,
|
|
51
57
|
inputSchema: consultLlmInputSchema,
|
|
52
58
|
};
|
package/dist/schema.test.js
CHANGED
|
@@ -43,6 +43,15 @@ describe('ConsultLlmArgs', () => {
|
|
|
43
43
|
expect(parsed.model).toBeDefined();
|
|
44
44
|
expect(ALL_MODELS).toContain(parsed.model);
|
|
45
45
|
});
|
|
46
|
+
it('accepts optional thread_id as string', () => {
|
|
47
|
+
const withThread = ConsultLlmArgs.parse({
|
|
48
|
+
prompt: 'follow up',
|
|
49
|
+
thread_id: 'thread_abc',
|
|
50
|
+
});
|
|
51
|
+
expect(withThread.thread_id).toBe('thread_abc');
|
|
52
|
+
const withoutThread = ConsultLlmArgs.parse({ prompt: 'no thread' });
|
|
53
|
+
expect(withoutThread.thread_id).toBeUndefined();
|
|
54
|
+
});
|
|
46
55
|
it('defaults web_mode to false but honors explicit value', () => {
|
|
47
56
|
const parsedDefault = ConsultLlmArgs.parse({ prompt: 'default case' });
|
|
48
57
|
expect(parsedDefault.web_mode).toBe(false);
|
package/dist/server.js
CHANGED
|
@@ -47,7 +47,7 @@ export async function handleConsultLlm(args) {
|
|
|
47
47
|
.join(', ');
|
|
48
48
|
throw new Error(`Invalid request parameters: ${errors}`);
|
|
49
49
|
}
|
|
50
|
-
const { files, prompt: userPrompt, git_diff, web_mode, model: parsedModel, } = parseResult.data;
|
|
50
|
+
const { files, prompt: userPrompt, git_diff, web_mode, model: parsedModel, thread_id: threadId, } = parseResult.data;
|
|
51
51
|
const providedModel = typeof args === 'object' &&
|
|
52
52
|
args !== null &&
|
|
53
53
|
Object.prototype.hasOwnProperty.call(args, 'model');
|
|
@@ -56,6 +56,9 @@ export async function handleConsultLlm(args) {
|
|
|
56
56
|
: (config.defaultModel ?? parsedModel);
|
|
57
57
|
logToolCall('consult_llm', args);
|
|
58
58
|
const isCliMode = isCliExecution(model);
|
|
59
|
+
if (threadId && !isCliMode) {
|
|
60
|
+
throw new Error('thread_id is only supported with CLI mode models (Codex or Gemini CLI)');
|
|
61
|
+
}
|
|
59
62
|
let prompt;
|
|
60
63
|
let filePaths;
|
|
61
64
|
if (web_mode || !isCliMode) {
|
|
@@ -95,10 +98,13 @@ ${prompt}`;
|
|
|
95
98
|
content: [{ type: 'text', text: responseMessage }],
|
|
96
99
|
};
|
|
97
100
|
}
|
|
98
|
-
const { response, costInfo } = await queryLlm(prompt, model, filePaths);
|
|
101
|
+
const { response, costInfo, threadId: returnedThreadId, } = await queryLlm(prompt, model, filePaths, threadId);
|
|
99
102
|
await logResponse(model, response, costInfo);
|
|
103
|
+
const responseText = returnedThreadId
|
|
104
|
+
? `[thread_id:${returnedThreadId}]\n\n${response}`
|
|
105
|
+
: response;
|
|
100
106
|
return {
|
|
101
|
-
content: [{ type: 'text', text:
|
|
107
|
+
content: [{ type: 'text', text: responseText }],
|
|
102
108
|
};
|
|
103
109
|
}
|
|
104
110
|
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
package/dist/server.test.js
CHANGED
|
@@ -88,13 +88,13 @@ describe('handleConsultLlm', () => {
|
|
|
88
88
|
expect(processFilesMock).toHaveBeenCalledWith(['file1.ts']);
|
|
89
89
|
expect(generateGitDiffMock).toHaveBeenCalledWith(undefined, ['src/index.ts'], 'HEAD');
|
|
90
90
|
expect(buildPromptMock).toHaveBeenCalledWith('help me', expect.any(Array), 'diff output');
|
|
91
|
-
expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.1', undefined);
|
|
91
|
+
expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.1', undefined, undefined);
|
|
92
92
|
expect(result.content[0]?.text).toBe('ok');
|
|
93
93
|
});
|
|
94
94
|
it('uses explicit model even when config default exists', async () => {
|
|
95
95
|
mockConfig.defaultModel = 'gpt-5.1';
|
|
96
96
|
await handleConsultLlm({ prompt: 'hello', model: 'gpt-5.2' });
|
|
97
|
-
expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.2', undefined);
|
|
97
|
+
expect(queryLlmMock).toHaveBeenCalledWith('BUILT PROMPT', 'gpt-5.2', undefined, undefined);
|
|
98
98
|
});
|
|
99
99
|
it('builds CLI prompts without file contents', async () => {
|
|
100
100
|
mockConfig.openaiMode = 'cli';
|
|
@@ -137,6 +137,55 @@ describe('handleConsultLlm', () => {
|
|
|
137
137
|
expect(queryLlmMock).not.toHaveBeenCalled();
|
|
138
138
|
expect(result.content[0]?.text).toContain('Prompt copied to clipboard');
|
|
139
139
|
});
|
|
140
|
+
it('passes thread_id to queryLlm for Codex CLI models', async () => {
|
|
141
|
+
mockConfig.openaiMode = 'cli';
|
|
142
|
+
await handleConsultLlm({
|
|
143
|
+
prompt: 'follow up',
|
|
144
|
+
model: 'gpt-5.2',
|
|
145
|
+
thread_id: 'thread_abc',
|
|
146
|
+
});
|
|
147
|
+
const callArgs = queryLlmMock.mock.calls[0];
|
|
148
|
+
expect(callArgs[3]).toBe('thread_abc');
|
|
149
|
+
});
|
|
150
|
+
it('prefixes response with thread_id when returned', async () => {
|
|
151
|
+
mockConfig.openaiMode = 'cli';
|
|
152
|
+
queryLlmMock.mockResolvedValueOnce({
|
|
153
|
+
response: 'answer',
|
|
154
|
+
costInfo: null,
|
|
155
|
+
threadId: 'thread_xyz',
|
|
156
|
+
});
|
|
157
|
+
const result = await handleConsultLlm({
|
|
158
|
+
prompt: 'question',
|
|
159
|
+
model: 'gpt-5.2',
|
|
160
|
+
});
|
|
161
|
+
expect(result.content[0]?.text).toBe('[thread_id:thread_xyz]\n\nanswer');
|
|
162
|
+
});
|
|
163
|
+
it('passes thread_id to queryLlm for Gemini CLI models', async () => {
|
|
164
|
+
mockConfig.geminiMode = 'cli';
|
|
165
|
+
await handleConsultLlm({
|
|
166
|
+
prompt: 'follow up',
|
|
167
|
+
model: 'gemini-2.5-pro',
|
|
168
|
+
thread_id: 'sess_abc',
|
|
169
|
+
});
|
|
170
|
+
const callArgs = queryLlmMock.mock.calls[0];
|
|
171
|
+
expect(callArgs[3]).toBe('sess_abc');
|
|
172
|
+
});
|
|
173
|
+
it('rejects thread_id with non-CLI model', async () => {
|
|
174
|
+
mockConfig.openaiMode = 'api';
|
|
175
|
+
await expect(handleConsultLlm({
|
|
176
|
+
prompt: 'hello',
|
|
177
|
+
model: 'gpt-5.2',
|
|
178
|
+
thread_id: 'thread_abc',
|
|
179
|
+
})).rejects.toThrow('thread_id is only supported with CLI mode models');
|
|
180
|
+
});
|
|
181
|
+
it('rejects thread_id with Gemini API model', async () => {
|
|
182
|
+
mockConfig.geminiMode = 'api';
|
|
183
|
+
await expect(handleConsultLlm({
|
|
184
|
+
prompt: 'hello',
|
|
185
|
+
model: 'gemini-2.5-pro',
|
|
186
|
+
thread_id: 'sess_abc',
|
|
187
|
+
})).rejects.toThrow('thread_id is only supported with CLI mode models');
|
|
188
|
+
});
|
|
140
189
|
it('propagates query errors', async () => {
|
|
141
190
|
queryLlmMock.mockRejectedValueOnce(new Error('boom'));
|
|
142
191
|
await expect(handleConsultLlm({ prompt: 'oops' })).rejects.toThrow('boom');
|