consult-llm-mcp 2.1.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +133 -47
- package/dist/config.d.ts +10 -5
- package/dist/config.js +47 -19
- package/dist/config.test.d.ts +1 -0
- package/dist/config.test.js +68 -0
- package/dist/executors/api.d.ts +3 -0
- package/dist/executors/api.js +29 -0
- package/dist/executors/codex-cli.d.ts +6 -0
- package/dist/executors/codex-cli.js +117 -0
- package/dist/executors/cursor-cli.d.ts +6 -0
- package/dist/executors/cursor-cli.js +116 -0
- package/dist/executors/gemini-cli.d.ts +6 -0
- package/dist/executors/gemini-cli.js +102 -0
- package/dist/executors/types.d.ts +15 -0
- package/dist/executors/types.js +1 -0
- package/dist/file.js +2 -2
- package/dist/git.js +1 -1
- package/dist/llm-cost.d.ts +2 -2
- package/dist/llm-query.d.ts +3 -2
- package/dist/llm-query.js +2 -10
- package/dist/llm.d.ts +6 -17
- package/dist/llm.js +27 -243
- package/dist/llm.test.js +252 -41
- package/dist/logger.js +3 -3
- package/dist/main.js +15 -0
- package/dist/models.d.ts +1 -1
- package/dist/models.js +0 -4
- package/dist/prompt.test.js +3 -3
- package/dist/schema.d.ts +37 -2
- package/dist/schema.js +7 -3
- package/dist/schema.test.js +18 -1
- package/dist/server.d.ts +0 -2
- package/dist/server.js +15 -22
- package/dist/server.test.js +77 -39
- package/dist/system-prompt.d.ts +7 -2
- package/dist/system-prompt.js +50 -16
- package/dist/version.d.ts +1 -0
- package/dist/version.js +1 -0
- package/package.json +17 -21
- package/dist/main.logic.test.js +0 -165
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
An MCP server that lets Claude Code consult stronger AI models (GPT-5.2, Gemini
|
|
4
4
|
3.0 Pro, DeepSeek Reasoner) when Sonnet has you running in circles and you need
|
|
5
|
-
to bring in the heavy artillery.
|
|
5
|
+
to bring in the heavy artillery. Supports multi-turn conversations.
|
|
6
6
|
|
|
7
7
|
```
|
|
8
8
|
> Still getting this error after your fix. Ask gemini
|
|
@@ -22,6 +22,9 @@ to bring in the heavy artillery.
|
|
|
22
22
|
iter_captures() approach now.
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
+
[Quick start](#quick-start) · [Configuration](#configuration) ·
|
|
26
|
+
[Changelog](CHANGELOG.md)
|
|
27
|
+
|
|
25
28
|
## Features
|
|
26
29
|
|
|
27
30
|
- Query powerful AI models (GPT-5.2, Gemini 3.0 Pro, DeepSeek Reasoner) with
|
|
@@ -29,9 +32,13 @@ to bring in the heavy artillery.
|
|
|
29
32
|
- Direct queries with optional file context
|
|
30
33
|
- Include git changes for code review and analysis
|
|
31
34
|
- Comprehensive logging with cost estimation
|
|
32
|
-
- [Gemini CLI
|
|
35
|
+
- [Gemini CLI backend](#gemini-cli): Use the `gemini` CLI to take advantage of
|
|
33
36
|
[free quota](https://developers.google.com/gemini-code-assist/resources/quotas#quotas-for-agent-mode-gemini-cli)
|
|
34
|
-
- [Codex CLI
|
|
37
|
+
- [Codex CLI backend](#codex-cli): Use the `codex` CLI for OpenAI models
|
|
38
|
+
- [Cursor CLI backend](#cursor-cli): Use the `cursor-agent` CLI to route GPT and
|
|
39
|
+
Gemini models through a single tool
|
|
40
|
+
- [Multi-turn conversations](#multi-turn-conversations): Resume CLI sessions
|
|
41
|
+
across requests with `thread_id`
|
|
35
42
|
- [Web mode](#web-mode): Copy formatted prompts to clipboard for browser-based
|
|
36
43
|
LLM services
|
|
37
44
|
- Simple: provides just one MCP tool to not clutter the context
|
|
@@ -41,20 +48,23 @@ to bring in the heavy artillery.
|
|
|
41
48
|
1. **Add to Claude Code**:
|
|
42
49
|
|
|
43
50
|
```bash
|
|
44
|
-
claude mcp add consult-llm
|
|
51
|
+
claude mcp add consult-llm \
|
|
52
|
+
-e OPENAI_API_KEY=your_key \
|
|
53
|
+
-e GEMINI_API_KEY=your_key \
|
|
54
|
+
-- npx -y consult-llm-mcp
|
|
45
55
|
```
|
|
46
56
|
|
|
47
57
|
For global availability across projects, add `--scope user`.
|
|
48
58
|
|
|
49
59
|
<details>
|
|
50
|
-
<summary>Using multiple API keys or CLI
|
|
60
|
+
<summary>Using multiple API keys or CLI backends</summary>
|
|
51
61
|
|
|
52
62
|
```bash
|
|
53
63
|
claude mcp add consult-llm \
|
|
54
64
|
-e OPENAI_API_KEY=your_openai_key \
|
|
55
65
|
-e GEMINI_API_KEY=your_gemini_key \
|
|
56
66
|
-e DEEPSEEK_API_KEY=your_deepseek_key \
|
|
57
|
-
-e
|
|
67
|
+
-e GEMINI_BACKEND=gemini-cli \
|
|
58
68
|
-- npx -y consult-llm-mcp
|
|
59
69
|
```
|
|
60
70
|
|
|
@@ -297,24 +307,26 @@ confidence in the approach.
|
|
|
297
307
|
|
|
298
308
|
</details>
|
|
299
309
|
|
|
300
|
-
##
|
|
310
|
+
## Backends
|
|
301
311
|
|
|
302
|
-
|
|
312
|
+
Each model is routed to a **backend** — either an API endpoint or a CLI tool.
|
|
303
313
|
|
|
304
|
-
|
|
|
305
|
-
|
|
|
306
|
-
| **API**
|
|
307
|
-
| **CLI** | Shells out to
|
|
308
|
-
| **
|
|
314
|
+
| Backend | Description | When to use |
|
|
315
|
+
| -------------- | -------------------------------- | ---------------------------------------------------------------- |
|
|
316
|
+
| **API** | Queries LLM APIs directly | You have API keys and want the simplest setup |
|
|
317
|
+
| **Gemini CLI** | Shells out to `gemini` CLI | Free quota (Gemini), existing subscriptions, or prefer CLI tools |
|
|
318
|
+
| **Codex CLI** | Shells out to `codex` CLI | OpenAI models via Codex subscription |
|
|
319
|
+
| **Cursor CLI** | Shells out to `cursor-agent` CLI | Route GPT and Gemini through one tool |
|
|
320
|
+
| **Web** | Copies prompt to clipboard | You prefer browser UIs or want to review prompts |
|
|
309
321
|
|
|
310
|
-
### API
|
|
322
|
+
### API (default)
|
|
311
323
|
|
|
312
|
-
The default
|
|
324
|
+
The default backend. Requires API keys configured via environment variables. See
|
|
313
325
|
[Configuration](#configuration) for details.
|
|
314
326
|
|
|
315
|
-
### CLI
|
|
327
|
+
### CLI backends
|
|
316
328
|
|
|
317
|
-
Instead of making API calls, shell out to local CLI tools. The CLI
|
|
329
|
+
Instead of making API calls, shell out to local CLI tools. The CLI tools can
|
|
318
330
|
explore the codebase themselves, so you don't need to pass all relevant files as
|
|
319
331
|
context, but it helps.
|
|
320
332
|
|
|
@@ -331,7 +343,7 @@ Use Gemini's local CLI to take advantage of Google's
|
|
|
331
343
|
**Setup:**
|
|
332
344
|
|
|
333
345
|
```bash
|
|
334
|
-
claude mcp add consult-llm -e
|
|
346
|
+
claude mcp add consult-llm -e GEMINI_BACKEND=gemini-cli -- npx -y consult-llm-mcp
|
|
335
347
|
```
|
|
336
348
|
|
|
337
349
|
#### Codex CLI
|
|
@@ -346,41 +358,76 @@ Use OpenAI's Codex CLI for OpenAI models.
|
|
|
346
358
|
**Setup:**
|
|
347
359
|
|
|
348
360
|
```bash
|
|
349
|
-
claude mcp add consult-llm -e
|
|
361
|
+
claude mcp add consult-llm -e OPENAI_BACKEND=codex-cli -- npx -y consult-llm-mcp
|
|
350
362
|
```
|
|
351
363
|
|
|
352
364
|
<!-- prettier-ignore -->
|
|
353
365
|
> [!TIP]
|
|
354
366
|
> Set reasoning effort with `-e CODEX_REASONING_EFFORT=high`. Options:
|
|
355
|
-
> `none`, `minimal`, `low`, `medium`, `high`, `xhigh
|
|
367
|
+
> `none`, `minimal`, `low`, `medium`, `high`, `xhigh`.
|
|
368
|
+
|
|
369
|
+
#### Cursor CLI
|
|
370
|
+
|
|
371
|
+
Use Cursor's agent CLI to route GPT and Gemini models through one tool.
|
|
372
|
+
|
|
373
|
+
**Requirements:**
|
|
374
|
+
|
|
375
|
+
1. Install the [Cursor agent CLI](https://cursor.com/cli) (`cursor-agent` in
|
|
376
|
+
PATH)
|
|
377
|
+
|
|
378
|
+
**Setup:**
|
|
379
|
+
|
|
380
|
+
```bash
|
|
381
|
+
# Route GPT models through Cursor CLI
|
|
382
|
+
claude mcp add consult-llm -e OPENAI_BACKEND=cursor-cli -- npx -y consult-llm-mcp
|
|
383
|
+
|
|
384
|
+
# Route Gemini models through Cursor CLI
|
|
385
|
+
claude mcp add consult-llm -e GEMINI_BACKEND=cursor-cli -- npx -y consult-llm-mcp
|
|
386
|
+
|
|
387
|
+
# Route everything through Cursor CLI
|
|
388
|
+
claude mcp add consult-llm \
|
|
389
|
+
-e OPENAI_BACKEND=cursor-cli \
|
|
390
|
+
-e GEMINI_BACKEND=cursor-cli \
|
|
391
|
+
-- npx -y consult-llm-mcp
|
|
392
|
+
```
|
|
356
393
|
|
|
357
394
|
#### Multi-turn conversations
|
|
358
395
|
|
|
359
|
-
CLI
|
|
396
|
+
CLI backends support multi-turn conversations via the `thread_id` parameter. The
|
|
360
397
|
first response includes a `[thread_id:xxx]` prefix. Pass that ID in follow-up
|
|
361
398
|
requests to continue the conversation with full context from prior turns.
|
|
362
399
|
|
|
363
|
-
This works with
|
|
364
|
-
|
|
400
|
+
This works with all CLI backends (Gemini CLI, Codex CLI, Cursor CLI). Each uses
|
|
401
|
+
its own session/thread mechanism, but all are passed through the same
|
|
402
|
+
`thread_id` parameter.
|
|
365
403
|
|
|
366
404
|
```
|
|
367
|
-
|
|
405
|
+
> Ask codex what's the best caching strategy for our read-heavy API
|
|
406
|
+
|
|
407
|
+
⏺ consult-llm - consult_llm (MCP)(prompt: "What's the best caching strategy for a read-heavy API?",
|
|
368
408
|
model: "gpt-5.3-codex")
|
|
369
409
|
⎿ [thread_id:thread_b1ff711...]
|
|
370
410
|
|
|
371
|
-
|
|
411
|
+
Redis with TTL-based invalidation. Set short TTLs for volatile data ...
|
|
372
412
|
|
|
373
|
-
⏺
|
|
413
|
+
⏺ Codex recommends Redis with TTL-based invalidation — but our payment
|
|
414
|
+
endpoints need strong consistency. Let me push back on that.
|
|
415
|
+
|
|
416
|
+
⏺ consult-llm - consult_llm (MCP)(prompt: "What if we need strong consistency guarantees?
|
|
417
|
+
Does that change your recommendation?",
|
|
374
418
|
model: "gpt-5.3-codex",
|
|
375
419
|
thread_id: "thread_b1ff711...")
|
|
376
420
|
⎿ [thread_id:thread_b1ff711...]
|
|
377
421
|
|
|
378
|
-
|
|
422
|
+
Yes — with strong consistency, the TTL approach breaks down. Consider
|
|
423
|
+
write-through caching or cache invalidation on writes instead ...
|
|
424
|
+
|
|
425
|
+
⏺ Good — Codex revised its recommendation based on the consistency
|
|
426
|
+
constraint. I'll use write-through caching for the payment endpoints.
|
|
379
427
|
```
|
|
380
428
|
|
|
381
|
-
See [skills
|
|
382
|
-
|
|
383
|
-
the result.
|
|
429
|
+
See the [debate skills](#debate-skills) for multi-LLM workflows that use
|
|
430
|
+
`thread_id` to maintain context across debate rounds.
|
|
384
431
|
|
|
385
432
|
### Web mode
|
|
386
433
|
|
|
@@ -409,19 +456,22 @@ See the "Using web mode..." example above for a concrete transcript.
|
|
|
409
456
|
- `DEEPSEEK_API_KEY` - Your DeepSeek API key (required for DeepSeek models)
|
|
410
457
|
- `CONSULT_LLM_DEFAULT_MODEL` - Override the default model (optional)
|
|
411
458
|
- Options: `gpt-5.2` (default), `gemini-2.5-pro`, `gemini-3-pro-preview`,
|
|
412
|
-
`deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex
|
|
413
|
-
|
|
414
|
-
- `
|
|
415
|
-
|
|
416
|
-
-
|
|
417
|
-
- `OPENAI_MODE` - Choose between API or CLI mode for OpenAI models (optional)
|
|
418
|
-
- Options: `api` (default), `cli`
|
|
419
|
-
- CLI mode uses the system-installed `codex` CLI tool
|
|
459
|
+
`deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex`
|
|
460
|
+
- `GEMINI_BACKEND` - Backend for Gemini models (optional)
|
|
461
|
+
- Options: `api` (default), `gemini-cli`, `cursor-cli`
|
|
462
|
+
- `OPENAI_BACKEND` - Backend for OpenAI models (optional)
|
|
463
|
+
- Options: `api` (default), `codex-cli`, `cursor-cli`
|
|
420
464
|
- `CODEX_REASONING_EFFORT` - Configure reasoning effort for Codex CLI (optional)
|
|
421
465
|
- See [Codex CLI](#codex-cli) for details and available options
|
|
466
|
+
- `CONSULT_LLM_EXTRA_MODELS` - Add models not in the built-in list (optional)
|
|
467
|
+
- Comma-separated list, e.g., `grok-3,kimi-k2.5`
|
|
468
|
+
- Merged with built-in models and included in the tool schema
|
|
469
|
+
- Useful for newly released models with a known provider prefix (`gpt-`,
|
|
470
|
+
`gemini-`, `deepseek-`)
|
|
422
471
|
- `CONSULT_LLM_ALLOWED_MODELS` - List of models to advertise (optional)
|
|
423
472
|
- Comma-separated list, e.g., `gpt-5.2,gemini-3-pro-preview`
|
|
424
473
|
- When set, only these models appear in the tool schema
|
|
474
|
+
- Filters the combined catalog (built-in + extra models)
|
|
425
475
|
- If `CONSULT_LLM_DEFAULT_MODEL` is set, it must be in this list
|
|
426
476
|
- See [Tips](#controlling-which-models-claude-uses) for usage examples
|
|
427
477
|
- `CONSULT_LLM_SYSTEM_PROMPT_PATH` - Custom path to system prompt file
|
|
@@ -442,7 +492,9 @@ This creates a placeholder file with the default system prompt that you can edit
|
|
|
442
492
|
to customize how the consultant LLM behaves. The custom prompt is read on every
|
|
443
493
|
request, so changes take effect immediately without restarting the server.
|
|
444
494
|
|
|
445
|
-
|
|
495
|
+
When a custom prompt file exists, it acts as a full override — `task_mode`
|
|
496
|
+
overlays are not applied on top. To revert to the default prompt with
|
|
497
|
+
`task_mode` support, simply delete the `SYSTEM_PROMPT.md` file.
|
|
446
498
|
|
|
447
499
|
#### Custom prompt path
|
|
448
500
|
|
|
@@ -502,8 +554,18 @@ models complex questions.
|
|
|
502
554
|
|
|
503
555
|
- **model** (optional): LLM model to use
|
|
504
556
|
- Options: `gpt-5.2` (default), `gemini-2.5-pro`, `gemini-3-pro-preview`,
|
|
505
|
-
`deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex
|
|
506
|
-
|
|
557
|
+
`deepseek-reasoner`, `gpt-5.3-codex`, `gpt-5.2-codex`
|
|
558
|
+
|
|
559
|
+
- **task_mode** (optional): Controls the system prompt persona. The calling LLM
|
|
560
|
+
should choose based on the task:
|
|
561
|
+
- `general` (default): Neutral base prompt that defers to the user prompt
|
|
562
|
+
- `review`: Critical code reviewer — bugs, security, performance,
|
|
563
|
+
anti-patterns
|
|
564
|
+
- `debug`: Focused troubleshooter — root cause analysis, execution tracing,
|
|
565
|
+
ignores style issues
|
|
566
|
+
- `plan`: Constructive architect — trade-offs, alternatives, always includes a
|
|
567
|
+
final recommendation
|
|
568
|
+
- `create`: Generative writer — docs, content, polished output
|
|
507
569
|
|
|
508
570
|
- **web_mode** (optional): Copy prompt to clipboard instead of querying LLM
|
|
509
571
|
- Default: `false`
|
|
@@ -512,7 +574,7 @@ models complex questions.
|
|
|
512
574
|
services
|
|
513
575
|
|
|
514
576
|
- **thread_id** (optional): Resume a multi-turn conversation
|
|
515
|
-
- Works with
|
|
577
|
+
- Works with CLI backends (Codex CLI, Gemini CLI, Cursor CLI)
|
|
516
578
|
- The first response includes a `[thread_id:xxx]` prefix — pass that ID back
|
|
517
579
|
as `thread_id` in follow-up requests to maintain conversation context
|
|
518
580
|
|
|
@@ -532,10 +594,6 @@ models complex questions.
|
|
|
532
594
|
- **gpt-5.2**: OpenAI's latest GPT model
|
|
533
595
|
- **gpt-5.3-codex**: OpenAI's Codex model based on GPT-5.3
|
|
534
596
|
- **gpt-5.2-codex**: OpenAI's Codex model based on GPT-5.2
|
|
535
|
-
- **gpt-5.1-codex-max**: Strongest OpenAI Codex model
|
|
536
|
-
- **gpt-5.1-codex**: OpenAI's Codex model optimized for coding
|
|
537
|
-
- **gpt-5.1-codex-mini**: Lighter, faster version of gpt-5.1-codex
|
|
538
|
-
- **gpt-5.1**: Broad world knowledge with strong general reasoning
|
|
539
597
|
|
|
540
598
|
## Logging
|
|
541
599
|
|
|
@@ -640,6 +698,34 @@ for the full content.
|
|
|
640
698
|
Save it as `~/.claude/commands/consult.md` and you can then use it by typing
|
|
641
699
|
`/consult ask gemini about X` or `/consult ask codex about X` in Claude Code.
|
|
642
700
|
|
|
701
|
+
## Debate skills
|
|
702
|
+
|
|
703
|
+
Two skills that orchestrate structured debates between LLMs to find the best
|
|
704
|
+
implementation approach before writing code. Both use `thread_id` to maintain
|
|
705
|
+
conversation context across rounds, so each LLM remembers the full debate
|
|
706
|
+
history without resending everything.
|
|
707
|
+
|
|
708
|
+
### debate
|
|
709
|
+
|
|
710
|
+
**Claude moderates, two LLMs debate.** Gemini and Codex independently propose
|
|
711
|
+
approaches, then critique each other's proposals. Claude synthesizes the best
|
|
712
|
+
ideas and implements. See [skills/debate/SKILL.md](skills/debate/SKILL.md).
|
|
713
|
+
|
|
714
|
+
```
|
|
715
|
+
> /debate design the multi-tenant isolation strategy
|
|
716
|
+
```
|
|
717
|
+
|
|
718
|
+
### debate-vs
|
|
719
|
+
|
|
720
|
+
**Claude participates as a debater** against one opponent LLM (Gemini or Codex)
|
|
721
|
+
through multiple rounds. Claude forms its own position, then debates back and
|
|
722
|
+
forth before synthesizing and implementing. See
|
|
723
|
+
[skills/debate-vs/SKILL.md](skills/debate-vs/SKILL.md).
|
|
724
|
+
|
|
725
|
+
```
|
|
726
|
+
> /debate-vs --gemini design the multi-tenant isolation strategy
|
|
727
|
+
```
|
|
728
|
+
|
|
643
729
|
## Development
|
|
644
730
|
|
|
645
731
|
To work on the MCP server locally and use your development version:
|
|
@@ -647,7 +733,7 @@ To work on the MCP server locally and use your development version:
|
|
|
647
733
|
1. Clone the repository and install dependencies:
|
|
648
734
|
|
|
649
735
|
```bash
|
|
650
|
-
git clone https://github.com/
|
|
736
|
+
git clone https://github.com/raine/consult-llm-mcp.git
|
|
651
737
|
cd consult-llm-mcp
|
|
652
738
|
npm install
|
|
653
739
|
```
|
package/dist/config.d.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { z } from 'zod/v4';
|
|
2
|
+
/** Build the final model catalog from built-in + extra + allowlist filtering. */
|
|
3
|
+
export declare function buildModelCatalog(builtinModels: readonly string[], extraModelsRaw?: string, allowedModelsRaw?: string): string[];
|
|
2
4
|
export declare const SupportedChatModel: z.ZodEnum<{
|
|
3
5
|
[x: string]: string;
|
|
4
6
|
}>;
|
|
5
7
|
export type SupportedChatModel = z.infer<typeof SupportedChatModel>;
|
|
6
|
-
export declare const fallbackModel:
|
|
8
|
+
export declare const fallbackModel: string;
|
|
7
9
|
declare const Config: z.ZodObject<{
|
|
8
10
|
openaiApiKey: z.ZodOptional<z.ZodString>;
|
|
9
11
|
geminiApiKey: z.ZodOptional<z.ZodString>;
|
|
@@ -11,13 +13,15 @@ declare const Config: z.ZodObject<{
|
|
|
11
13
|
defaultModel: z.ZodOptional<z.ZodEnum<{
|
|
12
14
|
[x: string]: string;
|
|
13
15
|
}>>;
|
|
14
|
-
|
|
16
|
+
geminiBackend: z.ZodDefault<z.ZodEnum<{
|
|
15
17
|
api: "api";
|
|
16
|
-
cli: "cli";
|
|
18
|
+
"gemini-cli": "gemini-cli";
|
|
19
|
+
"cursor-cli": "cursor-cli";
|
|
17
20
|
}>>;
|
|
18
|
-
|
|
21
|
+
openaiBackend: z.ZodDefault<z.ZodEnum<{
|
|
19
22
|
api: "api";
|
|
20
|
-
cli: "cli";
|
|
23
|
+
"cursor-cli": "cursor-cli";
|
|
24
|
+
"codex-cli": "codex-cli";
|
|
21
25
|
}>>;
|
|
22
26
|
codexReasoningEffort: z.ZodOptional<z.ZodEnum<{
|
|
23
27
|
none: "none";
|
|
@@ -33,5 +37,6 @@ type ParsedConfig = z.infer<typeof Config>;
|
|
|
33
37
|
export type Config = ParsedConfig & {
|
|
34
38
|
allowedModels: string[];
|
|
35
39
|
};
|
|
40
|
+
export declare function migrateBackendEnv(newVar: string | undefined, oldVar: string | undefined, providerCliValue: string, legacyName: string, newName: string): string | undefined;
|
|
36
41
|
export declare const config: Config;
|
|
37
42
|
export {};
|
package/dist/config.js
CHANGED
|
@@ -1,17 +1,33 @@
|
|
|
1
1
|
import { z } from 'zod/v4';
|
|
2
2
|
import { ALL_MODELS } from './models.js';
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
3
|
+
import { logToFile } from './logger.js';
|
|
4
|
+
/** Build the final model catalog from built-in + extra + allowlist filtering. */
|
|
5
|
+
export function buildModelCatalog(builtinModels, extraModelsRaw, allowedModelsRaw) {
|
|
6
|
+
const extraModels = extraModelsRaw
|
|
7
|
+
? extraModelsRaw
|
|
8
|
+
.split(',')
|
|
9
|
+
.map((m) => m.trim())
|
|
10
|
+
.filter((m) => m.length > 0)
|
|
11
|
+
: [];
|
|
12
|
+
const allAvailable = [
|
|
13
|
+
...builtinModels,
|
|
14
|
+
...extraModels.filter((m) => !builtinModels.includes(m)),
|
|
15
|
+
];
|
|
16
|
+
const allowedModels = allowedModelsRaw
|
|
17
|
+
? allowedModelsRaw
|
|
18
|
+
.split(',')
|
|
19
|
+
.map((m) => m.trim())
|
|
20
|
+
.filter((m) => m.length > 0)
|
|
21
|
+
: [];
|
|
22
|
+
return allowedModels.length > 0
|
|
23
|
+
? allAvailable.filter((m) => allowedModels.includes(m))
|
|
24
|
+
: allAvailable;
|
|
25
|
+
}
|
|
26
|
+
const enabledModels = buildModelCatalog(ALL_MODELS, process.env.CONSULT_LLM_EXTRA_MODELS, process.env.CONSULT_LLM_ALLOWED_MODELS);
|
|
12
27
|
if (enabledModels.length === 0) {
|
|
13
|
-
|
|
14
|
-
|
|
28
|
+
const msg = 'Invalid environment variables:\n CONSULT_LLM_ALLOWED_MODELS: No valid models enabled.';
|
|
29
|
+
logToFile(`FATAL ERROR:\n${msg}`);
|
|
30
|
+
console.error(`❌ ${msg}`);
|
|
15
31
|
process.exit(1);
|
|
16
32
|
}
|
|
17
33
|
// Dynamic Zod enum based on enabled models
|
|
@@ -24,28 +40,40 @@ const Config = z.object({
|
|
|
24
40
|
geminiApiKey: z.string().optional(),
|
|
25
41
|
deepseekApiKey: z.string().optional(),
|
|
26
42
|
defaultModel: SupportedChatModel.optional(),
|
|
27
|
-
|
|
28
|
-
|
|
43
|
+
geminiBackend: z.enum(['api', 'gemini-cli', 'cursor-cli']).default('api'),
|
|
44
|
+
openaiBackend: z.enum(['api', 'codex-cli', 'cursor-cli']).default('api'),
|
|
29
45
|
codexReasoningEffort: z
|
|
30
46
|
.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh'])
|
|
31
47
|
.optional(),
|
|
32
48
|
systemPromptPath: z.string().optional(),
|
|
33
49
|
});
|
|
50
|
+
// Migrate legacy GEMINI_MODE / OPENAI_MODE env vars
|
|
51
|
+
export function migrateBackendEnv(newVar, oldVar, providerCliValue, legacyName, newName) {
|
|
52
|
+
if (newVar)
|
|
53
|
+
return newVar;
|
|
54
|
+
if (!oldVar)
|
|
55
|
+
return undefined;
|
|
56
|
+
const mapped = oldVar === 'cli' ? providerCliValue : oldVar;
|
|
57
|
+
logToFile(`DEPRECATED: ${legacyName}=${oldVar} → use ${newName}=${mapped} instead`);
|
|
58
|
+
return mapped;
|
|
59
|
+
}
|
|
34
60
|
const parsedConfig = Config.safeParse({
|
|
35
61
|
openaiApiKey: process.env.OPENAI_API_KEY,
|
|
36
62
|
geminiApiKey: process.env.GEMINI_API_KEY,
|
|
37
63
|
deepseekApiKey: process.env.DEEPSEEK_API_KEY,
|
|
38
64
|
defaultModel: process.env.CONSULT_LLM_DEFAULT_MODEL,
|
|
39
|
-
|
|
40
|
-
|
|
65
|
+
geminiBackend: migrateBackendEnv(process.env.GEMINI_BACKEND, process.env.GEMINI_MODE, 'gemini-cli', 'GEMINI_MODE', 'GEMINI_BACKEND'),
|
|
66
|
+
openaiBackend: migrateBackendEnv(process.env.OPENAI_BACKEND, process.env.OPENAI_MODE, 'codex-cli', 'OPENAI_MODE', 'OPENAI_BACKEND'),
|
|
41
67
|
codexReasoningEffort: process.env.CODEX_REASONING_EFFORT,
|
|
42
68
|
systemPromptPath: process.env.CONSULT_LLM_SYSTEM_PROMPT_PATH,
|
|
43
69
|
});
|
|
44
70
|
if (!parsedConfig.success) {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
}
|
|
71
|
+
const details = parsedConfig.error.issues
|
|
72
|
+
.map((issue) => ` ${issue.path.join('.')}: ${issue.message}`)
|
|
73
|
+
.join('\n');
|
|
74
|
+
const msg = `Invalid environment variables:\n${details}`;
|
|
75
|
+
logToFile(`FATAL ERROR:\n${msg}`);
|
|
76
|
+
console.error(`❌ ${msg}`);
|
|
49
77
|
process.exit(1);
|
|
50
78
|
}
|
|
51
79
|
export const config = {
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
2
|
+
import { migrateBackendEnv, buildModelCatalog } from './config.js';
|
|
3
|
+
import { ALL_MODELS } from './models.js';
|
|
4
|
+
vi.mock('./logger.js', () => ({ logToFile: vi.fn() }));
|
|
5
|
+
describe('migrateBackendEnv', () => {
|
|
6
|
+
it('returns newVar when set, ignoring oldVar', () => {
|
|
7
|
+
expect(migrateBackendEnv('cursor-cli', 'cli', 'gemini-cli', 'GEMINI_MODE', 'GEMINI_BACKEND')).toBe('cursor-cli');
|
|
8
|
+
});
|
|
9
|
+
it('maps "cli" to provider-specific cli value', () => {
|
|
10
|
+
expect(migrateBackendEnv(undefined, 'cli', 'gemini-cli', 'GEMINI_MODE', 'GEMINI_BACKEND')).toBe('gemini-cli');
|
|
11
|
+
});
|
|
12
|
+
it('passes through non-cli values directly', () => {
|
|
13
|
+
expect(migrateBackendEnv(undefined, 'api', 'gemini-cli', 'GEMINI_MODE', 'GEMINI_BACKEND')).toBe('api');
|
|
14
|
+
});
|
|
15
|
+
it('returns undefined when both vars are missing', () => {
|
|
16
|
+
expect(migrateBackendEnv(undefined, undefined, 'gemini-cli', 'GEMINI_MODE', 'GEMINI_BACKEND')).toBeUndefined();
|
|
17
|
+
});
|
|
18
|
+
it('maps openai cli to codex-cli', () => {
|
|
19
|
+
expect(migrateBackendEnv(undefined, 'cli', 'codex-cli', 'OPENAI_MODE', 'OPENAI_BACKEND')).toBe('codex-cli');
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
describe('buildModelCatalog', () => {
|
|
23
|
+
it('returns all built-in models when no env vars are set', () => {
|
|
24
|
+
const result = buildModelCatalog(ALL_MODELS);
|
|
25
|
+
expect(result).toEqual([...ALL_MODELS]);
|
|
26
|
+
});
|
|
27
|
+
it('appends extra models to the catalog', () => {
|
|
28
|
+
const result = buildModelCatalog(ALL_MODELS, 'grok-3,kimi-k2.5');
|
|
29
|
+
expect(result).toContain('grok-3');
|
|
30
|
+
expect(result).toContain('kimi-k2.5');
|
|
31
|
+
expect(result.length).toBe(ALL_MODELS.length + 2);
|
|
32
|
+
});
|
|
33
|
+
it('deduplicates extra models that overlap with built-ins', () => {
|
|
34
|
+
const result = buildModelCatalog(ALL_MODELS, 'gpt-5.2,grok-3');
|
|
35
|
+
expect(result.filter((m) => m === 'gpt-5.2').length).toBe(1);
|
|
36
|
+
expect(result.length).toBe(ALL_MODELS.length + 1);
|
|
37
|
+
});
|
|
38
|
+
it('filters by allowlist from combined catalog', () => {
|
|
39
|
+
const result = buildModelCatalog(ALL_MODELS, 'grok-3', 'gpt-5.2,grok-3');
|
|
40
|
+
expect(result).toEqual(['gpt-5.2', 'grok-3']);
|
|
41
|
+
});
|
|
42
|
+
it('allowlist can include only extra models', () => {
|
|
43
|
+
const result = buildModelCatalog(ALL_MODELS, 'grok-3', 'grok-3');
|
|
44
|
+
expect(result).toEqual(['grok-3']);
|
|
45
|
+
});
|
|
46
|
+
it('allowlist filters out models not in catalog', () => {
|
|
47
|
+
const result = buildModelCatalog(ALL_MODELS, undefined, 'nonexistent');
|
|
48
|
+
expect(result).toEqual([]);
|
|
49
|
+
});
|
|
50
|
+
it('handles whitespace and empty entries in extra models', () => {
|
|
51
|
+
const result = buildModelCatalog(ALL_MODELS, ' grok-3 , , kimi-k2.5 ');
|
|
52
|
+
expect(result).toContain('grok-3');
|
|
53
|
+
expect(result).toContain('kimi-k2.5');
|
|
54
|
+
expect(result.length).toBe(ALL_MODELS.length + 2);
|
|
55
|
+
});
|
|
56
|
+
it('handles whitespace in allowlist', () => {
|
|
57
|
+
const result = buildModelCatalog(ALL_MODELS, undefined, ' gpt-5.2 , gemini-2.5-pro ');
|
|
58
|
+
expect(result).toContain('gpt-5.2');
|
|
59
|
+
expect(result).toContain('gemini-2.5-pro');
|
|
60
|
+
expect(result.length).toBe(2);
|
|
61
|
+
});
|
|
62
|
+
it('preserves built-in model order with extras appended', () => {
|
|
63
|
+
const result = buildModelCatalog(ALL_MODELS, 'aaa-model,zzz-model');
|
|
64
|
+
const builtinPart = result.slice(0, ALL_MODELS.length);
|
|
65
|
+
expect(builtinPart).toEqual([...ALL_MODELS]);
|
|
66
|
+
expect(result.slice(ALL_MODELS.length)).toEqual(['aaa-model', 'zzz-model']);
|
|
67
|
+
});
|
|
68
|
+
});
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { logToFile } from '../logger.js';
|
|
2
|
+
export function createApiExecutor(client) {
|
|
3
|
+
return {
|
|
4
|
+
capabilities: {
|
|
5
|
+
isCli: false,
|
|
6
|
+
supportsThreads: false,
|
|
7
|
+
supportsFileRefs: false,
|
|
8
|
+
},
|
|
9
|
+
async execute(prompt, model, systemPrompt, filePaths) {
|
|
10
|
+
if (filePaths && filePaths.length > 0) {
|
|
11
|
+
const msg = `File paths were provided but are not supported by the API executor for model ${model}. They will be ignored.`;
|
|
12
|
+
logToFile(`WARNING: ${msg}`);
|
|
13
|
+
console.warn(`Warning: ${msg}`);
|
|
14
|
+
}
|
|
15
|
+
const completion = await client.chat.completions.create({
|
|
16
|
+
model,
|
|
17
|
+
messages: [
|
|
18
|
+
{ role: 'system', content: systemPrompt },
|
|
19
|
+
{ role: 'user', content: prompt },
|
|
20
|
+
],
|
|
21
|
+
});
|
|
22
|
+
const response = completion.choices[0]?.message?.content;
|
|
23
|
+
if (!response) {
|
|
24
|
+
throw new Error('No response from the model via API');
|
|
25
|
+
}
|
|
26
|
+
return { response, usage: completion.usage ?? null };
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
}
|