hydramcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +192 -0
- package/dist/index.d.ts +31 -0
- package/dist/index.js +111 -0
- package/dist/orchestrator/cache.d.ts +44 -0
- package/dist/orchestrator/cache.js +80 -0
- package/dist/orchestrator/circuit-breaker.d.ts +34 -0
- package/dist/orchestrator/circuit-breaker.js +90 -0
- package/dist/orchestrator/config.d.ts +15 -0
- package/dist/orchestrator/config.js +50 -0
- package/dist/orchestrator/index.d.ts +37 -0
- package/dist/orchestrator/index.js +143 -0
- package/dist/orchestrator/metrics.d.ts +40 -0
- package/dist/orchestrator/metrics.js +77 -0
- package/dist/providers/anthropic.d.ts +20 -0
- package/dist/providers/anthropic.js +101 -0
- package/dist/providers/google.d.ts +18 -0
- package/dist/providers/google.js +123 -0
- package/dist/providers/multi-provider.d.ts +23 -0
- package/dist/providers/multi-provider.js +71 -0
- package/dist/providers/ollama.d.ts +26 -0
- package/dist/providers/ollama.js +87 -0
- package/dist/providers/openai.d.ts +17 -0
- package/dist/providers/openai.js +91 -0
- package/dist/providers/provider.d.ts +40 -0
- package/dist/providers/provider.js +9 -0
- package/dist/providers/subscription.d.ts +27 -0
- package/dist/providers/subscription.js +193 -0
- package/dist/server.d.ts +12 -0
- package/dist/server.js +238 -0
- package/dist/setup.d.ts +14 -0
- package/dist/setup.js +252 -0
- package/dist/tools/analyze-file.d.ts +40 -0
- package/dist/tools/analyze-file.js +227 -0
- package/dist/tools/ask-model.d.ts +49 -0
- package/dist/tools/ask-model.js +122 -0
- package/dist/tools/compare-models.d.ts +40 -0
- package/dist/tools/compare-models.js +104 -0
- package/dist/tools/consensus.d.ts +50 -0
- package/dist/tools/consensus.js +267 -0
- package/dist/tools/session-recap.d.ts +38 -0
- package/dist/tools/session-recap.js +341 -0
- package/dist/tools/smart-read.d.ts +45 -0
- package/dist/tools/smart-read.js +259 -0
- package/dist/tools/synthesize.d.ts +44 -0
- package/dist/tools/synthesize.js +182 -0
- package/dist/utils/compress.d.ts +27 -0
- package/dist/utils/compress.js +132 -0
- package/dist/utils/env.d.ts +11 -0
- package/dist/utils/env.js +44 -0
- package/dist/utils/logger.d.ts +14 -0
- package/dist/utils/logger.js +27 -0
- package/dist/utils/model-selection.d.ts +23 -0
- package/dist/utils/model-selection.js +54 -0
- package/dist/utils/session-reader.d.ts +67 -0
- package/dist/utils/session-reader.js +383 -0
- package/package.json +56 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 PicklePixel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/HydraMCP.png" width="200" />
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">HydraMCP</h1>
|
|
6
|
+
<p align="center">Connect agents to agents.</p>
|
|
7
|
+
|
|
8
|
+
An MCP server that lets Claude Code query any LLM — compare, vote, and synthesize across GPT, Gemini, Claude, and local models from one terminal.
|
|
9
|
+
|
|
10
|
+
## Quick Start
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
npx hydramcp setup
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
That's it. The wizard walks you through everything — API keys, subscriptions, local models. At the end it gives you the one-liner to add to Claude Code.
|
|
17
|
+
|
|
18
|
+
Or if you already have API keys:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
claude mcp add hydramcp -e OPENAI_API_KEY=sk-... -- npx hydramcp
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## What It Looks Like
|
|
25
|
+
|
|
26
|
+
Four models, four ecosystems, one prompt. Real output from a live session:
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
> compare gpt-5-codex, gemini-3, claude-sonnet, and local qwen on this function review
|
|
30
|
+
|
|
31
|
+
## Model Comparison (4 models, 11637ms total)
|
|
32
|
+
|
|
33
|
+
| Model | Latency | Tokens |
|
|
34
|
+
|----------------------------|-----------------|--------|
|
|
35
|
+
| gpt-5-codex | 1630ms fastest | 194 |
|
|
36
|
+
| gemini-3-pro-preview | 11636ms | 1235 |
|
|
37
|
+
| claude-sonnet-4-5-20250929 | 3010ms | 202 |
|
|
38
|
+
| ollama/qwen2.5-coder:14b | 8407ms | 187 |
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
All four independently found the same async bug. Then each one caught something different the others missed.
|
|
42
|
+
|
|
43
|
+
And this is consensus with a local judge:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
> get consensus from gpt-5, gemini-3, and claude-sonnet. use local qwen as judge.
|
|
47
|
+
|
|
48
|
+
## Consensus: REACHED
|
|
49
|
+
|
|
50
|
+
Strategy: majority (needed 2/3)
|
|
51
|
+
Agreement: 3/3 models (100%)
|
|
52
|
+
Judge: ollama/qwen2.5-coder:14b (686ms)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Three cloud models polled, local model judging them. 686ms to evaluate agreement.
|
|
56
|
+
|
|
57
|
+
## Tools
|
|
58
|
+
|
|
59
|
+
| Tool | What It Does |
|
|
60
|
+
|------|-------------|
|
|
61
|
+
| **list_models** | See what's available across all providers |
|
|
62
|
+
| **ask_model** | Query any model, optional response distillation |
|
|
63
|
+
| **compare_models** | Same prompt to 2-5 models in parallel |
|
|
64
|
+
| **consensus** | Poll 3-7 models, LLM-as-judge evaluates agreement |
|
|
65
|
+
| **synthesize** | Combine best ideas from multiple models into one answer |
|
|
66
|
+
| **analyze_file** | Offload file analysis to a worker model |
|
|
67
|
+
| **smart_read** | Extract specific code sections without reading the whole file |
|
|
68
|
+
| **session_recap** | Restore context from previous Claude Code sessions |
|
|
69
|
+
|
|
70
|
+
From inside Claude Code, just say things like:
|
|
71
|
+
- "ask gpt-5 to review this function"
|
|
72
|
+
- "compare gemini and claude on this approach"
|
|
73
|
+
- "get consensus from 3 models on whether this is thread safe"
|
|
74
|
+
- "synthesize responses from all models on how to design this API"
|
|
75
|
+
|
|
76
|
+
## How It Works
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
Claude Code
|
|
80
|
+
|
|
|
81
|
+
HydraMCP (MCP Server)
|
|
82
|
+
|
|
|
83
|
+
SmartProvider (circuit breaker, cache, metrics)
|
|
84
|
+
|
|
|
85
|
+
MultiProvider (routes to the right backend)
|
|
86
|
+
|
|
|
87
|
+
|-- OpenAI -> api.openai.com (API key)
|
|
88
|
+
|-- Google -> Gemini API (API key)
|
|
89
|
+
|-- Anthropic -> api.anthropic.com (API key)
|
|
90
|
+
|-- Sub -> CLI tools (Gemini CLI, Claude Code, Codex CLI)
|
|
91
|
+
|-- Ollama -> local models (your hardware)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Three Ways to Connect Models
|
|
95
|
+
|
|
96
|
+
### API Keys (fastest setup)
|
|
97
|
+
|
|
98
|
+
Set environment variables. HydraMCP auto-detects them.
|
|
99
|
+
|
|
100
|
+
| Variable | Provider |
|
|
101
|
+
|----------|----------|
|
|
102
|
+
| `OPENAI_API_KEY` | OpenAI (GPT-4o, GPT-5, o3, etc.) |
|
|
103
|
+
| `GOOGLE_API_KEY` | Google Gemini (2.5 Flash, Pro, etc.) |
|
|
104
|
+
| `ANTHROPIC_API_KEY` | Anthropic Claude (Opus, Sonnet, Haiku) |
|
|
105
|
+
|
|
106
|
+
### Subscriptions (use your monthly plan)
|
|
107
|
+
|
|
108
|
+
Already paying for ChatGPT Plus, Claude Pro, or Gemini Advanced? HydraMCP wraps the CLI tools those subscriptions include. No API billing.
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
npx hydramcp setup # auto-installs CLIs and runs auth
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
The setup wizard detects which CLIs you have, installs missing ones, and walks you through authentication. Each CLI authenticates via browser once — then it's stored forever.
|
|
115
|
+
|
|
116
|
+
| Subscription | CLI Tool | What You Get |
|
|
117
|
+
|-------------|----------|-------------|
|
|
118
|
+
| Gemini Advanced | `gemini` | Gemini 2.5 Flash, Pro, etc. |
|
|
119
|
+
| Claude Pro/Max | `claude` | Claude Opus, Sonnet, Haiku |
|
|
120
|
+
| ChatGPT Plus/Pro | `codex` | GPT-5, o3, Codex models |
|
|
121
|
+
|
|
122
|
+
### Local Models
|
|
123
|
+
|
|
124
|
+
Install [Ollama](https://ollama.com), pull a model, done. Auto-detected.
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
ollama pull qwen2.5-coder:14b
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Mix and Match
|
|
131
|
+
|
|
132
|
+
All three methods stack. Use API keys for some providers, subscriptions for others, and Ollama for local. They all show up in `list_models` together.
|
|
133
|
+
|
|
134
|
+
Route explicitly with prefixes:
|
|
135
|
+
- `openai/gpt-5` — force OpenAI API
|
|
136
|
+
- `google/gemini-2.5-flash` — force Google API
|
|
137
|
+
- `sub/gemini-2.5-flash` — force subscription CLI
|
|
138
|
+
- `ollama/qwen2.5-coder:14b` — force local
|
|
139
|
+
- `gpt-5` — auto-detect (tries each provider)
|
|
140
|
+
|
|
141
|
+
## Setup Details
|
|
142
|
+
|
|
143
|
+
### Option A: npx (recommended)
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
npx hydramcp setup # interactive wizard
|
|
147
|
+
claude mcp add hydramcp -- npx hydramcp # register with Claude Code
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Config is saved to `~/.hydramcp/.env` and persists across npx runs.
|
|
151
|
+
|
|
152
|
+
### Option B: Clone
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
git clone https://github.com/Pickle-Pixel/HydraMCP.git
|
|
156
|
+
cd HydraMCP
|
|
157
|
+
npm install && npm run build
|
|
158
|
+
claude mcp add hydramcp -- node /path/to/HydraMCP/dist/index.js
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Verify
|
|
162
|
+
|
|
163
|
+
Restart Claude Code and say "list models". You should see everything you configured.
|
|
164
|
+
|
|
165
|
+
## Architecture
|
|
166
|
+
|
|
167
|
+
HydraMCP wraps all providers in a **SmartProvider** layer that adds:
|
|
168
|
+
|
|
169
|
+
- **Circuit breaker** — per-model failure tracking. After 3 failures, the model is disabled for 60s and auto-recovers.
|
|
170
|
+
- **Response cache** — SHA-256 keyed, 15-minute TTL. Identical queries are served instantly.
|
|
171
|
+
- **Metrics** — per-model query counts, latency, token usage, cache hit rates.
|
|
172
|
+
- **Response distillation** — set `max_response_tokens` on any query and a cheap model compresses the response while preserving code, errors, and specifics.
|
|
173
|
+
|
|
174
|
+
## Contributing
|
|
175
|
+
|
|
176
|
+
Want to add a provider? The interface is three methods:
|
|
177
|
+
|
|
178
|
+
```typescript
|
|
179
|
+
interface Provider {
|
|
180
|
+
healthCheck(): Promise<boolean>;
|
|
181
|
+
listModels(): Promise<ModelInfo[]>;
|
|
182
|
+
query(model: string, prompt: string, options?: QueryOptions): Promise<QueryResponse>;
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
See `src/providers/ollama.ts` for a working example. Implement it, register in `src/index.ts`, done.
|
|
187
|
+
|
|
188
|
+
Providers we'd love to see: LM Studio, OpenRouter, Groq, Together AI, or anything that speaks HTTP.
|
|
189
|
+
|
|
190
|
+
## License
|
|
191
|
+
|
|
192
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* HydraMCP — Entry point.
|
|
4
|
+
*
|
|
5
|
+
* Auto-detects available providers from environment and installed tools:
|
|
6
|
+
*
|
|
7
|
+
* API Keys (direct, fast):
|
|
8
|
+
* OPENAI_API_KEY → OpenAI (GPT-4o, GPT-5, o3, etc.)
|
|
9
|
+
* GOOGLE_API_KEY → Google Gemini (or GEMINI_API_KEY)
|
|
10
|
+
* ANTHROPIC_API_KEY → Anthropic Claude
|
|
11
|
+
*
|
|
12
|
+
* Subscriptions (via installed CLI tools):
|
|
13
|
+
* gemini CLI → Gemini Advanced subscription
|
|
14
|
+
* claude CLI → Claude Pro/Max subscription
|
|
15
|
+
* codex CLI → ChatGPT Plus/Pro subscription
|
|
16
|
+
*
|
|
17
|
+
* Local models:
|
|
18
|
+
* OLLAMA_URL → Ollama local models (auto-detected)
|
|
19
|
+
*
|
|
20
|
+
* Set any combination. HydraMCP registers what's available.
|
|
21
|
+
*
|
|
22
|
+
* Model routing:
|
|
23
|
+
* "openai/gpt-4o" → OpenAI API key
|
|
24
|
+
* "google/gemini-2.5-flash" → Google API key
|
|
25
|
+
* "anthropic/claude-..." → Anthropic API key
|
|
26
|
+
* "sub/gemini-2.5-flash" → Gemini CLI subscription
|
|
27
|
+
* "sub/claude-..." → Claude CLI subscription
|
|
28
|
+
* "ollama/llama3" → local Ollama instance
|
|
29
|
+
* "gpt-4o" → auto-detect (tries each provider)
|
|
30
|
+
*/
|
|
31
|
+
export {};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* HydraMCP — Entry point.
|
|
4
|
+
*
|
|
5
|
+
* Auto-detects available providers from environment and installed tools:
|
|
6
|
+
*
|
|
7
|
+
* API Keys (direct, fast):
|
|
8
|
+
* OPENAI_API_KEY → OpenAI (GPT-4o, GPT-5, o3, etc.)
|
|
9
|
+
* GOOGLE_API_KEY → Google Gemini (or GEMINI_API_KEY)
|
|
10
|
+
* ANTHROPIC_API_KEY → Anthropic Claude
|
|
11
|
+
*
|
|
12
|
+
* Subscriptions (via installed CLI tools):
|
|
13
|
+
* gemini CLI → Gemini Advanced subscription
|
|
14
|
+
* claude CLI → Claude Pro/Max subscription
|
|
15
|
+
* codex CLI → ChatGPT Plus/Pro subscription
|
|
16
|
+
*
|
|
17
|
+
* Local models:
|
|
18
|
+
* OLLAMA_URL → Ollama local models (auto-detected)
|
|
19
|
+
*
|
|
20
|
+
* Set any combination. HydraMCP registers what's available.
|
|
21
|
+
*
|
|
22
|
+
* Model routing:
|
|
23
|
+
* "openai/gpt-4o" → OpenAI API key
|
|
24
|
+
* "google/gemini-2.5-flash" → Google API key
|
|
25
|
+
* "anthropic/claude-..." → Anthropic API key
|
|
26
|
+
* "sub/gemini-2.5-flash" → Gemini CLI subscription
|
|
27
|
+
* "sub/claude-..." → Claude CLI subscription
|
|
28
|
+
* "ollama/llama3" → local Ollama instance
|
|
29
|
+
* "gpt-4o" → auto-detect (tries each provider)
|
|
30
|
+
*/
|
|
31
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
32
|
+
import { OpenAIProvider } from "./providers/openai.js";
|
|
33
|
+
import { GoogleProvider } from "./providers/google.js";
|
|
34
|
+
import { AnthropicProvider } from "./providers/anthropic.js";
|
|
35
|
+
import { SubscriptionProvider } from "./providers/subscription.js";
|
|
36
|
+
import { OllamaProvider } from "./providers/ollama.js";
|
|
37
|
+
import { MultiProvider } from "./providers/multi-provider.js";
|
|
38
|
+
import { SmartProvider } from "./orchestrator/index.js";
|
|
39
|
+
import { createServer } from "./server.js";
|
|
40
|
+
import { logger } from "./utils/logger.js";
|
|
41
|
+
import { loadEnv } from "./utils/env.js";
|
|
42
|
+
async function main() {
|
|
43
|
+
// Setup wizard: npx hydramcp setup
|
|
44
|
+
if (process.argv.includes("setup")) {
|
|
45
|
+
const { runSetup } = await import("./setup.js");
|
|
46
|
+
await runSetup();
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
// Load .env before anything reads process.env
|
|
50
|
+
loadEnv();
|
|
51
|
+
const multi = new MultiProvider();
|
|
52
|
+
const active = [];
|
|
53
|
+
// --- Native API providers (preferred — direct, fast, reliable) ---
|
|
54
|
+
if (process.env.OPENAI_API_KEY) {
|
|
55
|
+
multi.register("openai", new OpenAIProvider());
|
|
56
|
+
active.push("OpenAI");
|
|
57
|
+
}
|
|
58
|
+
if (process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY) {
|
|
59
|
+
multi.register("google", new GoogleProvider());
|
|
60
|
+
active.push("Google");
|
|
61
|
+
}
|
|
62
|
+
if (process.env.ANTHROPIC_API_KEY) {
|
|
63
|
+
multi.register("anthropic", new AnthropicProvider());
|
|
64
|
+
active.push("Anthropic");
|
|
65
|
+
}
|
|
66
|
+
// --- Subscription providers (CLI-based, uses monthly subscriptions) ---
|
|
67
|
+
const sub = new SubscriptionProvider();
|
|
68
|
+
const subCount = await sub.detect();
|
|
69
|
+
if (subCount > 0) {
|
|
70
|
+
multi.register("sub", sub);
|
|
71
|
+
active.push(`Subscriptions (${subCount} CLI tools)`);
|
|
72
|
+
}
|
|
73
|
+
// --- Local models ---
|
|
74
|
+
const ollama = new OllamaProvider();
|
|
75
|
+
if (await ollama.healthCheck()) {
|
|
76
|
+
multi.register("ollama", ollama);
|
|
77
|
+
active.push("Ollama");
|
|
78
|
+
}
|
|
79
|
+
// --- Startup summary ---
|
|
80
|
+
if (active.length === 0) {
|
|
81
|
+
logger.warn("No providers detected. Set at least one:\n" +
|
|
82
|
+
"\n" +
|
|
83
|
+
" API Keys (direct access):\n" +
|
|
84
|
+
" OPENAI_API_KEY — OpenAI (GPT-4o, GPT-5, o3, ...)\n" +
|
|
85
|
+
" GOOGLE_API_KEY — Google Gemini\n" +
|
|
86
|
+
" ANTHROPIC_API_KEY — Anthropic Claude\n" +
|
|
87
|
+
"\n" +
|
|
88
|
+
" Subscriptions (install CLI tools, auth once):\n" +
|
|
89
|
+
" npm i -g @google/gemini-cli → then: gemini auth\n" +
|
|
90
|
+
" npm i -g @anthropic-ai/claude-code → then: claude\n" +
|
|
91
|
+
" npm i -g @openai/codex → then: codex auth\n" +
|
|
92
|
+
"\n" +
|
|
93
|
+
" Local models:\n" +
|
|
94
|
+
" Install Ollama → ollama pull llama3\n" +
|
|
95
|
+
"\n" +
|
|
96
|
+
"HydraMCP will start anyway and retry on first request.");
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
logger.info(`Providers: ${active.join(", ")}`);
|
|
100
|
+
}
|
|
101
|
+
// Wrap with SmartProvider (orchestrator: circuit breaker, caching, metrics)
|
|
102
|
+
const provider = new SmartProvider(multi);
|
|
103
|
+
const server = createServer(provider);
|
|
104
|
+
const transport = new StdioServerTransport();
|
|
105
|
+
await server.connect(transport);
|
|
106
|
+
logger.info(`HydraMCP running — ${active.length} provider(s) active`);
|
|
107
|
+
}
|
|
108
|
+
main().catch((err) => {
|
|
109
|
+
logger.error(`Fatal: ${err instanceof Error ? err.message : String(err)}`);
|
|
110
|
+
process.exit(1);
|
|
111
|
+
});
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Caches for the orchestrator layer.
|
|
3
|
+
*
|
|
4
|
+
* ResponseCache — caches query() results by content hash (SHA-256).
|
|
5
|
+
* 15-min TTL, 100 max entries, LRU eviction.
|
|
6
|
+
*
|
|
7
|
+
* ModelListCache — caches listModels() results.
|
|
8
|
+
* 30-sec TTL, single entry (latest result).
|
|
9
|
+
* Deduplicates the 3-4 listModels() calls that happen per tool invocation
|
|
10
|
+
* (pickCompressorModel, pickLargeContextModel, etc.)
|
|
11
|
+
*/
|
|
12
|
+
import { QueryResponse, QueryOptions, ModelInfo } from "../providers/provider.js";
|
|
13
|
+
export interface ResponseCacheConfig {
|
|
14
|
+
ttlMs: number;
|
|
15
|
+
maxEntries: number;
|
|
16
|
+
}
|
|
17
|
+
export declare class ResponseCache {
|
|
18
|
+
private cache;
|
|
19
|
+
private config;
|
|
20
|
+
constructor(config: ResponseCacheConfig);
|
|
21
|
+
/** Build a cache key from query parameters. */
|
|
22
|
+
key(model: string, prompt: string, options?: QueryOptions): string;
|
|
23
|
+
/** Get a cached response, or null if missing/expired. */
|
|
24
|
+
get(key: string): QueryResponse | null;
|
|
25
|
+
/** Store a response in the cache. */
|
|
26
|
+
set(key: string, response: QueryResponse): void;
|
|
27
|
+
/** Number of entries in cache. */
|
|
28
|
+
get size(): number;
|
|
29
|
+
}
|
|
30
|
+
export interface ModelListCacheConfig {
|
|
31
|
+
ttlMs: number;
|
|
32
|
+
}
|
|
33
|
+
export declare class ModelListCache {
|
|
34
|
+
private models;
|
|
35
|
+
private timestamp;
|
|
36
|
+
private config;
|
|
37
|
+
constructor(config: ModelListCacheConfig);
|
|
38
|
+
/** Get cached model list, or null if stale/empty. */
|
|
39
|
+
get(): ModelInfo[] | null;
|
|
40
|
+
/** Store model list. */
|
|
41
|
+
set(models: ModelInfo[]): void;
|
|
42
|
+
/** Invalidate the cache (e.g., when circuit breaker state changes). */
|
|
43
|
+
invalidate(): void;
|
|
44
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Caches for the orchestrator layer.
|
|
3
|
+
*
|
|
4
|
+
* ResponseCache — caches query() results by content hash (SHA-256).
|
|
5
|
+
* 15-min TTL, 100 max entries, LRU eviction.
|
|
6
|
+
*
|
|
7
|
+
* ModelListCache — caches listModels() results.
|
|
8
|
+
* 30-sec TTL, single entry (latest result).
|
|
9
|
+
* Deduplicates the 3-4 listModels() calls that happen per tool invocation
|
|
10
|
+
* (pickCompressorModel, pickLargeContextModel, etc.)
|
|
11
|
+
*/
|
|
12
|
+
import { createHash } from "node:crypto";
|
|
13
|
+
export class ResponseCache {
|
|
14
|
+
cache = new Map();
|
|
15
|
+
config;
|
|
16
|
+
constructor(config) {
|
|
17
|
+
this.config = config;
|
|
18
|
+
}
|
|
19
|
+
/** Build a cache key from query parameters. */
|
|
20
|
+
key(model, prompt, options) {
|
|
21
|
+
const raw = `${model}|${prompt}|${JSON.stringify(options ?? {})}`;
|
|
22
|
+
return createHash("sha256").update(raw).digest("hex");
|
|
23
|
+
}
|
|
24
|
+
/** Get a cached response, or null if missing/expired. */
|
|
25
|
+
get(key) {
|
|
26
|
+
const entry = this.cache.get(key);
|
|
27
|
+
if (!entry)
|
|
28
|
+
return null;
|
|
29
|
+
if (Date.now() - entry.timestamp > this.config.ttlMs) {
|
|
30
|
+
this.cache.delete(key);
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
// Move to end for LRU (delete + re-add preserves Map insertion order)
|
|
34
|
+
this.cache.delete(key);
|
|
35
|
+
this.cache.set(key, entry);
|
|
36
|
+
return entry.response;
|
|
37
|
+
}
|
|
38
|
+
/** Store a response in the cache. */
|
|
39
|
+
set(key, response) {
|
|
40
|
+
// LRU eviction: remove oldest entry if at capacity
|
|
41
|
+
if (this.cache.size >= this.config.maxEntries) {
|
|
42
|
+
const oldest = this.cache.keys().next().value;
|
|
43
|
+
if (oldest) {
|
|
44
|
+
this.cache.delete(oldest);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
this.cache.set(key, { response, timestamp: Date.now() });
|
|
48
|
+
}
|
|
49
|
+
/** Number of entries in cache. */
|
|
50
|
+
get size() {
|
|
51
|
+
return this.cache.size;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
export class ModelListCache {
|
|
55
|
+
models = null;
|
|
56
|
+
timestamp = 0;
|
|
57
|
+
config;
|
|
58
|
+
constructor(config) {
|
|
59
|
+
this.config = config;
|
|
60
|
+
}
|
|
61
|
+
/** Get cached model list, or null if stale/empty. */
|
|
62
|
+
get() {
|
|
63
|
+
if (!this.models)
|
|
64
|
+
return null;
|
|
65
|
+
if (Date.now() - this.timestamp > this.config.ttlMs) {
|
|
66
|
+
this.models = null;
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
return this.models;
|
|
70
|
+
}
|
|
71
|
+
/** Store model list. */
|
|
72
|
+
set(models) {
|
|
73
|
+
this.models = models;
|
|
74
|
+
this.timestamp = Date.now();
|
|
75
|
+
}
|
|
76
|
+
/** Invalidate the cache (e.g., when circuit breaker state changes). */
|
|
77
|
+
invalidate() {
|
|
78
|
+
this.models = null;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit breaker — per-model failure tracking with automatic recovery.
|
|
3
|
+
*
|
|
4
|
+
* States:
|
|
5
|
+
* closed → normal operation, requests flow through
|
|
6
|
+
* open → model disabled after N consecutive failures, requests rejected
|
|
7
|
+
* half-open → cooldown expired, allow one retry to test recovery
|
|
8
|
+
*
|
|
9
|
+
* On success → reset to closed.
|
|
10
|
+
* On failure in half-open → back to open with fresh cooldown.
|
|
11
|
+
*/
|
|
12
|
+
export type CircuitState = "closed" | "open" | "half-open";
|
|
13
|
+
export interface CircuitBreakerConfig {
|
|
14
|
+
maxFailures: number;
|
|
15
|
+
cooldownMs: number;
|
|
16
|
+
}
|
|
17
|
+
export declare class CircuitBreaker {
|
|
18
|
+
private circuits;
|
|
19
|
+
private config;
|
|
20
|
+
constructor(config: CircuitBreakerConfig);
|
|
21
|
+
/** Check if the circuit is open (model should not be called). */
|
|
22
|
+
isOpen(model: string): boolean;
|
|
23
|
+
/** Record a successful query — reset circuit to closed. */
|
|
24
|
+
recordSuccess(model: string): void;
|
|
25
|
+
/** Record a failed query — increment failures, potentially open circuit. */
|
|
26
|
+
recordFailure(model: string): void;
|
|
27
|
+
/** Get status of all tracked models. */
|
|
28
|
+
getStatus(): Map<string, {
|
|
29
|
+
state: CircuitState;
|
|
30
|
+
failures: number;
|
|
31
|
+
}>;
|
|
32
|
+
/** Get list of models currently in open state. */
|
|
33
|
+
getOpenModels(): string[];
|
|
34
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit breaker — per-model failure tracking with automatic recovery.
|
|
3
|
+
*
|
|
4
|
+
* States:
|
|
5
|
+
* closed → normal operation, requests flow through
|
|
6
|
+
* open → model disabled after N consecutive failures, requests rejected
|
|
7
|
+
* half-open → cooldown expired, allow one retry to test recovery
|
|
8
|
+
*
|
|
9
|
+
* On success → reset to closed.
|
|
10
|
+
* On failure in half-open → back to open with fresh cooldown.
|
|
11
|
+
*/
|
|
12
|
+
import { logger } from "../utils/logger.js";
|
|
13
|
+
export class CircuitBreaker {
|
|
14
|
+
circuits = new Map();
|
|
15
|
+
config;
|
|
16
|
+
constructor(config) {
|
|
17
|
+
this.config = config;
|
|
18
|
+
}
|
|
19
|
+
/** Check if the circuit is open (model should not be called). */
|
|
20
|
+
isOpen(model) {
|
|
21
|
+
const circuit = this.circuits.get(model);
|
|
22
|
+
if (!circuit || circuit.state === "closed")
|
|
23
|
+
return false;
|
|
24
|
+
if (circuit.state === "open") {
|
|
25
|
+
// Check if cooldown has expired → transition to half-open
|
|
26
|
+
if (Date.now() - circuit.lastFailureTime >= this.config.cooldownMs) {
|
|
27
|
+
circuit.state = "half-open";
|
|
28
|
+
logger.info(`circuit-breaker: ${model} → half-open (cooldown expired, allowing retry)`);
|
|
29
|
+
return false; // Allow one retry
|
|
30
|
+
}
|
|
31
|
+
return true; // Still in cooldown
|
|
32
|
+
}
|
|
33
|
+
// half-open: allow the retry
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
/** Record a successful query — reset circuit to closed. */
|
|
37
|
+
recordSuccess(model) {
|
|
38
|
+
const circuit = this.circuits.get(model);
|
|
39
|
+
if (!circuit)
|
|
40
|
+
return;
|
|
41
|
+
if (circuit.state !== "closed") {
|
|
42
|
+
logger.info(`circuit-breaker: ${model} → closed (recovered)`);
|
|
43
|
+
}
|
|
44
|
+
this.circuits.delete(model); // Clean state = no entry
|
|
45
|
+
}
|
|
46
|
+
/** Record a failed query — increment failures, potentially open circuit. */
|
|
47
|
+
recordFailure(model) {
|
|
48
|
+
const circuit = this.circuits.get(model) ?? {
|
|
49
|
+
state: "closed",
|
|
50
|
+
failures: 0,
|
|
51
|
+
lastFailureTime: 0,
|
|
52
|
+
};
|
|
53
|
+
circuit.failures++;
|
|
54
|
+
circuit.lastFailureTime = Date.now();
|
|
55
|
+
if (circuit.state === "half-open") {
|
|
56
|
+
// Retry failed — back to open
|
|
57
|
+
circuit.state = "open";
|
|
58
|
+
logger.warn(`circuit-breaker: ${model} → open (retry failed, ${circuit.failures} total failures)`);
|
|
59
|
+
}
|
|
60
|
+
else if (circuit.failures >= this.config.maxFailures) {
|
|
61
|
+
circuit.state = "open";
|
|
62
|
+
logger.warn(`circuit-breaker: ${model} → open (${circuit.failures} consecutive failures)`);
|
|
63
|
+
}
|
|
64
|
+
this.circuits.set(model, circuit);
|
|
65
|
+
}
|
|
66
|
+
/** Get status of all tracked models. */
|
|
67
|
+
getStatus() {
|
|
68
|
+
const status = new Map();
|
|
69
|
+
for (const [model, circuit] of this.circuits) {
|
|
70
|
+
status.set(model, {
|
|
71
|
+
state: circuit.state,
|
|
72
|
+
failures: circuit.failures,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
return status;
|
|
76
|
+
}
|
|
77
|
+
/** Get list of models currently in open state. */
|
|
78
|
+
getOpenModels() {
|
|
79
|
+
const open = [];
|
|
80
|
+
for (const [model, circuit] of this.circuits) {
|
|
81
|
+
if (circuit.state === "open") {
|
|
82
|
+
// Check if still within cooldown
|
|
83
|
+
if (Date.now() - circuit.lastFailureTime < this.config.cooldownMs) {
|
|
84
|
+
open.push(model);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return open;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestrator configuration — sensible defaults with env var overrides.
|
|
3
|
+
*/
|
|
4
|
+
export interface OrchestratorConfig {
|
|
5
|
+
maxFailures: number;
|
|
6
|
+
cooldownMs: number;
|
|
7
|
+
queryCacheTtlMs: number;
|
|
8
|
+
queryCacheMaxEntries: number;
|
|
9
|
+
modelListCacheTtlMs: number;
|
|
10
|
+
enableCache: boolean;
|
|
11
|
+
enableCircuitBreaker: boolean;
|
|
12
|
+
}
|
|
13
|
+
export declare const DEFAULT_CONFIG: OrchestratorConfig;
|
|
14
|
+
/** Read config from env vars, falling back to defaults. */
|
|
15
|
+
export declare function loadConfig(overrides?: Partial<OrchestratorConfig>): OrchestratorConfig;
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestrator configuration — sensible defaults with env var overrides.
|
|
3
|
+
*/
|
|
4
|
+
export const DEFAULT_CONFIG = {
|
|
5
|
+
maxFailures: 3,
|
|
6
|
+
cooldownMs: 60_000, // 1 minute
|
|
7
|
+
queryCacheTtlMs: 900_000, // 15 minutes
|
|
8
|
+
queryCacheMaxEntries: 100,
|
|
9
|
+
modelListCacheTtlMs: 30_000, // 30 seconds
|
|
10
|
+
enableCache: true,
|
|
11
|
+
enableCircuitBreaker: true,
|
|
12
|
+
};
|
|
13
|
+
/** Read config from env vars, falling back to defaults. */
|
|
14
|
+
export function loadConfig(overrides) {
|
|
15
|
+
const env = process.env;
|
|
16
|
+
return {
|
|
17
|
+
maxFailures: overrides?.maxFailures ??
|
|
18
|
+
intEnv(env.HYDRA_CB_MAX_FAILURES) ??
|
|
19
|
+
DEFAULT_CONFIG.maxFailures,
|
|
20
|
+
cooldownMs: overrides?.cooldownMs ??
|
|
21
|
+
intEnv(env.HYDRA_CB_COOLDOWN_MS) ??
|
|
22
|
+
DEFAULT_CONFIG.cooldownMs,
|
|
23
|
+
queryCacheTtlMs: overrides?.queryCacheTtlMs ??
|
|
24
|
+
intEnv(env.HYDRA_CACHE_TTL_MS) ??
|
|
25
|
+
DEFAULT_CONFIG.queryCacheTtlMs,
|
|
26
|
+
queryCacheMaxEntries: overrides?.queryCacheMaxEntries ??
|
|
27
|
+
intEnv(env.HYDRA_CACHE_MAX_ENTRIES) ??
|
|
28
|
+
DEFAULT_CONFIG.queryCacheMaxEntries,
|
|
29
|
+
modelListCacheTtlMs: overrides?.modelListCacheTtlMs ??
|
|
30
|
+
intEnv(env.HYDRA_MODEL_CACHE_TTL_MS) ??
|
|
31
|
+
DEFAULT_CONFIG.modelListCacheTtlMs,
|
|
32
|
+
enableCache: overrides?.enableCache ??
|
|
33
|
+
boolEnv(env.HYDRA_CACHE_ENABLED) ??
|
|
34
|
+
DEFAULT_CONFIG.enableCache,
|
|
35
|
+
enableCircuitBreaker: overrides?.enableCircuitBreaker ??
|
|
36
|
+
boolEnv(env.HYDRA_CB_ENABLED) ??
|
|
37
|
+
DEFAULT_CONFIG.enableCircuitBreaker,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
function intEnv(val) {
|
|
41
|
+
if (val === undefined)
|
|
42
|
+
return undefined;
|
|
43
|
+
const n = parseInt(val, 10);
|
|
44
|
+
return isNaN(n) ? undefined : n;
|
|
45
|
+
}
|
|
46
|
+
function boolEnv(val) {
|
|
47
|
+
if (val === undefined)
|
|
48
|
+
return undefined;
|
|
49
|
+
return val === "true" || val === "1";
|
|
50
|
+
}
|