@index9/mcp 5.1.0 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -2
- package/dist/cli.js +490 -270
- package/manifest.json +10 -2
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @index9/mcp
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Discover, shortlist, compare, cost-model, and live-test 300+ AI models from your editor
|
|
4
4
|
|
|
5
5
|
## Install
|
|
6
6
|
|
|
@@ -8,7 +8,25 @@ Search, inspect, and benchmark 300+ AI models from your editor
|
|
|
8
8
|
npx -y @index9/mcp@latest
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
## OpenRouter API key
|
|
12
|
+
|
|
13
|
+
Optional: set OPENROUTER_API_KEY in your MCP client config for live test_model calls. dryRun=true works without a key.
|
|
14
|
+
|
|
15
|
+
Add the key as `OPENROUTER_API_KEY` in the MCP server environment for your client. Example:
|
|
16
|
+
|
|
17
|
+
```json
|
|
18
|
+
{
|
|
19
|
+
"mcpServers": {
|
|
20
|
+
"index9": {
|
|
21
|
+
"command": "npx",
|
|
22
|
+
"args": ["-y", "@index9/mcp@latest"],
|
|
23
|
+
"env": {
|
|
24
|
+
"OPENROUTER_API_KEY": "sk-or-..."
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
```
|
|
12
30
|
|
|
13
31
|
**Claude Code:** Run `claude mcp add --transport stdio index9 -- npx -y @index9/mcp` or add the same config to .mcp.json / ~/.claude.json.
|
|
14
32
|
|
|
@@ -16,6 +34,8 @@ Optional: `OPENROUTER_API_KEY` for live test_model calls.
|
|
|
16
34
|
|
|
17
35
|
- **find_models** — Search and paginate AI models by semantic query or filters
|
|
18
36
|
- **get_models** — Get full model metadata by IDs or aliases (batch, up to 100)
|
|
37
|
+
- **compare_models** — Diff 2-10 models across pricing, context, capabilities, and tokenizer
|
|
38
|
+
- **list_facets** — Enumerate available providers, capabilities, modalities, and tokenizers
|
|
19
39
|
- **test_model** — Run live inference or dry-run cost estimation across up to 10 models (requires OpenRouter API key)
|
|
20
40
|
|
|
21
41
|
## Response Metadata
|
package/dist/cli.js
CHANGED
|
@@ -4,7 +4,9 @@
|
|
|
4
4
|
var API_PATHS = {
|
|
5
5
|
search: "/api/search",
|
|
6
6
|
model: "/api/model",
|
|
7
|
-
test: "/api/test"
|
|
7
|
+
test: "/api/test",
|
|
8
|
+
compare: "/api/compare",
|
|
9
|
+
facets: "/api/facets"
|
|
8
10
|
};
|
|
9
11
|
var CAPABILITIES = [
|
|
10
12
|
"function_calling",
|
|
@@ -73,26 +75,36 @@ var LIMITS = {
|
|
|
73
75
|
searchMax: 100,
|
|
74
76
|
searchDefault: 20,
|
|
75
77
|
getModelsMax: 100,
|
|
76
|
-
testModelsMax: 10
|
|
78
|
+
testModelsMax: 10,
|
|
79
|
+
compareModelsMax: 10
|
|
77
80
|
};
|
|
78
81
|
var MODEL_COUNT = "300+";
|
|
79
|
-
var WORKFLOW_INSTRUCTIONS = `Index9 provides
|
|
82
|
+
var WORKFLOW_INSTRUCTIONS = `Index9 provides 5 tools for AI model discovery, inspection, comparison, and benchmarking.
|
|
80
83
|
|
|
81
84
|
IMPORTANT \u2014 your model knowledge is stale by default.
|
|
82
|
-
New AI models ship weekly; pricing, aliases, and capabilities change. Treat any specific model ID, "flagship" name, or "good default" you recall from training as potentially out of date. Before naming, recommending, or benchmarking models, call find_models to anchor on what is actually live right now.
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
85
|
+
New AI models ship weekly; pricing, aliases, and capabilities change. Treat any specific model ID, "flagship" name, or "good default" you recall from training as potentially out of date. Before naming, recommending, or benchmarking models, call find_models to anchor on what is actually live right now.
|
|
86
|
+
|
|
87
|
+
When a user asks you to recommend or pick specific models, you MUST call find_models at least once with \`sortBy=created\` (newest first, no q) before committing to a shortlist. Sorting purely by price or using a semantic q alone returns the cheapest-ever cache entries \u2014 which are often superseded by newer cheap models you should have considered. A good pattern:
|
|
88
|
+
1. find_models sortBy=created, limit=10 \u2014 see what has landed recently (last weeks/months).
|
|
89
|
+
2. Optional second call: find_models with the task query or price filter to surface established options.
|
|
90
|
+
3. Cross-reference and pick 2-4 candidates that span recent + established.
|
|
91
|
+
|
|
92
|
+
Skip the anchor call only when the user has typed a specific provider/model-id.
|
|
86
93
|
|
|
87
94
|
Typical workflow:
|
|
88
|
-
1.
|
|
89
|
-
2.
|
|
90
|
-
3.
|
|
95
|
+
1. list_facets \u2014 Optional first call to learn the live filter vocabulary (providers, capabilities, modalities, tokenizers) before constructing find_models filters.
|
|
96
|
+
2. find_models \u2014 Discover models by semantic query or filters. Start here unless the user named a specific model.
|
|
97
|
+
3. get_models \u2014 Full metadata for specific IDs or aliases. Use after search, or directly when the user names a model.
|
|
98
|
+
4. compare_models \u2014 Side-by-side diff of 2-10 candidates with cheapest/largest-context picks. Use when the user is choosing between specific finalists.
|
|
99
|
+
5. test_model \u2014 Run live inference, or set dryRun=true to estimate token usage/cost without running inference.
|
|
91
100
|
|
|
92
101
|
Key rules:
|
|
93
102
|
- find_models requires \`q\` when \`sortBy=relevance\` (the default). Omit \`q\` only with \`sortBy=created\` or \`sortBy=price\`.
|
|
94
|
-
-
|
|
95
|
-
-
|
|
103
|
+
- find_models price-asc tends to be dominated by free preview models \u2014 pass \`excludeFree=true\` when you want a paid SLA.
|
|
104
|
+
- find_models flags \`meta.confidence: "low"\` when no candidate matched on keyword (BM25). When that fires, prefer \`meta.suggestion\` over the returned scores; weak hits are capped at score=30 so they don't masquerade as strong matches.
|
|
105
|
+
- get_models accepts aliases (display names, short names) \u2014 not just full IDs. Unknown ids return in missingIds with \`suggestions\` (token-fuzzy or recency-anchored newest-from-provider). Retry with one of the suggested ids.
|
|
106
|
+
- compare_models accepts the same alias formats as get_models. Use it instead of N parallel get_models calls when the user is comparing finalists.
|
|
107
|
+
- Use test_model with \`dryRun=true\` to estimate cost before live testing. Pass \`expectedPromptTokens\` for capacity planning at sizes you don't want to paste in full.
|
|
96
108
|
- test_model with \`dryRun=false\` (default) requires OPENROUTER_API_KEY and incurs real usage costs.
|
|
97
109
|
- Reasoning-capable models (capabilities includes "reasoning") burn hidden reasoning tokens against \`maxTokens\` before emitting visible text. Leave \`maxTokens\` unset, or set it to at least 2000, when testing reasoning models \u2014 otherwise results may fail with finish_reason=length.
|
|
98
110
|
- Cursors are opaque and tied to query/sort/filters. Reuse the same query/sort/filters when paginating. \`limit\` may change between pages.`;
|
|
@@ -101,25 +113,28 @@ var TOOLS = {
|
|
|
101
113
|
name: "find_models",
|
|
102
114
|
title: "Search AI models",
|
|
103
115
|
summary: "Search and paginate AI models by semantic query or filters",
|
|
104
|
-
description: `
|
|
116
|
+
description: `Filter ${MODEL_COUNT} AI models by structured constraints (capabilities, price, context, modality, provider). The semantic \`q\` is a soft tiebreaker on top of filters \u2014 trust filters first; weak \`q\`-only queries surface as \`meta.confidence: "low"\` with a hint to drop \`q\` and use \`sortBy=created\` or \`sortBy=price\` instead.
|
|
105
117
|
|
|
106
|
-
Call this
|
|
118
|
+
Call this after list_facets (or directly when you already know the filters). Always include one call with sortBy=created (no q) when recommending models to the user \u2014 your training-data list of "good" models is likely stale, and sorting purely by price surfaces old tiny models and misses this month's cheap frontier.
|
|
107
119
|
|
|
108
|
-
Extract filters from user queries.
|
|
120
|
+
Extract filters from user queries. Numeric and categorical constraints MUST go in structured filters; \`q\` is for semantic flavor. Shorthand: 1K=1000, 1M=1000000. Prices are USD per million input tokens.
|
|
109
121
|
|
|
110
122
|
Examples:
|
|
111
123
|
- "1M context under $1" \u2192 q="model", minContext=1000000, maxPrice=1
|
|
112
124
|
- "cheap vision model from openai" \u2192 q="cheap vision model", capabilitiesAll=["vision"], provider="openai"
|
|
113
125
|
- "function calling under $0.50 with 128K" \u2192 q="function calling", capabilitiesAll=["function_calling"], maxPrice=0.5, minContext=128000
|
|
126
|
+
- "cheapest paid model" \u2192 sortBy="price", sortOrder="asc", excludeFree=true
|
|
114
127
|
- "best coding model" \u2192 q="best coding model"
|
|
115
128
|
- "what's new" \u2192 sortBy="created" (no q needed)
|
|
116
129
|
|
|
117
130
|
Valid capabilities: ${CAPABILITIES.join(", ")}.
|
|
118
131
|
|
|
119
|
-
Each result: id, name, description, created (unix seconds), createdAt (ISO 8601), contextLength, maxOutputTokens, pricing.{promptPerMillion, completionPerMillion} (numbers, USD per million tokens), capabilities[], score.
|
|
132
|
+
Each result: id, name, description, created (unix seconds), createdAt (ISO 8601), contextLength, maxOutputTokens, pricing.{promptPerMillion, completionPerMillion} (numbers, USD per million tokens), inputModalities[] / outputModalities[] (e.g. ["text","image"] \u2014 check at a glance to spot text-only vs multimodal models), capabilities[], score.
|
|
120
133
|
|
|
121
134
|
\`score\` is 0-100: the best match per page scores 100; others scale proportionally. Combines semantic similarity and keyword matching. Null when sorting by price or date.
|
|
122
135
|
|
|
136
|
+
\`q\` must be at least 2 characters when provided. \`meta.confidence\` is "low" when no candidate matched on keyword (BM25), meaning the ranker fell back to vector similarity alone \u2014 typo, gibberish, or a query the catalog can't answer. When low, \`meta.suggestion\` carries an actionable hint and \`score\` values are capped at 30 so weak hits don't masquerade as strong ones.
|
|
137
|
+
|
|
123
138
|
Pass result.id to get_models for full specs or to test_model for live testing.`,
|
|
124
139
|
requiresKey: false
|
|
125
140
|
},
|
|
@@ -131,7 +146,7 @@ Pass result.id to get_models for full specs or to test_model for live testing.`,
|
|
|
131
146
|
|
|
132
147
|
Call after find_models to inspect candidates, or directly when the user names a model (format: 'provider/model-name').
|
|
133
148
|
|
|
134
|
-
Response: { results: (Model | null)[], missingIds: string[], resolvedAliases?: Record<alias, canonicalId>, ambiguousAliases?: Record<alias, candidateIds[]> }. Each non-null result has:
|
|
149
|
+
Response: { results: (Model | null)[], missingIds: string[], resolvedAliases?: Record<alias, canonicalId>, ambiguousAliases?: Record<alias, candidateIds[]>, suggestions?: Record<unknownId, candidateIds[]> }. Each non-null result has:
|
|
135
150
|
- id, canonicalSlug, name, description
|
|
136
151
|
- created (unix seconds), createdAt (ISO 8601), knowledgeCutoff (ISO date or null)
|
|
137
152
|
- contextLength (tokens), maxOutputTokens, isModerated
|
|
@@ -140,7 +155,35 @@ Response: { results: (Model | null)[], missingIds: string[], resolvedAliases?: R
|
|
|
140
155
|
- capabilities[]: normalized capability flags (same values as find_models and capabilitiesAll/Any)
|
|
141
156
|
- supportedParameters[]: OpenRouter parameters the model accepts (e.g., "temperature", "tools", "response_format")
|
|
142
157
|
|
|
143
|
-
Entries in results are null when the id is unknown; those ids appear in missingIds. Ambiguous aliases appear in ambiguousAliases with candidate canonical ids \u2014 pass a canonical id to disambiguate.`,
|
|
158
|
+
Entries in results are null when the id is unknown; those ids appear in missingIds. Ambiguous aliases appear in ambiguousAliases with candidate canonical ids \u2014 pass a canonical id to disambiguate. Unknown ids that partially match (e.g. "sonnet" \u2192 all Claude Sonnet variants) appear in suggestions with up to 5 candidate ids. When token-overlap finds nothing but the id is shaped like \`provider/<unknown>\` and the provider exists, suggestions falls back to the 5 newest models from that provider (real created timestamps, no hardcoded "popular" list). Retry with one of the suggested ids.`,
|
|
159
|
+
requiresKey: false
|
|
160
|
+
},
|
|
161
|
+
compare_models: {
|
|
162
|
+
name: "compare_models",
|
|
163
|
+
title: "Compare AI models side-by-side",
|
|
164
|
+
summary: "Diff 2-10 models across pricing, context, capabilities, and tokenizer",
|
|
165
|
+
description: `Compare 2-${LIMITS.compareModelsMax} models side-by-side. Returns each model's full metadata plus a diff matrix highlighting which fields are equal and which differ.
|
|
166
|
+
|
|
167
|
+
Use this when the user asks "which is cheaper / has more context / supports X" across multiple specific models. Faster than calling get_models and diffing yourself.
|
|
168
|
+
|
|
169
|
+
Response: { models: ModelResponse[], diff: { contextLength, maxOutputTokens, promptPricePerMillion, completionPricePerMillion, tokenizer, inputModalities, outputModalities, capabilities, supportedParameters }, cheapestForPromptPerMillion, largestContext, missingIds, resolvedAliases?, ambiguousAliases?, suggestions? }.
|
|
170
|
+
|
|
171
|
+
Each numeric/string diff field has { allEqual: boolean, values: Record<id, value|null> }. Capability/parameter diffs have { commonAll: string[], uniquePerModel: Record<id, string[]> }. cheapestForPromptPerMillion / largestContext are convenience picks across the supplied models \u2014 null when the field is missing on every model.
|
|
172
|
+
|
|
173
|
+
Optional: pass \`expectedPromptTokens\` AND \`expectedCompletionTokens\` to also receive \`workloadCosts\` (per-model totalCostUsd) and \`cheapestForRealisticWorkload\` \u2014 the actual cheapest given the user's expected token mix. This matters when prompt:completion price ratios diverge across models (e.g., a model with cheap prompt but expensive completion can lose against a flatter-priced sibling under heavy completions).
|
|
174
|
+
|
|
175
|
+
Accepts the same alias formats as get_models. Unknown ids are returned in missingIds (with suggestions when partial matches exist).`,
|
|
176
|
+
requiresKey: false
|
|
177
|
+
},
|
|
178
|
+
list_facets: {
|
|
179
|
+
name: "list_facets",
|
|
180
|
+
title: "List filter vocabulary",
|
|
181
|
+
summary: "Enumerate available providers, capabilities, modalities, and tokenizers",
|
|
182
|
+
description: `Return the live vocabulary derived from the current model cache: providers (id-prefixes with model counts), capability flags (with counts), input/output modalities, and tokenizers.
|
|
183
|
+
|
|
184
|
+
Use this once at the start of a session to learn what filter values find_models will accept, instead of trial-and-error against find_models.
|
|
185
|
+
|
|
186
|
+
Response: { providers: { id, modelCount }[], capabilities: { id, modelCount }[], modalities: { input: string[], output: string[] }, tokenizers: string[], totalModels: number, updatedAt: string (ISO 8601) }. No inputs.`,
|
|
144
187
|
requiresKey: false
|
|
145
188
|
},
|
|
146
189
|
test_model: {
|
|
@@ -152,13 +195,14 @@ Entries in results are null when the id is unknown; those ids appear in missingI
|
|
|
152
195
|
When dryRun=true:
|
|
153
196
|
- No OpenRouter API key required
|
|
154
197
|
- No inference call is made
|
|
155
|
-
- prompt is required
|
|
198
|
+
- Either \`prompt\` OR \`expectedPromptTokens\` is required (use the latter for capacity planning at sizes you don't want to paste in full)
|
|
156
199
|
- expectedCompletionTokens defaults to 256 when omitted
|
|
157
200
|
|
|
158
201
|
Parameters:
|
|
159
202
|
- models: 1-${LIMITS.testModelsMax} model IDs to test (all receive identical prompts)
|
|
160
|
-
- prompt: Prompt text (required for dryRun; required for live unless userContent provided)
|
|
203
|
+
- prompt: Prompt text (required for dryRun unless expectedPromptTokens is set; required for live unless userContent provided)
|
|
161
204
|
- dryRun: If true, return cost estimates only
|
|
205
|
+
- expectedPromptTokens: Estimated prompt-token count for dryRun cost estimation; overrides the prompt-string heuristic. Use to model "what would N-token requests cost?" without pasting N tokens.
|
|
162
206
|
- expectedCompletionTokens: Optional completion token estimate used by dryRun
|
|
163
207
|
- maxTokens, systemPrompt, temperature, topP, seed, responseFormat, enforceJson, retries: Live-testing controls (ignored when dryRun=true)
|
|
164
208
|
|
|
@@ -167,13 +211,15 @@ Results (live): each result carries modelId (the id you passed), resolvedModelId
|
|
|
167
211
|
}
|
|
168
212
|
};
|
|
169
213
|
var PARAM_DESCRIPTIONS = {
|
|
170
|
-
q: "Natural language search query describing desired model characteristics (e.g., 'fast cheap coding model'). Uses semantic search with fuzzy matching. Optional
|
|
214
|
+
q: "Natural language search query describing desired model characteristics (e.g., 'fast cheap coding model'). Uses semantic search with fuzzy matching. Must be at least 2 characters when provided. Optional \u2014 omit (along with sortBy=created or sortBy=price) to use filters only.",
|
|
171
215
|
sortBy: `Sort order for results. Options: 'relevance' (best semantic match, default), 'created' (newest models), 'price' (cheapest/most expensive, with sortOrder). Defaults to 'relevance'.`,
|
|
172
216
|
cursor: `Opaque pagination cursor from a previous response's \`nextCursor\` field. IMPORTANT: cursors are bound to the exact query text, filters, and sort order that produced them. Reuse the same query+filters+sort when paginating. \`limit\` may change between pages. To start a new search, omit the cursor.`,
|
|
173
217
|
capabilitiesAll: `Array of capabilities that must ALL be present on the model (AND logic). Valid values: ${CAPABILITIES.join(", ")}. Example: ["function_calling","vision"].`,
|
|
174
218
|
capabilitiesAny: `Array of capabilities where at least ONE must be present (OR logic). Valid values: ${CAPABILITIES.join(", ")}. Example: ["vision","audio_input"].`,
|
|
175
219
|
modality: `Required output modality. Filters on the model's output modalities, not input capabilities. For example, "image" finds image-generation models, while capabilitiesAll=["vision"] finds models that accept image input. Valid values: ${OUTPUT_MODALITIES.join(", ")}.`,
|
|
176
|
-
provider: `Provider prefix filter.
|
|
220
|
+
provider: `Provider prefix filter. Array of provider slugs \u2014 a model matches if its ID starts with any of them (e.g., ['openai'] matches 'openai/gpt-4o'; ['openai','anthropic'] matches both). Pass a single-element array for one provider. Common providers: ${COMMON_PROVIDERS.join(", ")}.`,
|
|
221
|
+
excludeFree: `When true, exclude models with id ending in ':free'. Useful for sortBy=price (which would otherwise be dominated by free-tier preview models) and when you want a paid SLA. Default false.`,
|
|
222
|
+
expectedPromptTokens: `Expected number of prompt tokens for dryRun cost estimation. When set, overrides the heuristic that counts characters from the literal \`prompt\` string \u2014 use this for capacity planning ("what would 6000-token reviews cost?") without pasting filler. If both are omitted, the prompt string is tokenized at ~4 chars/token.`,
|
|
177
223
|
expectedCompletionTokens: `Expected number of completion tokens for cost estimation (default: 256). Typical ranges: 100-500 for quick tests, 1000-2000 for code generation, 4000+ for long-form content. This is a heuristic \u2014 actual billed tokens may differ.`
|
|
178
224
|
};
|
|
179
225
|
var SITE = {
|
|
@@ -184,56 +230,161 @@ var SITE = {
|
|
|
184
230
|
install: "Install",
|
|
185
231
|
faq: "FAQ",
|
|
186
232
|
github: "GitHub",
|
|
187
|
-
githubLabel: "GitHub repository"
|
|
233
|
+
githubLabel: "GitHub repository",
|
|
234
|
+
installCta: "Install"
|
|
188
235
|
},
|
|
189
236
|
hero: {
|
|
190
|
-
titleLine1: "
|
|
191
|
-
titleLine2: "
|
|
192
|
-
subtitle: "
|
|
193
|
-
|
|
194
|
-
audiencePrefix: "Built for",
|
|
195
|
-
audience: ["AI engineers", "Indie developers", "Teams standardizing on models"],
|
|
237
|
+
titleLine1: "Pick the right AI model",
|
|
238
|
+
titleLine2: "without leaving your editor",
|
|
239
|
+
subtitle: "Index9 is an MCP server. Your AI assistant searches, compares, and live-tests 300+ models on your prompt, so picks are measured, not guessed.",
|
|
240
|
+
proof: ["Live OpenRouter data", "300+ models, refreshed every 30 min"],
|
|
196
241
|
pricingNote: "Free. You only pay OpenRouter for live model calls.",
|
|
197
|
-
getStarted: "Add to your editor",
|
|
198
|
-
seeHowItWorks: "See
|
|
199
|
-
updatedBadge: "
|
|
242
|
+
getStarted: "Add index9 to your editor",
|
|
243
|
+
seeHowItWorks: "See a real session",
|
|
244
|
+
updatedBadge: "OpenRouter data \xB7 refreshed "
|
|
200
245
|
},
|
|
201
246
|
howItWorks: {
|
|
202
247
|
label: "How it works",
|
|
203
|
-
heading: "Your assistant
|
|
204
|
-
subtitle: "
|
|
248
|
+
heading: "Your assistant does the model-picking. You stay in the chat.",
|
|
249
|
+
subtitle: "Index9 adds five MCP tools to your editor. When you ask about models, your assistant calls them, and gets live data back.",
|
|
205
250
|
steps: [
|
|
206
251
|
{
|
|
207
252
|
number: "1",
|
|
208
253
|
title: "You ask your assistant",
|
|
209
|
-
body: '"Pick the cheapest model that can summarize this document well."
|
|
254
|
+
body: '"Pick the cheapest model that can summarize this document well." Just chat. No new UI to learn.'
|
|
210
255
|
},
|
|
211
256
|
{
|
|
212
257
|
number: "2",
|
|
213
258
|
title: "Your assistant calls index9",
|
|
214
|
-
body: "It
|
|
259
|
+
body: "It pulls live OpenRouter data: search results, full specs, cost diffs, and test outputs on your prompt."
|
|
215
260
|
},
|
|
216
261
|
{
|
|
217
262
|
number: "3",
|
|
218
263
|
title: "You get a measured recommendation",
|
|
219
|
-
body: "The assistant compares
|
|
264
|
+
body: "The assistant compares actual outputs and recommends the model that fits your constraints. Evidence, not guesswork."
|
|
220
265
|
}
|
|
221
266
|
]
|
|
222
267
|
},
|
|
268
|
+
caseStudy: {
|
|
269
|
+
label: "Case study",
|
|
270
|
+
heading: "A real session, not a mock",
|
|
271
|
+
subheading: "A Claude Code session picking a TypeScript code-review model. Actual tool calls, decisions, and final pick. Captured 2026-04-24.",
|
|
272
|
+
prompt: {
|
|
273
|
+
title: "The prompt",
|
|
274
|
+
body: "Pick 3 models for a TypeScript code-review bot. Test them on a sample PR diff and recommend the best one. Quality matters more than price."
|
|
275
|
+
},
|
|
276
|
+
toolCalls: {
|
|
277
|
+
title: "Selection path",
|
|
278
|
+
subtitle: "in order",
|
|
279
|
+
calls: [
|
|
280
|
+
{
|
|
281
|
+
tool: "find_models",
|
|
282
|
+
params: "sortBy=created, limit=10",
|
|
283
|
+
note: "recent releases"
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
tool: "find_models",
|
|
287
|
+
params: 'q="code review reasoning", structured_output',
|
|
288
|
+
note: "task fit"
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
tool: "find_models",
|
|
292
|
+
params: 'q="not frontier price", maxPrice=6',
|
|
293
|
+
note: "budget filter"
|
|
294
|
+
},
|
|
295
|
+
{ tool: "get_models", params: "\xD7 12 candidates", note: "metadata lookup" },
|
|
296
|
+
{
|
|
297
|
+
tool: "compare_models",
|
|
298
|
+
params: "ids=[3 finalists], expectedPromptTokens=6000",
|
|
299
|
+
note: "workload-cost flip"
|
|
300
|
+
},
|
|
301
|
+
{ tool: "test_model", params: "dryRun \xD7 2", note: "cost estimate" },
|
|
302
|
+
{
|
|
303
|
+
tool: "test_model",
|
|
304
|
+
params: "live \xD7 2, enforceJson=true",
|
|
305
|
+
note: "real inference"
|
|
306
|
+
}
|
|
307
|
+
]
|
|
308
|
+
},
|
|
309
|
+
consideredTitle: "Every recent model, evaluated",
|
|
310
|
+
consideredSubtitle: "Recent releases were checked with explicit accept, test, or skip decisions.",
|
|
311
|
+
consideredRows: [
|
|
312
|
+
{
|
|
313
|
+
id: "openai/gpt-5.5-pro",
|
|
314
|
+
age: "6h ago",
|
|
315
|
+
decision: "skip",
|
|
316
|
+
reason: "too expensive for every PR"
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
id: "openai/gpt-5.5",
|
|
320
|
+
age: "6h ago",
|
|
321
|
+
decision: "skip",
|
|
322
|
+
reason: "frontier-priced vs Codex"
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
id: "deepseek/deepseek-v4-pro",
|
|
326
|
+
age: "14h ago",
|
|
327
|
+
decision: "tested",
|
|
328
|
+
reason: "live test hit upstream 429 twice"
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
id: "deepseek/deepseek-v4-flash",
|
|
332
|
+
age: "14h ago",
|
|
333
|
+
decision: "skip",
|
|
334
|
+
reason: "cheaper sibling, lower quality expected"
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
id: "xiaomi/mimo-v2.5-pro",
|
|
338
|
+
age: "2d ago",
|
|
339
|
+
decision: "shortlisted",
|
|
340
|
+
reason: "recent + reasoning + structured output"
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
id: "inclusionai/ling-2.6-1t:free",
|
|
344
|
+
age: "1d ago",
|
|
345
|
+
decision: "skip",
|
|
346
|
+
reason: "no reasoning capability flag"
|
|
347
|
+
},
|
|
348
|
+
{
|
|
349
|
+
id: "arcee-ai/trinity-large-thinking",
|
|
350
|
+
age: "3w ago",
|
|
351
|
+
decision: "skip",
|
|
352
|
+
reason: "MiMo Pro had stronger positioning"
|
|
353
|
+
}
|
|
354
|
+
],
|
|
355
|
+
verdict: {
|
|
356
|
+
title: "The final pick",
|
|
357
|
+
model: "openai/gpt-5.3-codex",
|
|
358
|
+
body: "The only tested model that caught both sample bugs. About $0.015 per PR: higher than budget models, far below frontier rates."
|
|
359
|
+
},
|
|
360
|
+
quote: {
|
|
361
|
+
body: "The cheapest candidate ran 4\xD7 cheaper than Codex, and missed both bugs in the sample diff. The only way to know was a live test.",
|
|
362
|
+
attribution: "index9 session trace, 2026-04-24"
|
|
363
|
+
}
|
|
364
|
+
},
|
|
223
365
|
toolsSection: {
|
|
224
366
|
label: "Tools",
|
|
225
|
-
heading: "
|
|
226
|
-
subheading: "
|
|
367
|
+
heading: "Five MCP tools, composable in any client",
|
|
368
|
+
subheading: "Discover, shortlist, compare, cost-model, and live-test. Your assistant chains them together to make a measured pick.",
|
|
227
369
|
openRouterKey: "OpenRouter API key (live tests only)",
|
|
228
370
|
noKeyRequired: "No API key required",
|
|
229
371
|
requiresLabel: "Requires ",
|
|
230
372
|
cards: [
|
|
373
|
+
{
|
|
374
|
+
name: "list_facets",
|
|
375
|
+
action: "Discover",
|
|
376
|
+
displayName: "list_facets",
|
|
377
|
+
fullName: null,
|
|
378
|
+
description: "List the live filter vocabulary (providers, capabilities, modalities) before constructing a search.",
|
|
379
|
+
badge: null,
|
|
380
|
+
requiresKey: false
|
|
381
|
+
},
|
|
231
382
|
{
|
|
232
383
|
name: "find_models",
|
|
233
|
-
action: "
|
|
384
|
+
action: "Shortlist",
|
|
234
385
|
displayName: "find_models",
|
|
235
386
|
fullName: null,
|
|
236
|
-
description: `
|
|
387
|
+
description: `Filter ${MODEL_COUNT} models by price, context, and capabilities. Natural-language search refines the ranking.`,
|
|
237
388
|
badge: null,
|
|
238
389
|
requiresKey: false
|
|
239
390
|
},
|
|
@@ -242,7 +393,16 @@ var SITE = {
|
|
|
242
393
|
action: "Inspect",
|
|
243
394
|
displayName: "get_models",
|
|
244
395
|
fullName: null,
|
|
245
|
-
description: "
|
|
396
|
+
description: "Inspect current pricing, limits, and capabilities for any model.",
|
|
397
|
+
badge: null,
|
|
398
|
+
requiresKey: false
|
|
399
|
+
},
|
|
400
|
+
{
|
|
401
|
+
name: "compare_models",
|
|
402
|
+
action: "Compare",
|
|
403
|
+
displayName: "compare_models",
|
|
404
|
+
fullName: null,
|
|
405
|
+
description: "Side-by-side spec, capability, and workload-cost diff for 2\u201310 finalists.",
|
|
246
406
|
badge: null,
|
|
247
407
|
requiresKey: false
|
|
248
408
|
},
|
|
@@ -251,7 +411,7 @@ var SITE = {
|
|
|
251
411
|
action: "Run live tests",
|
|
252
412
|
displayName: "test_model",
|
|
253
413
|
fullName: null,
|
|
254
|
-
description: "
|
|
414
|
+
description: "Run one prompt across models and compare output, latency, and cost.",
|
|
255
415
|
badge: "Live Testing",
|
|
256
416
|
requiresKey: true
|
|
257
417
|
}
|
|
@@ -263,35 +423,45 @@ var SITE = {
|
|
|
263
423
|
items: [
|
|
264
424
|
{
|
|
265
425
|
question: "What is MCP?",
|
|
266
|
-
answer: "MCP (Model Context Protocol)
|
|
426
|
+
answer: "MCP (Model Context Protocol) lets AI assistants call external tools. index9 adds five composable tools (list_facets, find_models, get_models, compare_models, test_model) to any MCP-compatible client.",
|
|
267
427
|
link: {
|
|
268
428
|
label: "Learn more about MCP",
|
|
269
429
|
url: "https://modelcontextprotocol.io"
|
|
270
430
|
}
|
|
271
431
|
},
|
|
432
|
+
{
|
|
433
|
+
question: "Who is index9 for?",
|
|
434
|
+
answer: "Developers using AI coding assistants (Claude Code, Cursor, Codex, VS Code) who want their assistant to pick models based on live cost and quality data, not training-data guesses.",
|
|
435
|
+
link: null
|
|
436
|
+
},
|
|
272
437
|
{
|
|
273
438
|
question: "How does live testing work?",
|
|
274
|
-
answer: `
|
|
439
|
+
answer: `test_model sends your prompt to 1\u2013${LIMITS.testModelsMax} models via OpenRouter and returns output, latency, token usage, and cost. Live tests require an OpenRouter key; dryRun=true only estimates cost (pass expectedPromptTokens to model larger workloads without pasting filler).`,
|
|
275
440
|
link: null
|
|
276
441
|
},
|
|
277
442
|
{
|
|
278
443
|
question: "Does index9 recommend which model to use?",
|
|
279
|
-
answer: "index9
|
|
444
|
+
answer: "index9 returns outputs, latency, cost, and specs. Your assistant uses those results to make the recommendation.",
|
|
445
|
+
link: null
|
|
446
|
+
},
|
|
447
|
+
{
|
|
448
|
+
question: "How is compare_models different from calling get_models on each candidate?",
|
|
449
|
+
answer: "compare_models returns a diff matrix (which fields are equal, which differ), plus convenience picks: cheapestForPromptPerMillion, largestContext, and (when you pass expectedPromptTokens + expectedCompletionTokens) cheapestForRealisticWorkload accounting for prompt:completion ratio differences. One call instead of N parallel get_models calls plus manual diffing.",
|
|
280
450
|
link: null
|
|
281
451
|
},
|
|
282
452
|
{
|
|
283
453
|
question: "What models are available?",
|
|
284
|
-
answer: `index9
|
|
454
|
+
answer: `index9 covers ${MODEL_COUNT} OpenRouter models, including OpenAI, Anthropic, Google, Meta, Mistral, and others. Metadata refreshes every 30 minutes.`,
|
|
285
455
|
link: null
|
|
286
456
|
},
|
|
287
457
|
{
|
|
288
458
|
question: "What's the project status?",
|
|
289
|
-
answer: "
|
|
459
|
+
answer: "The hosted API and MCP server are stable and in active use. Issues and feature requests welcome on GitHub.",
|
|
290
460
|
link: null
|
|
291
461
|
},
|
|
292
462
|
{
|
|
293
463
|
question: "Is my data stored?",
|
|
294
|
-
answer: "No. index9 does not store
|
|
464
|
+
answer: "No. index9 does not store prompts, outputs, or API keys. Live tests are proxied to OpenRouter.",
|
|
295
465
|
link: null
|
|
296
466
|
}
|
|
297
467
|
]
|
|
@@ -299,17 +469,31 @@ var SITE = {
|
|
|
299
469
|
install: {
|
|
300
470
|
label: "Setup",
|
|
301
471
|
heading: "Add index9 to your editor",
|
|
302
|
-
subheading: "
|
|
472
|
+
subheading: "Choose your client and copy the config.",
|
|
303
473
|
configs: [
|
|
304
474
|
{
|
|
305
|
-
id: "cursor
|
|
306
|
-
label: "Cursor
|
|
307
|
-
paths: [".cursor/mcp.json"
|
|
475
|
+
id: "cursor",
|
|
476
|
+
label: "Cursor",
|
|
477
|
+
paths: [".cursor/mcp.json"],
|
|
308
478
|
config: `{
|
|
309
479
|
"mcpServers": {
|
|
310
480
|
"index9": {
|
|
311
481
|
"command": "npx",
|
|
312
|
-
"args": ["-y", "@index9/mcp"]
|
|
482
|
+
"args": ["-y", "@index9/mcp@latest"]
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}`
|
|
486
|
+
},
|
|
487
|
+
{
|
|
488
|
+
id: "vscode",
|
|
489
|
+
label: "VS Code",
|
|
490
|
+
paths: [".vscode/mcp.json"],
|
|
491
|
+
config: `{
|
|
492
|
+
"servers": {
|
|
493
|
+
"index9": {
|
|
494
|
+
"type": "stdio",
|
|
495
|
+
"command": "npx",
|
|
496
|
+
"args": ["-y", "@index9/mcp@latest"]
|
|
313
497
|
}
|
|
314
498
|
}
|
|
315
499
|
}`
|
|
@@ -317,141 +501,37 @@ var SITE = {
|
|
|
317
501
|
{
|
|
318
502
|
id: "claude-code",
|
|
319
503
|
label: "Claude Code",
|
|
320
|
-
paths: ["
|
|
321
|
-
config: `claude mcp add --transport stdio index9 -- npx -y @index9/mcp`,
|
|
504
|
+
paths: ["Terminal command"],
|
|
505
|
+
config: `claude mcp add --transport stdio index9 -- npx -y @index9/mcp@latest`,
|
|
322
506
|
copyHint: "# Run in terminal (adds to ~/.claude.json)"
|
|
507
|
+
},
|
|
508
|
+
{
|
|
509
|
+
id: "codex",
|
|
510
|
+
label: "Codex",
|
|
511
|
+
paths: ["Terminal command"],
|
|
512
|
+
config: `codex mcp add index9 -- npx -y @index9/mcp@latest`,
|
|
513
|
+
copyHint: "# Run in terminal (adds to ~/.codex/config.toml)"
|
|
323
514
|
}
|
|
324
515
|
],
|
|
325
|
-
workflowHeading: "Recommended workflow",
|
|
326
|
-
workflowIntro: "Add these to your assistant rules to guide model selection:",
|
|
327
|
-
workflowRulesLabel: ".cursor/rules or AGENTS.md",
|
|
328
|
-
workflowRules: [
|
|
329
|
-
"Use find_models to shortlist candidates based on task requirements (cost, speed, context window, capabilities).",
|
|
330
|
-
"Use get_models to confirm pricing, limits, and capabilities for the shortlist.",
|
|
331
|
-
"Use test_model with dryRun=true to estimate cost, then run live tests with a task-representative prompt. Compare outputs first, then optimize for speed/cost."
|
|
332
|
-
],
|
|
333
516
|
copyButton: "Copy",
|
|
334
517
|
copiedButton: "Copied",
|
|
335
518
|
copyAriaLabel: "Copy configuration",
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
openRouterNoteSuffix: ". Add OPENROUTER_API_KEY to your config env for live tests. dryRun=true does not require a key; keys are passed per-request and never stored or logged."
|
|
340
|
-
},
|
|
341
|
-
comparison: {
|
|
342
|
-
label: "Comparison",
|
|
343
|
-
heading: "Evidence over intuition",
|
|
344
|
-
subheading: "Benchmark on your real prompts \u2014 not someone else's.",
|
|
345
|
-
withoutLabel: "Without index9",
|
|
346
|
-
withLabel: "With index9",
|
|
347
|
-
withoutItems: [
|
|
348
|
-
"Model pricing and specs may be weeks old",
|
|
349
|
-
"Quality based on generic benchmarks, not your task",
|
|
350
|
-
"Testing a model means a throwaway script or manual switching"
|
|
351
|
-
],
|
|
352
|
-
withItems: [
|
|
353
|
-
"Models synced from OpenRouter every 30 minutes",
|
|
354
|
-
"Quality measured on prompts for your specific task",
|
|
355
|
-
"Test and compare in your editor \u2014 no scripts, no switching"
|
|
356
|
-
],
|
|
357
|
-
sampleTableLabel: 'Sample comparison \u2014 "Extract the action items from this meeting transcript"',
|
|
358
|
-
tableHeaders: {
|
|
359
|
-
model: "Model",
|
|
360
|
-
latency: "Latency",
|
|
361
|
-
cost: "Cost",
|
|
362
|
-
notes: "Notes"
|
|
363
|
-
},
|
|
364
|
-
sampleRows: [
|
|
365
|
-
{
|
|
366
|
-
model: "gpt-4.1-nano",
|
|
367
|
-
latency: "310ms",
|
|
368
|
-
tokens: "96",
|
|
369
|
-
cost: "$0.0001",
|
|
370
|
-
note: "Fastest; missed one implicit action item"
|
|
371
|
-
},
|
|
372
|
-
{
|
|
373
|
-
model: "gemini-2.5-flash",
|
|
374
|
-
latency: "560ms",
|
|
375
|
-
tokens: "142",
|
|
376
|
-
cost: "$0.0004",
|
|
377
|
-
note: "Caught all items; good balance"
|
|
378
|
-
},
|
|
379
|
-
{
|
|
380
|
-
model: "claude-sonnet-4.5",
|
|
381
|
-
latency: "1,120ms",
|
|
382
|
-
tokens: "189",
|
|
383
|
-
cost: "$0.0018",
|
|
384
|
-
note: "Most thorough; grouped items by owner"
|
|
385
|
-
}
|
|
386
|
-
],
|
|
387
|
-
tableNote: "The right model depends on the task."
|
|
519
|
+
setupNote: "Need live tests or custom setup?",
|
|
520
|
+
setupLink: "Set up live testing",
|
|
521
|
+
setupUrl: "https://github.com/index9-org/mcp#openrouter-api-key"
|
|
388
522
|
},
|
|
389
523
|
footer: {
|
|
390
524
|
brand: "index9",
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
copyright: "\xA9 2026 index9",
|
|
525
|
+
tagline: "MCP-native model picker. Live data from OpenRouter.",
|
|
526
|
+
copyrightSuffix: "index9",
|
|
394
527
|
github: "GitHub",
|
|
395
528
|
privacy: "Privacy",
|
|
396
529
|
terms: "Terms"
|
|
397
|
-
},
|
|
398
|
-
terminalDemo: {
|
|
399
|
-
sectionLabel: "Example",
|
|
400
|
-
titleBar: "Cursor \u2014 assistant chat",
|
|
401
|
-
userLabel: "You",
|
|
402
|
-
assistantLabel: "Assistant",
|
|
403
|
-
userPrompt: "Find the cheapest, most capable model for my use case.",
|
|
404
|
-
assistantReplyPrefix: "I'll check your ",
|
|
405
|
-
assistantReplyFile: "summarize-ticket.ts",
|
|
406
|
-
assistantReplySuffix: " handler, search for suitable models, then test the top 3.",
|
|
407
|
-
findModelsCall: {
|
|
408
|
-
label: "find_models",
|
|
409
|
-
suffix: " \u2014 search by use case",
|
|
410
|
-
queryLabel: "q:",
|
|
411
|
-
queryContent: '"cheap fast summarization model"',
|
|
412
|
-
sortByLabel: "sortBy:",
|
|
413
|
-
sortByValue: "price"
|
|
414
|
-
},
|
|
415
|
-
testModelsCall: {
|
|
416
|
-
label: "test_model",
|
|
417
|
-
suffix: " \u2014 top 3 from find_models (via OpenRouter)",
|
|
418
|
-
modelsLabel: "models:",
|
|
419
|
-
modelsContent: "gemini-2.5-flash-lite, gpt-4.1-mini, claude-haiku-4.5",
|
|
420
|
-
promptLabel: "prompt:",
|
|
421
|
-
promptContent: '"Checkout fails on iPhone Safari. Payment subdomain SSL cert expired."'
|
|
422
|
-
},
|
|
423
|
-
resultsLabel: "Results",
|
|
424
|
-
conclusionPrefix: "Best fit: ",
|
|
425
|
-
conclusionModel: "gemini-2.5-flash-lite",
|
|
426
|
-
conclusionSuffix: " \u2014 cheapest and fastest, with comparable quality.",
|
|
427
|
-
resultCards: [
|
|
428
|
-
{
|
|
429
|
-
model: "gemini-2.5-flash-lite",
|
|
430
|
-
latency: "480ms",
|
|
431
|
-
cost: "$0.00005",
|
|
432
|
-
tokens: 124,
|
|
433
|
-
output: "Mobile Safari checkout failed \u2014 expired SSL cert on payment subdomain. Renewed; customer confirmed."
|
|
434
|
-
},
|
|
435
|
-
{
|
|
436
|
-
model: "gpt-4.1-mini",
|
|
437
|
-
latency: "720ms",
|
|
438
|
-
cost: "$0.0002",
|
|
439
|
-
tokens: 138,
|
|
440
|
-
output: "Checkout failures on mobile Safari. Root cause: expired SSL cert on payment subdomain. Resolved; fix confirmed."
|
|
441
|
-
},
|
|
442
|
-
{
|
|
443
|
-
model: "claude-haiku-4.5",
|
|
444
|
-
latency: "980ms",
|
|
445
|
-
cost: "$0.0008",
|
|
446
|
-
tokens: 155,
|
|
447
|
-
output: "Repeated checkout failures on iPhone Safari traced to expired payment subdomain SSL cert. Renewed; customer verified."
|
|
448
|
-
}
|
|
449
|
-
]
|
|
450
530
|
}
|
|
451
531
|
};
|
|
452
532
|
var README = {
|
|
453
|
-
tagline: `Landing page, API, and MCP server for
|
|
454
|
-
mcpDescription: `
|
|
533
|
+
tagline: `Landing page, API, and MCP server for discovering, shortlisting, comparing, cost-modeling, and live-testing ${MODEL_COUNT} AI models.`,
|
|
534
|
+
mcpDescription: `Discover, shortlist, compare, cost-model, and live-test ${MODEL_COUNT} AI models from your editor`,
|
|
455
535
|
monorepoLayout: {
|
|
456
536
|
appsWeb: "apps/web \u2014 Next.js 16 app (UI + API routes)",
|
|
457
537
|
packagesCore: "packages/core \u2014 Shared Zod schemas, types, constants (@index9/core)",
|
|
@@ -466,7 +546,7 @@ var README = {
|
|
|
466
546
|
envNote: "Copy apps/web/.env.example to apps/web/.env.local and fill in values for local development.",
|
|
467
547
|
mcpInstall: {
|
|
468
548
|
cli: "npx -y @index9/mcp@latest",
|
|
469
|
-
envNote: "Optional: OPENROUTER_API_KEY for live test_model calls.",
|
|
549
|
+
envNote: "Optional: set OPENROUTER_API_KEY in your MCP client config for live test_model calls. dryRun=true works without a key.",
|
|
470
550
|
claudeCode: "Claude Code: Run `claude mcp add --transport stdio index9 -- npx -y @index9/mcp` or add the same config to .mcp.json / ~/.claude.json."
|
|
471
551
|
},
|
|
472
552
|
release: {
|
|
@@ -496,7 +576,8 @@ var SearchQuerySchema = z2.object({
|
|
|
496
576
|
capabilitiesAll: z2.array(z2.enum(CAPABILITIES)).optional(),
|
|
497
577
|
capabilitiesAny: z2.array(z2.enum(CAPABILITIES)).optional(),
|
|
498
578
|
modality: z2.enum(OUTPUT_MODALITIES).optional(),
|
|
499
|
-
provider: z2.string().min(1).optional()
|
|
579
|
+
provider: z2.array(z2.string().min(1)).optional(),
|
|
580
|
+
excludeFree: z2.boolean().optional()
|
|
500
581
|
}).strict();
|
|
501
582
|
var SearchResultSchema = z2.object({
|
|
502
583
|
id: z2.string(),
|
|
@@ -510,6 +591,8 @@ var SearchResultSchema = z2.object({
|
|
|
510
591
|
promptPerMillion: z2.number().nullable(),
|
|
511
592
|
completionPerMillion: z2.number().nullable()
|
|
512
593
|
}),
|
|
594
|
+
inputModalities: z2.array(z2.string()),
|
|
595
|
+
outputModalities: z2.array(z2.string()),
|
|
513
596
|
capabilities: z2.array(z2.string()),
|
|
514
597
|
score: z2.number().nullable()
|
|
515
598
|
});
|
|
@@ -524,7 +607,9 @@ var SearchResponseSchema = z2.object({
|
|
|
524
607
|
}),
|
|
525
608
|
meta: z2.object({
|
|
526
609
|
queryMode: z2.enum(["semantic", "filter_only"]),
|
|
527
|
-
ranking: z2.literal("hybrid_rrf")
|
|
610
|
+
ranking: z2.literal("hybrid_rrf"),
|
|
611
|
+
confidence: z2.enum(["high", "low"]).optional(),
|
|
612
|
+
suggestion: z2.string().optional()
|
|
528
613
|
})
|
|
529
614
|
});
|
|
530
615
|
var FindModelsToolResultSchema = SearchResponseSchema.extend({
|
|
@@ -569,42 +654,122 @@ var BatchModelLookupResponseSchema = z3.object({
|
|
|
569
654
|
results: z3.array(ModelResponseSchema.nullable()),
|
|
570
655
|
missingIds: z3.array(z3.string()),
|
|
571
656
|
resolvedAliases: z3.record(z3.string(), z3.string()).optional(),
|
|
572
|
-
ambiguousAliases: z3.record(z3.string(), z3.array(z3.string())).optional()
|
|
657
|
+
ambiguousAliases: z3.record(z3.string(), z3.array(z3.string())).optional(),
|
|
658
|
+
suggestions: z3.record(z3.string(), z3.array(z3.string())).optional()
|
|
573
659
|
}).strict();
|
|
574
660
|
var GetModelsToolResultSchema = z3.object({
|
|
575
661
|
results: z3.array(ModelResponseSchema.nullable()),
|
|
576
662
|
missingIds: z3.array(z3.string()),
|
|
577
663
|
resolvedAliases: z3.record(z3.string(), z3.string()).optional(),
|
|
578
664
|
ambiguousAliases: z3.record(z3.string(), z3.array(z3.string())).optional(),
|
|
665
|
+
suggestions: z3.record(z3.string(), z3.array(z3.string())).optional(),
|
|
579
666
|
_index9: Index9MetaSchema
|
|
580
667
|
});
|
|
581
668
|
|
|
582
|
-
// ../core/dist/schemas/
|
|
669
|
+
// ../core/dist/schemas/compare.js
|
|
583
670
|
import { z as z4 } from "zod";
|
|
584
|
-
var
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
671
|
+
var CompareRequestSchema = z4.object({
|
|
672
|
+
ids: z4.array(z4.string().min(1)).min(2, "compare requires at least 2 ids").max(LIMITS.compareModelsMax, `ids must contain between 2 and ${LIMITS.compareModelsMax} model IDs`),
|
|
673
|
+
expectedPromptTokens: z4.number().int().positive().optional(),
|
|
674
|
+
expectedCompletionTokens: z4.number().int().positive().optional()
|
|
675
|
+
}).strict();
|
|
676
|
+
var NumericDiffField = z4.object({
|
|
677
|
+
allEqual: z4.boolean(),
|
|
678
|
+
values: z4.record(z4.string(), z4.number().nullable())
|
|
679
|
+
});
|
|
680
|
+
var StringDiffField = z4.object({
|
|
681
|
+
allEqual: z4.boolean(),
|
|
682
|
+
values: z4.record(z4.string(), z4.string().nullable())
|
|
683
|
+
});
|
|
684
|
+
var StringArrayDiffField = z4.object({
|
|
685
|
+
allEqual: z4.boolean(),
|
|
686
|
+
values: z4.record(z4.string(), z4.array(z4.string()))
|
|
687
|
+
});
|
|
688
|
+
var SetDiffField = z4.object({
|
|
689
|
+
commonAll: z4.array(z4.string()),
|
|
690
|
+
uniquePerModel: z4.record(z4.string(), z4.array(z4.string()))
|
|
691
|
+
});
|
|
692
|
+
var CompareDiffSchema = z4.object({
|
|
693
|
+
contextLength: NumericDiffField,
|
|
694
|
+
maxOutputTokens: NumericDiffField,
|
|
695
|
+
promptPricePerMillion: NumericDiffField,
|
|
696
|
+
completionPricePerMillion: NumericDiffField,
|
|
697
|
+
tokenizer: StringDiffField,
|
|
698
|
+
inputModalities: StringArrayDiffField,
|
|
699
|
+
outputModalities: StringArrayDiffField,
|
|
700
|
+
capabilities: SetDiffField,
|
|
701
|
+
supportedParameters: SetDiffField
|
|
702
|
+
});
|
|
703
|
+
var CompareWorkloadCostSchema = z4.object({
|
|
704
|
+
modelId: z4.string(),
|
|
705
|
+
promptTokens: z4.number().int().nonnegative(),
|
|
706
|
+
completionTokens: z4.number().int().nonnegative(),
|
|
707
|
+
totalCostUsd: z4.number().nullable()
|
|
708
|
+
});
|
|
709
|
+
var CompareResponseSchema = z4.object({
|
|
710
|
+
models: z4.array(ModelResponseSchema),
|
|
711
|
+
diff: CompareDiffSchema,
|
|
712
|
+
cheapestForPromptPerMillion: z4.string().nullable(),
|
|
713
|
+
largestContext: z4.string().nullable(),
|
|
714
|
+
cheapestForRealisticWorkload: z4.string().nullable().optional(),
|
|
715
|
+
workloadCosts: z4.array(CompareWorkloadCostSchema).optional(),
|
|
716
|
+
resolvedAliases: z4.record(z4.string(), z4.string()).optional(),
|
|
717
|
+
missingIds: z4.array(z4.string()),
|
|
718
|
+
suggestions: z4.record(z4.string(), z4.array(z4.string())).optional(),
|
|
719
|
+
ambiguousAliases: z4.record(z4.string(), z4.array(z4.string())).optional()
|
|
720
|
+
}).strict();
|
|
721
|
+
var CompareModelsToolResultSchema = CompareResponseSchema.extend({
|
|
722
|
+
_index9: Index9MetaSchema
|
|
723
|
+
});
|
|
724
|
+
|
|
725
|
+
// ../core/dist/schemas/facets.js
|
|
726
|
+
import { z as z5 } from "zod";
|
|
727
|
+
var FacetCount = z5.object({
|
|
728
|
+
id: z5.string(),
|
|
729
|
+
modelCount: z5.number().int().nonnegative()
|
|
730
|
+
});
|
|
731
|
+
var FacetsResponseSchema = z5.object({
|
|
732
|
+
providers: z5.array(FacetCount),
|
|
733
|
+
capabilities: z5.array(FacetCount),
|
|
734
|
+
modalities: z5.object({
|
|
735
|
+
input: z5.array(z5.string()),
|
|
736
|
+
output: z5.array(z5.string())
|
|
737
|
+
}),
|
|
738
|
+
tokenizers: z5.array(z5.string()),
|
|
739
|
+
totalModels: z5.number().int().nonnegative(),
|
|
740
|
+
updatedAt: z5.string()
|
|
741
|
+
}).strict();
|
|
742
|
+
var ListFacetsToolResultSchema = FacetsResponseSchema.extend({
|
|
743
|
+
_index9: Index9MetaSchema
|
|
744
|
+
});
|
|
745
|
+
|
|
746
|
+
// ../core/dist/schemas/test.js
|
|
747
|
+
import { z as z6 } from "zod";
|
|
748
|
+
var ResponseFormatSchema = z6.object({
|
|
749
|
+
type: z6.string().min(1)
|
|
750
|
+
}).catchall(z6.unknown()).optional();
|
|
751
|
+
var TestRequestSchema = z6.object({
|
|
752
|
+
prompt: z6.string().min(1).optional(),
|
|
753
|
+
userContent: z6.array(UserContentPartSchema).min(1).optional(),
|
|
754
|
+
dryRun: z6.boolean().optional(),
|
|
755
|
+
expectedPromptTokens: z6.number().int().positive().optional(),
|
|
756
|
+
expectedCompletionTokens: z6.number().int().positive().optional(),
|
|
757
|
+
models: z6.array(z6.string().min(1)).min(1, "Models are required").max(LIMITS.testModelsMax, `Models must contain between 1 and ${LIMITS.testModelsMax} model IDs`),
|
|
758
|
+
timeoutMs: z6.number().int().positive().optional(),
|
|
759
|
+
maxTokens: z6.number().int().positive().optional(),
|
|
760
|
+
systemPrompt: z6.string().min(1).optional(),
|
|
761
|
+
temperature: z6.number().min(0).max(2).optional(),
|
|
762
|
+
topP: z6.number().gt(0).lte(1).optional(),
|
|
763
|
+
seed: z6.number().int().optional(),
|
|
599
764
|
responseFormat: ResponseFormatSchema,
|
|
600
|
-
enforceJson:
|
|
601
|
-
retries:
|
|
765
|
+
enforceJson: z6.boolean().optional(),
|
|
766
|
+
retries: z6.number().int().min(0).max(3).optional()
|
|
602
767
|
}).strict().superRefine((data, ctx) => {
|
|
603
768
|
if (data.dryRun === true) {
|
|
604
|
-
if (!data.prompt) {
|
|
769
|
+
if (!data.prompt && data.expectedPromptTokens === void 0) {
|
|
605
770
|
ctx.addIssue({
|
|
606
|
-
code:
|
|
607
|
-
message: "
|
|
771
|
+
code: z6.ZodIssueCode.custom,
|
|
772
|
+
message: "dryRun requires either prompt or expectedPromptTokens",
|
|
608
773
|
path: ["prompt"]
|
|
609
774
|
});
|
|
610
775
|
}
|
|
@@ -612,72 +777,72 @@ var TestRequestSchema = z4.object({
|
|
|
612
777
|
}
|
|
613
778
|
if (!data.prompt && !data.userContent?.length) {
|
|
614
779
|
ctx.addIssue({
|
|
615
|
-
code:
|
|
780
|
+
code: z6.ZodIssueCode.custom,
|
|
616
781
|
message: "Prompt or userContent is required",
|
|
617
782
|
path: ["prompt"]
|
|
618
783
|
});
|
|
619
784
|
}
|
|
620
785
|
});
|
|
621
|
-
var UsageTokensSchema =
|
|
622
|
-
prompt:
|
|
623
|
-
completion:
|
|
786
|
+
var UsageTokensSchema = z6.object({
|
|
787
|
+
prompt: z6.number().min(0),
|
|
788
|
+
completion: z6.number().min(0)
|
|
624
789
|
});
|
|
625
|
-
var TestPricingUsedSchema =
|
|
626
|
-
promptPerToken:
|
|
627
|
-
completionPerToken:
|
|
628
|
-
promptPerMillion:
|
|
629
|
-
completionPerMillion:
|
|
790
|
+
var TestPricingUsedSchema = z6.object({
|
|
791
|
+
promptPerToken: z6.number().nullable().optional(),
|
|
792
|
+
completionPerToken: z6.number().nullable().optional(),
|
|
793
|
+
promptPerMillion: z6.number().nullable().optional(),
|
|
794
|
+
completionPerMillion: z6.number().nullable().optional()
|
|
630
795
|
});
|
|
631
|
-
var TestModelMetadataSchema =
|
|
632
|
-
id:
|
|
633
|
-
name:
|
|
634
|
-
created:
|
|
635
|
-
createdAt:
|
|
796
|
+
var TestModelMetadataSchema = z6.object({
|
|
797
|
+
id: z6.string(),
|
|
798
|
+
name: z6.string(),
|
|
799
|
+
created: z6.number().nullable().optional(),
|
|
800
|
+
createdAt: z6.string().nullable().optional(),
|
|
636
801
|
pricingUsed: TestPricingUsedSchema.optional()
|
|
637
802
|
});
|
|
638
|
-
var TestResultSuccessSchema =
|
|
639
|
-
modelId:
|
|
640
|
-
resolvedModelId:
|
|
641
|
-
ok:
|
|
803
|
+
var TestResultSuccessSchema = z6.object({
|
|
804
|
+
modelId: z6.string(),
|
|
805
|
+
resolvedModelId: z6.string().optional(),
|
|
806
|
+
ok: z6.literal(true),
|
|
642
807
|
model: TestModelMetadataSchema,
|
|
643
|
-
response:
|
|
644
|
-
latencyMs:
|
|
808
|
+
response: z6.string(),
|
|
809
|
+
latencyMs: z6.number().min(0),
|
|
645
810
|
tokens: UsageTokensSchema,
|
|
646
|
-
cost:
|
|
647
|
-
truncated:
|
|
811
|
+
cost: z6.number().nullable().optional(),
|
|
812
|
+
truncated: z6.boolean().optional()
|
|
648
813
|
});
|
|
649
|
-
var TestResultFailureSchema =
|
|
650
|
-
modelId:
|
|
651
|
-
resolvedModelId:
|
|
652
|
-
ok:
|
|
814
|
+
var TestResultFailureSchema = z6.object({
|
|
815
|
+
modelId: z6.string(),
|
|
816
|
+
resolvedModelId: z6.string().optional(),
|
|
817
|
+
ok: z6.literal(false),
|
|
653
818
|
model: TestModelMetadataSchema,
|
|
654
|
-
error:
|
|
655
|
-
latencyMs:
|
|
819
|
+
error: z6.string(),
|
|
820
|
+
latencyMs: z6.number().min(0)
|
|
656
821
|
});
|
|
657
|
-
var TestResultSchema =
|
|
822
|
+
var TestResultSchema = z6.discriminatedUnion("ok", [
|
|
658
823
|
TestResultSuccessSchema,
|
|
659
824
|
TestResultFailureSchema
|
|
660
825
|
]);
|
|
661
|
-
var TestEstimateResultSchema =
|
|
662
|
-
modelId:
|
|
663
|
-
resolvedModelId:
|
|
826
|
+
var TestEstimateResultSchema = z6.object({
|
|
827
|
+
modelId: z6.string(),
|
|
828
|
+
resolvedModelId: z6.string().optional(),
|
|
664
829
|
model: TestModelMetadataSchema,
|
|
665
830
|
tokens: UsageTokensSchema,
|
|
666
|
-
estimatedCost:
|
|
831
|
+
estimatedCost: z6.number().nullable().optional()
|
|
667
832
|
});
|
|
668
|
-
var TestDryRunResponseSchema =
|
|
669
|
-
dryRun:
|
|
670
|
-
results:
|
|
671
|
-
disclaimer:
|
|
833
|
+
var TestDryRunResponseSchema = z6.object({
|
|
834
|
+
dryRun: z6.literal(true),
|
|
835
|
+
results: z6.array(TestEstimateResultSchema),
|
|
836
|
+
disclaimer: z6.string()
|
|
672
837
|
});
|
|
673
|
-
var TestLiveResponseSchema =
|
|
674
|
-
results:
|
|
838
|
+
var TestLiveResponseSchema = z6.object({
|
|
839
|
+
results: z6.array(TestResultSchema)
|
|
675
840
|
});
|
|
676
|
-
var TestResponseSchema =
|
|
841
|
+
var TestResponseSchema = z6.union([TestDryRunResponseSchema, TestLiveResponseSchema]);
|
|
677
842
|
|
|
678
843
|
// src/server.ts
|
|
679
844
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
680
|
-
import { z as
|
|
845
|
+
import { z as z7 } from "zod";
|
|
681
846
|
|
|
682
847
|
// src/config.ts
|
|
683
848
|
var DEFAULT_BASE_URL = "https://index9.dev";
|
|
@@ -859,7 +1024,8 @@ async function handleSearchModels(ctx, args) {
|
|
|
859
1024
|
if (q.capabilitiesAll?.length) params.capabilitiesAll = q.capabilitiesAll.join(",");
|
|
860
1025
|
if (q.capabilitiesAny?.length) params.capabilitiesAny = q.capabilitiesAny.join(",");
|
|
861
1026
|
if (q.modality) params.modality = q.modality;
|
|
862
|
-
if (q.provider) params.provider = q.provider;
|
|
1027
|
+
if (q.provider?.length) params.provider = q.provider.join(",");
|
|
1028
|
+
if (q.excludeFree === true) params.excludeFree = "true";
|
|
863
1029
|
return callApi(
|
|
864
1030
|
ctx,
|
|
865
1031
|
buildUrl(ctx.baseUrl, API_PATHS.search, params),
|
|
@@ -879,6 +1045,26 @@ async function handleGetModels(ctx, args) {
|
|
|
879
1045
|
BatchModelLookupResponseSchema
|
|
880
1046
|
);
|
|
881
1047
|
}
|
|
1048
|
+
async function handleCompareModels(ctx, args) {
|
|
1049
|
+
const parsed = CompareRequestSchema.safeParse(args);
|
|
1050
|
+
if (!parsed.success) {
|
|
1051
|
+
return toResponse({ error: parsed.error.message }, true);
|
|
1052
|
+
}
|
|
1053
|
+
return callApi(
|
|
1054
|
+
ctx,
|
|
1055
|
+
`${ctx.baseUrl}${API_PATHS.compare}`,
|
|
1056
|
+
{ method: "POST", headers: baseHeaders(ctx), body: JSON.stringify(parsed.data) },
|
|
1057
|
+
CompareResponseSchema
|
|
1058
|
+
);
|
|
1059
|
+
}
|
|
1060
|
+
async function handleListFacets(ctx, _args) {
|
|
1061
|
+
return callApi(
|
|
1062
|
+
ctx,
|
|
1063
|
+
`${ctx.baseUrl}${API_PATHS.facets}`,
|
|
1064
|
+
{ method: "GET", headers: baseHeaders(ctx) },
|
|
1065
|
+
FacetsResponseSchema
|
|
1066
|
+
);
|
|
1067
|
+
}
|
|
882
1068
|
async function handleTestModels(ctx, args) {
|
|
883
1069
|
const parsed = TestRequestSchema.safeParse(args);
|
|
884
1070
|
if (!parsed.success) {
|
|
@@ -917,20 +1103,21 @@ async function createServer() {
|
|
|
917
1103
|
title: TOOLS.find_models.title,
|
|
918
1104
|
description: TOOLS.find_models.description,
|
|
919
1105
|
inputSchema: {
|
|
920
|
-
q:
|
|
921
|
-
limit:
|
|
922
|
-
cursor:
|
|
923
|
-
sortBy:
|
|
924
|
-
sortOrder:
|
|
925
|
-
createdAfter:
|
|
926
|
-
createdBefore:
|
|
927
|
-
minPrice:
|
|
928
|
-
maxPrice:
|
|
929
|
-
minContext:
|
|
930
|
-
capabilitiesAll:
|
|
931
|
-
capabilitiesAny:
|
|
932
|
-
modality:
|
|
933
|
-
provider:
|
|
1106
|
+
q: z7.string().min(1).optional().describe(PARAM_DESCRIPTIONS.q),
|
|
1107
|
+
limit: z7.number().int().min(1).max(100).default(20).describe("Page size (1-100, default 20)."),
|
|
1108
|
+
cursor: z7.string().min(1).optional().describe(PARAM_DESCRIPTIONS.cursor),
|
|
1109
|
+
sortBy: z7.enum(["relevance", "created", "price"]).default("relevance").describe(PARAM_DESCRIPTIONS.sortBy),
|
|
1110
|
+
sortOrder: z7.enum(["asc", "desc"]).optional().describe("Sort order. Defaults by sortBy."),
|
|
1111
|
+
createdAfter: z7.string().optional().describe("Lower bound for model created timestamp."),
|
|
1112
|
+
createdBefore: z7.string().optional().describe("Upper bound for model created timestamp."),
|
|
1113
|
+
minPrice: z7.number().min(0).optional().describe("Minimum prompt price in USD per million tokens."),
|
|
1114
|
+
maxPrice: z7.number().min(0).optional().describe("Maximum prompt price in USD per million tokens."),
|
|
1115
|
+
minContext: z7.number().int().min(1).optional().describe("Minimum context window in tokens."),
|
|
1116
|
+
capabilitiesAll: z7.array(z7.enum(CAPABILITIES)).optional().describe(PARAM_DESCRIPTIONS.capabilitiesAll),
|
|
1117
|
+
capabilitiesAny: z7.array(z7.enum(CAPABILITIES)).optional().describe(PARAM_DESCRIPTIONS.capabilitiesAny),
|
|
1118
|
+
modality: z7.enum(OUTPUT_MODALITIES).optional().describe(PARAM_DESCRIPTIONS.modality),
|
|
1119
|
+
provider: z7.array(z7.string().min(1)).optional().describe(PARAM_DESCRIPTIONS.provider),
|
|
1120
|
+
excludeFree: z7.boolean().optional().describe(PARAM_DESCRIPTIONS.excludeFree)
|
|
934
1121
|
},
|
|
935
1122
|
outputSchema: FindModelsToolResultSchema.shape,
|
|
936
1123
|
annotations: { readOnlyHint: true }
|
|
@@ -943,40 +1130,73 @@ async function createServer() {
|
|
|
943
1130
|
title: TOOLS.get_models.title,
|
|
944
1131
|
description: TOOLS.get_models.description,
|
|
945
1132
|
inputSchema: {
|
|
946
|
-
ids:
|
|
947
|
-
maxDescriptionChars:
|
|
1133
|
+
ids: z7.array(z7.string().min(1)).min(1).max(100).describe("Model identifiers or aliases. Up to 100."),
|
|
1134
|
+
maxDescriptionChars: z7.number().int().min(0).max(2e3).optional().describe("Truncate descriptions to this many characters.")
|
|
948
1135
|
},
|
|
949
1136
|
outputSchema: GetModelsToolResultSchema.shape,
|
|
950
1137
|
annotations: { readOnlyHint: true }
|
|
951
1138
|
},
|
|
952
1139
|
async (args) => handleGetModels(ctx, args)
|
|
953
1140
|
);
|
|
1141
|
+
server.registerTool(
|
|
1142
|
+
"compare_models",
|
|
1143
|
+
{
|
|
1144
|
+
title: TOOLS.compare_models.title,
|
|
1145
|
+
description: TOOLS.compare_models.description,
|
|
1146
|
+
inputSchema: {
|
|
1147
|
+
ids: z7.array(z7.string().min(1)).min(2).max(LIMITS.compareModelsMax).describe(
|
|
1148
|
+
`Model identifiers or aliases to compare (2-${LIMITS.compareModelsMax}). Same alias formats as get_models.`
|
|
1149
|
+
),
|
|
1150
|
+
expectedPromptTokens: z7.number().int().min(1).optional().describe(
|
|
1151
|
+
"Optional. When set with expectedCompletionTokens, computes total per-call cost for each model and picks cheapestForRealisticWorkload \u2014 closes the gap where promptPerMillion alone misleads when prompt:completion price ratios diverge."
|
|
1152
|
+
),
|
|
1153
|
+
expectedCompletionTokens: z7.number().int().min(1).optional().describe(
|
|
1154
|
+
"Optional. Pair with expectedPromptTokens to surface workloadCosts and cheapestForRealisticWorkload. Both must be set to enable workload costing."
|
|
1155
|
+
)
|
|
1156
|
+
},
|
|
1157
|
+
outputSchema: CompareModelsToolResultSchema.shape,
|
|
1158
|
+
annotations: { readOnlyHint: true }
|
|
1159
|
+
},
|
|
1160
|
+
async (args) => handleCompareModels(ctx, args)
|
|
1161
|
+
);
|
|
1162
|
+
server.registerTool(
|
|
1163
|
+
"list_facets",
|
|
1164
|
+
{
|
|
1165
|
+
title: TOOLS.list_facets.title,
|
|
1166
|
+
description: TOOLS.list_facets.description,
|
|
1167
|
+
inputSchema: {},
|
|
1168
|
+
outputSchema: ListFacetsToolResultSchema.shape,
|
|
1169
|
+
annotations: { readOnlyHint: true }
|
|
1170
|
+
},
|
|
1171
|
+
async (args) => handleListFacets(ctx, args)
|
|
1172
|
+
);
|
|
954
1173
|
server.registerTool(
|
|
955
1174
|
"test_model",
|
|
956
1175
|
{
|
|
957
1176
|
title: TOOLS.test_model.title,
|
|
958
1177
|
description: TOOLS.test_model.description,
|
|
959
1178
|
inputSchema: {
|
|
960
|
-
prompt:
|
|
961
|
-
userContent:
|
|
962
|
-
dryRun:
|
|
1179
|
+
prompt: z7.string().min(1).optional().describe("Prompt sent to each model."),
|
|
1180
|
+
userContent: z7.array(UserContentPartSchema).min(1).optional().describe("Multimodal user content. At least one of prompt or userContent required."),
|
|
1181
|
+
dryRun: z7.boolean().optional().describe(
|
|
963
1182
|
"When true, returns estimated token usage and cost without calling OpenRouter (no API key required)."
|
|
964
1183
|
),
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
1184
|
+
expectedPromptTokens: z7.number().int().min(1).optional().describe(PARAM_DESCRIPTIONS.expectedPromptTokens),
|
|
1185
|
+
expectedCompletionTokens: z7.number().int().min(1).optional().describe(PARAM_DESCRIPTIONS.expectedCompletionTokens),
|
|
1186
|
+
models: z7.array(z7.string().min(1)).min(1).max(LIMITS.testModelsMax).describe(`Model IDs to evaluate (1-${LIMITS.testModelsMax}).`),
|
|
1187
|
+
timeoutMs: z7.number().int().min(1).optional().describe("Per-model timeout in ms (default 15000, max 60000)."),
|
|
1188
|
+
maxTokens: z7.number().int().min(1).optional().describe(
|
|
969
1189
|
"Completion token cap. For reasoning-capable models, set \u2265 2000 (or omit) \u2014 reasoning tokens count against this before visible output, and too-low caps cause finish_reason=length."
|
|
970
1190
|
),
|
|
971
|
-
systemPrompt:
|
|
972
|
-
temperature:
|
|
973
|
-
topP:
|
|
974
|
-
seed:
|
|
1191
|
+
systemPrompt: z7.string().min(1).optional().describe("System instruction prepended to prompt."),
|
|
1192
|
+
temperature: z7.number().min(0).max(2).optional().describe("Sampling temperature (0-2)."),
|
|
1193
|
+
topP: z7.number().gt(0).max(1).optional().describe("Nucleus sampling (0-1]."),
|
|
1194
|
+
seed: z7.number().int().optional().describe("Seed for repeatable outputs."),
|
|
975
1195
|
responseFormat: ResponseFormatSchema.describe(
|
|
976
1196
|
"Structured output shape request forwarded to OpenRouter (e.g., { type: 'json_object' })."
|
|
977
1197
|
),
|
|
978
|
-
enforceJson:
|
|
979
|
-
retries:
|
|
1198
|
+
enforceJson: z7.boolean().optional().describe("When true, output must parse as JSON."),
|
|
1199
|
+
retries: z7.number().int().min(0).max(3).optional().describe("Retries for transient failures.")
|
|
980
1200
|
},
|
|
981
1201
|
// No outputSchema: test_model returns a z.union of dry-run and live shapes.
|
|
982
1202
|
// The SDK supports only ZodRawShape | AnySchema for outputSchema; a discriminated-union
|
package/manifest.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"manifest_version": "0.3",
|
|
3
3
|
"name": "index9",
|
|
4
|
-
"version": "5.
|
|
5
|
-
"description": "
|
|
4
|
+
"version": "5.2.0",
|
|
5
|
+
"description": "Discover, shortlist, compare, cost-model, and live-test 300+ AI models from your editor",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Index9"
|
|
8
8
|
},
|
|
@@ -28,6 +28,14 @@
|
|
|
28
28
|
"name": "get_models",
|
|
29
29
|
"description": "Get full model metadata by IDs or aliases (batch, up to 100)"
|
|
30
30
|
},
|
|
31
|
+
{
|
|
32
|
+
"name": "compare_models",
|
|
33
|
+
"description": "Diff 2-10 models across pricing, context, capabilities, and tokenizer"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"name": "list_facets",
|
|
37
|
+
"description": "Enumerate available providers, capabilities, modalities, and tokenizers"
|
|
38
|
+
},
|
|
31
39
|
{
|
|
32
40
|
"name": "test_model",
|
|
33
41
|
"description": "Run live inference or dry-run cost estimation across up to 10 models"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@index9/mcp",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.3.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"tsup": "^8.5.1",
|
|
29
29
|
"typescript": "6.0.3",
|
|
30
30
|
"vitest": "^4.1.5",
|
|
31
|
-
"@index9/core": "2.3.
|
|
31
|
+
"@index9/core": "2.3.2"
|
|
32
32
|
},
|
|
33
33
|
"engines": {
|
|
34
34
|
"node": ">=20"
|