@index9/mcp 6.2.0 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -219,8 +219,14 @@ Parameters:
219
219
  - expectedPromptTokens: Estimated prompt-token count for dryRun cost estimation; overrides the prompt-string heuristic. Use to model "what would N-token requests cost?" without pasting N tokens.
220
220
  - expectedCompletionTokens: Optional completion token estimate used by dryRun
221
221
  - maxTokens, systemPrompt, temperature, topP, seed, responseFormat, enforceJson, retries: Live-testing controls (ignored when dryRun=true)
222
+ - stream: Use OpenRouter's SSE streaming so capacity/refusal errors surface in ~1s instead of waiting the full per-model timeout for an empty 200. Defaults to false.
223
+ - firstTokenTimeoutMs: Streaming-only deadline for the first delta. Defaults to 10s. If the upstream sends no token within this window, the request aborts and returns failureReason="timeout". Ignored when stream=false.
224
+ - providerSort: "throughput" | "price" | "latency" \u2014 opt-in OpenRouter provider routing. Defaults to OpenRouter's load-balanced choice.
225
+ - providerOrder: ordered list of provider slugs (up to 8). Try these providers first before falling back. Useful for steering around an overloaded provider for a single model.
226
+ - fallbackModels: ordered list of model ids (up to 5). OpenRouter automatically retries the request against the next id when the primary is unavailable. Use sparingly \u2014 a benchmark should usually test the model you asked for, not a substitute.
227
+ - debug: When true, each result includes a \`debug\` field with the raw upstream finish_reason, error message, provider name, refusal, and usage. Use to diagnose "missing assistant text" without re-running.
222
228
 
223
- Results (live): each result carries modelId (the id you passed), resolvedModelId (canonical id, present when the input was an alias), ok, response, latencyMs, tokens { prompt, completion }, cost (USD; live from OpenRouter when available, else estimated from cached pricing), and truncated=true when finish_reason is "length". On failure, results include \`error\` (free-form) plus \`failureReason\` ("insufficient_credits" | "model_unavailable" | "rate_limited" | "timeout" | "invalid_request" | "unknown") so callers can pick a retry strategy without parsing the error string.
229
+ Results (live): each result carries modelId (the id you passed), resolvedModelId (canonical id, present when the input was an alias), ok, response, latencyMs, tokens { prompt, completion }, cost (USD; live from OpenRouter when available, else estimated from cached pricing), and truncated=true when finish_reason is "length". On failure, results include \`error\` (free-form) plus \`failureReason\` ("insufficient_credits" | "model_unavailable" | "rate_limited" | "capacity" | "timeout" | "invalid_request" | "unknown") so callers can pick a retry strategy without parsing the error string. \`capacity\` indicates the provider is overloaded \u2014 apply a longer backoff or set \`fallbackModels\` and retry. When \`debug: true\` is set, each result also carries a \`debug\` block with the upstream provider's diagnostic fields.
224
230
 
225
231
  Results (dryRun): each entry carries \`tokenCostUsd\`, \`requestCostUsd\`, \`totalCostUsd\` (matches \`estimatedCost\`, includes per-request fees), and \`estimatedCostBasis\` (same enum as compare_models.workloadCosts). Use find_models or get_models first to identify model ids.
226
232
 
@@ -255,24 +261,28 @@ var SITE = {
255
261
  hero: {
256
262
  titleLine1: "Pick the right AI model",
257
263
  titleLine2: "from chat",
258
- subtitle: "Index9 is an MCP server. Your coding assistant uses it to search, compare, and live-test 300+ models on the task you're working on, so it recommends the best fit.",
259
- proof: ["Live OpenRouter data \xB7 300+ models \xB7 refreshed every 30 min"],
264
+ subtitle: "An MCP server your coding assistant uses to search, compare, and live-test 300+ models for the task you're on.",
260
265
  pricingNote: "Free. You only pay OpenRouter for live model calls.",
261
- getStarted: "Add index9 to your editor",
262
266
  seeHowItWorks: "See a real session",
263
- updatedBadge: "OpenRouter data \xB7 refreshed "
267
+ updatedBadge: "OpenRouter data \xB7 refreshed ",
268
+ panel: {
269
+ signalEyebrow: "Just landed",
270
+ signalTitle: "Newest on OpenRouter",
271
+ liveLabel: "live",
272
+ ctaEyebrow: "How your assistant picks",
273
+ body: "Your assistant compares these against your task and live-tests the finalists."
274
+ }
264
275
  },
265
276
  problem: {
266
277
  label: "Why this exists",
267
278
  heading: "Your assistant's model knowledge is stale",
268
279
  body: [
269
280
  'New models ship every week. Pricing changes. "Use GPT-4" or "use Claude 3.5" is usually months behind reality.',
270
- "Without live data, your assistant defaults to whatever it learned in training \u2014 often a model that's been superseded by something cheaper or better-suited to your task.",
271
- "Index9 gives it the data and the tools to actually compare."
281
+ "Without live data, your assistant defaults to whatever it learned in training. Usually a model superseded by something cheaper or better-suited to your task.",
282
+ "Index9 gives it the data, and the tools to compare."
272
283
  ]
273
284
  },
274
285
  howItWorks: {
275
- label: "How it works",
276
286
  heading: "How it works",
277
287
  subtitle: "Index9 adds 5 tools to your editor. Your assistant calls them when you ask about models.",
278
288
  steps: [
@@ -284,12 +294,12 @@ var SITE = {
284
294
  {
285
295
  number: "2",
286
296
  title: "Your assistant calls index9",
287
- body: "It searches live model data, compares finalists, and runs your prompt against the top candidates."
297
+ body: "It searches live model data, compares finalists, and runs your prompt against the top picks."
288
298
  },
289
299
  {
290
300
  number: "3",
291
301
  title: "You get a measured pick",
292
- body: "Backed by real cost numbers and real outputs \u2014 not training-data memory."
302
+ body: "Backed by real cost numbers and real outputs, not training-data memory."
293
303
  }
294
304
  ]
295
305
  },
@@ -299,7 +309,7 @@ var SITE = {
299
309
  subheading: "A Claude Code session picking a TypeScript code-review model. Real tool calls, real verdict.",
300
310
  prompt: {
301
311
  title: "The prompt",
302
- body: "Pick a model for a TypeScript code-review bot that runs on every PR. I want real quality without paying frontier rates on routine reviews. Test against this sample diff."
312
+ body: "Pick a model for a TypeScript code-review bot that runs on every PR. I want quality without paying frontier rates on routine reviews. Test against this diff."
303
313
  },
304
314
  toolCalls: {
305
315
  title: "What the assistant did",
@@ -327,7 +337,7 @@ var SITE = {
327
337
  ]
328
338
  },
329
339
  consideredTitle: "Recent models, evaluated",
330
- consideredSubtitle: "A trimmed view of the candidates the assistant ruled in and out. Each row pairs a decision with the reason behind it.",
340
+ consideredSubtitle: "Candidates the assistant ruled in and out, with the reason.",
331
341
  consideredRows: [
332
342
  {
333
343
  id: "openai/gpt-5.5",
@@ -358,16 +368,14 @@ var SITE = {
358
368
  title: "The pick",
359
369
  model: "z-ai/glm-5.1",
360
370
  body: "Open-weight, $1.05 per million input tokens. Caught both bugs in the sample diff at roughly $0.005 per PR, about 5\xD7 cheaper than running gpt-5.5 on every commit."
361
- },
362
- quote: {
363
- body: "The frontier model would have caught both bugs, at 5\xD7 the cost. The cheapest candidate missed them entirely. Only the live test surfaced the model that did both.",
364
- attribution: "index9 session trace"
365
371
  }
366
372
  },
367
373
  toolsSection: {
368
374
  label: "Tools",
369
375
  heading: "The 5 tools",
370
376
  subheading: "Your assistant chains these together. You don't call them directly.",
377
+ keyNotePrefix: "Only",
378
+ keyNoteSuffix: "needs an OpenRouter key. The rest work out of the box.",
371
379
  openRouterKey: "OpenRouter API key",
372
380
  noKeyRequired: "No key required",
373
381
  requiresLabel: "Requires ",
@@ -404,7 +412,7 @@ var SITE = {
404
412
  action: "compare_models",
405
413
  displayName: "compare_models",
406
414
  fullName: null,
407
- description: "Diffs 2\u201310 finalists side-by-side. Flags the cheapest pick for your expected token mix.",
415
+ description: "Diffs 2\u201310 finalists side-by-side. Flags the cheapest for your token mix.",
408
416
  badge: null,
409
417
  requiresKey: false
410
418
  },
@@ -413,7 +421,7 @@ var SITE = {
413
421
  action: "test_model",
414
422
  displayName: "test_model",
415
423
  fullName: null,
416
- description: "Runs your prompt across models. Returns output, latency, and real cost. Or dry-run for cost only.",
424
+ description: "Runs your prompt across models. Returns output, latency, cost. Dry-run for cost only.",
417
425
  badge: "Live",
418
426
  requiresKey: true
419
427
  }
@@ -438,7 +446,7 @@ var SITE = {
438
446
  },
439
447
  {
440
448
  question: "Does it pick the model for me?",
441
- answer: "No \u2014 it gives your assistant the data (search results, specs, cost diffs, live test outputs). Your assistant makes the call.",
449
+ answer: "No. It gives your assistant the data: search results, specs, cost diffs, live test outputs. Your assistant makes the call.",
442
450
  link: null
443
451
  },
444
452
  {
@@ -448,7 +456,7 @@ var SITE = {
448
456
  },
449
457
  {
450
458
  question: "Which models?",
451
- answer: `${MODEL_COUNT} from OpenRouter \u2014 OpenAI, Anthropic, Google, Meta, Mistral, DeepSeek, and more. Metadata refreshes every 30 minutes.`,
459
+ answer: `${MODEL_COUNT} from OpenRouter: OpenAI, Anthropic, Google, Meta, Mistral, DeepSeek, and more. Metadata refreshes every 30 minutes.`,
452
460
  link: null
453
461
  },
454
462
  {
@@ -458,7 +466,7 @@ var SITE = {
458
466
  },
459
467
  {
460
468
  question: "What's the project status?",
461
- answer: "Stable and in active use. Issues and feature requests welcome on GitHub.",
469
+ answer: "Stable. Issues and feature requests on GitHub.",
462
470
  link: null
463
471
  }
464
472
  ]
@@ -528,32 +536,9 @@ var SITE = {
528
536
  }
529
537
  };
530
538
  var README = {
531
- tagline: `Landing page, API, and MCP server for discovering, shortlisting, comparing, cost-modeling, and live-testing ${MODEL_COUNT} AI models.`,
532
539
  mcpDescription: `Discover, shortlist, compare, cost-model, and live-test ${MODEL_COUNT} AI models from your editor`,
533
- monorepoLayout: {
534
- appsWeb: "apps/web \u2014 Next.js 16 app (UI + API routes)",
535
- packagesCore: "packages/core \u2014 Shared Zod schemas, types, constants (@index9/core)",
536
- packagesMcp: "packages/mcp \u2014 Thin MCP stdio server calling the hosted API (@index9/mcp)"
537
- },
538
- quickStart: {
539
- install: "pnpm install",
540
- build: "pnpm build",
541
- test: "pnpm test",
542
- dev: "pnpm dev # run web app"
543
- },
544
- envNote: "Copy apps/web/.env.example to apps/web/.env.local and fill in values for local development.",
545
540
  mcpInstall: {
546
- cli: "npx -y @index9/mcp@latest",
547
- envNote: "Optional: set OPENROUTER_API_KEY in your MCP client config for live test_model calls. dryRun=true works without a key.",
548
- claudeCode: "Claude Code: Run `claude mcp add --transport stdio index9 -- npx -y @index9/mcp` or add the same config to .mcp.json / ~/.claude.json."
549
- },
550
- release: {
551
- step1: "Make changes in packages/mcp (core is internal, bundled into mcp)",
552
- step2: "Run pnpm changeset \u2014 add a changeset, select packages, choose bump type",
553
- step3: "Commit and push; open PR to main",
554
- step4: "Merge the PR; CI creates a Version Packages PR when changesets exist",
555
- step5: "Merge the version PR; CI publishes to npm and creates a GitHub Release with the .mcpb artifact attached",
556
- step6: "Users can install via npx @index9/mcp@latest or download .mcpb from Releases"
541
+ envNote: "Optional: set OPENROUTER_API_KEY in your MCP client config for live test_model calls. dryRun=true works without a key."
557
542
  }
558
543
  };
559
544
 
@@ -762,6 +747,7 @@ import { z as z6 } from "zod";
762
747
  var ResponseFormatSchema = z6.object({
763
748
  type: z6.string().min(1)
764
749
  }).catchall(z6.unknown()).optional();
750
+ var ProviderSortSchema = z6.enum(["throughput", "price", "latency"]);
765
751
  var TestRequestSchema = z6.object({
766
752
  prompt: z6.string().min(1).optional(),
767
753
  userContent: z6.array(UserContentPartSchema).min(1).optional(),
@@ -777,7 +763,30 @@ var TestRequestSchema = z6.object({
777
763
  seed: z6.number().int().optional(),
778
764
  responseFormat: ResponseFormatSchema,
779
765
  enforceJson: z6.boolean().optional(),
780
- retries: z6.number().int().min(0).max(3).optional()
766
+ retries: z6.number().int().min(0).max(3).optional(),
767
+ // Use OpenRouter's SSE streaming endpoint so capacity/refusal errors
768
+ // surface in ~1s instead of waiting the full per-model timeout for an
769
+ // empty 200 OK. Cost/tokens are still returned via stream_options.
770
+ stream: z6.boolean().optional(),
771
+ // First-token deadline (streaming only). If the upstream sends no
772
+ // delta within this window, abort the request. Defaults to 10s when
773
+ // streaming. Ignored when stream=false.
774
+ firstTokenTimeoutMs: z6.number().int().positive().optional(),
775
+ // Forwards as `provider.sort` to OpenRouter — opt into routing toward
776
+ // higher-throughput providers when running benchmarks.
777
+ providerSort: ProviderSortSchema.optional(),
778
+ // Forwards as `provider.order` — try these provider slugs first in the
779
+ // given order before falling back. Capped to stay within reasonable
780
+ // limits and prevent abuse.
781
+ providerOrder: z6.array(z6.string().min(1)).min(1).max(8).optional(),
782
+ // Forwards as the top-level `models` array (NOT `model`). OpenRouter
783
+ // tries each in order if the primary is unavailable. Different intent
784
+ // from providerOrder, which routes within a single model.
785
+ fallbackModels: z6.array(z6.string().min(1)).min(1).max(5).optional(),
786
+ // When true, attach a `debug` field on each result with the raw
787
+ // upstream finish_reason, error message, provider name, refusal, and
788
+ // usage. Used to diagnose "missing assistant text" without re-running.
789
+ debug: z6.boolean().optional()
781
790
  }).strict().superRefine((data, ctx) => {
782
791
  if (data.dryRun === true) {
783
792
  if (!data.prompt && data.expectedPromptTokens === void 0) {
@@ -812,10 +821,27 @@ var TestFailureReasonSchema = z6.enum([
812
821
  "insufficient_credits",
813
822
  "model_unavailable",
814
823
  "rate_limited",
824
+ // Provider is overloaded / "at capacity" / "provisioned throughput
825
+ // required". A distinct reason from rate_limited so callers can apply
826
+ // a longer backoff or route to a fallback model.
827
+ "capacity",
815
828
  "timeout",
816
829
  "invalid_request",
817
830
  "unknown"
818
831
  ]);
832
+ var TestDebugInfoSchema = z6.object({
833
+ upstreamId: z6.string().optional(),
834
+ providerName: z6.string().optional(),
835
+ finishReason: z6.string().optional(),
836
+ upstreamError: z6.string().optional(),
837
+ refusal: z6.string().optional(),
838
+ hasToolCalls: z6.boolean().optional(),
839
+ usage: z6.object({
840
+ promptTokens: z6.number().optional(),
841
+ completionTokens: z6.number().optional(),
842
+ totalTokens: z6.number().optional()
843
+ }).optional()
844
+ });
819
845
  var TestModelMetadataSchema = z6.object({
820
846
  id: z6.string(),
821
847
  name: z6.string(),
@@ -832,7 +858,8 @@ var TestResultSuccessSchema = z6.object({
832
858
  latencyMs: z6.number().min(0),
833
859
  tokens: UsageTokensSchema,
834
860
  cost: z6.number().nullable().optional(),
835
- truncated: z6.boolean().optional()
861
+ truncated: z6.boolean().optional(),
862
+ debug: TestDebugInfoSchema.optional()
836
863
  });
837
864
  var TestResultFailureSchema = z6.object({
838
865
  modelId: z6.string(),
@@ -841,7 +868,8 @@ var TestResultFailureSchema = z6.object({
841
868
  model: TestModelMetadataSchema,
842
869
  error: z6.string(),
843
870
  failureReason: TestFailureReasonSchema.optional(),
844
- latencyMs: z6.number().min(0)
871
+ latencyMs: z6.number().min(0),
872
+ debug: TestDebugInfoSchema.optional()
845
873
  });
846
874
  var TestResultSchema = z6.discriminatedUnion("ok", [
847
875
  TestResultSuccessSchema,
@@ -902,8 +930,8 @@ function loadConfig() {
902
930
  }
903
931
 
904
932
  // src/client.ts
905
- var RETRY_DELAYS_MS = [1e3, 2e3, 4e3];
906
- var ATTEMPT_TIMEOUT_MS = 3e4;
933
+ var DEFAULT_RETRY_DELAYS_MS = [1e3, 2e3, 4e3];
934
+ var DEFAULT_ATTEMPT_TIMEOUT_MS = 3e4;
907
935
  function isRetryable(status) {
908
936
  return status === 429 || status >= 500;
909
937
  }
@@ -919,14 +947,17 @@ function toErrorMessage(error) {
919
947
  if (error instanceof Error && error.message.trim()) return error.message;
920
948
  return "Unknown error";
921
949
  }
922
- async function fetchWithRetry(url, options) {
950
+ async function fetchWithRetry(url, options, retryOptions) {
951
+ const attemptTimeoutMs = retryOptions?.attemptTimeoutMs ?? DEFAULT_ATTEMPT_TIMEOUT_MS;
952
+ const maxRetries = Math.max(0, retryOptions?.maxRetries ?? DEFAULT_RETRY_DELAYS_MS.length);
953
+ const retryDelaysMs = DEFAULT_RETRY_DELAYS_MS.slice(0, maxRetries);
923
954
  let lastResponse = null;
924
955
  let lastError;
925
- for (let i = 0; i <= RETRY_DELAYS_MS.length; i++) {
956
+ for (let i = 0; i <= maxRetries; i++) {
926
957
  const timeoutController = new AbortController();
927
958
  const timeoutId = setTimeout(() => {
928
959
  timeoutController.abort(new DOMException("Request timed out", "AbortError"));
929
- }, ATTEMPT_TIMEOUT_MS);
960
+ }, attemptTimeoutMs);
930
961
  const externalSignal = options.signal;
931
962
  const onAbort = () => {
932
963
  timeoutController.abort(
@@ -951,14 +982,12 @@ async function fetchWithRetry(url, options) {
951
982
  clearTimeout(timeoutId);
952
983
  externalSignal?.removeEventListener("abort", onAbort);
953
984
  }
954
- if (i < RETRY_DELAYS_MS.length) {
955
- await sleep(RETRY_DELAYS_MS[i]);
985
+ if (i < retryDelaysMs.length) {
986
+ await sleep(retryDelaysMs[i]);
956
987
  }
957
988
  }
958
989
  if (lastResponse) return lastResponse;
959
- throw new Error(
960
- `Request failed after ${RETRY_DELAYS_MS.length + 1} attempts: ${toErrorMessage(lastError)}`
961
- );
990
+ throw new Error(`Request failed after ${maxRetries + 1} attempts: ${toErrorMessage(lastError)}`);
962
991
  }
963
992
  function buildUrl(baseUrl, path, params) {
964
993
  const url = new URL(path, baseUrl);
@@ -1037,8 +1066,8 @@ function extractRecoveryFields(body) {
1037
1066
  }
1038
1067
  return out;
1039
1068
  }
1040
- async function callApi(ctx, url, options, responseSchema) {
1041
- const res = await fetchWithRetry(url, options);
1069
+ async function callApi(ctx, url, options, responseSchema, retryOptions) {
1070
+ const res = await fetchWithRetry(url, options, retryOptions);
1042
1071
  let body;
1043
1072
  try {
1044
1073
  body = await res.json();
@@ -1153,7 +1182,12 @@ async function handleTestModels(ctx, args) {
1153
1182
  ctx,
1154
1183
  `${ctx.baseUrl}${API_PATHS.test}`,
1155
1184
  { method: "POST", headers: reqHeaders, body: JSON.stringify(parsed.data) },
1156
- TestResponseSchema
1185
+ TestResponseSchema,
1186
+ // Live inference is non-idempotent and slow: each retry costs real money
1187
+ // and the server-side per-model retry/backoff already handles transient
1188
+ // errors. Give the call enough wall-clock to cover a worst-case 10-model
1189
+ // batch × 60s per model and let the server decide on retries.
1190
+ { attemptTimeoutMs: 24e4, maxRetries: 0 }
1157
1191
  );
1158
1192
  }
1159
1193
 
@@ -1264,7 +1298,21 @@ async function createServer() {
1264
1298
  "Structured output shape request forwarded to OpenRouter (e.g., { type: 'json_object' })."
1265
1299
  ),
1266
1300
  enforceJson: z7.boolean().optional().describe("When true, output must parse as JSON."),
1267
- retries: z7.number().int().min(0).max(3).optional().describe("Retries for transient failures.")
1301
+ retries: z7.number().int().min(0).max(3).optional().describe("Retries for transient failures."),
1302
+ stream: z7.boolean().optional().describe(
1303
+ "Use OpenRouter SSE streaming so capacity/refusal errors surface quickly. Defaults to false."
1304
+ ),
1305
+ firstTokenTimeoutMs: z7.number().int().min(1).optional().describe("Streaming-only first-token deadline in ms. Defaults to 10000."),
1306
+ providerSort: ProviderSortSchema.optional().describe(
1307
+ 'OpenRouter provider routing sort: "throughput", "price", or "latency".'
1308
+ ),
1309
+ providerOrder: z7.array(z7.string().min(1)).min(1).max(8).optional().describe("Provider slugs to try first, in order. Up to 8."),
1310
+ fallbackModels: z7.array(z7.string().min(1)).min(1).max(5).optional().describe(
1311
+ "Fallback model IDs OpenRouter may try if the primary is unavailable. Up to 5."
1312
+ ),
1313
+ debug: z7.boolean().optional().describe(
1314
+ "When true, include upstream finish_reason, provider, error, refusal, and usage."
1315
+ )
1268
1316
  },
1269
1317
  // No outputSchema: test_model returns a z.union of dry-run and live shapes.
1270
1318
  // The SDK supports only ZodRawShape | AnySchema for outputSchema; a discriminated-union
package/manifest.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "manifest_version": "0.3",
3
3
  "name": "index9",
4
- "version": "6.1.0",
4
+ "version": "6.2.0",
5
5
  "description": "Discover, shortlist, compare, cost-model, and live-test 300+ AI models from your editor",
6
6
  "author": {
7
7
  "name": "Index9"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@index9/mcp",
3
- "version": "6.2.0",
3
+ "version": "6.3.0",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",
@@ -24,11 +24,11 @@
24
24
  "zod": "^4.4.3"
25
25
  },
26
26
  "devDependencies": {
27
- "@types/node": "^25.6.2",
27
+ "@types/node": "^25.8.0",
28
28
  "tsup": "^8.5.1",
29
29
  "typescript": "6.0.3",
30
30
  "vitest": "^4.1.6",
31
- "@index9/core": "2.5.0"
31
+ "@index9/core": "2.6.0"
32
32
  },
33
33
  "engines": {
34
34
  "node": ">=20"