npm - @index9/mcp - Versions diffs - 6.2.0 → 6.3.0 - Mend

@index9/mcp 6.2.0 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.js CHANGED Viewed

@@ -219,8 +219,14 @@ Parameters:
 - expectedPromptTokens: Estimated prompt-token count for dryRun cost estimation; overrides the prompt-string heuristic. Use to model "what would N-token requests cost?" without pasting N tokens.
 - expectedCompletionTokens: Optional completion token estimate used by dryRun
 - maxTokens, systemPrompt, temperature, topP, seed, responseFormat, enforceJson, retries: Live-testing controls (ignored when dryRun=true)
+- stream: Use OpenRouter's SSE streaming so capacity/refusal errors surface in ~1s instead of waiting the full per-model timeout for an empty 200. Defaults to false.
+- firstTokenTimeoutMs: Streaming-only deadline for the first delta. Defaults to 10s. If the upstream sends no token within this window, the request aborts and returns failureReason="timeout". Ignored when stream=false.
+- providerSort: "throughput" | "price" | "latency" \u2014 opt-in OpenRouter provider routing. Defaults to OpenRouter's load-balanced choice.
+- providerOrder: ordered list of provider slugs (up to 8). Try these providers first before falling back. Useful for steering around an overloaded provider for a single model.
+- fallbackModels: ordered list of model ids (up to 5). OpenRouter automatically retries the request against the next id when the primary is unavailable. Use sparingly \u2014 a benchmark should usually test the model you asked for, not a substitute.
+- debug: When true, each result includes a \`debug\` field with the raw upstream finish_reason, error message, provider name, refusal, and usage. Use to diagnose "missing assistant text" without re-running.
-Results (live): each result carries modelId (the id you passed), resolvedModelId (canonical id, present when the input was an alias), ok, response, latencyMs, tokens { prompt, completion }, cost (USD; live from OpenRouter when available, else estimated from cached pricing), and truncated=true when finish_reason is "length". On failure, results include \`error\` (free-form) plus \`failureReason\` ("insufficient_credits" | "model_unavailable" | "rate_limited" | "timeout" | "invalid_request" | "unknown") so callers can pick a retry strategy without parsing the error string.
+Results (live): each result carries modelId (the id you passed), resolvedModelId (canonical id, present when the input was an alias), ok, response, latencyMs, tokens { prompt, completion }, cost (USD; live from OpenRouter when available, else estimated from cached pricing), and truncated=true when finish_reason is "length". On failure, results include \`error\` (free-form) plus \`failureReason\` ("insufficient_credits" | "model_unavailable" | "rate_limited" | "capacity" | "timeout" | "invalid_request" | "unknown") so callers can pick a retry strategy without parsing the error string. \`capacity\` indicates the provider is overloaded \u2014 apply a longer backoff or set \`fallbackModels\` and retry. When \`debug: true\` is set, each result also carries a \`debug\` block with the upstream provider's diagnostic fields.
 Results (dryRun): each entry carries \`tokenCostUsd\`, \`requestCostUsd\`, \`totalCostUsd\` (matches \`estimatedCost\`, includes per-request fees), and \`estimatedCostBasis\` (same enum as compare_models.workloadCosts). Use find_models or get_models first to identify model ids.
@@ -255,24 +261,28 @@ var SITE = {
   hero: {
     titleLine1: "Pick the right AI model",
     titleLine2: "from chat",
-    subtitle: "Index9 is an MCP server. Your coding assistant uses it to search, compare, and live-test 300+ models on the task you're working on, so it recommends the best fit.",
-    proof: ["Live OpenRouter data \xB7 300+ models \xB7 refreshed every 30 min"],
+    subtitle: "An MCP server your coding assistant uses to search, compare, and live-test 300+ models for the task you're on.",
     pricingNote: "Free. You only pay OpenRouter for live model calls.",
-    getStarted: "Add index9 to your editor",
     seeHowItWorks: "See a real session",
-    updatedBadge: "OpenRouter data \xB7 refreshed "
+    updatedBadge: "OpenRouter data \xB7 refreshed ",
+    panel: {
+      signalEyebrow: "Just landed",
+      signalTitle: "Newest on OpenRouter",
+      liveLabel: "live",
+      ctaEyebrow: "How your assistant picks",
+      body: "Your assistant compares these against your task and live-tests the finalists."
+    }
   },
   problem: {
     label: "Why this exists",
     heading: "Your assistant's model knowledge is stale",
     body: [
       'New models ship every week. Pricing changes. "Use GPT-4" or "use Claude 3.5" is usually months behind reality.',
-      "Without live data, your assistant defaults to whatever it learned in training \u2014 often a model that's been superseded by something cheaper or better-suited to your task.",
-      "Index9 gives it the data and the tools to actually compare."
+      "Without live data, your assistant defaults to whatever it learned in training. Usually a model superseded by something cheaper or better-suited to your task.",
+      "Index9 gives it the data, and the tools to compare."
     ]
   },
   howItWorks: {
-    label: "How it works",
     heading: "How it works",
     subtitle: "Index9 adds 5 tools to your editor. Your assistant calls them when you ask about models.",
     steps: [
@@ -284,12 +294,12 @@ var SITE = {
       {
         number: "2",
         title: "Your assistant calls index9",
-        body: "It searches live model data, compares finalists, and runs your prompt against the top candidates."
+        body: "It searches live model data, compares finalists, and runs your prompt against the top picks."
       },
       {
         number: "3",
         title: "You get a measured pick",
-        body: "Backed by real cost numbers and real outputs \u2014 not training-data memory."
+        body: "Backed by real cost numbers and real outputs, not training-data memory."
       }
     ]
   },
@@ -299,7 +309,7 @@ var SITE = {
     subheading: "A Claude Code session picking a TypeScript code-review model. Real tool calls, real verdict.",
     prompt: {
       title: "The prompt",
-      body: "Pick a model for a TypeScript code-review bot that runs on every PR. I want real quality without paying frontier rates on routine reviews. Test against this sample diff."
+      body: "Pick a model for a TypeScript code-review bot that runs on every PR. I want quality without paying frontier rates on routine reviews. Test against this diff."
     },
     toolCalls: {
       title: "What the assistant did",
@@ -327,7 +337,7 @@ var SITE = {
       ]
     },
     consideredTitle: "Recent models, evaluated",
-    consideredSubtitle: "A trimmed view of the candidates the assistant ruled in and out. Each row pairs a decision with the reason behind it.",
+    consideredSubtitle: "Candidates the assistant ruled in and out, with the reason.",
     consideredRows: [
       {
         id: "openai/gpt-5.5",
@@ -358,16 +368,14 @@ var SITE = {
       title: "The pick",
       model: "z-ai/glm-5.1",
       body: "Open-weight, $1.05 per million input tokens. Caught both bugs in the sample diff at roughly $0.005 per PR, about 5\xD7 cheaper than running gpt-5.5 on every commit."
-    },
-    quote: {
-      body: "The frontier model would have caught both bugs, at 5\xD7 the cost. The cheapest candidate missed them entirely. Only the live test surfaced the model that did both.",
-      attribution: "index9 session trace"
     }
   },
   toolsSection: {
     label: "Tools",
     heading: "The 5 tools",
     subheading: "Your assistant chains these together. You don't call them directly.",
+    keyNotePrefix: "Only",
+    keyNoteSuffix: "needs an OpenRouter key. The rest work out of the box.",
     openRouterKey: "OpenRouter API key",
     noKeyRequired: "No key required",
     requiresLabel: "Requires ",
@@ -404,7 +412,7 @@ var SITE = {
         action: "compare_models",
         displayName: "compare_models",
         fullName: null,
-        description: "Diffs 2\u201310 finalists side-by-side. Flags the cheapest pick for your expected token mix.",
+        description: "Diffs 2\u201310 finalists side-by-side. Flags the cheapest for your token mix.",
         badge: null,
         requiresKey: false
       },
@@ -413,7 +421,7 @@ var SITE = {
         action: "test_model",
         displayName: "test_model",
         fullName: null,
-        description: "Runs your prompt across models. Returns output, latency, and real cost. Or dry-run for cost only.",
+        description: "Runs your prompt across models. Returns output, latency, cost. Dry-run for cost only.",
         badge: "Live",
         requiresKey: true
       }
@@ -438,7 +446,7 @@ var SITE = {
       },
       {
         question: "Does it pick the model for me?",
-        answer: "No \u2014 it gives your assistant the data (search results, specs, cost diffs, live test outputs). Your assistant makes the call.",
+        answer: "No. It gives your assistant the data: search results, specs, cost diffs, live test outputs. Your assistant makes the call.",
         link: null
       },
       {
@@ -448,7 +456,7 @@ var SITE = {
       },
       {
         question: "Which models?",
-        answer: `${MODEL_COUNT} from OpenRouter \u2014 OpenAI, Anthropic, Google, Meta, Mistral, DeepSeek, and more. Metadata refreshes every 30 minutes.`,
+        answer: `${MODEL_COUNT} from OpenRouter: OpenAI, Anthropic, Google, Meta, Mistral, DeepSeek, and more. Metadata refreshes every 30 minutes.`,
         link: null
       },
       {
@@ -458,7 +466,7 @@ var SITE = {
       },
       {
         question: "What's the project status?",
-        answer: "Stable and in active use. Issues and feature requests welcome on GitHub.",
+        answer: "Stable. Issues and feature requests on GitHub.",
         link: null
       }
     ]
@@ -528,32 +536,9 @@ var SITE = {
   }
 };
 var README = {
-  tagline: `Landing page, API, and MCP server for discovering, shortlisting, comparing, cost-modeling, and live-testing ${MODEL_COUNT} AI models.`,
   mcpDescription: `Discover, shortlist, compare, cost-model, and live-test ${MODEL_COUNT} AI models from your editor`,
-  monorepoLayout: {
-    appsWeb: "apps/web \u2014 Next.js 16 app (UI + API routes)",
-    packagesCore: "packages/core \u2014 Shared Zod schemas, types, constants (@index9/core)",
-    packagesMcp: "packages/mcp \u2014 Thin MCP stdio server calling the hosted API (@index9/mcp)"
-  },
-  quickStart: {
-    install: "pnpm install",
-    build: "pnpm build",
-    test: "pnpm test",
-    dev: "pnpm dev    # run web app"
-  },
-  envNote: "Copy apps/web/.env.example to apps/web/.env.local and fill in values for local development.",
   mcpInstall: {
-    cli: "npx -y @index9/mcp@latest",
-    envNote: "Optional: set OPENROUTER_API_KEY in your MCP client config for live test_model calls. dryRun=true works without a key.",
-    claudeCode: "Claude Code: Run `claude mcp add --transport stdio index9 -- npx -y @index9/mcp` or add the same config to .mcp.json / ~/.claude.json."
-  },
-  release: {
-    step1: "Make changes in packages/mcp (core is internal, bundled into mcp)",
-    step2: "Run pnpm changeset \u2014 add a changeset, select packages, choose bump type",
-    step3: "Commit and push; open PR to main",
-    step4: "Merge the PR; CI creates a Version Packages PR when changesets exist",
-    step5: "Merge the version PR; CI publishes to npm and creates a GitHub Release with the .mcpb artifact attached",
-    step6: "Users can install via npx @index9/mcp@latest or download .mcpb from Releases"
+    envNote: "Optional: set OPENROUTER_API_KEY in your MCP client config for live test_model calls. dryRun=true works without a key."
   }
 };
@@ -762,6 +747,7 @@ import { z as z6 } from "zod";
 var ResponseFormatSchema = z6.object({
   type: z6.string().min(1)
 }).catchall(z6.unknown()).optional();
+var ProviderSortSchema = z6.enum(["throughput", "price", "latency"]);
 var TestRequestSchema = z6.object({
   prompt: z6.string().min(1).optional(),
   userContent: z6.array(UserContentPartSchema).min(1).optional(),
@@ -777,7 +763,30 @@ var TestRequestSchema = z6.object({
   seed: z6.number().int().optional(),
   responseFormat: ResponseFormatSchema,
   enforceJson: z6.boolean().optional(),
-  retries: z6.number().int().min(0).max(3).optional()
+  retries: z6.number().int().min(0).max(3).optional(),
+  // Use OpenRouter's SSE streaming endpoint so capacity/refusal errors
+  // surface in ~1s instead of waiting the full per-model timeout for an
+  // empty 200 OK. Cost/tokens are still returned via stream_options.
+  stream: z6.boolean().optional(),
+  // First-token deadline (streaming only). If the upstream sends no
+  // delta within this window, abort the request. Defaults to 10s when
+  // streaming. Ignored when stream=false.
+  firstTokenTimeoutMs: z6.number().int().positive().optional(),
+  // Forwards as `provider.sort` to OpenRouter — opt into routing toward
+  // higher-throughput providers when running benchmarks.
+  providerSort: ProviderSortSchema.optional(),
+  // Forwards as `provider.order` — try these provider slugs first in the
+  // given order before falling back. Capped to stay within reasonable
+  // limits and prevent abuse.
+  providerOrder: z6.array(z6.string().min(1)).min(1).max(8).optional(),
+  // Forwards as the top-level `models` array (NOT `model`). OpenRouter
+  // tries each in order if the primary is unavailable. Different intent
+  // from providerOrder, which routes within a single model.
+  fallbackModels: z6.array(z6.string().min(1)).min(1).max(5).optional(),
+  // When true, attach a `debug` field on each result with the raw
+  // upstream finish_reason, error message, provider name, refusal, and
+  // usage. Used to diagnose "missing assistant text" without re-running.
+  debug: z6.boolean().optional()
 }).strict().superRefine((data, ctx) => {
   if (data.dryRun === true) {
     if (!data.prompt && data.expectedPromptTokens === void 0) {
@@ -812,10 +821,27 @@ var TestFailureReasonSchema = z6.enum([
   "insufficient_credits",
   "model_unavailable",
   "rate_limited",
+  // Provider is overloaded / "at capacity" / "provisioned throughput
+  // required". A distinct reason from rate_limited so callers can apply
+  // a longer backoff or route to a fallback model.
+  "capacity",
   "timeout",
   "invalid_request",
   "unknown"
 ]);
+var TestDebugInfoSchema = z6.object({
+  upstreamId: z6.string().optional(),
+  providerName: z6.string().optional(),
+  finishReason: z6.string().optional(),
+  upstreamError: z6.string().optional(),
+  refusal: z6.string().optional(),
+  hasToolCalls: z6.boolean().optional(),
+  usage: z6.object({
+    promptTokens: z6.number().optional(),
+    completionTokens: z6.number().optional(),
+    totalTokens: z6.number().optional()
+  }).optional()
+});
 var TestModelMetadataSchema = z6.object({
   id: z6.string(),
   name: z6.string(),
@@ -832,7 +858,8 @@ var TestResultSuccessSchema = z6.object({
   latencyMs: z6.number().min(0),
   tokens: UsageTokensSchema,
   cost: z6.number().nullable().optional(),
-  truncated: z6.boolean().optional()
+  truncated: z6.boolean().optional(),
+  debug: TestDebugInfoSchema.optional()
 });
 var TestResultFailureSchema = z6.object({
   modelId: z6.string(),
@@ -841,7 +868,8 @@ var TestResultFailureSchema = z6.object({
   model: TestModelMetadataSchema,
   error: z6.string(),
   failureReason: TestFailureReasonSchema.optional(),
-  latencyMs: z6.number().min(0)
+  latencyMs: z6.number().min(0),
+  debug: TestDebugInfoSchema.optional()
 });
 var TestResultSchema = z6.discriminatedUnion("ok", [
   TestResultSuccessSchema,
@@ -902,8 +930,8 @@ function loadConfig() {
 }
 // src/client.ts
-var RETRY_DELAYS_MS = [1e3, 2e3, 4e3];
-var ATTEMPT_TIMEOUT_MS = 3e4;
+var DEFAULT_RETRY_DELAYS_MS = [1e3, 2e3, 4e3];
+var DEFAULT_ATTEMPT_TIMEOUT_MS = 3e4;
 function isRetryable(status) {
   return status === 429 || status >= 500;
 }
@@ -919,14 +947,17 @@ function toErrorMessage(error) {
   if (error instanceof Error && error.message.trim()) return error.message;
   return "Unknown error";
 }
-async function fetchWithRetry(url, options) {
+async function fetchWithRetry(url, options, retryOptions) {
+  const attemptTimeoutMs = retryOptions?.attemptTimeoutMs ?? DEFAULT_ATTEMPT_TIMEOUT_MS;
+  const maxRetries = Math.max(0, retryOptions?.maxRetries ?? DEFAULT_RETRY_DELAYS_MS.length);
+  const retryDelaysMs = DEFAULT_RETRY_DELAYS_MS.slice(0, maxRetries);
   let lastResponse = null;
   let lastError;
-  for (let i = 0; i <= RETRY_DELAYS_MS.length; i++) {
+  for (let i = 0; i <= maxRetries; i++) {
     const timeoutController = new AbortController();
     const timeoutId = setTimeout(() => {
       timeoutController.abort(new DOMException("Request timed out", "AbortError"));
-    }, ATTEMPT_TIMEOUT_MS);
+    }, attemptTimeoutMs);
     const externalSignal = options.signal;
     const onAbort = () => {
       timeoutController.abort(
@@ -951,14 +982,12 @@ async function fetchWithRetry(url, options) {
       clearTimeout(timeoutId);
       externalSignal?.removeEventListener("abort", onAbort);
     }
-    if (i < RETRY_DELAYS_MS.length) {
-      await sleep(RETRY_DELAYS_MS[i]);
+    if (i < retryDelaysMs.length) {
+      await sleep(retryDelaysMs[i]);
     }
   }
   if (lastResponse) return lastResponse;
-  throw new Error(
-    `Request failed after ${RETRY_DELAYS_MS.length + 1} attempts: ${toErrorMessage(lastError)}`
-  );
+  throw new Error(`Request failed after ${maxRetries + 1} attempts: ${toErrorMessage(lastError)}`);
 }
 function buildUrl(baseUrl, path, params) {
   const url = new URL(path, baseUrl);
@@ -1037,8 +1066,8 @@ function extractRecoveryFields(body) {
   }
   return out;
 }
-async function callApi(ctx, url, options, responseSchema) {
-  const res = await fetchWithRetry(url, options);
+async function callApi(ctx, url, options, responseSchema, retryOptions) {
+  const res = await fetchWithRetry(url, options, retryOptions);
   let body;
   try {
     body = await res.json();
@@ -1153,7 +1182,12 @@ async function handleTestModels(ctx, args) {
     ctx,
     `${ctx.baseUrl}${API_PATHS.test}`,
     { method: "POST", headers: reqHeaders, body: JSON.stringify(parsed.data) },
-    TestResponseSchema
+    TestResponseSchema,
+    // Live inference is non-idempotent and slow: each retry costs real money
+    // and the server-side per-model retry/backoff already handles transient
+    // errors. Give the call enough wall-clock to cover a worst-case 10-model
+    // batch × 60s per model and let the server decide on retries.
+    { attemptTimeoutMs: 24e4, maxRetries: 0 }
   );
 }
@@ -1264,7 +1298,21 @@ async function createServer() {
           "Structured output shape request forwarded to OpenRouter (e.g., { type: 'json_object' })."
         ),
         enforceJson: z7.boolean().optional().describe("When true, output must parse as JSON."),
-        retries: z7.number().int().min(0).max(3).optional().describe("Retries for transient failures.")
+        retries: z7.number().int().min(0).max(3).optional().describe("Retries for transient failures."),
+        stream: z7.boolean().optional().describe(
+          "Use OpenRouter SSE streaming so capacity/refusal errors surface quickly. Defaults to false."
+        ),
+        firstTokenTimeoutMs: z7.number().int().min(1).optional().describe("Streaming-only first-token deadline in ms. Defaults to 10000."),
+        providerSort: ProviderSortSchema.optional().describe(
+          'OpenRouter provider routing sort: "throughput", "price", or "latency".'
+        ),
+        providerOrder: z7.array(z7.string().min(1)).min(1).max(8).optional().describe("Provider slugs to try first, in order. Up to 8."),
+        fallbackModels: z7.array(z7.string().min(1)).min(1).max(5).optional().describe(
+          "Fallback model IDs OpenRouter may try if the primary is unavailable. Up to 5."
+        ),
+        debug: z7.boolean().optional().describe(
+          "When true, include upstream finish_reason, provider, error, refusal, and usage."
+        )
       },
       // No outputSchema: test_model returns a z.union of dry-run and live shapes.
       // The SDK supports only ZodRawShape | AnySchema for outputSchema; a discriminated-union

package/manifest.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "manifest_version": "0.3",
   "name": "index9",
-  "version": "6.1.0",
+  "version": "6.2.0",
   "description": "Discover, shortlist, compare, cost-model, and live-test 300+ AI models from your editor",
   "author": {
     "name": "Index9"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@index9/mcp",
-  "version": "6.2.0",
+  "version": "6.3.0",
   "license": "MIT",
   "repository": {
     "type": "git",
@@ -24,11 +24,11 @@
     "zod": "^4.4.3"
   },
   "devDependencies": {
-    "@types/node": "^25.6.2",
+    "@types/node": "^25.8.0",
     "tsup": "^8.5.1",
     "typescript": "6.0.3",
     "vitest": "^4.1.6",
-    "@index9/core": "2.5.0"
+    "@index9/core": "2.6.0"
   },
   "engines": {
     "node": ">=20"