@index9/mcp 6.0.0 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -43,6 +43,11 @@ var Index9MetaSchema = z.object({
43
43
  retryAfterSeconds: z.number().optional(),
44
44
  rateLimit: RateLimitMetaSchema.optional()
45
45
  });
46
+ var MissingModelDiagnosticSchema = z.object({
47
+ reason: z.enum(["unknown_provider", "no_match", "suggestions_available", "ambiguous_alias"]),
48
+ provider: z.string().optional(),
49
+ message: z.string()
50
+ });
46
51
  var UserContentTextPartSchema = z.strictObject({
47
52
  type: z.literal("text"),
48
53
  text: z.string().trim().min(1)
@@ -101,8 +106,9 @@ Typical workflow:
101
106
  Key rules:
102
107
  - find_models requires \`q\` when \`sortBy=relevance\` (the default). Omit \`q\` only with \`sortBy=created\` or \`sortBy=price\`.
103
108
  - find_models price-asc tends to be dominated by free preview models \u2014 pass \`excludeFree=true\` when you want a paid SLA.
104
- - find_models flags \`meta.confidence: "low"\` when no candidate matched on keyword (BM25). When that fires, prefer \`meta.suggestion\` over the returned scores; weak hits are capped at score=30 so they don't masquerade as strong matches.
105
- - get_models accepts aliases (display names, short names) \u2014 not just full IDs. Unknown ids return in missingIds with \`suggestions\` (token-fuzzy or recency-anchored newest-from-provider). Retry with one of the suggested ids.
109
+ - find_models always emits \`meta.confidence\` ("high" | "low") on semantic queries. Low means no candidate matched on keyword (BM25); \`meta.lowConfidenceReason\` is "no_keyword_matches" or "no_results" and \`meta.suggestion\` carries an actionable hint. Weak hits are capped at score=30 so they don't masquerade as strong matches. Pass \`requireKeywordMatch: true\` to get an empty page instead of weak vector-only neighbors.
110
+ - find_models with sortBy=price exposes \`pricing.effectivePromptPerMillion\` and \`pageInfo.priceSortBasis\` \u2014 sort order may diverge from displayed promptPerMillion for models with per-request fees.
111
+ - get_models accepts aliases (display names, short names) \u2014 not just full IDs. Unknown ids return in missingIds with \`suggestions\` (token-fuzzy or recency-anchored newest-from-provider) and \`missingDiagnostics\` keyed by id with \`reason\` ("unknown_provider" | "no_match" | "suggestions_available" | "ambiguous_alias") so retry strategy is explicit. Retry with one of the suggested ids.
106
112
  - compare_models accepts the same alias formats as get_models. Use it instead of N parallel get_models calls when the user is comparing finalists.
107
113
  - Use test_model with \`dryRun=true\` to estimate cost before live testing. Pass \`expectedPromptTokens\` for capacity planning at sizes you don't want to paste in full.
108
114
  - test_model with \`dryRun=false\` (default) requires OPENROUTER_API_KEY and incurs real usage costs.
@@ -129,11 +135,11 @@ Examples:
129
135
 
130
136
  Valid capabilities: ${CAPABILITIES.join(", ")}.
131
137
 
132
- Each result: id, name, description, created (unix seconds), createdAt (ISO 8601), contextLength, maxOutputTokens, pricing.{promptPerMillion, completionPerMillion} (numbers, USD per million tokens), inputModalities[] / outputModalities[] (e.g. ["text","image"] \u2014 check at a glance to spot text-only vs multimodal models), capabilities[], score.
138
+ Each result: id, name, description, created (unix seconds), createdAt (ISO 8601), contextLength, maxOutputTokens, pricing.{promptPerMillion, completionPerMillion} (rounded display $/M), pricing.{promptPerToken, completionPerToken, requestUsd} (exact, use for cost math), inputModalities[] / outputModalities[], capabilities[], score. With sortBy=price, results also expose pricing.effectivePromptPerMillion and pageInfo.priceSortBasis \u2014 sort order may diverge from displayed promptPerMillion for models with per-request fees.
133
139
 
134
140
  \`score\` is 0-100: the best match per page scores 100; others scale proportionally. Combines semantic similarity and keyword matching. Null when sorting by price or date.
135
141
 
136
- \`q\` must be at least 2 characters when provided. \`meta.confidence\` is "low" when no candidate matched on keyword (BM25), meaning the ranker fell back to vector similarity alone \u2014 typo, gibberish, or a query the catalog can't answer. When low, \`meta.suggestion\` carries an actionable hint and \`score\` values are capped at 30 so weak hits don't masquerade as strong ones.
142
+ \`q\` must be at least 2 characters when provided. For semantic queries, \`meta.confidence\` is always emitted as "high" or "low". Low means no candidate matched on keyword (BM25); \`meta.lowConfidenceReason\` is "no_keyword_matches" or "no_results" and \`meta.suggestion\` carries an actionable hint. Pass \`requireKeywordMatch: true\` to suppress weak hits and get an empty page on low confidence.
137
143
 
138
144
  Pass result.id to get_models for full specs or to test_model for live testing.`,
139
145
  requiresKey: false
@@ -150,12 +156,14 @@ Response: { results: (Model | null)[], missingIds: string[], resolvedAliases?: R
150
156
  - id, canonicalSlug, name, description
151
157
  - created (unix seconds), createdAt (ISO 8601), knowledgeCutoff (ISO date or null)
152
158
  - contextLength (tokens), maxOutputTokens, isModerated
153
- - pricing: { promptPerMillion, completionPerMillion, requestUsd, imageUsd } \u2014 all USD, all numbers. Token prices are per million tokens; request/image are per unit.
159
+ - pricing: { promptPerMillion, completionPerMillion, promptPerToken, completionPerToken, requestUsd, imageUsd } \u2014 *PerMillion is rounded display, *PerToken is exact (use for cost math). request/image are flat per-unit fees.
154
160
  - architecture: { inputModalities[], outputModalities[], tokenizer, instructType }
155
161
  - capabilities[]: normalized capability flags (same values as find_models and capabilitiesAll/Any)
156
162
  - supportedParameters[]: OpenRouter parameters the model accepts (e.g., "temperature", "tools", "response_format")
157
163
 
158
- Entries in results are null when the id is unknown; those ids appear in missingIds. Ambiguous aliases appear in ambiguousAliases with candidate canonical ids \u2014 pass a canonical id to disambiguate. Unknown ids that partially match (e.g. "sonnet" \u2192 all Claude Sonnet variants) appear in suggestions with up to 5 candidate ids. When token-overlap finds nothing but the id is shaped like \`provider/<unknown>\` and the provider exists, suggestions falls back to the 5 newest models from that provider (real created timestamps, no hardcoded "popular" list). Retry with one of the suggested ids.`,
164
+ Entries in results are null when the id is unknown; those ids appear in missingIds. Ambiguous aliases appear in ambiguousAliases with candidate canonical ids \u2014 pass a canonical id to disambiguate. Unknown ids that partially match (e.g. "sonnet" \u2192 all Claude Sonnet variants) appear in suggestions with up to 5 candidate ids. When token-overlap finds nothing but the id is shaped like \`provider/<unknown>\` and the provider exists, suggestions falls back to the 5 newest models from that provider (real created timestamps, no hardcoded "popular" list). Retry with one of the suggested ids.
165
+
166
+ \`missingDiagnostics\` (when present) gives a machine-readable reason per missing id: \`unknown_provider\` (the prefix before / isn't in the catalog \u2014 fix the provider, not the model name), \`ambiguous_alias\`, \`suggestions_available\` (mirrors suggestions[id]), or \`no_match\`.`,
159
167
  requiresKey: false
160
168
  },
161
169
  compare_models: {
@@ -170,9 +178,9 @@ Response: { models: ModelResponse[], diff: { contextLength, maxOutputTokens, pro
170
178
 
171
179
  Each numeric/string diff field has { allEqual: boolean, values: Record<id, value|null> }. Capability/parameter diffs have { commonAll: string[], uniquePerModel: Record<id, string[]> }. cheapestForPromptPerMillion / largestContext are convenience picks across the supplied models \u2014 null when the field is missing on every model.
172
180
 
173
- Optional: pass \`expectedPromptTokens\` AND \`expectedCompletionTokens\` to also receive \`workloadCosts\` (per-model totalCostUsd) and \`cheapestForRealisticWorkload\` \u2014 the actual cheapest given the user's expected token mix. This matters when prompt:completion price ratios diverge across models (e.g., a model with cheap prompt but expensive completion can lose against a flatter-priced sibling under heavy completions).
181
+ Optional: pass \`expectedPromptTokens\` AND \`expectedCompletionTokens\` to also receive \`workloadCosts\` and \`cheapestForRealisticWorkload\` \u2014 the actual cheapest given the user's expected token mix. Each \`workloadCosts[i]\` carries \`tokenCostUsd\` (token-only), \`requestCostUsd\` (per-request fee), \`totalCostUsd\` (sum, includes request fees), and \`pricingBasis\` ("exact_per_token" | "rounded_per_million" | "unavailable"). This matters when prompt:completion price ratios diverge across models, or when a model has a per-request fee.
174
182
 
175
- Accepts the same alias formats as get_models. Unknown ids are returned in missingIds (with suggestions when partial matches exist).`,
183
+ Accepts the same alias formats as get_models. Unknown ids are returned in missingIds (with suggestions when partial matches exist, plus \`missingDiagnostics\` carrying a machine-readable reason per id).`,
176
184
  requiresKey: false
177
185
  },
178
186
  list_facets: {
@@ -206,7 +214,9 @@ Parameters:
206
214
  - expectedCompletionTokens: Optional completion token estimate used by dryRun
207
215
  - maxTokens, systemPrompt, temperature, topP, seed, responseFormat, enforceJson, retries: Live-testing controls (ignored when dryRun=true)
208
216
 
209
- Results (live): each result carries modelId (the id you passed), resolvedModelId (canonical id, present when the input was an alias), ok, response, latencyMs, tokens { prompt, completion }, cost (USD; live from OpenRouter when available, else estimated from cached pricing), and truncated=true when finish_reason is "length". Use find_models or get_models first to identify model ids.`,
217
+ Results (live): each result carries modelId (the id you passed), resolvedModelId (canonical id, present when the input was an alias), ok, response, latencyMs, tokens { prompt, completion }, cost (USD; live from OpenRouter when available, else estimated from cached pricing), and truncated=true when finish_reason is "length". On failure, results include \`error\` (free-form) plus \`failureReason\` ("insufficient_credits" | "model_unavailable" | "rate_limited" | "timeout" | "invalid_request" | "unknown") so callers can pick a retry strategy without parsing the error string.
218
+
219
+ Results (dryRun): each entry carries \`tokenCostUsd\`, \`requestCostUsd\`, \`totalCostUsd\` (matches \`estimatedCost\`, includes per-request fees), and \`estimatedCostBasis\` (same enum as compare_models.workloadCosts). Use find_models or get_models first to identify model ids.`,
210
220
  requiresKey: true
211
221
  }
212
222
  };
@@ -219,6 +229,7 @@ var PARAM_DESCRIPTIONS = {
219
229
  modality: `Required output modality. Filters on the model's output modalities, not input capabilities. For example, "image" finds image-generation models, while capabilitiesAll=["vision"] finds models that accept image input. Valid values: ${OUTPUT_MODALITIES.join(", ")}.`,
220
230
  provider: `Provider prefix filter. Array of provider slugs \u2014 a model matches if its ID starts with any of them (e.g., ['openai'] matches 'openai/gpt-4o'; ['openai','anthropic'] matches both). Pass a single-element array for one provider. Common providers: ${COMMON_PROVIDERS.join(", ")}.`,
221
231
  excludeFree: `When true, exclude models with id ending in ':free'. Useful for sortBy=price (which would otherwise be dominated by free-tier preview models) and when you want a paid SLA. Default false.`,
232
+ requireKeywordMatch: `When true, suppress weak vector-only results from semantic queries. If no candidate has a BM25 keyword hit, returns an empty page with meta.confidence='low' and meta.lowConfidenceReason \u2014 instead of returning misleading nearest-neighbor matches. Filter-only queries (sortBy=created or sortBy=price without q) ignore this flag. Default false.`,
222
233
  expectedPromptTokens: `Expected number of prompt tokens for dryRun cost estimation. When set, overrides the heuristic that counts characters from the literal \`prompt\` string \u2014 use this for capacity planning ("what would 6000-token reviews cost?") without pasting filler. If both are omitted, the prompt string is tokenized at ~4 chars/token.`,
223
234
  expectedCompletionTokens: `Expected number of completion tokens for cost estimation (default: 256). Typical ranges: 100-500 for quick tests, 1000-2000 for code generation, 4000+ for long-form content. This is a heuristic \u2014 actual billed tokens may differ.`
224
235
  };
@@ -556,7 +567,8 @@ var SearchQuerySchema = z2.object({
556
567
  capabilitiesAny: z2.array(z2.enum(CAPABILITIES)).optional(),
557
568
  modality: z2.enum(OUTPUT_MODALITIES).optional(),
558
569
  provider: z2.array(z2.string().min(1)).optional(),
559
- excludeFree: z2.boolean().optional()
570
+ excludeFree: z2.boolean().optional(),
571
+ requireKeywordMatch: z2.boolean().optional()
560
572
  }).strict();
561
573
  var SearchResultSchema = z2.object({
562
574
  id: z2.string(),
@@ -568,7 +580,11 @@ var SearchResultSchema = z2.object({
568
580
  maxOutputTokens: z2.number().nullable(),
569
581
  pricing: z2.object({
570
582
  promptPerMillion: z2.number().nullable(),
571
- completionPerMillion: z2.number().nullable()
583
+ completionPerMillion: z2.number().nullable(),
584
+ promptPerToken: z2.number().nullable().optional(),
585
+ completionPerToken: z2.number().nullable().optional(),
586
+ requestUsd: z2.number().nullable().optional(),
587
+ effectivePromptPerMillion: z2.number().nullable().optional()
572
588
  }),
573
589
  inputModalities: z2.array(z2.string()),
574
590
  outputModalities: z2.array(z2.string()),
@@ -582,13 +598,15 @@ var SearchResponseSchema = z2.object({
582
598
  limit: z2.number(),
583
599
  hasMore: z2.boolean(),
584
600
  sortBy: SearchSortBySchema,
585
- sortOrder: SearchSortOrderSchema
601
+ sortOrder: SearchSortOrderSchema,
602
+ priceSortBasis: z2.literal("effective_prompt_per_million").optional()
586
603
  }),
587
604
  meta: z2.object({
588
605
  queryMode: z2.enum(["semantic", "filter_only"]),
589
606
  ranking: z2.literal("hybrid_rrf"),
590
607
  confidence: z2.enum(["high", "low"]).optional(),
591
- suggestion: z2.string().optional()
608
+ suggestion: z2.string().optional(),
609
+ lowConfidenceReason: z2.enum(["no_keyword_matches", "no_results"]).optional()
592
610
  })
593
611
  });
594
612
  var FindModelsToolResultSchema = SearchResponseSchema.extend({
@@ -604,6 +622,8 @@ var BatchModelLookupRequestSchema = z3.object({
604
622
  var ModelPricingSchema = z3.object({
605
623
  promptPerMillion: z3.number().nullable(),
606
624
  completionPerMillion: z3.number().nullable(),
625
+ promptPerToken: z3.number().nullable().optional(),
626
+ completionPerToken: z3.number().nullable().optional(),
607
627
  requestUsd: z3.number().nullable(),
608
628
  imageUsd: z3.number().nullable()
609
629
  });
@@ -634,7 +654,8 @@ var BatchModelLookupResponseSchema = z3.object({
634
654
  missingIds: z3.array(z3.string()),
635
655
  resolvedAliases: z3.record(z3.string(), z3.string()).optional(),
636
656
  ambiguousAliases: z3.record(z3.string(), z3.array(z3.string())).optional(),
637
- suggestions: z3.record(z3.string(), z3.array(z3.string())).optional()
657
+ suggestions: z3.record(z3.string(), z3.array(z3.string())).optional(),
658
+ missingDiagnostics: z3.record(z3.string(), MissingModelDiagnosticSchema).optional()
638
659
  }).strict();
639
660
  var GetModelsToolResultSchema = z3.object({
640
661
  results: z3.array(ModelResponseSchema.nullable()),
@@ -642,11 +663,13 @@ var GetModelsToolResultSchema = z3.object({
642
663
  resolvedAliases: z3.record(z3.string(), z3.string()).optional(),
643
664
  ambiguousAliases: z3.record(z3.string(), z3.array(z3.string())).optional(),
644
665
  suggestions: z3.record(z3.string(), z3.array(z3.string())).optional(),
666
+ missingDiagnostics: z3.record(z3.string(), MissingModelDiagnosticSchema).optional(),
645
667
  _index9: Index9MetaSchema
646
668
  });
647
669
 
648
670
  // ../core/dist/schemas/compare.js
649
671
  import { z as z4 } from "zod";
672
+ var PricingBasisSchema = z4.enum(["exact_per_token", "rounded_per_million", "unavailable"]);
650
673
  var CompareRequestSchema = z4.object({
651
674
  ids: z4.array(z4.string().min(1)).min(2, "compare requires at least 2 ids").max(LIMITS.compareModelsMax, `ids must contain between 2 and ${LIMITS.compareModelsMax} model IDs`),
652
675
  expectedPromptTokens: z4.number().int().positive().optional(),
@@ -683,7 +706,10 @@ var CompareWorkloadCostSchema = z4.object({
683
706
  modelId: z4.string(),
684
707
  promptTokens: z4.number().int().nonnegative(),
685
708
  completionTokens: z4.number().int().nonnegative(),
686
- totalCostUsd: z4.number().nullable()
709
+ totalCostUsd: z4.number().nullable(),
710
+ tokenCostUsd: z4.number().nullable().optional(),
711
+ requestCostUsd: z4.number().nullable().optional(),
712
+ pricingBasis: PricingBasisSchema.optional()
687
713
  });
688
714
  var CompareResponseSchema = z4.object({
689
715
  models: z4.array(ModelResponseSchema),
@@ -695,7 +721,8 @@ var CompareResponseSchema = z4.object({
695
721
  resolvedAliases: z4.record(z4.string(), z4.string()).optional(),
696
722
  missingIds: z4.array(z4.string()),
697
723
  suggestions: z4.record(z4.string(), z4.array(z4.string())).optional(),
698
- ambiguousAliases: z4.record(z4.string(), z4.array(z4.string())).optional()
724
+ ambiguousAliases: z4.record(z4.string(), z4.array(z4.string())).optional(),
725
+ missingDiagnostics: z4.record(z4.string(), MissingModelDiagnosticSchema).optional()
699
726
  }).strict();
700
727
  var CompareModelsToolResultSchema = CompareResponseSchema.extend({
701
728
  _index9: Index9MetaSchema
@@ -770,8 +797,17 @@ var TestPricingUsedSchema = z6.object({
770
797
  promptPerToken: z6.number().nullable().optional(),
771
798
  completionPerToken: z6.number().nullable().optional(),
772
799
  promptPerMillion: z6.number().nullable().optional(),
773
- completionPerMillion: z6.number().nullable().optional()
800
+ completionPerMillion: z6.number().nullable().optional(),
801
+ requestUsd: z6.number().nullable().optional()
774
802
  });
803
+ var TestFailureReasonSchema = z6.enum([
804
+ "insufficient_credits",
805
+ "model_unavailable",
806
+ "rate_limited",
807
+ "timeout",
808
+ "invalid_request",
809
+ "unknown"
810
+ ]);
775
811
  var TestModelMetadataSchema = z6.object({
776
812
  id: z6.string(),
777
813
  name: z6.string(),
@@ -796,6 +832,7 @@ var TestResultFailureSchema = z6.object({
796
832
  ok: z6.literal(false),
797
833
  model: TestModelMetadataSchema,
798
834
  error: z6.string(),
835
+ failureReason: TestFailureReasonSchema.optional(),
799
836
  latencyMs: z6.number().min(0)
800
837
  });
801
838
  var TestResultSchema = z6.discriminatedUnion("ok", [
@@ -807,7 +844,11 @@ var TestEstimateResultSchema = z6.object({
807
844
  resolvedModelId: z6.string().optional(),
808
845
  model: TestModelMetadataSchema,
809
846
  tokens: UsageTokensSchema,
810
- estimatedCost: z6.number().nullable().optional()
847
+ estimatedCost: z6.number().nullable().optional(),
848
+ tokenCostUsd: z6.number().nullable().optional(),
849
+ requestCostUsd: z6.number().nullable().optional(),
850
+ totalCostUsd: z6.number().nullable().optional(),
851
+ estimatedCostBasis: PricingBasisSchema.optional()
811
852
  });
812
853
  var TestDryRunResponseSchema = z6.object({
813
854
  dryRun: z6.literal(true),
@@ -1005,6 +1046,7 @@ async function handleSearchModels(ctx, args) {
1005
1046
  if (q.modality) params.modality = q.modality;
1006
1047
  if (q.provider?.length) params.provider = q.provider.join(",");
1007
1048
  if (q.excludeFree === true) params.excludeFree = "true";
1049
+ if (q.requireKeywordMatch === true) params.requireKeywordMatch = "true";
1008
1050
  return callApi(
1009
1051
  ctx,
1010
1052
  buildUrl(ctx.baseUrl, API_PATHS.search, params),
@@ -1096,7 +1138,8 @@ async function createServer() {
1096
1138
  capabilitiesAny: z7.array(z7.enum(CAPABILITIES)).optional().describe(PARAM_DESCRIPTIONS.capabilitiesAny),
1097
1139
  modality: z7.enum(OUTPUT_MODALITIES).optional().describe(PARAM_DESCRIPTIONS.modality),
1098
1140
  provider: z7.array(z7.string().min(1)).optional().describe(PARAM_DESCRIPTIONS.provider),
1099
- excludeFree: z7.boolean().optional().describe(PARAM_DESCRIPTIONS.excludeFree)
1141
+ excludeFree: z7.boolean().optional().describe(PARAM_DESCRIPTIONS.excludeFree),
1142
+ requireKeywordMatch: z7.boolean().optional().describe(PARAM_DESCRIPTIONS.requireKeywordMatch)
1100
1143
  },
1101
1144
  outputSchema: FindModelsToolResultSchema.shape,
1102
1145
  annotations: { readOnlyHint: true }
package/manifest.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "manifest_version": "0.3",
3
3
  "name": "index9",
4
- "version": "5.3.0",
4
+ "version": "6.0.0",
5
5
  "description": "Discover, shortlist, compare, cost-model, and live-test 300+ AI models from your editor",
6
6
  "author": {
7
7
  "name": "Index9"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@index9/mcp",
3
- "version": "6.0.0",
3
+ "version": "6.1.0",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",
@@ -28,7 +28,7 @@
28
28
  "tsup": "^8.5.1",
29
29
  "typescript": "6.0.3",
30
30
  "vitest": "^4.1.5",
31
- "@index9/core": "2.3.2"
31
+ "@index9/core": "2.4.0"
32
32
  },
33
33
  "engines": {
34
34
  "node": ">=20"