@heventure/model-provider-x 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/args.js CHANGED
@@ -29,6 +29,12 @@ export function parseCliArgs(argv) {
29
29
  .map((model) => model.trim())
30
30
  .filter(Boolean);
31
31
  break;
32
+ case "--modalities":
33
+ if (!options.modalities) {
34
+ options.modalities = [];
35
+ }
36
+ options.modalities.push(parseModalitiesArg(next()));
37
+ break;
32
38
  case "--name":
33
39
  options.providerName = next();
34
40
  break;
@@ -107,6 +113,8 @@ Options:
107
113
  --proxy Write agent config through the local compatibility proxy.
108
114
  --direct Write agent config directly to the upstream provider.
109
115
  --models <list> Comma-separated model ids. Skips interactive model selection.
116
+ --modalities <spec> Model modalities in format <model>:<input>:<output>.
117
+ Example: --modalities qwen-vl:image,text:text
110
118
  --config <path> OpenCode config path to write when targeting OpenCode.
111
119
  --print Print generated JSON and do not write config.
112
120
  --yes, -y Accept defaults in non-interactive prompts.
@@ -134,3 +142,29 @@ function addModelByOneBasedIndex(selected, models, index) {
134
142
  }
135
143
  selected.add(model);
136
144
  }
145
+ function parseModalitiesArg(value) {
146
+ const parts = value.split(":");
147
+ if (parts.length !== 3) {
148
+ throw new Error(`Invalid --modalities format: ${value}. Expected <model>:<input>:<output>`);
149
+ }
150
+ const [modelId, inputStr, outputStr] = parts;
151
+ if (!modelId) {
152
+ throw new Error("Model id is required in --modalities");
153
+ }
154
+ const parseModalityList = (s) => {
155
+ return s.split(",").map((m) => {
156
+ const trimmed = m.trim().toLowerCase();
157
+ if (["text", "image", "audio", "video", "pdf"].includes(trimmed)) {
158
+ return trimmed;
159
+ }
160
+ throw new Error(`Unknown modality: ${trimmed}`);
161
+ });
162
+ };
163
+ return {
164
+ modelId,
165
+ modalities: {
166
+ input: parseModalityList(inputStr),
167
+ output: parseModalityList(outputStr),
168
+ },
169
+ };
170
+ }
package/dist/cli/index.js CHANGED
@@ -6,6 +6,7 @@ import { readProxyStatus, startProxyProcess, stopProxyProcess } from "../core/pr
6
6
  import { createProxyAuthToken, getDefaultToolConfigPath, readToolConfig, upsertProviderProfile, writeToolConfig } from "../core/tool-config.js";
7
7
  import { discoverOpenCodeConfigs, getDefaultConfigPath, writeProviderToConfig } from "../core/config.js";
8
8
  import { buildProviderConfig, detectProviderCapabilities, recommendProxyMode, validateAndFetchModels } from "../core/provider.js";
9
+ import { isKnownModel } from "../core/model-capabilities.js";
9
10
  import { startProxyServer } from "../proxy/server.js";
10
11
  import { defaultClaudeModelMapping, getDefaultClaudeSettingsPath, writeClaudeCodeSettings } from "../targets/claude-code.js";
11
12
  import { getDefaultCodexConfigPath, writeCodexConfig } from "../targets/codex.js";
@@ -76,12 +77,14 @@ export async function runCli(options) {
76
77
  (canUseTui()
77
78
  ? await multiSelectChoices("Select models", createModelChoices(fetched.models))
78
79
  : parseModelSelection(await rl.question(formatModelPrompt(fetched.models)), fetched.models));
80
+ const modelDetails = fetched.modelDetails.filter((m) => selectedModels.includes(m.id));
79
81
  const fragment = buildProviderConfig({
80
82
  providerId,
81
83
  providerName,
82
84
  baseURL: fetched.baseURL,
83
85
  apiKey,
84
86
  models: selectedModels,
87
+ modelDetails,
85
88
  opencodeApiType: options.opencodeApiType ?? "chat"
86
89
  });
87
90
  const provider = fragment.provider[providerId];
@@ -151,6 +154,7 @@ async function collectProviderSelection(rl, command, providerInput) {
151
154
  if (!defaultModel) {
152
155
  throw new Error("Select at least one model");
153
156
  }
157
+ const modelDetails = await resolveModelModalities(rl, command, selectedModels, fetched.modelDetails);
154
158
  const toolConfigPath = getDefaultToolConfigPath();
155
159
  const config = await upsertProviderProfile(toolConfigPath, {
156
160
  id: providerInput.providerId,
@@ -168,6 +172,7 @@ async function collectProviderSelection(rl, command, providerInput) {
168
172
  upstreamBaseURL: fetched.baseURL,
169
173
  apiKey: providerInput.apiKey,
170
174
  selectedModels,
175
+ modelDetails,
171
176
  defaultModel,
172
177
  config,
173
178
  toolConfigPath
@@ -186,6 +191,7 @@ async function writeOpenCodeSetup(rl, command, selection, useProxy, capabilities
186
191
  baseURL,
187
192
  apiKey,
188
193
  models: selection.selectedModels,
194
+ modelDetails: selection.modelDetails,
189
195
  opencodeApiType
190
196
  });
191
197
  const provider = fragment.provider[selection.providerId];
@@ -634,4 +640,73 @@ function slugify(value) {
634
640
  .replace(/[^a-z0-9]+/g, "-")
635
641
  .replace(/^-+|-+$/g, "");
636
642
  }
643
+ async function resolveModelModalities(rl, command, selectedModels, fetchedModelDetails) {
644
+ const userModalityOverrides = new Map();
645
+ if (command.options.modalities) {
646
+ for (const override of command.options.modalities) {
647
+ userModalityOverrides.set(override.modelId, override.modalities);
648
+ }
649
+ }
650
+ const result = [];
651
+ for (const modelId of selectedModels) {
652
+ const fetched = fetchedModelDetails.find((m) => m.id === modelId);
653
+ const userOverride = userModalityOverrides.get(modelId);
654
+ if (userOverride) {
655
+ result.push({ id: modelId, modalities: userOverride });
656
+ continue;
657
+ }
658
+ if (fetched?.modalities) {
659
+ result.push(fetched);
660
+ continue;
661
+ }
662
+ if (command.options.yes || isKnownModel(modelId)) {
663
+ result.push(fetched ?? { id: modelId });
664
+ continue;
665
+ }
666
+ const modalities = await promptModelModalities(rl, modelId);
667
+ result.push({ id: modelId, modalities });
668
+ }
669
+ return result;
670
+ }
671
+ async function promptModelModalities(rl, modelId) {
672
+ output.write(`\nModel: ${modelId} - Capabilities unknown\n`);
673
+ if (canUseTui()) {
674
+ const inputModalities = await multiSelectChoices("Input modalities", [
675
+ { label: "text", value: "text", hint: "default" },
676
+ { label: "image", value: "image" },
677
+ { label: "audio", value: "audio" },
678
+ { label: "video", value: "video" },
679
+ { label: "pdf", value: "pdf" },
680
+ ]);
681
+ const outputModalities = await multiSelectChoices("Output modalities", [
682
+ { label: "text", value: "text", hint: "default" },
683
+ { label: "image", value: "image" },
684
+ { label: "audio", value: "audio" },
685
+ { label: "video", value: "video" },
686
+ { label: "pdf", value: "pdf" },
687
+ ]);
688
+ return {
689
+ input: inputModalities,
690
+ output: outputModalities,
691
+ };
692
+ }
693
+ const inputAnswer = await rl.question("Input modalities [text]: ");
694
+ const outputAnswer = await rl.question("Output modalities [text]: ");
695
+ const parseModalityList = (answer) => {
696
+ if (!answer.trim()) {
697
+ return ["text"];
698
+ }
699
+ return answer.split(",").map((m) => {
700
+ const trimmed = m.trim().toLowerCase();
701
+ if (["text", "image", "audio", "video", "pdf"].includes(trimmed)) {
702
+ return trimmed;
703
+ }
704
+ throw new Error(`Unknown modality: ${trimmed}`);
705
+ });
706
+ };
707
+ return {
708
+ input: parseModalityList(inputAnswer),
709
+ output: parseModalityList(outputAnswer),
710
+ };
711
+ }
637
712
  void main();
@@ -0,0 +1,174 @@
1
+ const MODEL_CAPABILITIES = {
2
+ "gpt-4o": { input: ["text", "image"], output: ["text"] },
3
+ "gpt-4o-mini": { input: ["text", "image"], output: ["text"] },
4
+ "gpt-4-turbo": { input: ["text", "image"], output: ["text"] },
5
+ "gpt-4-vision-preview": { input: ["text", "image"], output: ["text"] },
6
+ "gpt-4.1": { input: ["text", "image"], output: ["text"] },
7
+ "gpt-4.1-mini": { input: ["text", "image"], output: ["text"] },
8
+ "gpt-4.1-nano": { input: ["text", "image"], output: ["text"] },
9
+ "gpt-5": { input: ["text", "image"], output: ["text"] },
10
+ "gpt-5-mini": { input: ["text", "image"], output: ["text"] },
11
+ "gpt-5-nano": { input: ["text", "image"], output: ["text"] },
12
+ "o1": { input: ["text", "image"], output: ["text"] },
13
+ "o1-mini": { input: ["text", "image"], output: ["text"] },
14
+ "o1-pro": { input: ["text", "image"], output: ["text"] },
15
+ "o3": { input: ["text", "image"], output: ["text"] },
16
+ "o3-mini": { input: ["text", "image"], output: ["text"] },
17
+ "o3-pro": { input: ["text", "image"], output: ["text"] },
18
+ "o4-mini": { input: ["text", "image"], output: ["text"] },
19
+ "claude-3-opus": { input: ["text", "image"], output: ["text"] },
20
+ "claude-3-sonnet": { input: ["text", "image"], output: ["text"] },
21
+ "claude-3-haiku": { input: ["text", "image"], output: ["text"] },
22
+ "claude-3.5-sonnet": { input: ["text", "image"], output: ["text"] },
23
+ "claude-3.5-haiku": { input: ["text", "image"], output: ["text"] },
24
+ "claude-4-opus": { input: ["text", "image"], output: ["text"] },
25
+ "claude-4-sonnet": { input: ["text", "image"], output: ["text"] },
26
+ "claude-4.5-opus": { input: ["text", "image"], output: ["text"] },
27
+ "claude-4.5-sonnet": { input: ["text", "image"], output: ["text"] },
28
+ "claude-4.5-haiku": { input: ["text", "image"], output: ["text"] },
29
+ "gemini-pro-vision": { input: ["text", "image"], output: ["text"] },
30
+ "gemini-1.5-pro": { input: ["text", "image", "audio", "video"], output: ["text"] },
31
+ "gemini-1.5-flash": { input: ["text", "image", "audio", "video"], output: ["text"] },
32
+ "gemini-2.0-flash": { input: ["text", "image", "audio", "video"], output: ["text"] },
33
+ "gemini-2.5-pro": { input: ["text", "image", "audio", "video"], output: ["text"] },
34
+ "gemini-2.5-flash": { input: ["text", "image", "audio", "video"], output: ["text"] },
35
+ "qwen-vl-plus": { input: ["text", "image"], output: ["text"] },
36
+ "qwen-vl-max": { input: ["text", "image"], output: ["text"] },
37
+ "qwen-vl-chat": { input: ["text", "image"], output: ["text"] },
38
+ "qwen2.5-vl-72b-instruct": { input: ["text", "image"], output: ["text"] },
39
+ "qwen2.5-vl-7b-instruct": { input: ["text", "image"], output: ["text"] },
40
+ "qwen2.5-vl-32b-instruct": { input: ["text", "image"], output: ["text"] },
41
+ "qwen3-vl-235b-a22b": { input: ["text", "image"], output: ["text"] },
42
+ "qwen3-vl-30b-a3b": { input: ["text", "image"], output: ["text"] },
43
+ "llama-3.2-11b-vision-instruct": { input: ["text", "image"], output: ["text"] },
44
+ "llama-3.2-90b-vision-instruct": { input: ["text", "image"], output: ["text"] },
45
+ "llama-4-scout-17b-16e-instruct": { input: ["text", "image"], output: ["text"] },
46
+ "llama-4-maverick-17b-128e-instruct": { input: ["text", "image"], output: ["text"] },
47
+ "deepseek-vl": { input: ["text", "image"], output: ["text"] },
48
+ "deepseek-vl2": { input: ["text", "image"], output: ["text"] },
49
+ "glm-4v": { input: ["text", "image"], output: ["text"] },
50
+ "glm-4.5v": { input: ["text", "image"], output: ["text"] },
51
+ "glm-4.6v": { input: ["text", "image"], output: ["text"] },
52
+ "glm-5v-turbo": { input: ["text", "image"], output: ["text"] },
53
+ "pixtral-large-2411": { input: ["text", "image"], output: ["text"] },
54
+ "pixtral-large-2502": { input: ["text", "image"], output: ["text"] },
55
+ "mistral-small-3.1-24b-instruct": { input: ["text", "image"], output: ["text"] },
56
+ "phi-4-multimodal": { input: ["text", "image", "audio"], output: ["text"] },
57
+ "gemma-3-4b-it": { input: ["text", "image"], output: ["text"] },
58
+ "gemma-3-12b-it": { input: ["text", "image"], output: ["text"] },
59
+ "gemma-3-27b-it": { input: ["text", "image"], output: ["text"] },
60
+ "gemma-4-26b-a4b-it": { input: ["text", "image"], output: ["text"] },
61
+ "gemma-4-31b-it": { input: ["text", "image"], output: ["text"] },
62
+ };
63
+ const VISION_KEYWORDS = [
64
+ "vision",
65
+ "vl",
66
+ "visual",
67
+ "multimodal",
68
+ "pixtral",
69
+ ];
70
+ const KNOWN_MODALITY_MODELS = new Set(Object.keys(MODEL_CAPABILITIES));
71
+ export function lookupModelCapabilities(modelId) {
72
+ const normalized = modelId.toLowerCase().trim();
73
+ if (MODEL_CAPABILITIES[normalized]) {
74
+ return MODEL_CAPABILITIES[normalized];
75
+ }
76
+ for (const [pattern, caps] of Object.entries(MODEL_CAPABILITIES)) {
77
+ if (normalized.includes(pattern) || pattern.includes(normalized)) {
78
+ return caps;
79
+ }
80
+ }
81
+ const withoutPrefix = normalized.includes("/")
82
+ ? normalized.split("/").pop()
83
+ : normalized;
84
+ if (MODEL_CAPABILITIES[withoutPrefix]) {
85
+ return MODEL_CAPABILITIES[withoutPrefix];
86
+ }
87
+ for (const [pattern, caps] of Object.entries(MODEL_CAPABILITIES)) {
88
+ if (withoutPrefix.includes(pattern) || pattern.includes(withoutPrefix)) {
89
+ return caps;
90
+ }
91
+ }
92
+ for (const keyword of VISION_KEYWORDS) {
93
+ if (normalized.includes(keyword)) {
94
+ return { input: ["text", "image"], output: ["text"] };
95
+ }
96
+ }
97
+ return undefined;
98
+ }
99
+ export function parseCapabilitiesFromApi(model) {
100
+ if (model.modalities && typeof model.modalities === "object") {
101
+ const m = model.modalities;
102
+ const result = {};
103
+ if (Array.isArray(m.input)) {
104
+ result.input = m.input.filter((v) => typeof v === "string");
105
+ }
106
+ if (Array.isArray(m.output)) {
107
+ result.output = m.output.filter((v) => typeof v === "string");
108
+ }
109
+ if (result.input || result.output) {
110
+ return result;
111
+ }
112
+ }
113
+ if (model.capabilities && typeof model.capabilities === "object") {
114
+ const caps = model.capabilities;
115
+ const input = ["text"];
116
+ if (caps.vision || caps.image || caps.multimodal) {
117
+ input.push("image");
118
+ }
119
+ if (caps.audio || caps.speech) {
120
+ input.push("audio");
121
+ }
122
+ if (input.length > 1) {
123
+ return { input: input, output: ["text"] };
124
+ }
125
+ }
126
+ return undefined;
127
+ }
128
+ export function mergeModelCapabilities(modelId, apiCapabilities, userOverrides) {
129
+ if (userOverrides && (userOverrides.input || userOverrides.output)) {
130
+ return userOverrides;
131
+ }
132
+ if (apiCapabilities && (apiCapabilities.input || apiCapabilities.output)) {
133
+ return apiCapabilities;
134
+ }
135
+ return lookupModelCapabilities(modelId);
136
+ }
137
+ export function isKnownModel(modelId) {
138
+ const normalized = modelId.toLowerCase().trim();
139
+ if (KNOWN_MODALITY_MODELS.has(normalized)) {
140
+ return true;
141
+ }
142
+ const withoutPrefix = normalized.includes("/")
143
+ ? normalized.split("/").pop()
144
+ : normalized;
145
+ if (KNOWN_MODALITY_MODELS.has(withoutPrefix)) {
146
+ return true;
147
+ }
148
+ for (const keyword of VISION_KEYWORDS) {
149
+ if (normalized.includes(keyword)) {
150
+ return true;
151
+ }
152
+ }
153
+ return false;
154
+ }
155
+ export function parseModalitiesFromString(value) {
156
+ const parts = value.split(":");
157
+ if (parts.length !== 2) {
158
+ throw new Error(`Invalid modalities format: ${value}. Expected <input>:<output>`);
159
+ }
160
+ const [inputStr, outputStr] = parts;
161
+ const parseModalityList = (s) => {
162
+ return s.split(",").map((m) => {
163
+ const trimmed = m.trim().toLowerCase();
164
+ if (["text", "image", "audio", "video", "pdf"].includes(trimmed)) {
165
+ return trimmed;
166
+ }
167
+ throw new Error(`Unknown modality: ${trimmed}`);
168
+ });
169
+ };
170
+ return {
171
+ input: parseModalityList(inputStr),
172
+ output: parseModalityList(outputStr),
173
+ };
174
+ }
@@ -1,3 +1,4 @@
1
+ import { parseCapabilitiesFromApi, mergeModelCapabilities } from "./model-capabilities.js";
1
2
  export function normalizeBaseUrl(baseURL) {
2
3
  const normalized = baseURL.trim().replace(/\/+$/, "");
3
4
  if (!normalized) {
@@ -26,16 +27,25 @@ export async function validateAndFetchModels(input, fetchImpl = globalThis.fetch
26
27
  if (!isModelListResponse(body)) {
27
28
  throw new Error("Expected /models to return an object with a data array");
28
29
  }
30
+ const compatibleModels = body.data.filter(isOpenCodeCompatibleModel);
29
31
  const models = [
30
- ...new Set(body.data
31
- .filter(isOpenCodeCompatibleModel)
32
+ ...new Set(compatibleModels
32
33
  .map((model) => model.id.trim())
33
34
  .filter(Boolean))
34
35
  ];
35
36
  if (models.length === 0) {
36
37
  throw new Error("Provider returned no OpenCode-compatible model ids");
37
38
  }
38
- return { baseURL, models };
39
+ const modelDetails = models.map((modelId) => {
40
+ const rawModel = compatibleModels.find((m) => m.id.trim() === modelId);
41
+ const apiCapabilities = rawModel ? parseCapabilitiesFromApi(rawModel) : undefined;
42
+ const mergedCapabilities = mergeModelCapabilities(modelId, apiCapabilities);
43
+ return {
44
+ id: modelId,
45
+ modalities: mergedCapabilities
46
+ };
47
+ });
48
+ return { baseURL, models, modelDetails };
39
49
  }
40
50
  export async function detectProviderCapabilities(input, fetchImpl = globalThis.fetch) {
41
51
  const baseURL = normalizeBaseUrl(input.baseURL);
@@ -78,7 +88,10 @@ export function buildProviderConfig(input) {
78
88
  options: {
79
89
  baseURL
80
90
  },
81
- models: Object.fromEntries(input.models.map((model) => [model, { name: model }]))
91
+ models: Object.fromEntries(input.models.map((model) => {
92
+ const modelInfo = input.modelDetails?.find((m) => m.id === model);
93
+ return [model, buildModelConfig(model, modelInfo)];
94
+ }))
82
95
  };
83
96
  if (opencodeApiType !== "messages") {
84
97
  provider.options.setCacheKey = true;
@@ -93,6 +106,13 @@ export function buildProviderConfig(input) {
93
106
  }
94
107
  };
95
108
  }
109
+ function buildModelConfig(modelId, modelInfo) {
110
+ const config = { name: modelId };
111
+ if (modelInfo?.modalities) {
112
+ config.modalities = modelInfo.modalities;
113
+ }
114
+ return config;
115
+ }
96
116
  export function npmPackageForOpenCodeApiType(apiType) {
97
117
  if (apiType === "responses") {
98
118
  return "@ai-sdk/openai";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@heventure/model-provider-x",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "TUI configurator and local API proxy for wiring custom model providers into OpenCode and Claude Code.",
5
5
  "private": false,
6
6
  "license": "MIT",