@aws/ml-container-creator 0.9.1 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +2049 -0
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -68
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +138 -138
- package/servers/instance-sizer/lib/instance-ranker.js +76 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/catalogs/models.json +182 -26
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +4 -390
- package/src/lib/bootstrap-command-handler.js +710 -1148
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +641 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +408 -0
- package/src/lib/config-manager.js +66 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +149 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +315 -311
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +71 -71
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/path-prover-brain.js +607 -0
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +82 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +422 -0
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +149 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/config +108 -5
- package/templates/do/deploy.d/managed-inference.ejs +192 -11
- package/templates/do/optimize +106 -37
- package/templates/do/register +89 -0
- package/templates/do/test +13 -0
- package/templates/do/tune +378 -59
- package/templates/do/validate +44 -4
- package/config/parameter-schema.json +0 -88
|
@@ -19,29 +19,29 @@
|
|
|
19
19
|
* @returns {object|null} Parsed JSON object, or null if extraction fails
|
|
20
20
|
*/
|
|
21
21
|
export function extractJson(text) {
|
|
22
|
-
if (!text || typeof text !== 'string') return null
|
|
22
|
+
if (!text || typeof text !== 'string') return null;
|
|
23
23
|
|
|
24
24
|
// Try markdown-fenced code block first (```json ... ``` or ``` ... ```)
|
|
25
|
-
const fencedMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)```/)
|
|
25
|
+
const fencedMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
|
|
26
26
|
if (fencedMatch) {
|
|
27
27
|
try {
|
|
28
|
-
return JSON.parse(fencedMatch[1].trim())
|
|
28
|
+
return JSON.parse(fencedMatch[1].trim());
|
|
29
29
|
} catch {
|
|
30
30
|
// Fall through to raw extraction
|
|
31
31
|
}
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
// Try extracting raw JSON object
|
|
35
|
-
const jsonMatch = text.match(/\{[\s\S]*\}/)
|
|
35
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
36
36
|
if (jsonMatch) {
|
|
37
37
|
try {
|
|
38
|
-
return JSON.parse(jsonMatch[0])
|
|
38
|
+
return JSON.parse(jsonMatch[0]);
|
|
39
39
|
} catch {
|
|
40
|
-
return null
|
|
40
|
+
return null;
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
return null
|
|
44
|
+
return null;
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
/**
|
|
@@ -61,22 +61,22 @@ export function extractJson(text) {
|
|
|
61
61
|
* @returns {Promise<{values: object} | null>}
|
|
62
62
|
*/
|
|
63
63
|
export async function queryBedrock(serverConfig, parameters, limit, context) {
|
|
64
|
-
const prefix = `[${serverConfig.serverName}]
|
|
64
|
+
const prefix = `[${serverConfig.serverName}]`;
|
|
65
65
|
|
|
66
66
|
// Dynamic import with 1s timeout
|
|
67
|
-
let BedrockRuntimeClient, InvokeModelCommand
|
|
67
|
+
let BedrockRuntimeClient, InvokeModelCommand;
|
|
68
68
|
try {
|
|
69
69
|
const mod = await Promise.race([
|
|
70
70
|
import('@aws-sdk/client-bedrock-runtime'),
|
|
71
71
|
new Promise((_, reject) =>
|
|
72
72
|
setTimeout(() => reject(new Error('Import timed out')), 1000)
|
|
73
73
|
)
|
|
74
|
-
])
|
|
75
|
-
BedrockRuntimeClient = mod.BedrockRuntimeClient
|
|
76
|
-
InvokeModelCommand = mod.InvokeModelCommand
|
|
74
|
+
]);
|
|
75
|
+
BedrockRuntimeClient = mod.BedrockRuntimeClient;
|
|
76
|
+
InvokeModelCommand = mod.InvokeModelCommand;
|
|
77
77
|
} catch {
|
|
78
|
-
log(prefix, 'Failed to load @aws-sdk/client-bedrock-runtime. Run "npm install" in the servers/lib/ directory')
|
|
79
|
-
return null
|
|
78
|
+
log(prefix, 'Failed to load @aws-sdk/client-bedrock-runtime. Run "npm install" in the servers/lib/ directory');
|
|
79
|
+
return null;
|
|
80
80
|
}
|
|
81
81
|
|
|
82
82
|
const client = new BedrockRuntimeClient({
|
|
@@ -84,20 +84,20 @@ export async function queryBedrock(serverConfig, parameters, limit, context) {
|
|
|
84
84
|
requestHandler: {
|
|
85
85
|
requestTimeout: 10000
|
|
86
86
|
}
|
|
87
|
-
})
|
|
87
|
+
});
|
|
88
88
|
|
|
89
89
|
// Build prompt from template
|
|
90
90
|
const contextStr = context && Object.keys(context).length > 0
|
|
91
91
|
? JSON.stringify(context)
|
|
92
|
-
: 'No specific configuration context provided.'
|
|
92
|
+
: 'No specific configuration context provided.';
|
|
93
93
|
|
|
94
94
|
const prompt = serverConfig.systemPromptTemplate
|
|
95
95
|
.replace('{context}', contextStr)
|
|
96
96
|
.replace('{parameters}', parameters.join(', '))
|
|
97
|
-
.replace('{limit}', String(limit))
|
|
97
|
+
.replace('{limit}', String(limit));
|
|
98
98
|
|
|
99
99
|
try {
|
|
100
|
-
log(prefix, `Querying Bedrock model ${serverConfig.modelId} in ${serverConfig.region}...`)
|
|
100
|
+
log(prefix, `Querying Bedrock model ${serverConfig.modelId} in ${serverConfig.region}...`);
|
|
101
101
|
|
|
102
102
|
const body = JSON.stringify({
|
|
103
103
|
anthropic_version: 'bedrock-2023-05-31',
|
|
@@ -107,48 +107,48 @@ export async function queryBedrock(serverConfig, parameters, limit, context) {
|
|
|
107
107
|
role: 'user',
|
|
108
108
|
content: prompt
|
|
109
109
|
}]
|
|
110
|
-
})
|
|
110
|
+
});
|
|
111
111
|
|
|
112
112
|
const command = new InvokeModelCommand({
|
|
113
113
|
modelId: serverConfig.modelId,
|
|
114
114
|
contentType: 'application/json',
|
|
115
115
|
accept: 'application/json',
|
|
116
116
|
body
|
|
117
|
-
})
|
|
117
|
+
});
|
|
118
118
|
|
|
119
|
-
const response = await client.send(command)
|
|
120
|
-
const responseBody = JSON.parse(new TextDecoder().decode(response.body))
|
|
119
|
+
const response = await client.send(command);
|
|
120
|
+
const responseBody = JSON.parse(new TextDecoder().decode(response.body));
|
|
121
121
|
|
|
122
|
-
const text = responseBody.content?.[0]?.text
|
|
122
|
+
const text = responseBody.content?.[0]?.text;
|
|
123
123
|
if (!text) {
|
|
124
|
-
log(prefix, 'Bedrock response contained no text content')
|
|
125
|
-
return null
|
|
124
|
+
log(prefix, 'Bedrock response contained no text content');
|
|
125
|
+
return null;
|
|
126
126
|
}
|
|
127
127
|
|
|
128
|
-
const parsed = extractJson(text)
|
|
128
|
+
const parsed = extractJson(text);
|
|
129
129
|
if (!parsed) {
|
|
130
|
-
log(prefix, 'Could not extract JSON from Bedrock response')
|
|
131
|
-
return null
|
|
130
|
+
log(prefix, 'Could not extract JSON from Bedrock response');
|
|
131
|
+
return null;
|
|
132
132
|
}
|
|
133
133
|
|
|
134
134
|
if (!parsed.values || typeof parsed.values !== 'object') {
|
|
135
|
-
log(prefix, 'Bedrock response missing "values" object')
|
|
136
|
-
return null
|
|
135
|
+
log(prefix, 'Bedrock response missing "values" object');
|
|
136
|
+
return null;
|
|
137
137
|
}
|
|
138
138
|
|
|
139
|
-
log(prefix, `Bedrock returned recommendations: ${JSON.stringify(parsed.values)}`)
|
|
140
|
-
return parsed
|
|
139
|
+
log(prefix, `Bedrock returned recommendations: ${JSON.stringify(parsed.values)}`);
|
|
140
|
+
return parsed;
|
|
141
141
|
} catch (err) {
|
|
142
142
|
if (err.name === 'AccessDeniedException') {
|
|
143
|
-
log(prefix, `Access denied. Ensure bedrock:InvokeModel permission for arn:aws:bedrock:${serverConfig.region}:*:inference-profile/${serverConfig.modelId}`)
|
|
143
|
+
log(prefix, `Access denied. Ensure bedrock:InvokeModel permission for arn:aws:bedrock:${serverConfig.region}:*:inference-profile/${serverConfig.modelId}`);
|
|
144
144
|
} else if (err.name === 'ResourceNotFoundException') {
|
|
145
|
-
log(prefix, `Model "${serverConfig.modelId}" not found. Set BEDROCK_MODEL env var. Example: BEDROCK_MODEL=global.anthropic.claude-sonnet-4-20250514-v1:0`)
|
|
145
|
+
log(prefix, `Model "${serverConfig.modelId}" not found. Set BEDROCK_MODEL env var. Example: BEDROCK_MODEL=global.anthropic.claude-sonnet-4-20250514-v1:0`);
|
|
146
146
|
} else if (err.name === 'ThrottlingException') {
|
|
147
|
-
log(prefix, 'Bedrock rate limit hit. Falling back to static recommendations')
|
|
147
|
+
log(prefix, 'Bedrock rate limit hit. Falling back to static recommendations');
|
|
148
148
|
} else {
|
|
149
|
-
log(prefix, `Bedrock query failed: ${err.name}: ${err.message}`)
|
|
149
|
+
log(prefix, `Bedrock query failed: ${err.name}: ${err.message}`);
|
|
150
150
|
}
|
|
151
|
-
return null
|
|
151
|
+
return null;
|
|
152
152
|
}
|
|
153
153
|
}
|
|
154
154
|
|
|
@@ -156,5 +156,5 @@ export async function queryBedrock(serverConfig, parameters, limit, context) {
|
|
|
156
156
|
* Log to stderr so it doesn't interfere with MCP stdio protocol on stdout.
|
|
157
157
|
*/
|
|
158
158
|
function log(prefix, message) {
|
|
159
|
-
process.stderr.write(`${prefix} ${message}\n`)
|
|
159
|
+
process.stderr.write(`${prefix} ${message}\n`);
|
|
160
160
|
}
|
|
@@ -4,63 +4,148 @@
|
|
|
4
4
|
"family": "falcon",
|
|
5
5
|
"framework": "huggingface",
|
|
6
6
|
"provider": "jumpstart",
|
|
7
|
-
"tags": [
|
|
8
|
-
|
|
7
|
+
"tags": [
|
|
8
|
+
"text-generation",
|
|
9
|
+
"llm"
|
|
10
|
+
],
|
|
11
|
+
"description": "Falcon 7B via JumpStart",
|
|
12
|
+
"validation_level": "community-validated",
|
|
13
|
+
"framework_compatibility": {
|
|
14
|
+
"vllm": ">=0.3.0"
|
|
15
|
+
},
|
|
16
|
+
"chat_template": "",
|
|
17
|
+
"gated": false,
|
|
18
|
+
"architecture": null
|
|
9
19
|
},
|
|
10
20
|
"jumpstart://huggingface-llm-falcon-40b": {
|
|
11
21
|
"modelId": "jumpstart://huggingface-llm-falcon-40b",
|
|
12
22
|
"family": "falcon",
|
|
13
23
|
"framework": "huggingface",
|
|
14
24
|
"provider": "jumpstart",
|
|
15
|
-
"tags": [
|
|
16
|
-
|
|
25
|
+
"tags": [
|
|
26
|
+
"text-generation",
|
|
27
|
+
"llm"
|
|
28
|
+
],
|
|
29
|
+
"description": "Falcon 40B via JumpStart",
|
|
30
|
+
"validation_level": "community-validated",
|
|
31
|
+
"framework_compatibility": {
|
|
32
|
+
"vllm": ">=0.3.0"
|
|
33
|
+
},
|
|
34
|
+
"chat_template": "",
|
|
35
|
+
"gated": false,
|
|
36
|
+
"architecture": null
|
|
17
37
|
},
|
|
18
38
|
"jumpstart://meta-textgeneration-llama-2-7b": {
|
|
19
39
|
"modelId": "jumpstart://meta-textgeneration-llama-2-7b",
|
|
20
40
|
"family": "llama-2",
|
|
21
41
|
"framework": "huggingface",
|
|
22
42
|
"provider": "jumpstart",
|
|
23
|
-
"tags": [
|
|
24
|
-
|
|
43
|
+
"tags": [
|
|
44
|
+
"text-generation",
|
|
45
|
+
"llm",
|
|
46
|
+
"llama-2"
|
|
47
|
+
],
|
|
48
|
+
"description": "Llama 2 7B via JumpStart",
|
|
49
|
+
"validation_level": "community-validated",
|
|
50
|
+
"framework_compatibility": {
|
|
51
|
+
"vllm": ">=0.3.0"
|
|
52
|
+
},
|
|
53
|
+
"chat_template": "",
|
|
54
|
+
"gated": false,
|
|
55
|
+
"architecture": null
|
|
25
56
|
},
|
|
26
57
|
"jumpstart://meta-textgeneration-llama-2-13b": {
|
|
27
58
|
"modelId": "jumpstart://meta-textgeneration-llama-2-13b",
|
|
28
59
|
"family": "llama-2",
|
|
29
60
|
"framework": "huggingface",
|
|
30
61
|
"provider": "jumpstart",
|
|
31
|
-
"tags": [
|
|
32
|
-
|
|
62
|
+
"tags": [
|
|
63
|
+
"text-generation",
|
|
64
|
+
"llm",
|
|
65
|
+
"llama-2"
|
|
66
|
+
],
|
|
67
|
+
"description": "Llama 2 13B via JumpStart",
|
|
68
|
+
"validation_level": "community-validated",
|
|
69
|
+
"framework_compatibility": {
|
|
70
|
+
"vllm": ">=0.3.0"
|
|
71
|
+
},
|
|
72
|
+
"chat_template": "",
|
|
73
|
+
"gated": false,
|
|
74
|
+
"architecture": null
|
|
33
75
|
},
|
|
34
76
|
"jumpstart://meta-textgeneration-llama-2-70b": {
|
|
35
77
|
"modelId": "jumpstart://meta-textgeneration-llama-2-70b",
|
|
36
78
|
"family": "llama-2",
|
|
37
79
|
"framework": "huggingface",
|
|
38
80
|
"provider": "jumpstart",
|
|
39
|
-
"tags": [
|
|
40
|
-
|
|
81
|
+
"tags": [
|
|
82
|
+
"text-generation",
|
|
83
|
+
"llm",
|
|
84
|
+
"llama-2"
|
|
85
|
+
],
|
|
86
|
+
"description": "Llama 2 70B via JumpStart",
|
|
87
|
+
"validation_level": "community-validated",
|
|
88
|
+
"framework_compatibility": {
|
|
89
|
+
"vllm": ">=0.3.0"
|
|
90
|
+
},
|
|
91
|
+
"chat_template": "",
|
|
92
|
+
"gated": false,
|
|
93
|
+
"architecture": null
|
|
41
94
|
},
|
|
42
95
|
"jumpstart://model-txt2img-stabilityai-stable-diffusion-v2-1-base": {
|
|
43
96
|
"modelId": "jumpstart://model-txt2img-stabilityai-stable-diffusion-v2-1-base",
|
|
44
97
|
"family": "stable-diffusion",
|
|
45
98
|
"framework": "huggingface",
|
|
46
99
|
"provider": "jumpstart",
|
|
47
|
-
"tags": [
|
|
48
|
-
|
|
100
|
+
"tags": [
|
|
101
|
+
"image-generation",
|
|
102
|
+
"diffusion",
|
|
103
|
+
"stable-diffusion"
|
|
104
|
+
],
|
|
105
|
+
"description": "Stable Diffusion v2.1 Base via JumpStart",
|
|
106
|
+
"validation_level": "community-validated",
|
|
107
|
+
"framework_compatibility": {
|
|
108
|
+
"vllm": ">=0.3.0"
|
|
109
|
+
},
|
|
110
|
+
"chat_template": "",
|
|
111
|
+
"gated": false,
|
|
112
|
+
"architecture": null
|
|
49
113
|
},
|
|
50
114
|
"jumpstart://huggingface-text2text-flan-t5-xl": {
|
|
51
115
|
"modelId": "jumpstart://huggingface-text2text-flan-t5-xl",
|
|
52
116
|
"family": "flan-t5",
|
|
53
117
|
"framework": "huggingface",
|
|
54
118
|
"provider": "jumpstart",
|
|
55
|
-
"tags": [
|
|
56
|
-
|
|
119
|
+
"tags": [
|
|
120
|
+
"text-generation",
|
|
121
|
+
"text2text",
|
|
122
|
+
"flan-t5"
|
|
123
|
+
],
|
|
124
|
+
"description": "Flan-T5 XL via JumpStart",
|
|
125
|
+
"validation_level": "community-validated",
|
|
126
|
+
"framework_compatibility": {
|
|
127
|
+
"vllm": ">=0.3.0"
|
|
128
|
+
},
|
|
129
|
+
"chat_template": "",
|
|
130
|
+
"gated": false,
|
|
131
|
+
"architecture": null
|
|
57
132
|
},
|
|
58
133
|
"jumpstart://huggingface-textembedding-gpt-j-6b": {
|
|
59
134
|
"modelId": "jumpstart://huggingface-textembedding-gpt-j-6b",
|
|
60
135
|
"family": "gpt-j",
|
|
61
136
|
"framework": "huggingface",
|
|
62
137
|
"provider": "jumpstart",
|
|
63
|
-
"tags": [
|
|
64
|
-
|
|
138
|
+
"tags": [
|
|
139
|
+
"text-embedding",
|
|
140
|
+
"gpt-j"
|
|
141
|
+
],
|
|
142
|
+
"description": "GPT-J 6B Embedding via JumpStart",
|
|
143
|
+
"validation_level": "community-validated",
|
|
144
|
+
"framework_compatibility": {
|
|
145
|
+
"vllm": ">=0.3.0"
|
|
146
|
+
},
|
|
147
|
+
"chat_template": "",
|
|
148
|
+
"gated": false,
|
|
149
|
+
"architecture": null
|
|
65
150
|
}
|
|
66
151
|
}
|
|
@@ -66,18 +66,216 @@
|
|
|
66
66
|
},
|
|
67
67
|
"notes": "vLLM 0.20.2 adds Gemma 4 support, CUDA 12.9, improved multi-GPU. Requires CUDA compat on drivers < 570.",
|
|
68
68
|
"supportedModelTypes": [
|
|
69
|
+
"afmoe",
|
|
70
|
+
"apertus",
|
|
71
|
+
"arcee",
|
|
72
|
+
"arctic",
|
|
73
|
+
"aria",
|
|
74
|
+
"bagel",
|
|
75
|
+
"baichuan",
|
|
76
|
+
"bailing_moe",
|
|
77
|
+
"bailing_moe_linear",
|
|
78
|
+
"bamba",
|
|
79
|
+
"bee",
|
|
80
|
+
"bert",
|
|
81
|
+
"bert_with_rope",
|
|
82
|
+
"blip2",
|
|
83
|
+
"bloom",
|
|
84
|
+
"chatglm",
|
|
85
|
+
"cheers",
|
|
86
|
+
"clip",
|
|
87
|
+
"colbert",
|
|
88
|
+
"colmodernvbert",
|
|
89
|
+
"colpali",
|
|
90
|
+
"colqwen3",
|
|
91
|
+
"colqwen3_5",
|
|
92
|
+
"commandr",
|
|
93
|
+
"dbrx",
|
|
94
|
+
"deepseek_eagle",
|
|
95
|
+
"deepseek_eagle3",
|
|
96
|
+
"deepseek_mtp",
|
|
97
|
+
"deepseek_ocr",
|
|
98
|
+
"deepseek_ocr2",
|
|
99
|
+
"deepseek_v2",
|
|
100
|
+
"deepseek_v4",
|
|
101
|
+
"deepseek_v4_mtp",
|
|
102
|
+
"deepseek_vl2",
|
|
103
|
+
"dots1",
|
|
104
|
+
"dots_ocr",
|
|
105
|
+
"ernie",
|
|
106
|
+
"ernie45",
|
|
107
|
+
"ernie45_moe",
|
|
108
|
+
"ernie_mtp",
|
|
109
|
+
"exaone",
|
|
110
|
+
"exaone4",
|
|
111
|
+
"exaone4_5_mtp",
|
|
112
|
+
"exaone_moe",
|
|
113
|
+
"exaone_moe_mtp",
|
|
114
|
+
"extract_hidden_states",
|
|
115
|
+
"fairseq2_llama",
|
|
116
|
+
"falcon",
|
|
117
|
+
"falcon_h1",
|
|
118
|
+
"flex_olmo",
|
|
119
|
+
"funasr",
|
|
120
|
+
"fuyu",
|
|
69
121
|
"gemma",
|
|
70
122
|
"gemma2",
|
|
71
123
|
"gemma3",
|
|
124
|
+
"gemma3_mm",
|
|
125
|
+
"gemma3n",
|
|
126
|
+
"gemma4",
|
|
127
|
+
"gemma4_mm",
|
|
128
|
+
"glm",
|
|
129
|
+
"glm4",
|
|
130
|
+
"glm4_1v",
|
|
131
|
+
"glm4_moe",
|
|
132
|
+
"glm4_moe_lite",
|
|
133
|
+
"glm4_moe_lite_mtp",
|
|
134
|
+
"glm4_moe_mtp",
|
|
135
|
+
"glm4v",
|
|
136
|
+
"glm_ocr",
|
|
137
|
+
"glm_ocr_mtp",
|
|
138
|
+
"glmasr",
|
|
139
|
+
"gpt2",
|
|
140
|
+
"gpt_bigcode",
|
|
141
|
+
"gpt_j",
|
|
142
|
+
"gpt_neox",
|
|
143
|
+
"gpt_oss",
|
|
144
|
+
"granite",
|
|
145
|
+
"granitemoe",
|
|
146
|
+
"granitemoehybrid",
|
|
147
|
+
"granitemoeshared",
|
|
148
|
+
"gritlm",
|
|
149
|
+
"grok1",
|
|
150
|
+
"h2ovl",
|
|
151
|
+
"hunyuan_v1",
|
|
152
|
+
"hy_v3",
|
|
153
|
+
"hy_v3_mtp",
|
|
154
|
+
"hyperclovax",
|
|
155
|
+
"hyperclovax_vision",
|
|
156
|
+
"hyperclovax_vision_v2",
|
|
157
|
+
"internlm2",
|
|
158
|
+
"internlm2_ve",
|
|
159
|
+
"internvl",
|
|
160
|
+
"iquest_loopcoder",
|
|
161
|
+
"isaac",
|
|
162
|
+
"jais",
|
|
163
|
+
"jais2",
|
|
164
|
+
"jamba",
|
|
165
|
+
"jina",
|
|
166
|
+
"jina_vl",
|
|
167
|
+
"kanana_v",
|
|
168
|
+
"keye",
|
|
169
|
+
"kimi_audio",
|
|
170
|
+
"kimi_k25",
|
|
171
|
+
"kimi_linear",
|
|
172
|
+
"kimi_vl",
|
|
173
|
+
"lfm2",
|
|
174
|
+
"lfm2_moe",
|
|
175
|
+
"lfm2_vl",
|
|
72
176
|
"llama",
|
|
177
|
+
"llama4",
|
|
178
|
+
"llama4_eagle",
|
|
179
|
+
"llama_eagle",
|
|
180
|
+
"llama_eagle3",
|
|
181
|
+
"llava",
|
|
182
|
+
"longcat_flash",
|
|
183
|
+
"longcat_flash_mtp",
|
|
184
|
+
"mamba",
|
|
185
|
+
"mamba2",
|
|
186
|
+
"medusa",
|
|
187
|
+
"midashenglm",
|
|
188
|
+
"mimo",
|
|
189
|
+
"mimo_mtp",
|
|
190
|
+
"mimo_v2_flash",
|
|
191
|
+
"minicpm",
|
|
192
|
+
"minicpm3",
|
|
193
|
+
"minicpm_eagle",
|
|
194
|
+
"minicpmo",
|
|
195
|
+
"minicpmv",
|
|
196
|
+
"minimax_m2",
|
|
197
|
+
"minimax_text_01",
|
|
73
198
|
"mistral",
|
|
199
|
+
"mistral_large_3",
|
|
74
200
|
"mixtral",
|
|
201
|
+
"mllama4",
|
|
202
|
+
"mlp_speculator",
|
|
203
|
+
"modernbert",
|
|
204
|
+
"molmo",
|
|
205
|
+
"molmo2",
|
|
206
|
+
"mpt",
|
|
207
|
+
"nano_nemotron_vl",
|
|
208
|
+
"nemotron",
|
|
209
|
+
"nemotron_h",
|
|
210
|
+
"nemotron_h_mtp",
|
|
211
|
+
"nemotron_nas",
|
|
212
|
+
"nemotron_vl",
|
|
213
|
+
"nvlm_d",
|
|
214
|
+
"olmo",
|
|
215
|
+
"olmo2",
|
|
216
|
+
"olmo_hybrid",
|
|
217
|
+
"olmoe",
|
|
218
|
+
"opencua",
|
|
219
|
+
"openpangu",
|
|
220
|
+
"openpangu_mtp",
|
|
221
|
+
"opt",
|
|
222
|
+
"orion",
|
|
223
|
+
"ouro",
|
|
224
|
+
"ovis",
|
|
225
|
+
"ovis2_5",
|
|
226
|
+
"param2moe",
|
|
227
|
+
"persimmon",
|
|
228
|
+
"phi",
|
|
229
|
+
"phi3",
|
|
230
|
+
"phi3v",
|
|
231
|
+
"phi4mm",
|
|
232
|
+
"phi4siglip",
|
|
233
|
+
"phimoe",
|
|
234
|
+
"pixtral",
|
|
235
|
+
"plamo2",
|
|
236
|
+
"plamo3",
|
|
237
|
+
"qwen",
|
|
75
238
|
"qwen2",
|
|
239
|
+
"qwen2_moe",
|
|
240
|
+
"qwen2_rm",
|
|
241
|
+
"qwen2_vl",
|
|
76
242
|
"qwen3",
|
|
243
|
+
"qwen3_5",
|
|
244
|
+
"qwen3_5_mtp",
|
|
245
|
+
"qwen3_asr_realtime",
|
|
246
|
+
"qwen3_dflash",
|
|
77
247
|
"qwen3_moe",
|
|
78
|
-
"
|
|
79
|
-
"
|
|
80
|
-
"
|
|
248
|
+
"qwen3_next",
|
|
249
|
+
"qwen3_next_mtp",
|
|
250
|
+
"qwen3_vl",
|
|
251
|
+
"qwen_vl",
|
|
252
|
+
"rnj1",
|
|
253
|
+
"roberta",
|
|
254
|
+
"rvl",
|
|
255
|
+
"sarvam",
|
|
256
|
+
"seed_oss",
|
|
257
|
+
"siglip",
|
|
258
|
+
"skyworkr1v",
|
|
259
|
+
"smolvlm",
|
|
260
|
+
"solar",
|
|
261
|
+
"stablelm",
|
|
262
|
+
"starcoder2",
|
|
263
|
+
"step1",
|
|
264
|
+
"step3_text",
|
|
265
|
+
"step3_vl",
|
|
266
|
+
"step3p5",
|
|
267
|
+
"step3p5_mtp",
|
|
268
|
+
"step_vl",
|
|
269
|
+
"tarsier",
|
|
270
|
+
"telechat2",
|
|
271
|
+
"teleflm",
|
|
272
|
+
"terratorch",
|
|
273
|
+
"transformers",
|
|
274
|
+
"ultravox",
|
|
275
|
+
"voxtral",
|
|
276
|
+
"voxtral_realtime",
|
|
277
|
+
"whisper",
|
|
278
|
+
"zamba2"
|
|
81
279
|
]
|
|
82
280
|
},
|
|
83
281
|
{
|