@aws/ml-container-creator 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/parameter-schema-v2.json +2065 -0
- package/package.json +4 -4
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/models.json +182 -26
- package/src/app.js +1 -389
- package/src/lib/bootstrap-command-handler.js +75 -1078
- package/src/lib/bootstrap-profile-manager.js +634 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/config-loader.js +405 -0
- package/src/lib/config-manager.js +59 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +140 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +8 -4
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +2 -2
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +70 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +398 -0
- package/config/parameter-schema.json +0 -88
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import { fileURLToPath } from 'url';
|
|
7
|
+
import { isTuneSupported } from './tune-catalog-validator.js';
|
|
8
|
+
|
|
9
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
10
|
+
const __dirname = path.dirname(__filename);
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Finds model configuration by exact match or glob-pattern match.
|
|
14
|
+
*
|
|
15
|
+
* @param {string} modelName - Model ID to look up
|
|
16
|
+
* @param {object} registryConfigManager - Registry configuration manager
|
|
17
|
+
* @returns {object|null} Model configuration or null
|
|
18
|
+
*/
|
|
19
|
+
function _findModelConfig(modelName, registryConfigManager) {
|
|
20
|
+
if (!registryConfigManager?.modelRegistry) return null;
|
|
21
|
+
|
|
22
|
+
// Exact match first
|
|
23
|
+
const exact = registryConfigManager.modelRegistry[modelName];
|
|
24
|
+
if (exact) return exact;
|
|
25
|
+
|
|
26
|
+
// Pattern matching with glob-style wildcards
|
|
27
|
+
for (const [pattern, config] of Object.entries(registryConfigManager.modelRegistry)) {
|
|
28
|
+
if (pattern.includes('*')) {
|
|
29
|
+
const regex = new RegExp(`^${pattern.replace(/\*/g, '.*')}$`);
|
|
30
|
+
if (regex.test(modelName)) {
|
|
31
|
+
return config;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Merges environment variables from all catalog sources with correct precedence.
|
|
41
|
+
* Precedence (lowest → highest):
|
|
42
|
+
* 1. catalog defaults (Image_Entry defaults.envVars)
|
|
43
|
+
* 2. framework profile (Image_Entry profiles[selectedProfile].envVars)
|
|
44
|
+
* 3. model entry (model catalog entry envVars)
|
|
45
|
+
* 4. model profile (model catalog entry profiles[selectedProfile].envVars)
|
|
46
|
+
* 5. CLI overrides (existing answers.envVars from user CLI input)
|
|
47
|
+
*
|
|
48
|
+
* @param {object} answers - Configuration answers
|
|
49
|
+
* @param {object|null} registryConfigManager - Registry configuration manager
|
|
50
|
+
*/
|
|
51
|
+
export async function _mergeEnvVarsWithPrecedence(answers, registryConfigManager) {
|
|
52
|
+
if (!registryConfigManager) return;
|
|
53
|
+
|
|
54
|
+
// Capture CLI-provided env vars before merging (highest precedence)
|
|
55
|
+
const cliEnvVars = { ...answers.envVars };
|
|
56
|
+
|
|
57
|
+
// Resolve the framework config for the selected framework + version
|
|
58
|
+
const frameworkName = answers.framework || answers.deploymentConfig;
|
|
59
|
+
const frameworkVersion = answers.frameworkVersion;
|
|
60
|
+
let frameworkConfig = null;
|
|
61
|
+
|
|
62
|
+
if (frameworkName && registryConfigManager.frameworkRegistry) {
|
|
63
|
+
const frameworkVersions = registryConfigManager.frameworkRegistry[frameworkName];
|
|
64
|
+
if (frameworkVersions) {
|
|
65
|
+
if (frameworkVersion && frameworkVersions[frameworkVersion]) {
|
|
66
|
+
frameworkConfig = frameworkVersions[frameworkVersion];
|
|
67
|
+
} else {
|
|
68
|
+
// Fall back to latest version for Triton and other non-versioned lookups
|
|
69
|
+
const versions = Object.keys(frameworkVersions).sort((a, b) =>
|
|
70
|
+
b.localeCompare(a, undefined, { numeric: true })
|
|
71
|
+
);
|
|
72
|
+
if (versions.length > 0) {
|
|
73
|
+
frameworkConfig = frameworkVersions[versions[0]];
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Resolve the model config (exact match or pattern match)
|
|
80
|
+
let modelConfig = null;
|
|
81
|
+
if (answers.modelName && registryConfigManager.modelRegistry) {
|
|
82
|
+
modelConfig = _findModelConfig(answers.modelName, registryConfigManager);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Layer 1: catalog defaults (Image_Entry defaults.envVars)
|
|
86
|
+
const catalogDefaults = frameworkConfig?.envVars || {};
|
|
87
|
+
|
|
88
|
+
// Layer 2: framework profile envVars
|
|
89
|
+
let frameworkProfileEnvVars = {};
|
|
90
|
+
if (answers.frameworkProfile && frameworkConfig?.profiles) {
|
|
91
|
+
const profile = frameworkConfig.profiles[answers.frameworkProfile];
|
|
92
|
+
if (profile?.envVars) {
|
|
93
|
+
frameworkProfileEnvVars = profile.envVars;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Layer 3: model entry envVars
|
|
98
|
+
const modelEntryEnvVars = modelConfig?.envVars || {};
|
|
99
|
+
|
|
100
|
+
// Layer 4: model profile envVars
|
|
101
|
+
let modelProfileEnvVars = {};
|
|
102
|
+
if (answers.modelProfile && modelConfig?.profiles) {
|
|
103
|
+
const profile = modelConfig.profiles[answers.modelProfile];
|
|
104
|
+
if (profile?.envVars) {
|
|
105
|
+
modelProfileEnvVars = profile.envVars;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Layer 5: CLI overrides (captured above)
|
|
110
|
+
|
|
111
|
+
// Merge in precedence order: each layer overrides the previous
|
|
112
|
+
answers.envVars = {
|
|
113
|
+
...catalogDefaults,
|
|
114
|
+
...frameworkProfileEnvVars,
|
|
115
|
+
...modelEntryEnvVars,
|
|
116
|
+
...modelProfileEnvVars,
|
|
117
|
+
...cliEnvVars
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Validates environment variables using the registry system.
|
|
123
|
+
* Displays errors and warnings to the user.
|
|
124
|
+
*
|
|
125
|
+
* @param {object} answers - Configuration answers
|
|
126
|
+
* @param {object} registryConfigManager - Registry configuration manager
|
|
127
|
+
*/
|
|
128
|
+
export async function _validateEnvironmentVariables(answers, registryConfigManager) {
|
|
129
|
+
// Get framework configuration
|
|
130
|
+
// For Triton configs, look up using deploymentConfig key (e.g. 'triton-fil')
|
|
131
|
+
let frameworkConfig;
|
|
132
|
+
if (answers.architecture === 'triton' && answers.deploymentConfig) {
|
|
133
|
+
const tritonEntry = registryConfigManager.frameworkRegistry?.[answers.deploymentConfig];
|
|
134
|
+
if (tritonEntry) {
|
|
135
|
+
const versions = Object.keys(tritonEntry);
|
|
136
|
+
if (versions.length > 0) {
|
|
137
|
+
frameworkConfig = tritonEntry[versions[0]];
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
if (!frameworkConfig) {
|
|
142
|
+
frameworkConfig = registryConfigManager.frameworkRegistry?.[answers.framework]?.[answers.frameworkVersion];
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (!frameworkConfig || !frameworkConfig.envVars) {
|
|
146
|
+
return; // No env vars to validate
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
console.log('\n🔍 Validating environment variables...');
|
|
150
|
+
|
|
151
|
+
// Validate environment variables
|
|
152
|
+
const validationResult = registryConfigManager.validateEnvironmentVariables(
|
|
153
|
+
frameworkConfig.envVars,
|
|
154
|
+
frameworkConfig
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
// Display validation results
|
|
158
|
+
if (validationResult.errors && validationResult.errors.length > 0) {
|
|
159
|
+
console.log('\n❌ Environment Variable Validation Errors:');
|
|
160
|
+
validationResult.errors.forEach(error => {
|
|
161
|
+
console.log(` • ${error.key}: ${error.message}`);
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (validationResult.warnings && validationResult.warnings.length > 0) {
|
|
166
|
+
console.log('\n⚠️ Environment Variable Validation Warnings:');
|
|
167
|
+
validationResult.warnings.forEach(warning => {
|
|
168
|
+
console.log(` • ${warning.key ? `${warning.key}: ` : ''}${warning.message}`);
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (validationResult.strategiesUsed && validationResult.strategiesUsed.length > 0) {
|
|
173
|
+
console.log(`\n✅ Validation methods used: ${validationResult.strategiesUsed.join(', ')}`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (!validationResult.errors || validationResult.errors.length === 0) {
|
|
177
|
+
if (!validationResult.warnings || validationResult.warnings.length === 0) {
|
|
178
|
+
console.log(' ✅ All environment variables validated successfully');
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// In non-interactive mode (skip-prompts), throw on errors
|
|
183
|
+
if (validationResult.errors && validationResult.errors.length > 0) {
|
|
184
|
+
throw new Error('Environment variable validation failed. Please fix the errors and try again.');
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Ensures all template variables have proper defaults to prevent
|
|
190
|
+
* "undefined" errors in EJS templates. Also enriches answers with
|
|
191
|
+
* registry data (env var merging, HuggingFace data, Triton base image).
|
|
192
|
+
*
|
|
193
|
+
* @param {object} answers - Answers object to fill defaults into
|
|
194
|
+
* @param {object|null} registryConfigManager - Registry configuration manager (or null)
|
|
195
|
+
*/
|
|
196
|
+
export async function _ensureTemplateVariables(answers, registryConfigManager = null) {
|
|
197
|
+
const defaults = {
|
|
198
|
+
chatTemplate: null,
|
|
199
|
+
chatTemplateSource: null,
|
|
200
|
+
hfToken: null,
|
|
201
|
+
hfTokenArn: null,
|
|
202
|
+
ngcApiKey: null,
|
|
203
|
+
ngcTokenArn: null,
|
|
204
|
+
envVars: {},
|
|
205
|
+
inferenceAmiVersion: null,
|
|
206
|
+
accelerator: null,
|
|
207
|
+
frameworkVersion: null,
|
|
208
|
+
validationLevel: 'unknown',
|
|
209
|
+
configSources: [],
|
|
210
|
+
recommendedInstanceTypes: [],
|
|
211
|
+
roleArn: null,
|
|
212
|
+
deploymentConfig: '',
|
|
213
|
+
architecture: null,
|
|
214
|
+
backend: null,
|
|
215
|
+
engine: null,
|
|
216
|
+
codebuildComputeType: null,
|
|
217
|
+
codebuildProjectName: null,
|
|
218
|
+
modelName: null,
|
|
219
|
+
modelFormat: null,
|
|
220
|
+
includeSampleModel: true,
|
|
221
|
+
includeTesting: true,
|
|
222
|
+
testTypes: [],
|
|
223
|
+
buildTimestamp: new Date().toISOString(),
|
|
224
|
+
buildTarget: 'codebuild',
|
|
225
|
+
deploymentTarget: 'realtime-inference',
|
|
226
|
+
hyperPodCluster: null,
|
|
227
|
+
hyperPodNamespace: 'default',
|
|
228
|
+
hyperPodReplicas: 1,
|
|
229
|
+
fsxVolumeHandle: null,
|
|
230
|
+
baseImage: null,
|
|
231
|
+
modelSource: 'huggingface',
|
|
232
|
+
artifactUri: '',
|
|
233
|
+
modelLoadStrategy: 'runtime',
|
|
234
|
+
existingEndpointName: null,
|
|
235
|
+
enableLora: false,
|
|
236
|
+
maxLoras: 30,
|
|
237
|
+
maxLoraRank: 64
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
Object.entries(defaults).forEach(([key, value]) => {
|
|
241
|
+
if (answers[key] === undefined) {
|
|
242
|
+
answers[key] = value;
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
// Backward compatibility: populate framework and modelServer from architecture/backend
|
|
247
|
+
if (!answers.framework && answers.architecture) {
|
|
248
|
+
answers.framework = answers.architecture;
|
|
249
|
+
}
|
|
250
|
+
if (!answers.modelServer && answers.backend) {
|
|
251
|
+
answers.modelServer = answers.backend;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Always include testing with all available test types
|
|
255
|
+
answers.includeTesting = true;
|
|
256
|
+
if (!answers.testTypes || answers.testTypes.length === 0) {
|
|
257
|
+
if (answers.architecture === 'transformers' || answers.framework === 'transformers') {
|
|
258
|
+
answers.testTypes = ['hosted-model-endpoint'];
|
|
259
|
+
} else {
|
|
260
|
+
answers.testTypes = ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'];
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Merge catalog env vars into answers.envVars with correct precedence
|
|
265
|
+
await _mergeEnvVarsWithPrecedence(answers, registryConfigManager);
|
|
266
|
+
|
|
267
|
+
// For Triton architecture, set default base image fallback
|
|
268
|
+
if (answers.architecture === 'triton' && !answers.baseImage) {
|
|
269
|
+
// Try to look up base image from framework registry using deployment-config key
|
|
270
|
+
const tritonRegistryKey = answers.deploymentConfig;
|
|
271
|
+
if (tritonRegistryKey && registryConfigManager?.frameworkRegistry) {
|
|
272
|
+
const tritonFrameworkConfig = registryConfigManager.frameworkRegistry[tritonRegistryKey];
|
|
273
|
+
if (tritonFrameworkConfig) {
|
|
274
|
+
const versions = Object.keys(tritonFrameworkConfig).sort((a, b) =>
|
|
275
|
+
b.localeCompare(a, undefined, { numeric: true })
|
|
276
|
+
);
|
|
277
|
+
if (versions.length > 0) {
|
|
278
|
+
const latestConfig = tritonFrameworkConfig[versions[0]];
|
|
279
|
+
if (latestConfig.baseImage) {
|
|
280
|
+
answers.baseImage = latestConfig.baseImage;
|
|
281
|
+
}
|
|
282
|
+
if (latestConfig.inferenceAmiVersion && !answers.inferenceAmiVersion) {
|
|
283
|
+
answers.inferenceAmiVersion = latestConfig.inferenceAmiVersion;
|
|
284
|
+
}
|
|
285
|
+
if (latestConfig.accelerator) {
|
|
286
|
+
answers.accelerator = latestConfig.accelerator;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// Final fallback: hardcoded default Triton base image
|
|
292
|
+
if (!answers.baseImage) {
|
|
293
|
+
answers.baseImage = 'nvcr.io/nvidia/tritonserver:24.08-py3';
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// For transformer models, enrich with HuggingFace data and non-envVar metadata
|
|
298
|
+
if (answers.framework === 'transformers' && answers.modelName && registryConfigManager) {
|
|
299
|
+
try {
|
|
300
|
+
// Fetch HuggingFace data for model-specific info
|
|
301
|
+
const hfData = await registryConfigManager._fetchHuggingFaceData(answers.modelName);
|
|
302
|
+
|
|
303
|
+
// Merge chatTemplate if available and not already set
|
|
304
|
+
if (hfData && hfData.chatTemplate && !answers.chatTemplate) {
|
|
305
|
+
answers.chatTemplate = hfData.chatTemplate;
|
|
306
|
+
answers.chatTemplateSource = 'HuggingFace_Hub_API';
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Check Model Registry for chatTemplate overrides
|
|
310
|
+
if (registryConfigManager.modelRegistry) {
|
|
311
|
+
const modelConfig = _findModelConfig(answers.modelName, registryConfigManager);
|
|
312
|
+
|
|
313
|
+
if (modelConfig && modelConfig.chatTemplate) {
|
|
314
|
+
answers.chatTemplate = modelConfig.chatTemplate;
|
|
315
|
+
answers.chatTemplateSource = 'Model_Registry';
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Set framework-level metadata (non-envVar fields)
|
|
320
|
+
if (answers.frameworkVersion && registryConfigManager.frameworkRegistry) {
|
|
321
|
+
const frameworkConfig = registryConfigManager.frameworkRegistry[answers.framework]?.[answers.frameworkVersion];
|
|
322
|
+
|
|
323
|
+
if (frameworkConfig) {
|
|
324
|
+
if (frameworkConfig.inferenceAmiVersion && !answers.inferenceAmiVersion) {
|
|
325
|
+
answers.inferenceAmiVersion = frameworkConfig.inferenceAmiVersion;
|
|
326
|
+
}
|
|
327
|
+
if (frameworkConfig.accelerator) {
|
|
328
|
+
answers.accelerator = frameworkConfig.accelerator;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
} catch (error) {
|
|
333
|
+
// Silently continue - defaults are already set
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Populate baseImage from the catalog when still falsy (covers --skip-prompts and
|
|
338
|
+
// cases where MCP/CLI/config did not provide a base image).
|
|
339
|
+
// Precedence: MCP > CLI > config > catalog default (this block).
|
|
340
|
+
if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
|
|
341
|
+
const backendKey = answers.backend || answers.modelServer;
|
|
342
|
+
if (backendKey) {
|
|
343
|
+
const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
|
|
344
|
+
if (frameworkVersions) {
|
|
345
|
+
let resolvedConfig = null;
|
|
346
|
+
if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
|
|
347
|
+
resolvedConfig = frameworkVersions[answers.frameworkVersion];
|
|
348
|
+
} else {
|
|
349
|
+
// Fall back to latest version
|
|
350
|
+
const versions = Object.keys(frameworkVersions).sort((a, b) =>
|
|
351
|
+
b.localeCompare(a, undefined, { numeric: true })
|
|
352
|
+
);
|
|
353
|
+
if (versions.length > 0) {
|
|
354
|
+
resolvedConfig = frameworkVersions[versions[0]];
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
if (resolvedConfig?.baseImage) {
|
|
358
|
+
answers.baseImage = resolvedConfig.baseImage;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Populate icGpuCount from instance catalog when not explicitly set.
|
|
365
|
+
// The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
|
|
366
|
+
// so it must always have a value for GPU deployments.
|
|
367
|
+
if ((answers.icGpuCount === null || answers.icGpuCount === undefined) && answers.instanceType) {
|
|
368
|
+
// Use gpuCount from instance-sizer recommendation if available
|
|
369
|
+
if (answers.gpuCount) {
|
|
370
|
+
answers.icGpuCount = answers.gpuCount;
|
|
371
|
+
} else {
|
|
372
|
+
// Look up from instances catalog
|
|
373
|
+
try {
|
|
374
|
+
const catalogPath = path.resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
|
|
375
|
+
const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
|
|
376
|
+
const instanceInfo = catalogData?.catalog?.[answers.instanceType];
|
|
377
|
+
if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
|
|
378
|
+
answers.icGpuCount = instanceInfo.gpus;
|
|
379
|
+
}
|
|
380
|
+
} catch {
|
|
381
|
+
// Silently continue — template fallback handles missing value
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Determine tune support based on model presence in the tune catalog.
|
|
387
|
+
// Used by the do/config template to write TUNE_SUPPORTED=true|false.
|
|
388
|
+
if (answers.tuneSupported === undefined) {
|
|
389
|
+
try {
|
|
390
|
+
const tuneCatalogPath = path.resolve(__dirname, '..', '..', 'config', 'tune-catalog.json');
|
|
391
|
+
const tuneCatalog = JSON.parse(fs.readFileSync(tuneCatalogPath, 'utf-8'));
|
|
392
|
+
const modelId = answers.modelName || '';
|
|
393
|
+
answers.tuneSupported = isTuneSupported(modelId, tuneCatalog);
|
|
394
|
+
} catch {
|
|
395
|
+
answers.tuneSupported = false;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
}
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"schemaVersion": "1.0.0",
|
|
3
|
-
"deploymentTargets": {
|
|
4
|
-
"realtime-inference": {
|
|
5
|
-
"endpoint": {
|
|
6
|
-
"initialInstanceCount": {
|
|
7
|
-
"type": "integer",
|
|
8
|
-
"min": 1,
|
|
9
|
-
"max": 100,
|
|
10
|
-
"default": 1,
|
|
11
|
-
"description": "Number of instances for the endpoint",
|
|
12
|
-
"apiReference": "CreateEndpointConfig.ProductionVariants.InitialInstanceCount"
|
|
13
|
-
},
|
|
14
|
-
"dataCapturePercent": {
|
|
15
|
-
"type": "integer",
|
|
16
|
-
"min": 0,
|
|
17
|
-
"max": 100,
|
|
18
|
-
"default": 0,
|
|
19
|
-
"description": "Percentage of requests to capture",
|
|
20
|
-
"apiReference": "CreateEndpointConfig.DataCaptureConfig.InitialSamplingPercentage"
|
|
21
|
-
},
|
|
22
|
-
"variantName": {
|
|
23
|
-
"type": "string",
|
|
24
|
-
"pattern": "^[a-zA-Z0-9]([\\w-]{0,62}[a-zA-Z0-9])?$",
|
|
25
|
-
"default": "AllTraffic",
|
|
26
|
-
"description": "Name of the production variant",
|
|
27
|
-
"apiReference": "CreateEndpointConfig.ProductionVariants.VariantName"
|
|
28
|
-
},
|
|
29
|
-
"volumeSize": {
|
|
30
|
-
"type": "integer",
|
|
31
|
-
"min": 1,
|
|
32
|
-
"max": 16384,
|
|
33
|
-
"default": null,
|
|
34
|
-
"description": "Size of the ML storage volume in GB",
|
|
35
|
-
"apiReference": "CreateEndpointConfig.ProductionVariants.VolumeSizeInGB"
|
|
36
|
-
}
|
|
37
|
-
},
|
|
38
|
-
"inferenceComponent": {
|
|
39
|
-
"cpuCount": {
|
|
40
|
-
"type": "number",
|
|
41
|
-
"min": 0.25,
|
|
42
|
-
"max": 768,
|
|
43
|
-
"default": null,
|
|
44
|
-
"description": "Number of vCPUs allocated",
|
|
45
|
-
"apiReference": "CreateInferenceComponent.Specification.ComputeResourceRequirements.NumberOfCpuCoresRequired"
|
|
46
|
-
},
|
|
47
|
-
"memorySize": {
|
|
48
|
-
"type": "integer",
|
|
49
|
-
"min": 128,
|
|
50
|
-
"max": 3145728,
|
|
51
|
-
"default": null,
|
|
52
|
-
"description": "Memory allocation in MB",
|
|
53
|
-
"apiReference": "CreateInferenceComponent.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb"
|
|
54
|
-
},
|
|
55
|
-
"gpuCount": {
|
|
56
|
-
"type": "integer",
|
|
57
|
-
"min": 0,
|
|
58
|
-
"max": 8,
|
|
59
|
-
"default": null,
|
|
60
|
-
"description": "Number of GPUs allocated",
|
|
61
|
-
"apiReference": "CreateInferenceComponent.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired"
|
|
62
|
-
},
|
|
63
|
-
"copyCount": {
|
|
64
|
-
"type": "integer",
|
|
65
|
-
"min": 0,
|
|
66
|
-
"max": 100,
|
|
67
|
-
"default": 1,
|
|
68
|
-
"description": "Number of inference component copies",
|
|
69
|
-
"apiReference": "CreateInferenceComponent.RuntimeConfig.CopyCount"
|
|
70
|
-
},
|
|
71
|
-
"modelWeight": {
|
|
72
|
-
"type": "number",
|
|
73
|
-
"min": 0,
|
|
74
|
-
"max": 1,
|
|
75
|
-
"default": 1.0,
|
|
76
|
-
"description": "Traffic routing weight for the model",
|
|
77
|
-
"apiReference": "UpdateEndpointWeightsAndCapacities.DesiredWeightsAndCapacities.DesiredWeight"
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
},
|
|
81
|
-
"eks": {},
|
|
82
|
-
"async": {},
|
|
83
|
-
"batch": {}
|
|
84
|
-
},
|
|
85
|
-
"extensionPoints": {
|
|
86
|
-
"engines": {}
|
|
87
|
-
}
|
|
88
|
-
}
|