@aws/ml-container-creator 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +298 -62
- package/bin/cli.js +4 -3
- package/config/parameter-schema.json +1 -1
- package/package.json +1 -1
- package/src/app.js +17 -1
- package/src/lib/auto-prompt-builder.js +172 -0
- package/src/lib/ci-register-helpers.js +1 -1
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +177 -3
- package/src/lib/parameter-schema-validator.js +10 -10
- package/src/lib/prompt-runner.js +51 -7
- package/src/lib/prompts.js +7 -7
- package/src/lib/template-manager.js +2 -2
- package/templates/do/clean +6 -6
- package/templates/do/config +6 -6
- package/templates/do/deploy +5 -5
- package/templates/do/export +5 -5
- package/templates/do/logs +4 -4
- package/templates/do/register +3 -3
- package/templates/do/test +4 -4
|
@@ -78,6 +78,7 @@ export default class ConfigManager {
|
|
|
78
78
|
this.args = args || [];
|
|
79
79
|
this.config = {};
|
|
80
80
|
this.skipPrompts = false;
|
|
81
|
+
this.autoPrompt = false;
|
|
81
82
|
this.deploymentConfigResolver = new DeploymentConfigResolver();
|
|
82
83
|
this.parameterMatrix = this._getParameterMatrix();
|
|
83
84
|
this.schemaValidator = new ParameterSchemaValidator();
|
|
@@ -106,6 +107,9 @@ export default class ConfigManager {
|
|
|
106
107
|
await this._loadCliArguments();
|
|
107
108
|
await this._loadCliOptions();
|
|
108
109
|
|
|
110
|
+
// Normalize deprecated values to canonical equivalents
|
|
111
|
+
this._normalizeDeprecatedValues();
|
|
112
|
+
|
|
109
113
|
// Query configured MCP servers for unbounded parameter values
|
|
110
114
|
await this._queryMcpServers();
|
|
111
115
|
|
|
@@ -113,6 +117,18 @@ export default class ConfigManager {
|
|
|
113
117
|
this.skipPrompts = this.options['skip-prompts'] ||
|
|
114
118
|
this._hasCompleteConfiguration();
|
|
115
119
|
|
|
120
|
+
// Auto-prompt mode: fill defaults like skip-prompts, but prompt for truly missing values
|
|
121
|
+
this.autoPrompt = this.options['auto-prompt'] === true;
|
|
122
|
+
if (this.autoPrompt) {
|
|
123
|
+
// In auto-prompt mode, we don't skip prompts entirely — we'll selectively prompt
|
|
124
|
+
this.skipPrompts = false;
|
|
125
|
+
|
|
126
|
+
// Pre-fill defaults for required parameters that can be auto-generated.
|
|
127
|
+
// Promote these into explicitConfig so the wizard skips them.
|
|
128
|
+
// This means the wizard only prompts for values that are truly ambiguous.
|
|
129
|
+
this._fillAutoPromptDefaults();
|
|
130
|
+
}
|
|
131
|
+
|
|
116
132
|
return this.config;
|
|
117
133
|
}
|
|
118
134
|
|
|
@@ -182,8 +198,8 @@ export default class ConfigManager {
|
|
|
182
198
|
}
|
|
183
199
|
}
|
|
184
200
|
|
|
185
|
-
// When skipping prompts, provide reasonable defaults for missing required parameters
|
|
186
|
-
if (this.skipPrompts) {
|
|
201
|
+
// When skipping prompts or in auto-prompt mode, provide reasonable defaults for missing required parameters
|
|
202
|
+
if (this.skipPrompts || this.autoPrompt) {
|
|
187
203
|
Object.entries(this.parameterMatrix).forEach(([param, config]) => {
|
|
188
204
|
if (config.required &&
|
|
189
205
|
(finalConfig[param] === null || finalConfig[param] === undefined)) {
|
|
@@ -635,7 +651,7 @@ export default class ConfigManager {
|
|
|
635
651
|
mcp: false,
|
|
636
652
|
promptable: true,
|
|
637
653
|
required: true,
|
|
638
|
-
default: '
|
|
654
|
+
default: 'realtime-inference',
|
|
639
655
|
valueSpace: 'bounded'
|
|
640
656
|
},
|
|
641
657
|
hyperPodCluster: {
|
|
@@ -1373,6 +1389,35 @@ export default class ConfigManager {
|
|
|
1373
1389
|
this._parseEnvVarOptions('server-env', 'serverEnvVars');
|
|
1374
1390
|
}
|
|
1375
1391
|
|
|
1392
|
+
/**
|
|
1393
|
+
* Normalizes deprecated parameter values to their canonical equivalents.
|
|
1394
|
+
* Prints a deprecation warning when a deprecated value is encountered.
|
|
1395
|
+
* @private
|
|
1396
|
+
*/
|
|
1397
|
+
_normalizeDeprecatedValues() {
|
|
1398
|
+
const DEPRECATED_VALUES = {
|
|
1399
|
+
deploymentTarget: {
|
|
1400
|
+
'managed-inference': {
|
|
1401
|
+
canonical: 'realtime-inference',
|
|
1402
|
+
message: '--deployment-target=managed-inference is deprecated, use realtime-inference instead'
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
};
|
|
1406
|
+
|
|
1407
|
+
for (const [param, aliases] of Object.entries(DEPRECATED_VALUES)) {
|
|
1408
|
+
const currentValue = this.config[param];
|
|
1409
|
+
if (currentValue && aliases[currentValue]) {
|
|
1410
|
+
const { canonical, message } = aliases[currentValue];
|
|
1411
|
+
console.log(`\n⚠️ Deprecation: ${message}`);
|
|
1412
|
+
this.config[param] = canonical;
|
|
1413
|
+
// Also update explicit config if it was set there
|
|
1414
|
+
if (this.explicitConfig && this.explicitConfig[param] === currentValue) {
|
|
1415
|
+
this.explicitConfig[param] = canonical;
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1376
1421
|
/**
|
|
1377
1422
|
* Parse --model-env or --server-env CLI options into env var collections.
|
|
1378
1423
|
* Supports both array (multiple flags) and single string values.
|
|
@@ -1828,6 +1873,135 @@ export default class ConfigManager {
|
|
|
1828
1873
|
return autoGeneratable.includes(param);
|
|
1829
1874
|
}
|
|
1830
1875
|
|
|
1876
|
+
/**
|
|
1877
|
+
* Fills auto-prompt defaults for parameters that have sensible defaults
|
|
1878
|
+
* or can be inferred from the current config. Promotes these into
|
|
1879
|
+
* explicitConfig so the wizard skips them.
|
|
1880
|
+
*
|
|
1881
|
+
* Only fills parameters that:
|
|
1882
|
+
* - Have a non-null default in the parameter matrix, OR
|
|
1883
|
+
* - Can be auto-generated (instanceType, modelFormat, etc.)
|
|
1884
|
+
*
|
|
1885
|
+
* Does NOT fill parameters that are truly ambiguous and need user input
|
|
1886
|
+
* (e.g., deploymentConfig when not provided).
|
|
1887
|
+
* @private
|
|
1888
|
+
*/
|
|
1889
|
+
_fillAutoPromptDefaults() {
|
|
1890
|
+
if (!this.explicitConfig) {
|
|
1891
|
+
this.explicitConfig = {};
|
|
1892
|
+
}
|
|
1893
|
+
|
|
1894
|
+
// Derive architecture from deploymentConfig if available
|
|
1895
|
+
let architecture = this.config.architecture;
|
|
1896
|
+
if (!architecture && this.config.deploymentConfig) {
|
|
1897
|
+
try {
|
|
1898
|
+
const parts = this.deploymentConfigResolver.decompose(this.config.deploymentConfig);
|
|
1899
|
+
architecture = parts.architecture;
|
|
1900
|
+
this.config.architecture = parts.architecture;
|
|
1901
|
+
this.config.backend = parts.backend;
|
|
1902
|
+
this.config.engine = parts.engine;
|
|
1903
|
+
} catch {
|
|
1904
|
+
// Invalid deploymentConfig — will be caught by validation
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
Object.entries(this.parameterMatrix).forEach(([param, config]) => {
|
|
1909
|
+
// Skip if already explicitly set
|
|
1910
|
+
if (this.explicitConfig[param] !== undefined && this.explicitConfig[param] !== null) {
|
|
1911
|
+
return;
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
// For optional parameters: mark them as explicit (with null) so the wizard skips them.
|
|
1915
|
+
// The downstream template logic handles defaults for optional params.
|
|
1916
|
+
if (!config.required) {
|
|
1917
|
+
// Don't override if there's already a value in config
|
|
1918
|
+
if (this.config[param] !== undefined && this.config[param] !== null) {
|
|
1919
|
+
this.explicitConfig[param] = this.config[param];
|
|
1920
|
+
} else if (config.default !== null && config.default !== undefined) {
|
|
1921
|
+
this.config[param] = config.default;
|
|
1922
|
+
this.explicitConfig[param] = config.default;
|
|
1923
|
+
}
|
|
1924
|
+
return;
|
|
1925
|
+
}
|
|
1926
|
+
|
|
1927
|
+
// For required parameters: fill auto-generatable values
|
|
1928
|
+
if (this.config[param] === undefined || this.config[param] === null) {
|
|
1929
|
+
if (param === 'instanceType') {
|
|
1930
|
+
const arch = architecture || 'http';
|
|
1931
|
+
this.config[param] = arch === 'http' ? 'ml.m5.large' : 'ml.g5.xlarge';
|
|
1932
|
+
} else if (param === 'modelFormat') {
|
|
1933
|
+
if (architecture === 'transformers' || architecture === 'triton' || architecture === 'diffusors') {
|
|
1934
|
+
return; // Not needed for these architectures
|
|
1935
|
+
}
|
|
1936
|
+
const engine = this.config.engine || 'sklearn';
|
|
1937
|
+
const formatMap = { sklearn: 'pkl', xgboost: 'json', tensorflow: 'keras' };
|
|
1938
|
+
this.config[param] = formatMap[engine] || 'pkl';
|
|
1939
|
+
} else if (param === 'projectName') {
|
|
1940
|
+
this.config[param] = this._generateProjectName(architecture);
|
|
1941
|
+
} else {
|
|
1942
|
+
return; // Can't fill — leave for prompting
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
|
|
1946
|
+
// Promote non-null values to explicitConfig so the wizard skips them
|
|
1947
|
+
if (this.config[param] !== undefined && this.config[param] !== null) {
|
|
1948
|
+
if (config.default !== null || this._canAutoGenerate(param)) {
|
|
1949
|
+
this.explicitConfig[param] = this.config[param];
|
|
1950
|
+
}
|
|
1951
|
+
}
|
|
1952
|
+
});
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
/**
|
|
1956
|
+
* Returns whether auto-prompt mode is active
|
|
1957
|
+
* @returns {boolean}
|
|
1958
|
+
*/
|
|
1959
|
+
isAutoPrompt() {
|
|
1960
|
+
return this.autoPrompt;
|
|
1961
|
+
}
|
|
1962
|
+
|
|
1963
|
+
/**
|
|
1964
|
+
* Gets the list of required parameters that are truly missing and cannot be
|
|
1965
|
+
* auto-generated or defaulted. Used by auto-prompt mode to determine which
|
|
1966
|
+
* specific prompts to show.
|
|
1967
|
+
*
|
|
1968
|
+
* @returns {string[]} Array of parameter names that need prompting
|
|
1969
|
+
*/
|
|
1970
|
+
getMissingRequiredParameters() {
|
|
1971
|
+
const missing = [];
|
|
1972
|
+
|
|
1973
|
+
Object.entries(this.parameterMatrix).forEach(([param, config]) => {
|
|
1974
|
+
if (!config.required || !config.promptable) return;
|
|
1975
|
+
|
|
1976
|
+
const value = this.config[param];
|
|
1977
|
+
const hasValue = value !== undefined && value !== null;
|
|
1978
|
+
|
|
1979
|
+
if (hasValue) return;
|
|
1980
|
+
|
|
1981
|
+
// Special case: modelFormat is not required for transformers/triton/diffusors
|
|
1982
|
+
if (param === 'modelFormat') {
|
|
1983
|
+
const architecture = this.config.architecture;
|
|
1984
|
+
if (architecture === 'transformers' || architecture === 'triton' || architecture === 'diffusors') {
|
|
1985
|
+
return;
|
|
1986
|
+
}
|
|
1987
|
+
// Can be inferred from engine
|
|
1988
|
+
if (this.config.engine || this.config.deploymentConfig) {
|
|
1989
|
+
return;
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
|
|
1993
|
+
// Skip params that can be auto-generated
|
|
1994
|
+
if (this._canAutoGenerate(param)) return;
|
|
1995
|
+
|
|
1996
|
+
// Skip params that have a non-null default
|
|
1997
|
+
if (config.default !== null && config.default !== undefined) return;
|
|
1998
|
+
|
|
1999
|
+
missing.push(param);
|
|
2000
|
+
});
|
|
2001
|
+
|
|
2002
|
+
return missing;
|
|
2003
|
+
}
|
|
2004
|
+
|
|
1831
2005
|
/**
|
|
1832
2006
|
* Generates a project name based on framework
|
|
1833
2007
|
* @param {string} framework - The ML framework
|
|
@@ -27,15 +27,15 @@ const SUPPORTED_SCHEMA_VERSION = '1.0.0';
|
|
|
27
27
|
* Format: 'deploymentTarget.category.schemaKey'
|
|
28
28
|
*/
|
|
29
29
|
const PARAMETER_NAME_MAP = {
|
|
30
|
-
endpointInitialInstanceCount: '
|
|
31
|
-
endpointDataCapturePercent: '
|
|
32
|
-
endpointVariantName: '
|
|
33
|
-
endpointVolumeSize: '
|
|
34
|
-
icCpuCount: '
|
|
35
|
-
icMemorySize: '
|
|
36
|
-
icGpuCount: '
|
|
37
|
-
icCopyCount: '
|
|
38
|
-
icModelWeight: '
|
|
30
|
+
endpointInitialInstanceCount: 'realtime-inference.endpoint.initialInstanceCount',
|
|
31
|
+
endpointDataCapturePercent: 'realtime-inference.endpoint.dataCapturePercent',
|
|
32
|
+
endpointVariantName: 'realtime-inference.endpoint.variantName',
|
|
33
|
+
endpointVolumeSize: 'realtime-inference.endpoint.volumeSize',
|
|
34
|
+
icCpuCount: 'realtime-inference.inferenceComponent.cpuCount',
|
|
35
|
+
icMemorySize: 'realtime-inference.inferenceComponent.memorySize',
|
|
36
|
+
icGpuCount: 'realtime-inference.inferenceComponent.gpuCount',
|
|
37
|
+
icCopyCount: 'realtime-inference.inferenceComponent.copyCount',
|
|
38
|
+
icModelWeight: 'realtime-inference.inferenceComponent.modelWeight'
|
|
39
39
|
};
|
|
40
40
|
|
|
41
41
|
export default class ParameterSchemaValidator {
|
|
@@ -96,7 +96,7 @@ export default class ParameterSchemaValidator {
|
|
|
96
96
|
/**
|
|
97
97
|
* Resolve a parameter name to its schema constraint object.
|
|
98
98
|
* @param {string} parameterName - ConfigManager key (e.g., 'endpointVolumeSize')
|
|
99
|
-
* @param {string} [deploymentTarget] - Deployment target override (e.g., '
|
|
99
|
+
* @param {string} [deploymentTarget] - Deployment target override (e.g., 'realtime-inference')
|
|
100
100
|
* @returns {Object|null} Constraint object or null if not found
|
|
101
101
|
*/
|
|
102
102
|
_resolveConstraint(parameterName, deploymentTarget) {
|
package/src/lib/prompt-runner.js
CHANGED
|
@@ -81,9 +81,9 @@ export default class PromptRunner {
|
|
|
81
81
|
const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
|
|
82
82
|
const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, regionPreviousAnswers, explicitConfig, existingConfig);
|
|
83
83
|
|
|
84
|
-
// 1b. Instance type — query MCP and prompt for
|
|
84
|
+
// 1b. Instance type — query MCP and prompt for realtime-inference, async-inference, batch-transform, and hyperpod-eks
|
|
85
85
|
let instanceAnswers = {};
|
|
86
|
-
if (regionAndTargetAnswers.deploymentTarget === '
|
|
86
|
+
if (regionAndTargetAnswers.deploymentTarget === 'realtime-inference' ||
|
|
87
87
|
regionAndTargetAnswers.deploymentTarget === 'async-inference' ||
|
|
88
88
|
regionAndTargetAnswers.deploymentTarget === 'batch-transform' ||
|
|
89
89
|
regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks') {
|
|
@@ -521,7 +521,18 @@ export default class PromptRunner {
|
|
|
521
521
|
// First, add any existing config values to previousAnswers so they're available for defaults
|
|
522
522
|
const allPreviousAnswers = { ...existingConfig, ...previousAnswers };
|
|
523
523
|
|
|
524
|
-
|
|
524
|
+
// Collect explicit values for prompts that will be skipped.
|
|
525
|
+
// When a prompt is skipped because its value is in explicitConfig,
|
|
526
|
+
// the prompt library won't include it in the returned answers.
|
|
527
|
+
// Downstream code expects the value to be present, so we inject it.
|
|
528
|
+
const skippedValues = {};
|
|
529
|
+
for (const prompt of promptablePrompts) {
|
|
530
|
+
if (explicitConfig[prompt.name] !== undefined && explicitConfig[prompt.name] !== null) {
|
|
531
|
+
skippedValues[prompt.name] = explicitConfig[prompt.name];
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
const promptedAnswers = await this._runPrompts(promptablePrompts.map(prompt => ({
|
|
525
536
|
...prompt,
|
|
526
537
|
// Wrap message to inject previousAnswers so prompts can access _mcpInstanceChoices etc.
|
|
527
538
|
message: typeof prompt.message === 'function' ? (answers) => {
|
|
@@ -541,14 +552,34 @@ export default class PromptRunner {
|
|
|
541
552
|
} : (existingConfig[prompt.name] !== undefined && existingConfig[prompt.name] !== null) ?
|
|
542
553
|
existingConfig[prompt.name] : undefined,
|
|
543
554
|
// Skip prompt ONLY if we have explicit config (not defaults)
|
|
555
|
+
// In auto-prompt mode, also skip optional prompts (not required in parameter matrix)
|
|
544
556
|
when: prompt.when ? (answers) => {
|
|
545
557
|
// Skip if we have the value from explicit config (CLI, env vars, config files)
|
|
546
558
|
if (explicitConfig[prompt.name] !== undefined && explicitConfig[prompt.name] !== null) {
|
|
547
559
|
return false;
|
|
548
560
|
}
|
|
561
|
+
// In auto-prompt mode, skip optional/non-matrix parameters entirely
|
|
562
|
+
if (this.configManager?.isAutoPrompt()) {
|
|
563
|
+
const paramConfig = this.configManager.parameterMatrix[prompt.name];
|
|
564
|
+
// Skip if not in matrix (supplementary prompt) or if optional
|
|
565
|
+
if (!paramConfig || !paramConfig.required) {
|
|
566
|
+
return false;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
549
569
|
return prompt.when({...allPreviousAnswers, ...answers});
|
|
550
|
-
} : (
|
|
551
|
-
|
|
570
|
+
} : (_answers) => {
|
|
571
|
+
// No original when condition — skip if explicit or if auto-prompt + optional/non-matrix
|
|
572
|
+
if (explicitConfig[prompt.name] !== undefined && explicitConfig[prompt.name] !== null) {
|
|
573
|
+
return false;
|
|
574
|
+
}
|
|
575
|
+
if (this.configManager?.isAutoPrompt()) {
|
|
576
|
+
const paramConfig = this.configManager.parameterMatrix[prompt.name];
|
|
577
|
+
if (!paramConfig || !paramConfig.required) {
|
|
578
|
+
return false;
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
return true;
|
|
582
|
+
},
|
|
552
583
|
// Provide access to previous answers for conditional logic
|
|
553
584
|
// For unbounded parameters, inject MCP-provided choices if available
|
|
554
585
|
choices: prompt.choices ? (answers) => {
|
|
@@ -563,6 +594,9 @@ export default class PromptRunner {
|
|
|
563
594
|
return prompt.choices;
|
|
564
595
|
} : undefined
|
|
565
596
|
})));
|
|
597
|
+
|
|
598
|
+
// Merge skipped explicit values into the answers so downstream code sees them
|
|
599
|
+
return { ...skippedValues, ...promptedAnswers };
|
|
566
600
|
}
|
|
567
601
|
|
|
568
602
|
/**
|
|
@@ -638,7 +672,7 @@ export default class PromptRunner {
|
|
|
638
672
|
|
|
639
673
|
/**
|
|
640
674
|
* Query MCP instance-recommender server after deployment target is known.
|
|
641
|
-
* Only runs when deploymentTarget is
|
|
675
|
+
* Only runs when deploymentTarget is realtime-inference.
|
|
642
676
|
* Populates configManager.mcpChoices so _runPhase injects them into list prompts.
|
|
643
677
|
* @private
|
|
644
678
|
*/
|
|
@@ -1314,12 +1348,22 @@ export default class PromptRunner {
|
|
|
1314
1348
|
return inferenceAmiVersion ? { cudaVersion, inferenceAmiVersion } : null;
|
|
1315
1349
|
}
|
|
1316
1350
|
|
|
1317
|
-
// Multiple options — let the user choose
|
|
1351
|
+
// Multiple options — let the user choose (or auto-select in auto-prompt mode)
|
|
1318
1352
|
const defaultVersion = frameworkAccel?.version
|
|
1319
1353
|
&& compatibleVersions.includes(frameworkAccel.version)
|
|
1320
1354
|
? frameworkAccel.version
|
|
1321
1355
|
: instanceInfo.accelerator.default || compatibleVersions[compatibleVersions.length - 1];
|
|
1322
1356
|
|
|
1357
|
+
// In auto-prompt mode, auto-select the default without prompting
|
|
1358
|
+
if (this.configManager?.isAutoPrompt()) {
|
|
1359
|
+
const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[defaultVersion];
|
|
1360
|
+
if (inferenceAmiVersion) {
|
|
1361
|
+
console.log(`\n🔧 CUDA ${defaultVersion} auto-selected (auto-prompt mode)`);
|
|
1362
|
+
console.log(` AMI: ${inferenceAmiVersion}`);
|
|
1363
|
+
}
|
|
1364
|
+
return inferenceAmiVersion ? { cudaVersion: defaultVersion, inferenceAmiVersion } : null;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1323
1367
|
const choices = compatibleVersions.map(v => {
|
|
1324
1368
|
const ami = PromptRunner.CUDA_AMI_MAP[v] || 'unknown';
|
|
1325
1369
|
const isDefault = v === defaultVersion ? ' (recommended)' : '';
|
package/src/lib/prompts.js
CHANGED
|
@@ -649,7 +649,7 @@ const modulePrompts = [
|
|
|
649
649
|
/**
|
|
650
650
|
* Infrastructure prompts split into sub-phases so the prompt runner can
|
|
651
651
|
* interleave MCP queries between them (e.g. query instance-recommender
|
|
652
|
-
* only after we know the deployment target is
|
|
652
|
+
* only after we know the deployment target is realtime-inference).
|
|
653
653
|
*
|
|
654
654
|
* Ordering: Region → Deployment Target → Instance/HyperPod → Build Target → Role
|
|
655
655
|
*/
|
|
@@ -683,21 +683,21 @@ const infraRegionAndTargetPrompts = [
|
|
|
683
683
|
name: 'deploymentTarget',
|
|
684
684
|
message: 'Deployment target?',
|
|
685
685
|
choices: [
|
|
686
|
-
{ name: 'SageMaker
|
|
687
|
-
{ name: 'SageMaker
|
|
688
|
-
{ name: 'SageMaker
|
|
686
|
+
{ name: 'SageMaker Real-Time Inference', value: 'realtime-inference' },
|
|
687
|
+
{ name: 'SageMaker Async Inference', value: 'async-inference' },
|
|
688
|
+
{ name: 'SageMaker Batch Transform', value: 'batch-transform' },
|
|
689
689
|
{ name: 'SageMaker HyperPod - EKS', value: 'hyperpod-eks' }
|
|
690
690
|
],
|
|
691
|
-
default: '
|
|
691
|
+
default: 'realtime-inference'
|
|
692
692
|
}
|
|
693
693
|
];
|
|
694
694
|
|
|
695
|
-
// Sub-phase B: Instance type (only when deploymentTarget === '
|
|
695
|
+
// Sub-phase B: Instance type (only when deploymentTarget === 'realtime-inference')
|
|
696
696
|
const infraInstancePrompts = [
|
|
697
697
|
{
|
|
698
698
|
type: 'list',
|
|
699
699
|
name: 'instanceType',
|
|
700
|
-
when: answers => answers.deploymentTarget === '
|
|
700
|
+
when: answers => answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks',
|
|
701
701
|
message: (answers) => {
|
|
702
702
|
const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
|
|
703
703
|
|
|
@@ -64,7 +64,7 @@ export default class TemplateManager {
|
|
|
64
64
|
'diffusors-vllm-omni'
|
|
65
65
|
],
|
|
66
66
|
buildTargets: ['codebuild'],
|
|
67
|
-
deploymentTargets: ['
|
|
67
|
+
deploymentTargets: ['realtime-inference', 'async-inference', 'batch-transform', 'hyperpod-eks'],
|
|
68
68
|
testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
|
|
69
69
|
awsRegions: [
|
|
70
70
|
'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2',
|
|
@@ -135,7 +135,7 @@ export default class TemplateManager {
|
|
|
135
135
|
// Validate batch transform specific fields
|
|
136
136
|
this._validateBatchTransformConfig();
|
|
137
137
|
|
|
138
|
-
// Validate instance type format (ml.*.*) - only for
|
|
138
|
+
// Validate instance type format (ml.*.*) - only for realtime-inference
|
|
139
139
|
if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
|
|
140
140
|
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
141
141
|
if (!instancePattern.test(this.answers.instanceType)) {
|
package/templates/do/clean
CHANGED
|
@@ -24,7 +24,7 @@ done
|
|
|
24
24
|
|
|
25
25
|
# Function to display usage
|
|
26
26
|
show_usage() {
|
|
27
|
-
<% if (deploymentTarget === '
|
|
27
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
28
28
|
echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
|
|
29
29
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
30
30
|
echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
|
|
@@ -37,7 +37,7 @@ show_usage() {
|
|
|
37
37
|
echo "Cleanup targets:"
|
|
38
38
|
echo " local - Remove local Docker images"
|
|
39
39
|
echo " ecr - Remove images from Amazon ECR"
|
|
40
|
-
<% if (deploymentTarget === '
|
|
40
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
41
41
|
echo " endpoint - Delete SageMaker endpoint, configuration, and model"
|
|
42
42
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
43
43
|
echo " endpoint - Delete SageMaker async endpoint, configuration, and inference component"
|
|
@@ -51,7 +51,7 @@ show_usage() {
|
|
|
51
51
|
echo ""
|
|
52
52
|
echo "Examples:"
|
|
53
53
|
echo " ./do/clean local # Remove local Docker images only"
|
|
54
|
-
<% if (deploymentTarget === '
|
|
54
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
55
55
|
echo " ./do/clean endpoint # Delete SageMaker resources only"
|
|
56
56
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
57
57
|
echo " ./do/clean endpoint # Delete SageMaker async resources only"
|
|
@@ -198,7 +198,7 @@ clean_ecr() {
|
|
|
198
198
|
fi
|
|
199
199
|
}
|
|
200
200
|
|
|
201
|
-
<% if (deploymentTarget === '
|
|
201
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
202
202
|
# Function to clean SageMaker endpoint and inference components
|
|
203
203
|
clean_endpoint() {
|
|
204
204
|
echo "🧹 Cleaning SageMaker resources"
|
|
@@ -720,7 +720,7 @@ case "${CLEANUP_TARGET}" in
|
|
|
720
720
|
ecr)
|
|
721
721
|
clean_ecr
|
|
722
722
|
;;
|
|
723
|
-
<% if (deploymentTarget === '
|
|
723
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
724
724
|
endpoint)
|
|
725
725
|
clean_endpoint
|
|
726
726
|
;;
|
|
@@ -761,7 +761,7 @@ case "${CLEANUP_TARGET}" in
|
|
|
761
761
|
|
|
762
762
|
echo ""
|
|
763
763
|
|
|
764
|
-
<% if (deploymentTarget === '
|
|
764
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
765
765
|
# Clean SageMaker resources
|
|
766
766
|
if clean_endpoint; then
|
|
767
767
|
CLEANED_ITEMS+=("SageMaker resources")
|
package/templates/do/config
CHANGED
|
@@ -24,8 +24,8 @@ export CODEBUILD_PROJECT_NAME="${PROJECT_NAME}-build-$(date +%Y%m%d)"
|
|
|
24
24
|
# Deployment configuration — WHERE the model runs
|
|
25
25
|
export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
|
|
26
26
|
|
|
27
|
-
<% if (deploymentTarget === '
|
|
28
|
-
# SageMaker
|
|
27
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
28
|
+
# SageMaker Real-Time Inference configuration
|
|
29
29
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
30
30
|
<% if (inferenceAmiVersion) { %>
|
|
31
31
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
@@ -33,7 +33,7 @@ export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
|
33
33
|
<% } %>
|
|
34
34
|
|
|
35
35
|
<% if (deploymentTarget === 'async-inference') { %>
|
|
36
|
-
# SageMaker
|
|
36
|
+
# SageMaker Async Inference configuration
|
|
37
37
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
38
38
|
<% if (inferenceAmiVersion) { %>
|
|
39
39
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
@@ -77,7 +77,7 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
|
|
|
77
77
|
<% } %>
|
|
78
78
|
|
|
79
79
|
<% if (deploymentTarget === 'batch-transform') { %>
|
|
80
|
-
# SageMaker
|
|
80
|
+
# SageMaker Batch Transform configuration
|
|
81
81
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
82
82
|
|
|
83
83
|
# Resolve AWS account ID at runtime for default resource names
|
|
@@ -187,7 +187,7 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage %>}
|
|
|
187
187
|
|
|
188
188
|
# Allow environment variable overrides
|
|
189
189
|
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
190
|
-
<% if (deploymentTarget === '
|
|
190
|
+
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
191
191
|
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
192
192
|
<% } %>
|
|
193
193
|
export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
|
|
@@ -241,7 +241,7 @@ echo " Model env vars: <%= Object.keys(modelEnvVars).length %>"
|
|
|
241
241
|
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
242
242
|
echo " Server env vars: <%= Object.keys(serverEnvVars).length %>"
|
|
243
243
|
<% } %>
|
|
244
|
-
<% if (deploymentTarget === '
|
|
244
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
245
245
|
echo " Instance: ${INSTANCE_TYPE}"
|
|
246
246
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
247
247
|
echo " Instance: ${INSTANCE_TYPE}"
|
package/templates/do/deploy
CHANGED
|
@@ -36,7 +36,7 @@ echo " Deployment config: ${DEPLOYMENT_CONFIG}"
|
|
|
36
36
|
echo " Region: ${AWS_REGION}"
|
|
37
37
|
echo " Build target: ${BUILD_TARGET}"
|
|
38
38
|
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
39
|
-
<% if (deploymentTarget === '
|
|
39
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
40
40
|
echo " Instance type: ${INSTANCE_TYPE}"
|
|
41
41
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
42
42
|
echo " Instance type: ${INSTANCE_TYPE}"
|
|
@@ -95,9 +95,9 @@ fi
|
|
|
95
95
|
echo "✅ ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
96
96
|
IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
97
97
|
|
|
98
|
-
<% if (deploymentTarget === '
|
|
98
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
99
99
|
# ============================================================
|
|
100
|
-
# SageMaker
|
|
100
|
+
# SageMaker Real-Time Inference Deployment (Inference Components)
|
|
101
101
|
# ============================================================
|
|
102
102
|
|
|
103
103
|
# Validate execution role ARN
|
|
@@ -520,7 +520,7 @@ echo " ./do/clean endpoint"
|
|
|
520
520
|
|
|
521
521
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
522
522
|
# ============================================================
|
|
523
|
-
# SageMaker
|
|
523
|
+
# SageMaker Async Inference Deployment (Model-Based)
|
|
524
524
|
# SageMaker async inference does NOT support Inference Components.
|
|
525
525
|
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
526
526
|
# ============================================================
|
|
@@ -1151,7 +1151,7 @@ _update_config_var "KUBECONFIG" "${KUBECONFIG_PATH}"
|
|
|
1151
1151
|
|
|
1152
1152
|
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
1153
1153
|
# ============================================================
|
|
1154
|
-
# SageMaker
|
|
1154
|
+
# SageMaker Batch Transform Deployment
|
|
1155
1155
|
# Flow: create-model → create-transform-job → poll until completion
|
|
1156
1156
|
# ============================================================
|
|
1157
1157
|
|
package/templates/do/export
CHANGED
|
@@ -42,8 +42,8 @@ if [ "${1:-}" = "--json" ]; then
|
|
|
42
42
|
# Deployment target
|
|
43
43
|
JSON="${JSON},\"deploymentTarget\":\"${DEPLOYMENT_TARGET}\""
|
|
44
44
|
|
|
45
|
-
<% if (deploymentTarget === '
|
|
46
|
-
# SageMaker
|
|
45
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
46
|
+
# SageMaker Real-Time Inference
|
|
47
47
|
JSON="${JSON},\"instanceType\":\"${INSTANCE_TYPE}\""
|
|
48
48
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
49
49
|
# SageMaker Async Inference
|
|
@@ -206,11 +206,11 @@ fi
|
|
|
206
206
|
# Deployment target
|
|
207
207
|
CMD="${CMD} --deployment-target=${DEPLOYMENT_TARGET}"
|
|
208
208
|
|
|
209
|
-
<% if (deploymentTarget === '
|
|
210
|
-
# SageMaker
|
|
209
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
210
|
+
# SageMaker Real-Time Inference
|
|
211
211
|
CMD="${CMD} --instance-type=${INSTANCE_TYPE}"
|
|
212
212
|
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
213
|
-
# SageMaker
|
|
213
|
+
# SageMaker Batch Transform
|
|
214
214
|
CMD="${CMD} --instance-type=${INSTANCE_TYPE}"
|
|
215
215
|
CMD="${CMD} --batch-input-path=${BATCH_INPUT_PATH}"
|
|
216
216
|
CMD="${CMD} --batch-output-path=${BATCH_OUTPUT_PATH}"
|
package/templates/do/logs
CHANGED
|
@@ -10,9 +10,9 @@ set -o pipefail
|
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
12
|
|
|
13
|
-
<% if (deploymentTarget === '
|
|
13
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
14
14
|
# ============================================================
|
|
15
|
-
# SageMaker
|
|
15
|
+
# SageMaker Real-Time Inference Logs (CloudWatch)
|
|
16
16
|
# ============================================================
|
|
17
17
|
|
|
18
18
|
# Allow inference component name as argument or from config
|
|
@@ -95,7 +95,7 @@ aws logs tail "${LOG_GROUP}" \
|
|
|
95
95
|
|
|
96
96
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
97
97
|
# ============================================================
|
|
98
|
-
# SageMaker
|
|
98
|
+
# SageMaker Async Inference Logs (CloudWatch)
|
|
99
99
|
# ============================================================
|
|
100
100
|
|
|
101
101
|
ENDPOINT="${1:-${ENDPOINT_NAME:-}}"
|
|
@@ -166,7 +166,7 @@ aws logs tail "${LOG_GROUP}" \
|
|
|
166
166
|
|
|
167
167
|
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
168
168
|
# ============================================================
|
|
169
|
-
# SageMaker
|
|
169
|
+
# SageMaker Batch Transform Logs (CloudWatch)
|
|
170
170
|
# ============================================================
|
|
171
171
|
|
|
172
172
|
# Allow transform job name as argument or from config
|
package/templates/do/register
CHANGED
|
@@ -266,7 +266,7 @@ echo " Backend: ${BACKEND}"
|
|
|
266
266
|
<% if (framework === 'transformers') { %>
|
|
267
267
|
echo " Model name: ${MODEL_NAME:-N/A}"
|
|
268
268
|
<% } %>
|
|
269
|
-
<% if (deploymentTarget === '
|
|
269
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
270
270
|
echo " Instance type: ${INSTANCE_TYPE}"
|
|
271
271
|
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
272
272
|
echo " Instance: ${INSTANCE_TYPE} x ${BATCH_INSTANCE_COUNT}"
|
|
@@ -498,7 +498,7 @@ DJEOF
|
|
|
498
498
|
if [ "${CI_MODE}" = true ]; then
|
|
499
499
|
echo ""
|
|
500
500
|
echo "⚠️ CI Integration is experimental and currently only tested for"
|
|
501
|
-
echo " SageMaker
|
|
501
|
+
echo " SageMaker Real-Time Inference endpoints."
|
|
502
502
|
echo ""
|
|
503
503
|
|
|
504
504
|
# Compute configId
|
|
@@ -542,7 +542,7 @@ if [ -n "${MODEL_NAME:-}" ]; then
|
|
|
542
542
|
fi
|
|
543
543
|
<% } %>
|
|
544
544
|
|
|
545
|
-
<% if (deploymentTarget === '
|
|
545
|
+
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
546
546
|
CMD_ARGS+=("--instance-type" "${INSTANCE_TYPE}")
|
|
547
547
|
<% } %>
|
|
548
548
|
|