@aws/ml-container-creator 0.10.3 → 0.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/parameter-schema-v2.json +28 -1
- package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
- package/package.json +14 -5
- package/servers/instance-sizer/index.js +30 -17
- package/servers/instance-sizer/lib/instance-ranker.js +44 -0
- package/servers/lib/catalogs/instances.json +27 -0
- package/src/app.js +22 -1
- package/src/lib/bootstrap-command-handler.js +32 -3
- package/src/lib/config-validator.js +1 -1
- package/src/lib/generated/cli-options.js +7 -2
- package/src/lib/generated/parameter-matrix.js +16 -5
- package/src/lib/generated/validation-rules.js +7 -3
- package/src/lib/path-prover-brain.js +58 -1
- package/src/lib/prompts/infrastructure-prompts.js +2 -2
- package/src/lib/prompts/model-prompts.js +6 -0
- package/src/lib/prove-pipeline-executor.js +294 -0
- package/src/lib/secrets-prompt-runner.js +4 -0
- package/src/lib/template-manager.js +1 -1
- package/src/lib/template-variable-resolver.js +62 -0
- package/templates/do/README.md +37 -0
- package/templates/do/adapter +8 -0
- package/templates/do/build +8 -0
- package/templates/do/clean.d/async-inference.ejs +8 -0
- package/templates/do/clean.d/batch-transform.ejs +8 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +8 -0
- package/templates/do/clean.d/managed-inference.ejs +8 -0
- package/templates/do/config +12 -45
- package/templates/do/deploy.d/async-inference.ejs +33 -3
- package/templates/do/deploy.d/batch-transform.ejs +32 -3
- package/templates/do/deploy.d/hyperpod-eks.ejs +7 -0
- package/templates/do/deploy.d/managed-inference.ejs +27 -3
- package/templates/do/lib/endpoint-config.sh +1 -1
- package/templates/do/lib/profile.sh +44 -0
- package/templates/do/lib/staged-assets.sh +217 -0
- package/templates/do/push +8 -0
- package/templates/do/register +8 -0
- package/templates/do/stage +569 -0
- package/templates/do/submit +10 -0
- package/templates/do/test +1 -0
- package/templates/do/tune +7 -0
|
@@ -2,7 +2,11 @@
|
|
|
2
2
|
// SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
import { readFileSync } from 'node:fs';
|
|
5
|
-
import { resolve } from 'node:path';
|
|
5
|
+
import { resolve, dirname } from 'node:path';
|
|
6
|
+
import { fileURLToPath } from 'node:url';
|
|
7
|
+
|
|
8
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
9
|
+
const __dirname = dirname(__filename);
|
|
6
10
|
|
|
7
11
|
/**
|
|
8
12
|
* Path Prover Brain
|
|
@@ -364,6 +368,7 @@ export function classifyFailure(errorOutput) {
|
|
|
364
368
|
function detectStage(errorMsg) {
|
|
365
369
|
const stagePatterns = [
|
|
366
370
|
{ pattern: /\b(generate|generation)\b/i, stage: 'generate' },
|
|
371
|
+
{ pattern: /\bdo\/stage\b|model.staging|pre-stage|s3.*stag/i, stage: 'stage' },
|
|
367
372
|
{ pattern: /\b(build|docker)\b/i, stage: 'build' },
|
|
368
373
|
{ pattern: /\b(push|ecr|registry)\b/i, stage: 'push' },
|
|
369
374
|
{ pattern: /\b(deploy|endpoint|CreateEndpoint|InferenceComponent)\b/i, stage: 'deploy' },
|
|
@@ -605,3 +610,55 @@ export function loadPriorityTargets(configPath) {
|
|
|
605
610
|
return null;
|
|
606
611
|
}
|
|
607
612
|
}
|
|
613
|
+
|
|
614
|
+
// ── TP Degree Auto-Resolution at Prove-Time (Task 6.5) ──────────────────────
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Resolve tp_degree from instance catalog when not explicitly set in a prove config.
|
|
618
|
+
*
|
|
619
|
+
* At prove-time, if the target config omits tp_degree but specifies an instance_type,
|
|
620
|
+
* look up the instance catalog's GPU count and use it as the tp_degree.
|
|
621
|
+
* This mirrors the generation-time logic in template-variable-resolver.js.
|
|
622
|
+
*
|
|
623
|
+
* Requirements: FTP-1 (extension) — task 6.5
|
|
624
|
+
*
|
|
625
|
+
* @param {object} config - The prove config (from getNextPriorityConfig or similar)
|
|
626
|
+
* @param {string} [config.instance_type] - Instance type (e.g. 'ml.g5.48xlarge')
|
|
627
|
+
* @param {number|string|undefined} [config.tp_degree] - TP degree if already set
|
|
628
|
+
* @param {object|null} [catalogOverride] - Optional catalog data for testing (skips file load)
|
|
629
|
+
* @returns {object} The config object (mutated in place) with tp_degree resolved
|
|
630
|
+
*/
|
|
631
|
+
export function resolveProveTpDegree(config, catalogOverride = null) {
|
|
632
|
+
if (!config) return config;
|
|
633
|
+
|
|
634
|
+
// If tp_degree is already set (not null/undefined), respect it
|
|
635
|
+
if (config.tp_degree !== undefined && config.tp_degree !== null) {
|
|
636
|
+
return config;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// Need an instance_type to look up GPU count
|
|
640
|
+
if (!config.instance_type) {
|
|
641
|
+
return config;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// Load instance catalog
|
|
645
|
+
let catalog = catalogOverride;
|
|
646
|
+
if (!catalog) {
|
|
647
|
+
try {
|
|
648
|
+
const catalogPath = resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
|
|
649
|
+
const raw = readFileSync(catalogPath, 'utf8');
|
|
650
|
+
catalog = JSON.parse(raw);
|
|
651
|
+
} catch {
|
|
652
|
+
return config;
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
const instanceInfo = catalog?.catalog?.[config.instance_type];
|
|
657
|
+
if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
|
|
658
|
+
config.tp_degree = instanceInfo.gpus;
|
|
659
|
+
config._tpAutoResolved = true;
|
|
660
|
+
config._tpAutoResolvedFrom = config.instance_type;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
return config;
|
|
664
|
+
}
|
|
@@ -252,7 +252,7 @@ const infraInstancePrompts = [
|
|
|
252
252
|
if (!input || input.trim() === '') {
|
|
253
253
|
return 'At least one instance type is required';
|
|
254
254
|
}
|
|
255
|
-
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
255
|
+
const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
256
256
|
const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
|
|
257
257
|
if (instances.length === 0) {
|
|
258
258
|
return 'At least one instance type is required';
|
|
@@ -384,7 +384,7 @@ const infraInstancePrompts = [
|
|
|
384
384
|
if (!input || input.trim() === '') {
|
|
385
385
|
return 'Instance type is required';
|
|
386
386
|
}
|
|
387
|
-
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
387
|
+
const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
388
388
|
if (!instancePattern.test(input.trim())) {
|
|
389
389
|
return 'Invalid instance type format. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)';
|
|
390
390
|
}
|
|
@@ -459,6 +459,12 @@ const hfTokenPrompts = [
|
|
|
459
459
|
return false;
|
|
460
460
|
}
|
|
461
461
|
|
|
462
|
+
// Skip HF token when model name is an S3 URI (no HF download needed)
|
|
463
|
+
const modelName = answers.customModelName || answers.modelName;
|
|
464
|
+
if (modelName && modelName.startsWith('s3://')) {
|
|
465
|
+
return false;
|
|
466
|
+
}
|
|
467
|
+
|
|
462
468
|
// Display security warning before prompting
|
|
463
469
|
console.log('\n🔐 HuggingFace Authentication');
|
|
464
470
|
console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Prove Pipeline Executor
|
|
6
|
+
*
|
|
7
|
+
* Executes lifecycle stages for validation targets in the `mcc prove` workflow.
|
|
8
|
+
* Handles stage-specific logic including idempotency checks, status tracking,
|
|
9
|
+
* and fail-fast behavior.
|
|
10
|
+
*
|
|
11
|
+
* Feature: s3-model-loading
|
|
12
|
+
* Requirements: 5.1, 5.2, 5.3, 5.4, 5.5
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { execFile } from 'node:child_process';
|
|
16
|
+
import { promisify } from 'node:util';
|
|
17
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
18
|
+
import path from 'node:path';
|
|
19
|
+
|
|
20
|
+
const execFileAsync = promisify(execFile);
|
|
21
|
+
|
|
22
|
+
// ── Valid Lifecycle Stages ────────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* All recognized lifecycle stages for the prove pipeline.
|
|
26
|
+
* The "stage" step pre-stages model weights from HuggingFace to S3.
|
|
27
|
+
*/
|
|
28
|
+
export const VALID_LIFECYCLE_STAGES = [
|
|
29
|
+
'generate',
|
|
30
|
+
'stage',
|
|
31
|
+
'build',
|
|
32
|
+
'push',
|
|
33
|
+
'deploy',
|
|
34
|
+
'test',
|
|
35
|
+
'tune',
|
|
36
|
+
'adapter',
|
|
37
|
+
'test-adapter',
|
|
38
|
+
'benchmark',
|
|
39
|
+
'register',
|
|
40
|
+
'clean'
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Possible staging states for status output.
|
|
45
|
+
*/
|
|
46
|
+
export const STAGING_STATES = {
|
|
47
|
+
STAGED: 'staged',
|
|
48
|
+
NOT_STAGED: 'not-staged',
|
|
49
|
+
STAGE_FAILED: 'stage-failed'
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// ── Stage Lifecycle Step ─────────────────────────────────────────────────────
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Check if a model has already been staged by looking for `.mlcc/staged-assets.json`.
|
|
56
|
+
*
|
|
57
|
+
* @param {string} projectDir - Path to the generated project directory
|
|
58
|
+
* @returns {boolean} True if the model has already been staged
|
|
59
|
+
*/
|
|
60
|
+
export function isAlreadyStaged(projectDir) {
|
|
61
|
+
const stagedAssetsPath = path.join(projectDir, '.mlcc', 'staged-assets.json');
|
|
62
|
+
if (!existsSync(stagedAssetsPath)) {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
const content = readFileSync(stagedAssetsPath, 'utf8');
|
|
68
|
+
const data = JSON.parse(content);
|
|
69
|
+
// Check that there's a valid staged URI
|
|
70
|
+
return !!(data?.models?.default?.staged_uri);
|
|
71
|
+
} catch {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Get the current staging state for a project.
|
|
78
|
+
*
|
|
79
|
+
* @param {string} projectDir - Path to the generated project directory
|
|
80
|
+
* @param {object} [stepResults] - Previous step results (to check for stage-failed)
|
|
81
|
+
* @returns {string} One of: 'staged', 'not-staged', 'stage-failed'
|
|
82
|
+
*/
|
|
83
|
+
export function getStagingState(projectDir, stepResults = null) {
|
|
84
|
+
// Check if stage previously failed
|
|
85
|
+
if (stepResults?.stage?.status === 'fail') {
|
|
86
|
+
return STAGING_STATES.STAGE_FAILED;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (isAlreadyStaged(projectDir)) {
|
|
90
|
+
return STAGING_STATES.STAGED;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return STAGING_STATES.NOT_STAGED;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Execute the stage lifecycle step with idempotency support.
|
|
98
|
+
*
|
|
99
|
+
* If the model is already staged (`.mlcc/staged-assets.json` exists with a valid URI),
|
|
100
|
+
* the step is skipped and marked as passed.
|
|
101
|
+
*
|
|
102
|
+
* If `do/stage` exits non-zero, the model is marked as stage-failed.
|
|
103
|
+
*
|
|
104
|
+
* @param {string} projectDir - Path to the generated project directory
|
|
105
|
+
* @param {object} [options] - Execution options
|
|
106
|
+
* @param {number} [options.timeout=1800] - Timeout in seconds (default: 30 minutes)
|
|
107
|
+
* @param {boolean} [options.verbose=false] - Stream stdout/stderr in real time
|
|
108
|
+
* @returns {Promise<object>} StepResult with name, status, duration, stagingState, and optional error
|
|
109
|
+
*/
|
|
110
|
+
export async function executeStageStep(projectDir, options = {}) {
|
|
111
|
+
const { timeout = 1800, verbose = false } = options;
|
|
112
|
+
const startTime = Date.now();
|
|
113
|
+
|
|
114
|
+
// Idempotency check: skip if already staged (Requirement 5.4)
|
|
115
|
+
if (isAlreadyStaged(projectDir)) {
|
|
116
|
+
return {
|
|
117
|
+
name: 'stage',
|
|
118
|
+
status: 'pass',
|
|
119
|
+
duration: Date.now() - startTime,
|
|
120
|
+
stagingState: STAGING_STATES.STAGED,
|
|
121
|
+
skipped: true,
|
|
122
|
+
message: '✓ Model already staged — skipping'
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Execute do/stage and verify exit code 0 (Requirement 5.2)
|
|
127
|
+
const command = './do/stage';
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
if (verbose) {
|
|
131
|
+
// Verbose: stream output in real time
|
|
132
|
+
const { spawn } = await import('node:child_process');
|
|
133
|
+
const result = await new Promise((resolve) => {
|
|
134
|
+
const child = spawn('bash', ['-c', command], {
|
|
135
|
+
cwd: projectDir,
|
|
136
|
+
stdio: ['pipe', 'inherit', 'inherit']
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
let killed = false;
|
|
140
|
+
const timer = setTimeout(() => {
|
|
141
|
+
killed = true;
|
|
142
|
+
child.kill('SIGTERM');
|
|
143
|
+
}, timeout * 1000);
|
|
144
|
+
|
|
145
|
+
child.on('close', (code) => {
|
|
146
|
+
clearTimeout(timer);
|
|
147
|
+
if (code === 0) {
|
|
148
|
+
resolve({
|
|
149
|
+
name: 'stage',
|
|
150
|
+
status: 'pass',
|
|
151
|
+
duration: Date.now() - startTime,
|
|
152
|
+
stagingState: STAGING_STATES.STAGED
|
|
153
|
+
});
|
|
154
|
+
} else {
|
|
155
|
+
const error = killed
|
|
156
|
+
? `Timeout after ${timeout}s`
|
|
157
|
+
: `do/stage exited with code ${code}`;
|
|
158
|
+
resolve({
|
|
159
|
+
name: 'stage',
|
|
160
|
+
status: 'fail',
|
|
161
|
+
duration: Date.now() - startTime,
|
|
162
|
+
stagingState: STAGING_STATES.STAGE_FAILED,
|
|
163
|
+
error
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
child.on('error', (err) => {
|
|
169
|
+
clearTimeout(timer);
|
|
170
|
+
resolve({
|
|
171
|
+
name: 'stage',
|
|
172
|
+
status: 'fail',
|
|
173
|
+
duration: Date.now() - startTime,
|
|
174
|
+
stagingState: STAGING_STATES.STAGE_FAILED,
|
|
175
|
+
error: err.message.slice(-500)
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
return result;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Non-verbose: buffer output
|
|
183
|
+
await execFileAsync('bash', ['-c', command], {
|
|
184
|
+
cwd: projectDir,
|
|
185
|
+
timeout: timeout * 1000,
|
|
186
|
+
maxBuffer: 10 * 1024 * 1024
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
name: 'stage',
|
|
191
|
+
status: 'pass',
|
|
192
|
+
duration: Date.now() - startTime,
|
|
193
|
+
stagingState: STAGING_STATES.STAGED
|
|
194
|
+
};
|
|
195
|
+
} catch (err) {
|
|
196
|
+
// Mark model as failed if staging fails (Requirement 5.3)
|
|
197
|
+
const error = err.killed
|
|
198
|
+
? `Timeout after ${timeout}s`
|
|
199
|
+
: (err.stderr || err.message).slice(-500);
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
name: 'stage',
|
|
203
|
+
status: 'fail',
|
|
204
|
+
duration: Date.now() - startTime,
|
|
205
|
+
stagingState: STAGING_STATES.STAGE_FAILED,
|
|
206
|
+
error
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// ── Stage Validation ─────────────────────────────────────────────────────────
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Validate that a lifecycle stage name is recognized by the prove pipeline.
|
|
215
|
+
*
|
|
216
|
+
* @param {string} stageName - The stage name to validate
|
|
217
|
+
* @returns {boolean} True if the stage is valid
|
|
218
|
+
*/
|
|
219
|
+
export function isValidLifecycleStage(stageName) {
|
|
220
|
+
return VALID_LIFECYCLE_STAGES.includes(stageName);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Validate a stages array from validation-targets configuration.
|
|
225
|
+
*
|
|
226
|
+
* @param {string[]} stages - Array of stage names
|
|
227
|
+
* @returns {object} Validation result: { valid: boolean, errors: string[] }
|
|
228
|
+
*/
|
|
229
|
+
export function validateStagesArray(stages) {
|
|
230
|
+
const errors = [];
|
|
231
|
+
|
|
232
|
+
if (!Array.isArray(stages)) {
|
|
233
|
+
return { valid: false, errors: ['stages must be an array'] };
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (stages.length === 0) {
|
|
237
|
+
return { valid: false, errors: ['stages array must not be empty'] };
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
for (const stage of stages) {
|
|
241
|
+
if (typeof stage !== 'string') {
|
|
242
|
+
errors.push(`Invalid stage type: expected string, got ${typeof stage}`);
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
if (!isValidLifecycleStage(stage)) {
|
|
246
|
+
errors.push(`Unrecognized lifecycle stage: "${stage}"`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return { valid: errors.length === 0, errors };
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ── Status Output ────────────────────────────────────────────────────────────
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Format the staging state for status output display.
|
|
257
|
+
*
|
|
258
|
+
* @param {string} state - One of STAGING_STATES values
|
|
259
|
+
* @returns {string} Formatted status string with emoji
|
|
260
|
+
*/
|
|
261
|
+
export function formatStagingStatus(state) {
|
|
262
|
+
switch (state) {
|
|
263
|
+
case STAGING_STATES.STAGED:
|
|
264
|
+
return '✓ staged';
|
|
265
|
+
case STAGING_STATES.NOT_STAGED:
|
|
266
|
+
return '○ not-staged';
|
|
267
|
+
case STAGING_STATES.STAGE_FAILED:
|
|
268
|
+
return '✗ stage-failed';
|
|
269
|
+
default:
|
|
270
|
+
return '? unknown';
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Build a status summary for a prove target including staging state.
|
|
276
|
+
*
|
|
277
|
+
* @param {object} target - The validation target
|
|
278
|
+
* @param {string} target.model_name - Model name
|
|
279
|
+
* @param {string} projectDir - Path to the project directory
|
|
280
|
+
* @param {object} [stepResults] - Results of executed steps
|
|
281
|
+
* @returns {object} Status summary including stagingState
|
|
282
|
+
*/
|
|
283
|
+
export function buildTargetStatus(target, projectDir, stepResults = null) {
|
|
284
|
+
const stagingState = getStagingState(projectDir, stepResults);
|
|
285
|
+
const stages = target.stages || [];
|
|
286
|
+
const includesStage = stages.includes('stage');
|
|
287
|
+
|
|
288
|
+
return {
|
|
289
|
+
model_name: target.model_name,
|
|
290
|
+
stagingState,
|
|
291
|
+
stagingStatus: formatStagingStatus(stagingState),
|
|
292
|
+
includesStageStep: includesStage
|
|
293
|
+
};
|
|
294
|
+
}
|
|
@@ -70,6 +70,10 @@ export default class SecretsPromptRunner {
|
|
|
70
70
|
const modelSource = answers.modelSource;
|
|
71
71
|
if (modelSource && modelSource !== 'huggingface') return false;
|
|
72
72
|
|
|
73
|
+
// Skip HF token when model name is an S3 URI (no HF download needed)
|
|
74
|
+
const modelName = answers.customModelName || answers.modelName;
|
|
75
|
+
if (modelName && modelName.startsWith('s3://')) return false;
|
|
76
|
+
|
|
73
77
|
return true;
|
|
74
78
|
}
|
|
75
79
|
|
|
@@ -146,7 +146,7 @@ export default class TemplateManager {
|
|
|
146
146
|
|
|
147
147
|
// Validate instance type format (ml.*.*) - only for realtime-inference
|
|
148
148
|
if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
|
|
149
|
-
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
149
|
+
const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
150
150
|
if (!instancePattern.test(this.answers.instanceType)) {
|
|
151
151
|
throw new Error(`⚠️ Invalid instance type format: ${this.answers.instanceType}. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g5.xlarge)`);
|
|
152
152
|
}
|
|
@@ -383,6 +383,68 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
|
|
|
383
383
|
}
|
|
384
384
|
}
|
|
385
385
|
|
|
386
|
+
// Auto-resolve tensor parallel degree from instance catalog GPU count.
|
|
387
|
+
// Only applies when:
|
|
388
|
+
// 1. The engine supports tensor parallelism (vLLM, SGLang, TensorRT-LLM, LMI)
|
|
389
|
+
// 2. The instance has multiple GPUs (gpus > 1)
|
|
390
|
+
// 3. The user has NOT explicitly set the TP env var via --server-env or --model-env
|
|
391
|
+
// This ensures multi-GPU instances default to full TP utilization without requiring
|
|
392
|
+
// the user to manually specify TENSOR_PARALLEL_SIZE.
|
|
393
|
+
// Requirements: FTP-1 (extension) — task 6.2
|
|
394
|
+
const _TP_ENGINE_MAP = {
|
|
395
|
+
'vllm': 'VLLM_TENSOR_PARALLEL_SIZE',
|
|
396
|
+
'vllm-omni': 'VLLM_OMNI_TENSOR_PARALLEL_SIZE',
|
|
397
|
+
'sglang': 'SGLANG_TENSOR_PARALLEL_SIZE',
|
|
398
|
+
'tensorrt-llm': 'TRTLLM_TENSOR_PARALLEL_SIZE',
|
|
399
|
+
'lmi': 'OPTION_TENSOR_PARALLEL_DEGREE'
|
|
400
|
+
};
|
|
401
|
+
|
|
402
|
+
const tpEngine = answers.backend || answers.modelServer;
|
|
403
|
+
const tpEnvKey = tpEngine ? _TP_ENGINE_MAP[tpEngine] : null;
|
|
404
|
+
|
|
405
|
+
if (tpEnvKey && answers.instanceType) {
|
|
406
|
+
// Check if user explicitly set the TP value via --server-env (un-prefixed key)
|
|
407
|
+
const userServerEnvVars = answers.serverEnvVars || {};
|
|
408
|
+
const userExplicitlySetTP = (
|
|
409
|
+
userServerEnvVars['TENSOR_PARALLEL_SIZE'] !== undefined ||
|
|
410
|
+
userServerEnvVars['TENSOR_PARALLEL_DEGREE'] !== undefined ||
|
|
411
|
+
userServerEnvVars[tpEnvKey] !== undefined
|
|
412
|
+
);
|
|
413
|
+
|
|
414
|
+
if (!userExplicitlySetTP) {
|
|
415
|
+
// Look up GPU count from instance catalog
|
|
416
|
+
let instanceGpuCount = null;
|
|
417
|
+
if (answers.gpuCount) {
|
|
418
|
+
instanceGpuCount = answers.gpuCount;
|
|
419
|
+
} else if (answers.icGpuCount) {
|
|
420
|
+
instanceGpuCount = answers.icGpuCount;
|
|
421
|
+
} else {
|
|
422
|
+
try {
|
|
423
|
+
const catalogPath = path.resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
|
|
424
|
+
const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
|
|
425
|
+
const instanceInfo = catalogData?.catalog?.[answers.instanceType];
|
|
426
|
+
if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
|
|
427
|
+
instanceGpuCount = instanceInfo.gpus;
|
|
428
|
+
}
|
|
429
|
+
} catch {
|
|
430
|
+
// Silently continue
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// Auto-set TP to GPU count when instance has multiple GPUs
|
|
435
|
+
if (instanceGpuCount && instanceGpuCount > 1) {
|
|
436
|
+
if (!answers.envVars) {
|
|
437
|
+
answers.envVars = {};
|
|
438
|
+
}
|
|
439
|
+
answers.envVars[tpEnvKey] = String(instanceGpuCount);
|
|
440
|
+
answers.tensorParallelSize = instanceGpuCount;
|
|
441
|
+
answers._tpAutoResolved = true;
|
|
442
|
+
answers._tpAutoResolvedFrom = answers.instanceType;
|
|
443
|
+
console.log(` ℹ️ TP degree: ${instanceGpuCount} (auto-detected from ${answers.instanceType})`);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
386
448
|
// Determine tune support based on model presence in the tune catalog.
|
|
387
449
|
// Used by the do/config template to write TUNE_SUPPORTED=true|false.
|
|
388
450
|
if (answers.tuneSupported === undefined) {
|
package/templates/do/README.md
CHANGED
|
@@ -262,6 +262,43 @@ Clean everything:
|
|
|
262
262
|
|
|
263
263
|
---
|
|
264
264
|
|
|
265
|
+
### `./do/stage`
|
|
266
|
+
|
|
267
|
+
Pre-stage model weights from HuggingFace to S3 for faster builds and deploys.
|
|
268
|
+
|
|
269
|
+
**What it does:**
|
|
270
|
+
- Downloads model weights from HuggingFace using `huggingface-cli`
|
|
271
|
+
- Uses `hf_transfer` for accelerated parallel downloads
|
|
272
|
+
- Syncs downloaded weights to S3 (regional, fast access)
|
|
273
|
+
- Records the staged S3 URI in `.mlcc/staged-assets.json`
|
|
274
|
+
- Idempotent: skips if model is already staged (use `--force` to re-stage)
|
|
275
|
+
|
|
276
|
+
**Prerequisites:**
|
|
277
|
+
- AWS credentials configured
|
|
278
|
+
- `huggingface-cli` installed (`pip install huggingface_hub[cli] hf_transfer`)
|
|
279
|
+
- Bootstrap profile configured (`ml-container-creator bootstrap`)
|
|
280
|
+
|
|
281
|
+
**Usage:**
|
|
282
|
+
```bash
|
|
283
|
+
# Stage model to S3
|
|
284
|
+
./do/stage
|
|
285
|
+
|
|
286
|
+
# Force re-stage even if already present
|
|
287
|
+
./do/stage --force
|
|
288
|
+
|
|
289
|
+
# Stage and update MODEL_NAME in do/config
|
|
290
|
+
./do/stage --update-config
|
|
291
|
+
|
|
292
|
+
# Submit as SageMaker Processing Job (for models >500GB)
|
|
293
|
+
./do/stage --submit
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
**Output:**
|
|
297
|
+
- Staged model S3 URI
|
|
298
|
+
- Updated `.mlcc/staged-assets.json` tracking file
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
265
302
|
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
266
303
|
### `./do/benchmark`
|
|
267
304
|
|
package/templates/do/adapter
CHANGED
|
@@ -18,6 +18,14 @@ set -o pipefail
|
|
|
18
18
|
# ── Source project configuration ──────────────────────────────────────────────
|
|
19
19
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
20
20
|
source "${SCRIPT_DIR}/config"
|
|
21
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
22
|
+
|
|
23
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
24
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
25
|
+
set +u
|
|
26
|
+
ADAPTER_S3_BUCKET="${ADAPTER_S3_BUCKET:-mlcc-adapters-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
27
|
+
set -u
|
|
28
|
+
|
|
21
29
|
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
22
30
|
|
|
23
31
|
# ── Usage ─────────────────────────────────────────────────────────────────────
|
package/templates/do/build
CHANGED
|
@@ -9,6 +9,14 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
16
|
+
set +u
|
|
17
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
18
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
19
|
+
set -u
|
|
12
20
|
|
|
13
21
|
echo "🚀 Building Docker image for ${PROJECT_NAME}"
|
|
14
22
|
echo " Deployment config: ${DEPLOYMENT_CONFIG}"
|
|
@@ -9,6 +9,14 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
16
|
+
set +u
|
|
17
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
18
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
19
|
+
set -u
|
|
12
20
|
|
|
13
21
|
# Parse arguments
|
|
14
22
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,14 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
16
|
+
set +u
|
|
17
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
18
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
19
|
+
set -u
|
|
12
20
|
|
|
13
21
|
# Parse arguments
|
|
14
22
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,14 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
16
|
+
set +u
|
|
17
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
18
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
19
|
+
set -u
|
|
12
20
|
|
|
13
21
|
# Parse arguments
|
|
14
22
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,14 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
16
|
+
set +u
|
|
17
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
18
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
19
|
+
set -u
|
|
12
20
|
|
|
13
21
|
# Parse arguments
|
|
14
22
|
CLEANUP_TARGET=""
|