@aws/ml-container-creator 0.13.4 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -5
- package/config/parameter-schema-v2.json +32 -4
- package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
- package/infra/ci-harness/package-lock.json +122 -116
- package/infra/ci-harness/package.json +1 -1
- package/package.json +5 -3
- package/pyproject.toml +21 -0
- package/requirements.txt +19 -0
- package/servers/instance-sizer/index.js +72 -4
- package/servers/instance-sizer/lib/model-resolver.js +28 -2
- package/src/app.js +17 -0
- package/src/lib/bootstrap-command-handler.js +33 -23
- package/src/lib/config-loader.js +18 -0
- package/src/lib/config-manager.js +6 -1
- package/src/lib/dataset-slug.js +152 -0
- package/src/lib/generated/cli-options.js +9 -3
- package/src/lib/generated/parameter-matrix.js +14 -3
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/mcp-query-runner.js +6 -0
- package/src/lib/prompt-runner.js +5 -0
- package/src/lib/prompts/feature-prompts.js +1 -1
- package/src/lib/template-manager.js +0 -7
- package/src/lib/template-variable-resolver.js +51 -1
- package/src/lib/tune-config-state.js +14 -1
- package/templates/do/.adapter_helper.py +451 -0
- package/templates/do/.benchmark_writer.py +22 -0
- package/templates/do/.register_helper.py +1163 -0
- package/templates/do/.stage_helper.py +419 -0
- package/templates/do/.tune_helper.py +379 -65
- package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +427 -27
- package/templates/do/add-ic +85 -3
- package/templates/do/benchmark +173 -15
- package/templates/do/config +24 -0
- package/templates/do/lib/inference-component.sh +56 -3
- package/templates/do/lib/profile.sh +5 -0
- package/templates/do/register +552 -6
- package/templates/do/stage +91 -272
- package/templates/do/test +12 -2
- package/templates/do/tune +264 -12
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aws/ml-container-creator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.15.0",
|
|
4
4
|
"description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -76,7 +76,9 @@
|
|
|
76
76
|
"README.md",
|
|
77
77
|
"LICENSE",
|
|
78
78
|
"LICENSE-THIRD-PARTY",
|
|
79
|
-
"NOTICE"
|
|
79
|
+
"NOTICE",
|
|
80
|
+
"requirements.txt",
|
|
81
|
+
"pyproject.toml"
|
|
80
82
|
],
|
|
81
83
|
"type": "module",
|
|
82
84
|
"license": "Apache-2.0",
|
|
@@ -127,7 +129,7 @@
|
|
|
127
129
|
"lint-staged": "^17.0.7",
|
|
128
130
|
"mocha": "^10.2.0",
|
|
129
131
|
"npm-force-resolutions": "^0.0.10",
|
|
130
|
-
"nyc": "^
|
|
132
|
+
"nyc": "^18.0.0",
|
|
131
133
|
"sbom": "^0.0.0"
|
|
132
134
|
},
|
|
133
135
|
"lint-staged": {
|
package/pyproject.toml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "ml-container-creator"
|
|
3
|
+
version = "0.13.4"
|
|
4
|
+
description = "Python dependencies for ml-container-creator do/ lifecycle scripts"
|
|
5
|
+
requires-python = ">=3.10"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"boto3>=1.35.0",
|
|
8
|
+
"huggingface-hub>=0.25.0",
|
|
9
|
+
"hf-transfer>=0.1.8",
|
|
10
|
+
"pyarrow>=17.0.0",
|
|
11
|
+
"sagemaker-core>=1.0.0",
|
|
12
|
+
"sagemaker[train]>=3.0.0",
|
|
13
|
+
"sagemaker[serve]>=3.0.0",
|
|
14
|
+
"packaging>=24.0",
|
|
15
|
+
"pyyaml>=6.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[dependency-groups]
|
|
19
|
+
dev = [
|
|
20
|
+
"pytest>=8.0",
|
|
21
|
+
]
|
package/requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Python dependencies for do/ lifecycle scripts
|
|
2
|
+
#
|
|
3
|
+
# Install with uv (recommended):
|
|
4
|
+
# uv pip install -r requirements.txt
|
|
5
|
+
#
|
|
6
|
+
# Or with pip:
|
|
7
|
+
# pip install -r requirements.txt
|
|
8
|
+
#
|
|
9
|
+
# Source of truth: pyproject.toml
|
|
10
|
+
|
|
11
|
+
boto3>=1.35.0
|
|
12
|
+
huggingface_hub>=0.25.0
|
|
13
|
+
hf_transfer>=0.1.8
|
|
14
|
+
pyarrow>=17.0.0
|
|
15
|
+
sagemaker-core>=1.0.0
|
|
16
|
+
sagemaker[train]>=3.0.0
|
|
17
|
+
sagemaker[serve]>=3.0.0
|
|
18
|
+
packaging>=24.0
|
|
19
|
+
PyYAML>=6.0
|
|
@@ -25,8 +25,8 @@ import { readFileSync } from 'node:fs';
|
|
|
25
25
|
import { fileURLToPath } from 'node:url';
|
|
26
26
|
import { resolve, dirname } from 'node:path';
|
|
27
27
|
import { resolveModelMetadata } from './lib/model-resolver.js';
|
|
28
|
-
import { estimateVram } from './lib/vram-estimator.js';
|
|
29
|
-
import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js';
|
|
28
|
+
import { estimateVram, computeMaxModelLen } from './lib/vram-estimator.js';
|
|
29
|
+
import { filterAndRankInstances, applyAvailabilityRanking, getPerGpuMemoryGb } from './lib/instance-ranker.js';
|
|
30
30
|
import { QuotaResolver } from './lib/quota-resolver.js';
|
|
31
31
|
import { queryBedrock } from '../lib/bedrock-client.js';
|
|
32
32
|
|
|
@@ -393,6 +393,66 @@ async function handleGetInstanceRecommendation(params) {
|
|
|
393
393
|
{ limit }
|
|
394
394
|
);
|
|
395
395
|
|
|
396
|
+
// Step 3-max_model_len: When no instance fits at full context, try capping context length
|
|
397
|
+
// NFR-1 guard: skip this logic for models with recommendedInstances in catalog
|
|
398
|
+
let suggestedMaxModelLen = null;
|
|
399
|
+
let contextLengthCapped = false;
|
|
400
|
+
let originalMaxPositionEmbeddings = null;
|
|
401
|
+
|
|
402
|
+
if (recommendations.length === 0 && !modelMetadata.recommendedInstances && modelMetadata.maxPositionEmbeddings) {
|
|
403
|
+
// Find the largest available GPU instance
|
|
404
|
+
const gpuInstances = Object.entries(effectiveCatalog)
|
|
405
|
+
.filter(([, meta]) => meta.category === 'gpu' && meta.gpus > 0)
|
|
406
|
+
.map(([name, meta]) => {
|
|
407
|
+
const perGpu = getPerGpuMemoryGb(meta);
|
|
408
|
+
return { name, meta, totalVramGb: perGpu ? perGpu * meta.gpus : 0 };
|
|
409
|
+
})
|
|
410
|
+
.filter(i => i.totalVramGb > 0)
|
|
411
|
+
.sort((a, b) => b.totalVramGb - a.totalVramGb);
|
|
412
|
+
|
|
413
|
+
if (gpuInstances.length > 0) {
|
|
414
|
+
const bestInstance = gpuInstances[0];
|
|
415
|
+
|
|
416
|
+
// Compute model weight memory for computeMaxModelLen
|
|
417
|
+
const weightsGb = vramEstimate.breakdown.weightsGb;
|
|
418
|
+
|
|
419
|
+
const safeLen = computeMaxModelLen({
|
|
420
|
+
modelWeightGb: weightsGb,
|
|
421
|
+
totalGpuMemoryGb: bestInstance.meta.gpuMemoryGb || (bestInstance.totalVramGb / bestInstance.meta.gpus),
|
|
422
|
+
gpuCount: bestInstance.meta.gpus,
|
|
423
|
+
numLayers: modelMetadata.numLayers,
|
|
424
|
+
numKvHeads: modelMetadata.numKvHeads,
|
|
425
|
+
headDim: modelMetadata.headDim
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
if (safeLen && safeLen.maxModelLen >= 2048) {
|
|
429
|
+
// Re-estimate VRAM with capped sequence length
|
|
430
|
+
const cappedEstimate = estimateVram({
|
|
431
|
+
parameterCount: modelMetadata.parameterCount,
|
|
432
|
+
dtype: modelMetadata.dtype,
|
|
433
|
+
quantization: quantization || undefined,
|
|
434
|
+
maxSequenceLength: safeLen.maxModelLen,
|
|
435
|
+
batchSize: effectiveBatchSize || undefined
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
// Re-filter instances with the reduced VRAM requirement
|
|
439
|
+
recommendations = filterAndRankInstances(
|
|
440
|
+
cappedEstimate.vramGb,
|
|
441
|
+
effectiveCatalog,
|
|
442
|
+
{ limit }
|
|
443
|
+
);
|
|
444
|
+
|
|
445
|
+
suggestedMaxModelLen = safeLen.maxModelLen;
|
|
446
|
+
contextLengthCapped = true;
|
|
447
|
+
originalMaxPositionEmbeddings = modelMetadata.maxPositionEmbeddings;
|
|
448
|
+
log(`Context capped: ${modelMetadata.maxPositionEmbeddings} → ${safeLen.maxModelLen} for ${modelName}`);
|
|
449
|
+
} else {
|
|
450
|
+
// AC-1.6: safeLen < 2048 or null — recommend larger instance instead
|
|
451
|
+
log(`Model ${modelName} cannot fit 2048 context on ${bestInstance.name}, recommending larger instance`);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
396
456
|
// Step 3a: Quota & availability filtering (discover mode only)
|
|
397
457
|
let preQuotaFilterCount = 0;
|
|
398
458
|
let allFilteredByQuota = false;
|
|
@@ -521,7 +581,10 @@ async function handleGetInstanceRecommendation(params) {
|
|
|
521
581
|
content: [{
|
|
522
582
|
type: 'text',
|
|
523
583
|
text: JSON.stringify({
|
|
524
|
-
values: {
|
|
584
|
+
values: {
|
|
585
|
+
instanceType: topRecommendation,
|
|
586
|
+
...(suggestedMaxModelLen ? { maxModelLen: suggestedMaxModelLen } : {})
|
|
587
|
+
},
|
|
525
588
|
choices: { instanceType: rankedList },
|
|
526
589
|
metadata: {
|
|
527
590
|
modelName,
|
|
@@ -533,7 +596,12 @@ async function handleGetInstanceRecommendation(params) {
|
|
|
533
596
|
recommendations: finalRecommendations,
|
|
534
597
|
source: modelMetadata.source,
|
|
535
598
|
smartModeUsed,
|
|
536
|
-
allFilteredByQuota
|
|
599
|
+
allFilteredByQuota,
|
|
600
|
+
...(contextLengthCapped ? {
|
|
601
|
+
suggestedMaxModelLen,
|
|
602
|
+
contextLengthCapped: true,
|
|
603
|
+
originalMaxPositionEmbeddings
|
|
604
|
+
} : {})
|
|
537
605
|
}
|
|
538
606
|
})
|
|
539
607
|
}]
|
|
@@ -142,13 +142,27 @@ export function extractFromHuggingFaceConfig(config) {
|
|
|
142
142
|
const architecture = (config.architectures && config.architectures[0]) || 'unknown';
|
|
143
143
|
const maxPositionEmbeddings = config.max_position_embeddings || 4096;
|
|
144
144
|
|
|
145
|
-
|
|
145
|
+
// Extract architecture params for KV cache computation (computeMaxModelLen)
|
|
146
|
+
const numLayers = config.num_hidden_layers || null;
|
|
147
|
+
const numKvHeads = config.num_key_value_heads || config.num_attention_heads || null;
|
|
148
|
+
const headDim = config.head_dim || (config.hidden_size && config.num_attention_heads
|
|
149
|
+
? Math.floor(config.hidden_size / config.num_attention_heads)
|
|
150
|
+
: null);
|
|
151
|
+
|
|
152
|
+
const result = {
|
|
146
153
|
parameterCount,
|
|
147
154
|
dtype,
|
|
148
155
|
architecture,
|
|
149
156
|
maxPositionEmbeddings,
|
|
150
157
|
source: 'huggingface_api'
|
|
151
158
|
};
|
|
159
|
+
|
|
160
|
+
// Only include architecture params if available (graceful degradation)
|
|
161
|
+
if (numLayers) result.numLayers = numLayers;
|
|
162
|
+
if (numKvHeads) result.numKvHeads = numKvHeads;
|
|
163
|
+
if (headDim) result.headDim = headDim;
|
|
164
|
+
|
|
165
|
+
return result;
|
|
152
166
|
}
|
|
153
167
|
|
|
154
168
|
/**
|
|
@@ -175,13 +189,25 @@ export async function resolveModelMetadata(modelName, options = {}) {
|
|
|
175
189
|
const catalogEntry = catalogLookup(modelName, catalog);
|
|
176
190
|
|
|
177
191
|
if (catalogEntry) {
|
|
178
|
-
|
|
192
|
+
const result = {
|
|
179
193
|
parameterCount: catalogEntry.parameterCount,
|
|
180
194
|
dtype: catalogEntry.defaultDtype || 'float16',
|
|
181
195
|
architecture: catalogEntry.architecture || 'unknown',
|
|
182
196
|
maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings || 4096,
|
|
183
197
|
source: 'catalog'
|
|
184
198
|
};
|
|
199
|
+
|
|
200
|
+
// Pass through recommendedInstances for NFR-1 guard
|
|
201
|
+
if (catalogEntry.recommendedInstances) {
|
|
202
|
+
result.recommendedInstances = catalogEntry.recommendedInstances;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Pass through architecture params if available in catalog
|
|
206
|
+
if (catalogEntry.numLayers) result.numLayers = catalogEntry.numLayers;
|
|
207
|
+
if (catalogEntry.numKvHeads) result.numKvHeads = catalogEntry.numKvHeads;
|
|
208
|
+
if (catalogEntry.headDim) result.headDim = catalogEntry.headDim;
|
|
209
|
+
|
|
210
|
+
return result;
|
|
185
211
|
}
|
|
186
212
|
|
|
187
213
|
// Step 2: If discover mode, try HuggingFace Hub
|
package/src/app.js
CHANGED
|
@@ -400,6 +400,9 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
400
400
|
ignorePatterns.push('**/do/adapters/**');
|
|
401
401
|
ignorePatterns.push('**/do/tune');
|
|
402
402
|
ignorePatterns.push('**/do/.tune_helper.py');
|
|
403
|
+
ignorePatterns.push('**/do/.stage_helper.py');
|
|
404
|
+
ignorePatterns.push('**/do/.adapter_helper.py');
|
|
405
|
+
ignorePatterns.push('**/do/.register_helper.py');
|
|
403
406
|
ignorePatterns.push('**/do/train');
|
|
404
407
|
ignorePatterns.push('**/do/.train_build_request.py');
|
|
405
408
|
ignorePatterns.push('**/do/.train_status_parser.py');
|
|
@@ -576,6 +579,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
576
579
|
fs.writeFileSync(gitignorePath, mlccIgnore);
|
|
577
580
|
}
|
|
578
581
|
}
|
|
582
|
+
|
|
583
|
+
// Add __pycache__/ and *.pyc to .gitignore (Python helpers leave bytecode behind)
|
|
584
|
+
{
|
|
585
|
+
const gitignorePath = path.join(destDir, '.gitignore');
|
|
586
|
+
const pycacheIgnore = '# Python bytecode (generated by do/ helper scripts)\n__pycache__/\n*.pyc\n';
|
|
587
|
+
if (fs.existsSync(gitignorePath)) {
|
|
588
|
+
const existing = fs.readFileSync(gitignorePath, 'utf8');
|
|
589
|
+
if (!existing.includes('__pycache__')) {
|
|
590
|
+
fs.appendFileSync(gitignorePath, `\n${pycacheIgnore}`);
|
|
591
|
+
}
|
|
592
|
+
} else {
|
|
593
|
+
fs.writeFileSync(gitignorePath, pycacheIgnore);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
579
596
|
}
|
|
580
597
|
|
|
581
598
|
/**
|
|
@@ -459,39 +459,49 @@ export default class BootstrapCommandHandler {
|
|
|
459
459
|
|
|
460
460
|
// --no-rollback prevents rollback on AlreadyExists errors for IAM roles
|
|
461
461
|
// that may pre-exist from a prior deployment or another region.
|
|
462
|
-
// Check if benchmark bucket already exists
|
|
463
|
-
|
|
462
|
+
// Check if benchmark results bucket already exists.
|
|
463
|
+
// If it does, skip CDK deploy for benchmark infra — just update the profile.
|
|
464
|
+
let benchmarkBucketExists = false;
|
|
464
465
|
if (options.benchmarkInfra) {
|
|
466
|
+
const resultsBucketName = `mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}`;
|
|
465
467
|
try {
|
|
466
468
|
execSync(
|
|
467
|
-
`aws s3api head-bucket --bucket
|
|
469
|
+
`aws s3api head-bucket --bucket ${resultsBucketName}${profileData.awsProfile ? ` --profile ${profileData.awsProfile}` : ''} --region ${profileData.awsRegion}`,
|
|
468
470
|
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
469
471
|
);
|
|
470
|
-
|
|
471
|
-
console.log(
|
|
472
|
+
benchmarkBucketExists = true;
|
|
473
|
+
console.log(` ✅ Benchmark results bucket already exists: ${resultsBucketName}`);
|
|
474
|
+
console.log(' Skipping CDK deploy for benchmark infra — updating profile only.');
|
|
475
|
+
profileData.benchmarkInfraProvisioned = true;
|
|
476
|
+
profileData.ciGlueDatabase = profileData.ciGlueDatabase || 'mlcc_ci';
|
|
477
|
+
profileData.ciBenchmarkResultsBucket = resultsBucketName;
|
|
472
478
|
} catch {
|
|
473
479
|
// Bucket doesn't exist — will be created fresh
|
|
474
480
|
}
|
|
475
481
|
}
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
482
|
+
|
|
483
|
+
// Only run CDK deploy if we actually need to create infrastructure
|
|
484
|
+
if (!benchmarkBucketExists || !options.benchmarkInfra) {
|
|
485
|
+
const cdkDeployCmd = options.benchmarkInfra
|
|
486
|
+
? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
|
|
487
|
+
: 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
|
|
488
|
+
execSync(
|
|
489
|
+
cdkDeployCmd,
|
|
490
|
+
{
|
|
491
|
+
cwd: ciHarnessDir,
|
|
492
|
+
encoding: 'utf8',
|
|
493
|
+
stdio: 'inherit',
|
|
494
|
+
env: {
|
|
495
|
+
...process.env,
|
|
496
|
+
AWS_REGION: profileData.awsRegion,
|
|
497
|
+
CDK_DEFAULT_REGION: profileData.awsRegion,
|
|
498
|
+
CDK_DEFAULT_ACCOUNT: profileData.accountId,
|
|
499
|
+
AWS_PROFILE: profileData.awsProfile
|
|
500
|
+
}
|
|
491
501
|
}
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
502
|
+
);
|
|
503
|
+
console.log(' ✅ CI harness stack deployed');
|
|
504
|
+
}
|
|
495
505
|
|
|
496
506
|
profileData.ciInfraProvisioned = true;
|
|
497
507
|
profileData.ciTableName = 'mlcc-ci-table';
|
package/src/lib/config-loader.js
CHANGED
|
@@ -265,6 +265,21 @@ export default class ConfigLoader {
|
|
|
265
265
|
return;
|
|
266
266
|
}
|
|
267
267
|
|
|
268
|
+
// Handle icEnvVars object (deploy-time IC environment variables)
|
|
269
|
+
if (key === 'icEnvVars' && typeof value === 'object' && value !== null) {
|
|
270
|
+
if (!this.manager.config.icEnvVars) {
|
|
271
|
+
this.manager.config.icEnvVars = {};
|
|
272
|
+
}
|
|
273
|
+
const cliIcEnvVars = (this.manager.explicitConfig && this.manager.explicitConfig.icEnvVars) || {};
|
|
274
|
+
Object.entries(value).forEach(([envKey, envValue]) => {
|
|
275
|
+
if (!(envKey in cliIcEnvVars)) {
|
|
276
|
+
this.manager.config.icEnvVars[envKey] = envValue;
|
|
277
|
+
this.manager._recordSource(`icEnvVars.${envKey}`, envValue, 'config-file');
|
|
278
|
+
}
|
|
279
|
+
});
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
|
|
268
283
|
if (this.manager._isSourceSupported(key, 'configFile')) {
|
|
269
284
|
filteredConfig[key] = this.manager._parseValue(key, value);
|
|
270
285
|
this.manager._recordSource(key, this.manager._parseValue(key, value), 'config-file');
|
|
@@ -342,6 +357,9 @@ export default class ConfigLoader {
|
|
|
342
357
|
|
|
343
358
|
// Parse --server-env KEY=VALUE pairs
|
|
344
359
|
this._parseEnvVarOptions('server-env', 'serverEnvVars');
|
|
360
|
+
|
|
361
|
+
// Parse --ic-env KEY=VALUE pairs (deploy-time IC environment variables)
|
|
362
|
+
this._parseEnvVarOptions('ic-env', 'icEnvVars');
|
|
345
363
|
}
|
|
346
364
|
|
|
347
365
|
/**
|
|
@@ -183,6 +183,9 @@ export default class ConfigManager {
|
|
|
183
183
|
if (this.config.serverEnvVars && typeof this.config.serverEnvVars === 'object') {
|
|
184
184
|
finalConfig.serverEnvVars = { ...this.config.serverEnvVars };
|
|
185
185
|
}
|
|
186
|
+
if (this.config.icEnvVars && typeof this.config.icEnvVars === 'object') {
|
|
187
|
+
finalConfig.icEnvVars = { ...this.config.icEnvVars };
|
|
188
|
+
}
|
|
186
189
|
|
|
187
190
|
// Ensure all parameters from the matrix are included in final config
|
|
188
191
|
// This is important for optional parameters that might be null
|
|
@@ -411,7 +414,8 @@ export default class ConfigManager {
|
|
|
411
414
|
...endpointParams,
|
|
412
415
|
...icParams,
|
|
413
416
|
'modelEnvVars',
|
|
414
|
-
'serverEnvVars'
|
|
417
|
+
'serverEnvVars',
|
|
418
|
+
'icEnvVars'
|
|
415
419
|
]);
|
|
416
420
|
const core = {};
|
|
417
421
|
for (const [key, value] of Object.entries(this.config)) {
|
|
@@ -426,6 +430,7 @@ export default class ConfigManager {
|
|
|
426
430
|
icConfig,
|
|
427
431
|
modelEnvVars: { ...(this.config.modelEnvVars || {}) },
|
|
428
432
|
serverEnvVars: { ...(this.config.serverEnvVars || {}) },
|
|
433
|
+
icEnvVars: { ...(this.config.icEnvVars || {}) },
|
|
429
434
|
manifest: [...this._sourceManifest]
|
|
430
435
|
};
|
|
431
436
|
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Dataset Slug Derivation
|
|
6
|
+
*
|
|
7
|
+
* Derives a deterministic, short slug from a dataset URI for use in
|
|
8
|
+
* tuning-job-aware adapter naming conventions.
|
|
9
|
+
*
|
|
10
|
+
* Slugification rules:
|
|
11
|
+
* - Lowercase
|
|
12
|
+
* - Strip non-alphanumeric characters (keep hyphens)
|
|
13
|
+
* - Truncate to 20 characters
|
|
14
|
+
* - Replace consecutive hyphens with single hyphen
|
|
15
|
+
* - Strip leading/trailing hyphens
|
|
16
|
+
*
|
|
17
|
+
* Examples:
|
|
18
|
+
* hf://org/name -> "name"
|
|
19
|
+
* hf://tatsu-lab/alpaca -> "alpaca"
|
|
20
|
+
* hf://Open-Orca/OpenOrca -> "openorca"
|
|
21
|
+
* s3://bucket/path/file.jsonl -> "file"
|
|
22
|
+
*
|
|
23
|
+
* Requirements: US-4 (AC-4.2)
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Derive a dataset slug from a dataset URI.
|
|
28
|
+
*
|
|
29
|
+
* @param {string} datasetUri - Dataset URI (s3://... or hf://...)
|
|
30
|
+
* @returns {string} The derived slug, or empty string if extraction fails
|
|
31
|
+
*/
|
|
32
|
+
export function deriveDatasetSlug(datasetUri) {
|
|
33
|
+
if (!datasetUri || typeof datasetUri !== 'string') {
|
|
34
|
+
return '';
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let rawName = '';
|
|
38
|
+
|
|
39
|
+
if (datasetUri.startsWith('hf://')) {
|
|
40
|
+
// hf://org/name[/split][?file=pattern]
|
|
41
|
+
// Extract the dataset name (second path component)
|
|
42
|
+
const hfPath = datasetUri.slice(5); // remove "hf://"
|
|
43
|
+
const withoutQuery = hfPath.split('?')[0]; // remove ?file=...
|
|
44
|
+
const parts = withoutQuery.split('/');
|
|
45
|
+
// parts[0] = org, parts[1] = name, parts[2+] = split
|
|
46
|
+
rawName = parts[1] || parts[0] || '';
|
|
47
|
+
} else if (datasetUri.startsWith('s3://')) {
|
|
48
|
+
// s3://bucket/path/file.jsonl -> slug from filename (without extension)
|
|
49
|
+
const s3Path = datasetUri.slice(5); // remove "s3://"
|
|
50
|
+
const parts = s3Path.split('/');
|
|
51
|
+
const filename = parts[parts.length - 1] || '';
|
|
52
|
+
// Remove file extension
|
|
53
|
+
const dotIndex = filename.lastIndexOf('.');
|
|
54
|
+
rawName = dotIndex > 0 ? filename.substring(0, dotIndex) : filename;
|
|
55
|
+
} else {
|
|
56
|
+
// Unknown format — try to extract last path component
|
|
57
|
+
const parts = datasetUri.split('/');
|
|
58
|
+
rawName = parts[parts.length - 1] || '';
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return slugify(rawName);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Apply slugification rules to a raw name.
|
|
66
|
+
*
|
|
67
|
+
* @param {string} raw - Raw name to slugify
|
|
68
|
+
* @returns {string} Slugified string
|
|
69
|
+
*/
|
|
70
|
+
export function slugify(raw) {
|
|
71
|
+
if (!raw) return '';
|
|
72
|
+
|
|
73
|
+
let slug = raw
|
|
74
|
+
.toLowerCase() // lowercase
|
|
75
|
+
.replace(/[^a-z0-9-]/g, '') // strip non-alphanumeric (keep hyphens)
|
|
76
|
+
.replace(/-{2,}/g, '-') // replace consecutive hyphens
|
|
77
|
+
.replace(/^-+/, '') // strip leading hyphens
|
|
78
|
+
.replace(/-+$/, ''); // strip trailing hyphens
|
|
79
|
+
|
|
80
|
+
// Truncate to 20 chars
|
|
81
|
+
if (slug.length > 20) {
|
|
82
|
+
slug = slug.substring(0, 20);
|
|
83
|
+
// Don't end on a hyphen after truncation
|
|
84
|
+
slug = slug.replace(/-+$/, '');
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return slug;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Resolve a --from-tune argument to the appropriate config variable name.
|
|
92
|
+
*
|
|
93
|
+
* Resolution rules:
|
|
94
|
+
* - No arg (empty/null) -> TUNE_OUTPUT_PATH_LATEST
|
|
95
|
+
* - technique only (e.g., "sft") -> TUNE_ADAPTER_PATH_SFT
|
|
96
|
+
* - technique-dataset compound (e.g., "sft-alpaca") -> TUNE_ADAPTER_PATH_SFT_ALPACA
|
|
97
|
+
*
|
|
98
|
+
* @param {string} fromTuneArg - The --from-tune argument value
|
|
99
|
+
* @param {function} configVarExists - Function that checks if a config var exists
|
|
100
|
+
* @returns {{ varName: string, technique: string, slug: string, isCompound: boolean, fallback: string|null }}
|
|
101
|
+
*/
|
|
102
|
+
export function resolveFromTuneVar(fromTuneArg, configVarExists) {
|
|
103
|
+
if (!fromTuneArg) {
|
|
104
|
+
return {
|
|
105
|
+
varName: 'TUNE_OUTPUT_PATH_LATEST',
|
|
106
|
+
technique: '',
|
|
107
|
+
slug: '',
|
|
108
|
+
isCompound: false,
|
|
109
|
+
fallback: null
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const upper = fromTuneArg.toUpperCase();
|
|
114
|
+
|
|
115
|
+
// Check if argument contains a hyphen — potential compound key
|
|
116
|
+
const hyphenIndex = fromTuneArg.indexOf('-');
|
|
117
|
+
if (hyphenIndex > 0) {
|
|
118
|
+
const technique = fromTuneArg.substring(0, hyphenIndex);
|
|
119
|
+
const slug = fromTuneArg.substring(hyphenIndex + 1);
|
|
120
|
+
const techniqueUpper = technique.toUpperCase();
|
|
121
|
+
const slugUpper = slug.toUpperCase().replace(/-/g, '_');
|
|
122
|
+
const compoundVar = `TUNE_ADAPTER_PATH_${techniqueUpper}_${slugUpper}`;
|
|
123
|
+
|
|
124
|
+
if (configVarExists(compoundVar)) {
|
|
125
|
+
return {
|
|
126
|
+
varName: compoundVar,
|
|
127
|
+
technique,
|
|
128
|
+
slug,
|
|
129
|
+
isCompound: true,
|
|
130
|
+
fallback: null
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Compound key doesn't exist — fallback to technique-only
|
|
135
|
+
return {
|
|
136
|
+
varName: `TUNE_ADAPTER_PATH_${techniqueUpper}`,
|
|
137
|
+
technique,
|
|
138
|
+
slug,
|
|
139
|
+
isCompound: false,
|
|
140
|
+
fallback: compoundVar // the compound var that was tried but didn't exist
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// No hyphen — technique-only
|
|
145
|
+
return {
|
|
146
|
+
varName: `TUNE_ADAPTER_PATH_${upper}`,
|
|
147
|
+
technique: fromTuneArg,
|
|
148
|
+
slug: '',
|
|
149
|
+
isCompound: false,
|
|
150
|
+
fallback: null
|
|
151
|
+
};
|
|
152
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-06-
|
|
3
|
+
// Generated: 2026-06-22T13:49:00.815Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* CLI option definitions derived from parameter-schema-v2.json.
|
|
@@ -70,7 +70,7 @@ export const cliOptions = [
|
|
|
70
70
|
{
|
|
71
71
|
'flag': '--enable-lora',
|
|
72
72
|
'description': 'Enable LoRA adapter serving',
|
|
73
|
-
'defaultValue':
|
|
73
|
+
'defaultValue': true
|
|
74
74
|
},
|
|
75
75
|
{
|
|
76
76
|
'flag': '--max-loras <n>',
|
|
@@ -85,7 +85,7 @@ export const cliOptions = [
|
|
|
85
85
|
{
|
|
86
86
|
'flag': '--include-benchmark',
|
|
87
87
|
'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
|
|
88
|
-
'defaultValue':
|
|
88
|
+
'defaultValue': true
|
|
89
89
|
},
|
|
90
90
|
{
|
|
91
91
|
'flag': '--benchmark-concurrency <n>',
|
|
@@ -353,6 +353,11 @@ export const cliOptions = [
|
|
|
353
353
|
'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
|
|
354
354
|
'repeatable': true
|
|
355
355
|
},
|
|
356
|
+
{
|
|
357
|
+
'flag': '--ic-env <KEY=VALUE>',
|
|
358
|
+
'description': 'Deploy-time environment variable for inference components (IC_ENV_* prefix), repeatable (e.g. VLLM_MAX_MODEL_LEN=8192)',
|
|
359
|
+
'repeatable': true
|
|
360
|
+
},
|
|
356
361
|
{
|
|
357
362
|
'flag': '--include-sample',
|
|
358
363
|
'description': 'Include sample model code',
|
|
@@ -464,6 +469,7 @@ export const helpGroups = {
|
|
|
464
469
|
'--fsx-volume-handle': 'hyperpod',
|
|
465
470
|
'--model-env': 'env',
|
|
466
471
|
'--server-env': 'env',
|
|
472
|
+
'--ic-env': 'ic',
|
|
467
473
|
'--include-sample': 'features',
|
|
468
474
|
'--include-testing': 'features',
|
|
469
475
|
'--test-types': 'features',
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-06-
|
|
3
|
+
// Generated: 2026-06-22T13:49:00.924Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Parameter matrix defining how each parameter is loaded from various sources.
|
|
@@ -106,7 +106,7 @@ export const parameterMatrix = {
|
|
|
106
106
|
'mcp': false,
|
|
107
107
|
'promptable': true,
|
|
108
108
|
'required': false,
|
|
109
|
-
'default':
|
|
109
|
+
'default': true,
|
|
110
110
|
'valueSpace': 'bounded'
|
|
111
111
|
},
|
|
112
112
|
'maxLoras': {
|
|
@@ -139,7 +139,7 @@ export const parameterMatrix = {
|
|
|
139
139
|
'mcp': false,
|
|
140
140
|
'promptable': true,
|
|
141
141
|
'required': false,
|
|
142
|
-
'default':
|
|
142
|
+
'default': true,
|
|
143
143
|
'valueSpace': 'bounded'
|
|
144
144
|
},
|
|
145
145
|
'benchmarkConcurrency': {
|
|
@@ -569,6 +569,17 @@ export const parameterMatrix = {
|
|
|
569
569
|
'default': null,
|
|
570
570
|
'valueSpace': 'unbounded'
|
|
571
571
|
},
|
|
572
|
+
'icEnv': {
|
|
573
|
+
'cliOption': 'ic-env',
|
|
574
|
+
'envVar': null,
|
|
575
|
+
'configFile': true,
|
|
576
|
+
'packageJson': false,
|
|
577
|
+
'mcp': false,
|
|
578
|
+
'promptable': false,
|
|
579
|
+
'required': false,
|
|
580
|
+
'default': [],
|
|
581
|
+
'valueSpace': 'unbounded'
|
|
582
|
+
},
|
|
572
583
|
'includeSampleModel': {
|
|
573
584
|
'cliOption': 'include-sample',
|
|
574
585
|
'envVar': 'ML_INCLUDE_SAMPLE',
|
|
@@ -216,6 +216,12 @@ export default class McpQueryRunner {
|
|
|
216
216
|
if (parsed.choices?.instanceType?.length > 0) {
|
|
217
217
|
this.runner._instanceSizerMetadata = parsed.metadata || null;
|
|
218
218
|
|
|
219
|
+
// Store maxModelLen from sizer if context was capped (AC-1.7)
|
|
220
|
+
if (parsed.values?.maxModelLen) {
|
|
221
|
+
this.runner._sizerMaxModelLen = parsed.values.maxModelLen;
|
|
222
|
+
console.log(` ✓ Context length capped: max_model_len=${parsed.values.maxModelLen}`);
|
|
223
|
+
}
|
|
224
|
+
|
|
219
225
|
// Build display labels with VRAM estimate and utilization percentage
|
|
220
226
|
const recommendations = parsed.metadata?.recommendations || [];
|
|
221
227
|
const estimatedVramGb = parsed.metadata?.estimatedVramGb;
|