@aws/ml-container-creator 0.13.5 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/parameter-schema-v2.json +32 -4
- package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
- package/infra/ci-harness/package-lock.json +121 -111
- package/infra/ci-harness/package.json +1 -1
- package/package.json +2 -2
- package/servers/instance-sizer/index.js +72 -4
- package/servers/instance-sizer/lib/model-resolver.js +28 -2
- package/src/app.js +15 -0
- package/src/lib/config-loader.js +18 -0
- package/src/lib/config-manager.js +6 -1
- package/src/lib/dataset-slug.js +152 -0
- package/src/lib/generated/cli-options.js +9 -3
- package/src/lib/generated/parameter-matrix.js +14 -3
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/mcp-query-runner.js +6 -0
- package/src/lib/prompt-runner.js +5 -0
- package/src/lib/prompts/feature-prompts.js +1 -1
- package/src/lib/template-manager.js +0 -7
- package/src/lib/template-variable-resolver.js +51 -1
- package/src/lib/tune-config-state.js +14 -1
- package/templates/do/.benchmark_writer.py +9 -0
- package/templates/do/.register_helper.py +1163 -0
- package/templates/do/.tune_helper.py +168 -2
- package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +319 -27
- package/templates/do/add-ic +85 -3
- package/templates/do/benchmark +28 -8
- package/templates/do/config +20 -0
- package/templates/do/lib/inference-component.sh +56 -3
- package/templates/do/register +552 -6
- package/templates/do/test +12 -2
- package/templates/do/tune +201 -6
|
@@ -25,8 +25,8 @@ import { readFileSync } from 'node:fs';
|
|
|
25
25
|
import { fileURLToPath } from 'node:url';
|
|
26
26
|
import { resolve, dirname } from 'node:path';
|
|
27
27
|
import { resolveModelMetadata } from './lib/model-resolver.js';
|
|
28
|
-
import { estimateVram } from './lib/vram-estimator.js';
|
|
29
|
-
import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js';
|
|
28
|
+
import { estimateVram, computeMaxModelLen } from './lib/vram-estimator.js';
|
|
29
|
+
import { filterAndRankInstances, applyAvailabilityRanking, getPerGpuMemoryGb } from './lib/instance-ranker.js';
|
|
30
30
|
import { QuotaResolver } from './lib/quota-resolver.js';
|
|
31
31
|
import { queryBedrock } from '../lib/bedrock-client.js';
|
|
32
32
|
|
|
@@ -393,6 +393,66 @@ async function handleGetInstanceRecommendation(params) {
|
|
|
393
393
|
{ limit }
|
|
394
394
|
);
|
|
395
395
|
|
|
396
|
+
// Step 3-max_model_len: When no instance fits at full context, try capping context length
|
|
397
|
+
// NFR-1 guard: skip this logic for models with recommendedInstances in catalog
|
|
398
|
+
let suggestedMaxModelLen = null;
|
|
399
|
+
let contextLengthCapped = false;
|
|
400
|
+
let originalMaxPositionEmbeddings = null;
|
|
401
|
+
|
|
402
|
+
if (recommendations.length === 0 && !modelMetadata.recommendedInstances && modelMetadata.maxPositionEmbeddings) {
|
|
403
|
+
// Find the largest available GPU instance
|
|
404
|
+
const gpuInstances = Object.entries(effectiveCatalog)
|
|
405
|
+
.filter(([, meta]) => meta.category === 'gpu' && meta.gpus > 0)
|
|
406
|
+
.map(([name, meta]) => {
|
|
407
|
+
const perGpu = getPerGpuMemoryGb(meta);
|
|
408
|
+
return { name, meta, totalVramGb: perGpu ? perGpu * meta.gpus : 0 };
|
|
409
|
+
})
|
|
410
|
+
.filter(i => i.totalVramGb > 0)
|
|
411
|
+
.sort((a, b) => b.totalVramGb - a.totalVramGb);
|
|
412
|
+
|
|
413
|
+
if (gpuInstances.length > 0) {
|
|
414
|
+
const bestInstance = gpuInstances[0];
|
|
415
|
+
|
|
416
|
+
// Compute model weight memory for computeMaxModelLen
|
|
417
|
+
const weightsGb = vramEstimate.breakdown.weightsGb;
|
|
418
|
+
|
|
419
|
+
const safeLen = computeMaxModelLen({
|
|
420
|
+
modelWeightGb: weightsGb,
|
|
421
|
+
totalGpuMemoryGb: bestInstance.meta.gpuMemoryGb || (bestInstance.totalVramGb / bestInstance.meta.gpus),
|
|
422
|
+
gpuCount: bestInstance.meta.gpus,
|
|
423
|
+
numLayers: modelMetadata.numLayers,
|
|
424
|
+
numKvHeads: modelMetadata.numKvHeads,
|
|
425
|
+
headDim: modelMetadata.headDim
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
if (safeLen && safeLen.maxModelLen >= 2048) {
|
|
429
|
+
// Re-estimate VRAM with capped sequence length
|
|
430
|
+
const cappedEstimate = estimateVram({
|
|
431
|
+
parameterCount: modelMetadata.parameterCount,
|
|
432
|
+
dtype: modelMetadata.dtype,
|
|
433
|
+
quantization: quantization || undefined,
|
|
434
|
+
maxSequenceLength: safeLen.maxModelLen,
|
|
435
|
+
batchSize: effectiveBatchSize || undefined
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
// Re-filter instances with the reduced VRAM requirement
|
|
439
|
+
recommendations = filterAndRankInstances(
|
|
440
|
+
cappedEstimate.vramGb,
|
|
441
|
+
effectiveCatalog,
|
|
442
|
+
{ limit }
|
|
443
|
+
);
|
|
444
|
+
|
|
445
|
+
suggestedMaxModelLen = safeLen.maxModelLen;
|
|
446
|
+
contextLengthCapped = true;
|
|
447
|
+
originalMaxPositionEmbeddings = modelMetadata.maxPositionEmbeddings;
|
|
448
|
+
log(`Context capped: ${modelMetadata.maxPositionEmbeddings} → ${safeLen.maxModelLen} for ${modelName}`);
|
|
449
|
+
} else {
|
|
450
|
+
// AC-1.6: safeLen < 2048 or null — recommend larger instance instead
|
|
451
|
+
log(`Model ${modelName} cannot fit 2048 context on ${bestInstance.name}, recommending larger instance`);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
396
456
|
// Step 3a: Quota & availability filtering (discover mode only)
|
|
397
457
|
let preQuotaFilterCount = 0;
|
|
398
458
|
let allFilteredByQuota = false;
|
|
@@ -521,7 +581,10 @@ async function handleGetInstanceRecommendation(params) {
|
|
|
521
581
|
content: [{
|
|
522
582
|
type: 'text',
|
|
523
583
|
text: JSON.stringify({
|
|
524
|
-
values: {
|
|
584
|
+
values: {
|
|
585
|
+
instanceType: topRecommendation,
|
|
586
|
+
...(suggestedMaxModelLen ? { maxModelLen: suggestedMaxModelLen } : {})
|
|
587
|
+
},
|
|
525
588
|
choices: { instanceType: rankedList },
|
|
526
589
|
metadata: {
|
|
527
590
|
modelName,
|
|
@@ -533,7 +596,12 @@ async function handleGetInstanceRecommendation(params) {
|
|
|
533
596
|
recommendations: finalRecommendations,
|
|
534
597
|
source: modelMetadata.source,
|
|
535
598
|
smartModeUsed,
|
|
536
|
-
allFilteredByQuota
|
|
599
|
+
allFilteredByQuota,
|
|
600
|
+
...(contextLengthCapped ? {
|
|
601
|
+
suggestedMaxModelLen,
|
|
602
|
+
contextLengthCapped: true,
|
|
603
|
+
originalMaxPositionEmbeddings
|
|
604
|
+
} : {})
|
|
537
605
|
}
|
|
538
606
|
})
|
|
539
607
|
}]
|
|
@@ -142,13 +142,27 @@ export function extractFromHuggingFaceConfig(config) {
|
|
|
142
142
|
const architecture = (config.architectures && config.architectures[0]) || 'unknown';
|
|
143
143
|
const maxPositionEmbeddings = config.max_position_embeddings || 4096;
|
|
144
144
|
|
|
145
|
-
|
|
145
|
+
// Extract architecture params for KV cache computation (computeMaxModelLen)
|
|
146
|
+
const numLayers = config.num_hidden_layers || null;
|
|
147
|
+
const numKvHeads = config.num_key_value_heads || config.num_attention_heads || null;
|
|
148
|
+
const headDim = config.head_dim || (config.hidden_size && config.num_attention_heads
|
|
149
|
+
? Math.floor(config.hidden_size / config.num_attention_heads)
|
|
150
|
+
: null);
|
|
151
|
+
|
|
152
|
+
const result = {
|
|
146
153
|
parameterCount,
|
|
147
154
|
dtype,
|
|
148
155
|
architecture,
|
|
149
156
|
maxPositionEmbeddings,
|
|
150
157
|
source: 'huggingface_api'
|
|
151
158
|
};
|
|
159
|
+
|
|
160
|
+
// Only include architecture params if available (graceful degradation)
|
|
161
|
+
if (numLayers) result.numLayers = numLayers;
|
|
162
|
+
if (numKvHeads) result.numKvHeads = numKvHeads;
|
|
163
|
+
if (headDim) result.headDim = headDim;
|
|
164
|
+
|
|
165
|
+
return result;
|
|
152
166
|
}
|
|
153
167
|
|
|
154
168
|
/**
|
|
@@ -175,13 +189,25 @@ export async function resolveModelMetadata(modelName, options = {}) {
|
|
|
175
189
|
const catalogEntry = catalogLookup(modelName, catalog);
|
|
176
190
|
|
|
177
191
|
if (catalogEntry) {
|
|
178
|
-
|
|
192
|
+
const result = {
|
|
179
193
|
parameterCount: catalogEntry.parameterCount,
|
|
180
194
|
dtype: catalogEntry.defaultDtype || 'float16',
|
|
181
195
|
architecture: catalogEntry.architecture || 'unknown',
|
|
182
196
|
maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings || 4096,
|
|
183
197
|
source: 'catalog'
|
|
184
198
|
};
|
|
199
|
+
|
|
200
|
+
// Pass through recommendedInstances for NFR-1 guard
|
|
201
|
+
if (catalogEntry.recommendedInstances) {
|
|
202
|
+
result.recommendedInstances = catalogEntry.recommendedInstances;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Pass through architecture params if available in catalog
|
|
206
|
+
if (catalogEntry.numLayers) result.numLayers = catalogEntry.numLayers;
|
|
207
|
+
if (catalogEntry.numKvHeads) result.numKvHeads = catalogEntry.numKvHeads;
|
|
208
|
+
if (catalogEntry.headDim) result.headDim = catalogEntry.headDim;
|
|
209
|
+
|
|
210
|
+
return result;
|
|
185
211
|
}
|
|
186
212
|
|
|
187
213
|
// Step 2: If discover mode, try HuggingFace Hub
|
package/src/app.js
CHANGED
|
@@ -402,6 +402,7 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
402
402
|
ignorePatterns.push('**/do/.tune_helper.py');
|
|
403
403
|
ignorePatterns.push('**/do/.stage_helper.py');
|
|
404
404
|
ignorePatterns.push('**/do/.adapter_helper.py');
|
|
405
|
+
ignorePatterns.push('**/do/.register_helper.py');
|
|
405
406
|
ignorePatterns.push('**/do/train');
|
|
406
407
|
ignorePatterns.push('**/do/.train_build_request.py');
|
|
407
408
|
ignorePatterns.push('**/do/.train_status_parser.py');
|
|
@@ -578,6 +579,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
578
579
|
fs.writeFileSync(gitignorePath, mlccIgnore);
|
|
579
580
|
}
|
|
580
581
|
}
|
|
582
|
+
|
|
583
|
+
// Add __pycache__/ and *.pyc to .gitignore (Python helpers leave bytecode behind)
|
|
584
|
+
{
|
|
585
|
+
const gitignorePath = path.join(destDir, '.gitignore');
|
|
586
|
+
const pycacheIgnore = '# Python bytecode (generated by do/ helper scripts)\n__pycache__/\n*.pyc\n';
|
|
587
|
+
if (fs.existsSync(gitignorePath)) {
|
|
588
|
+
const existing = fs.readFileSync(gitignorePath, 'utf8');
|
|
589
|
+
if (!existing.includes('__pycache__')) {
|
|
590
|
+
fs.appendFileSync(gitignorePath, `\n${pycacheIgnore}`);
|
|
591
|
+
}
|
|
592
|
+
} else {
|
|
593
|
+
fs.writeFileSync(gitignorePath, pycacheIgnore);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
581
596
|
}
|
|
582
597
|
|
|
583
598
|
/**
|
package/src/lib/config-loader.js
CHANGED
|
@@ -265,6 +265,21 @@ export default class ConfigLoader {
|
|
|
265
265
|
return;
|
|
266
266
|
}
|
|
267
267
|
|
|
268
|
+
// Handle icEnvVars object (deploy-time IC environment variables)
|
|
269
|
+
if (key === 'icEnvVars' && typeof value === 'object' && value !== null) {
|
|
270
|
+
if (!this.manager.config.icEnvVars) {
|
|
271
|
+
this.manager.config.icEnvVars = {};
|
|
272
|
+
}
|
|
273
|
+
const cliIcEnvVars = (this.manager.explicitConfig && this.manager.explicitConfig.icEnvVars) || {};
|
|
274
|
+
Object.entries(value).forEach(([envKey, envValue]) => {
|
|
275
|
+
if (!(envKey in cliIcEnvVars)) {
|
|
276
|
+
this.manager.config.icEnvVars[envKey] = envValue;
|
|
277
|
+
this.manager._recordSource(`icEnvVars.${envKey}`, envValue, 'config-file');
|
|
278
|
+
}
|
|
279
|
+
});
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
|
|
268
283
|
if (this.manager._isSourceSupported(key, 'configFile')) {
|
|
269
284
|
filteredConfig[key] = this.manager._parseValue(key, value);
|
|
270
285
|
this.manager._recordSource(key, this.manager._parseValue(key, value), 'config-file');
|
|
@@ -342,6 +357,9 @@ export default class ConfigLoader {
|
|
|
342
357
|
|
|
343
358
|
// Parse --server-env KEY=VALUE pairs
|
|
344
359
|
this._parseEnvVarOptions('server-env', 'serverEnvVars');
|
|
360
|
+
|
|
361
|
+
// Parse --ic-env KEY=VALUE pairs (deploy-time IC environment variables)
|
|
362
|
+
this._parseEnvVarOptions('ic-env', 'icEnvVars');
|
|
345
363
|
}
|
|
346
364
|
|
|
347
365
|
/**
|
|
@@ -183,6 +183,9 @@ export default class ConfigManager {
|
|
|
183
183
|
if (this.config.serverEnvVars && typeof this.config.serverEnvVars === 'object') {
|
|
184
184
|
finalConfig.serverEnvVars = { ...this.config.serverEnvVars };
|
|
185
185
|
}
|
|
186
|
+
if (this.config.icEnvVars && typeof this.config.icEnvVars === 'object') {
|
|
187
|
+
finalConfig.icEnvVars = { ...this.config.icEnvVars };
|
|
188
|
+
}
|
|
186
189
|
|
|
187
190
|
// Ensure all parameters from the matrix are included in final config
|
|
188
191
|
// This is important for optional parameters that might be null
|
|
@@ -411,7 +414,8 @@ export default class ConfigManager {
|
|
|
411
414
|
...endpointParams,
|
|
412
415
|
...icParams,
|
|
413
416
|
'modelEnvVars',
|
|
414
|
-
'serverEnvVars'
|
|
417
|
+
'serverEnvVars',
|
|
418
|
+
'icEnvVars'
|
|
415
419
|
]);
|
|
416
420
|
const core = {};
|
|
417
421
|
for (const [key, value] of Object.entries(this.config)) {
|
|
@@ -426,6 +430,7 @@ export default class ConfigManager {
|
|
|
426
430
|
icConfig,
|
|
427
431
|
modelEnvVars: { ...(this.config.modelEnvVars || {}) },
|
|
428
432
|
serverEnvVars: { ...(this.config.serverEnvVars || {}) },
|
|
433
|
+
icEnvVars: { ...(this.config.icEnvVars || {}) },
|
|
429
434
|
manifest: [...this._sourceManifest]
|
|
430
435
|
};
|
|
431
436
|
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Dataset Slug Derivation
|
|
6
|
+
*
|
|
7
|
+
* Derives a deterministic, short slug from a dataset URI for use in
|
|
8
|
+
* tuning-job-aware adapter naming conventions.
|
|
9
|
+
*
|
|
10
|
+
* Slugification rules:
|
|
11
|
+
* - Lowercase
|
|
12
|
+
* - Strip non-alphanumeric characters (keep hyphens)
|
|
13
|
+
* - Truncate to 20 characters
|
|
14
|
+
* - Replace consecutive hyphens with single hyphen
|
|
15
|
+
* - Strip leading/trailing hyphens
|
|
16
|
+
*
|
|
17
|
+
* Examples:
|
|
18
|
+
* hf://org/name -> "name"
|
|
19
|
+
* hf://tatsu-lab/alpaca -> "alpaca"
|
|
20
|
+
* hf://Open-Orca/OpenOrca -> "openorca"
|
|
21
|
+
* s3://bucket/path/file.jsonl -> "file"
|
|
22
|
+
*
|
|
23
|
+
* Requirements: US-4 (AC-4.2)
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Derive a dataset slug from a dataset URI.
|
|
28
|
+
*
|
|
29
|
+
* @param {string} datasetUri - Dataset URI (s3://... or hf://...)
|
|
30
|
+
* @returns {string} The derived slug, or empty string if extraction fails
|
|
31
|
+
*/
|
|
32
|
+
export function deriveDatasetSlug(datasetUri) {
|
|
33
|
+
if (!datasetUri || typeof datasetUri !== 'string') {
|
|
34
|
+
return '';
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let rawName = '';
|
|
38
|
+
|
|
39
|
+
if (datasetUri.startsWith('hf://')) {
|
|
40
|
+
// hf://org/name[/split][?file=pattern]
|
|
41
|
+
// Extract the dataset name (second path component)
|
|
42
|
+
const hfPath = datasetUri.slice(5); // remove "hf://"
|
|
43
|
+
const withoutQuery = hfPath.split('?')[0]; // remove ?file=...
|
|
44
|
+
const parts = withoutQuery.split('/');
|
|
45
|
+
// parts[0] = org, parts[1] = name, parts[2+] = split
|
|
46
|
+
rawName = parts[1] || parts[0] || '';
|
|
47
|
+
} else if (datasetUri.startsWith('s3://')) {
|
|
48
|
+
// s3://bucket/path/file.jsonl -> slug from filename (without extension)
|
|
49
|
+
const s3Path = datasetUri.slice(5); // remove "s3://"
|
|
50
|
+
const parts = s3Path.split('/');
|
|
51
|
+
const filename = parts[parts.length - 1] || '';
|
|
52
|
+
// Remove file extension
|
|
53
|
+
const dotIndex = filename.lastIndexOf('.');
|
|
54
|
+
rawName = dotIndex > 0 ? filename.substring(0, dotIndex) : filename;
|
|
55
|
+
} else {
|
|
56
|
+
// Unknown format — try to extract last path component
|
|
57
|
+
const parts = datasetUri.split('/');
|
|
58
|
+
rawName = parts[parts.length - 1] || '';
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return slugify(rawName);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Apply slugification rules to a raw name.
|
|
66
|
+
*
|
|
67
|
+
* @param {string} raw - Raw name to slugify
|
|
68
|
+
* @returns {string} Slugified string
|
|
69
|
+
*/
|
|
70
|
+
export function slugify(raw) {
|
|
71
|
+
if (!raw) return '';
|
|
72
|
+
|
|
73
|
+
let slug = raw
|
|
74
|
+
.toLowerCase() // lowercase
|
|
75
|
+
.replace(/[^a-z0-9-]/g, '') // strip non-alphanumeric (keep hyphens)
|
|
76
|
+
.replace(/-{2,}/g, '-') // replace consecutive hyphens
|
|
77
|
+
.replace(/^-+/, '') // strip leading hyphens
|
|
78
|
+
.replace(/-+$/, ''); // strip trailing hyphens
|
|
79
|
+
|
|
80
|
+
// Truncate to 20 chars
|
|
81
|
+
if (slug.length > 20) {
|
|
82
|
+
slug = slug.substring(0, 20);
|
|
83
|
+
// Don't end on a hyphen after truncation
|
|
84
|
+
slug = slug.replace(/-+$/, '');
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return slug;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Resolve a --from-tune argument to the appropriate config variable name.
|
|
92
|
+
*
|
|
93
|
+
* Resolution rules:
|
|
94
|
+
* - No arg (empty/null) -> TUNE_OUTPUT_PATH_LATEST
|
|
95
|
+
* - technique only (e.g., "sft") -> TUNE_ADAPTER_PATH_SFT
|
|
96
|
+
* - technique-dataset compound (e.g., "sft-alpaca") -> TUNE_ADAPTER_PATH_SFT_ALPACA
|
|
97
|
+
*
|
|
98
|
+
* @param {string} fromTuneArg - The --from-tune argument value
|
|
99
|
+
* @param {function} configVarExists - Function that checks if a config var exists
|
|
100
|
+
* @returns {{ varName: string, technique: string, slug: string, isCompound: boolean, fallback: string|null }}
|
|
101
|
+
*/
|
|
102
|
+
export function resolveFromTuneVar(fromTuneArg, configVarExists) {
|
|
103
|
+
if (!fromTuneArg) {
|
|
104
|
+
return {
|
|
105
|
+
varName: 'TUNE_OUTPUT_PATH_LATEST',
|
|
106
|
+
technique: '',
|
|
107
|
+
slug: '',
|
|
108
|
+
isCompound: false,
|
|
109
|
+
fallback: null
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const upper = fromTuneArg.toUpperCase();
|
|
114
|
+
|
|
115
|
+
// Check if argument contains a hyphen — potential compound key
|
|
116
|
+
const hyphenIndex = fromTuneArg.indexOf('-');
|
|
117
|
+
if (hyphenIndex > 0) {
|
|
118
|
+
const technique = fromTuneArg.substring(0, hyphenIndex);
|
|
119
|
+
const slug = fromTuneArg.substring(hyphenIndex + 1);
|
|
120
|
+
const techniqueUpper = technique.toUpperCase();
|
|
121
|
+
const slugUpper = slug.toUpperCase().replace(/-/g, '_');
|
|
122
|
+
const compoundVar = `TUNE_ADAPTER_PATH_${techniqueUpper}_${slugUpper}`;
|
|
123
|
+
|
|
124
|
+
if (configVarExists(compoundVar)) {
|
|
125
|
+
return {
|
|
126
|
+
varName: compoundVar,
|
|
127
|
+
technique,
|
|
128
|
+
slug,
|
|
129
|
+
isCompound: true,
|
|
130
|
+
fallback: null
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Compound key doesn't exist — fallback to technique-only
|
|
135
|
+
return {
|
|
136
|
+
varName: `TUNE_ADAPTER_PATH_${techniqueUpper}`,
|
|
137
|
+
technique,
|
|
138
|
+
slug,
|
|
139
|
+
isCompound: false,
|
|
140
|
+
fallback: compoundVar // the compound var that was tried but didn't exist
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// No hyphen — technique-only
|
|
145
|
+
return {
|
|
146
|
+
varName: `TUNE_ADAPTER_PATH_${upper}`,
|
|
147
|
+
technique: fromTuneArg,
|
|
148
|
+
slug: '',
|
|
149
|
+
isCompound: false,
|
|
150
|
+
fallback: null
|
|
151
|
+
};
|
|
152
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-06-
|
|
3
|
+
// Generated: 2026-06-22T13:49:00.815Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* CLI option definitions derived from parameter-schema-v2.json.
|
|
@@ -70,7 +70,7 @@ export const cliOptions = [
|
|
|
70
70
|
{
|
|
71
71
|
'flag': '--enable-lora',
|
|
72
72
|
'description': 'Enable LoRA adapter serving',
|
|
73
|
-
'defaultValue':
|
|
73
|
+
'defaultValue': true
|
|
74
74
|
},
|
|
75
75
|
{
|
|
76
76
|
'flag': '--max-loras <n>',
|
|
@@ -85,7 +85,7 @@ export const cliOptions = [
|
|
|
85
85
|
{
|
|
86
86
|
'flag': '--include-benchmark',
|
|
87
87
|
'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
|
|
88
|
-
'defaultValue':
|
|
88
|
+
'defaultValue': true
|
|
89
89
|
},
|
|
90
90
|
{
|
|
91
91
|
'flag': '--benchmark-concurrency <n>',
|
|
@@ -353,6 +353,11 @@ export const cliOptions = [
|
|
|
353
353
|
'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
|
|
354
354
|
'repeatable': true
|
|
355
355
|
},
|
|
356
|
+
{
|
|
357
|
+
'flag': '--ic-env <KEY=VALUE>',
|
|
358
|
+
'description': 'Deploy-time environment variable for inference components (IC_ENV_* prefix), repeatable (e.g. VLLM_MAX_MODEL_LEN=8192)',
|
|
359
|
+
'repeatable': true
|
|
360
|
+
},
|
|
356
361
|
{
|
|
357
362
|
'flag': '--include-sample',
|
|
358
363
|
'description': 'Include sample model code',
|
|
@@ -464,6 +469,7 @@ export const helpGroups = {
|
|
|
464
469
|
'--fsx-volume-handle': 'hyperpod',
|
|
465
470
|
'--model-env': 'env',
|
|
466
471
|
'--server-env': 'env',
|
|
472
|
+
'--ic-env': 'ic',
|
|
467
473
|
'--include-sample': 'features',
|
|
468
474
|
'--include-testing': 'features',
|
|
469
475
|
'--test-types': 'features',
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-06-
|
|
3
|
+
// Generated: 2026-06-22T13:49:00.924Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Parameter matrix defining how each parameter is loaded from various sources.
|
|
@@ -106,7 +106,7 @@ export const parameterMatrix = {
|
|
|
106
106
|
'mcp': false,
|
|
107
107
|
'promptable': true,
|
|
108
108
|
'required': false,
|
|
109
|
-
'default':
|
|
109
|
+
'default': true,
|
|
110
110
|
'valueSpace': 'bounded'
|
|
111
111
|
},
|
|
112
112
|
'maxLoras': {
|
|
@@ -139,7 +139,7 @@ export const parameterMatrix = {
|
|
|
139
139
|
'mcp': false,
|
|
140
140
|
'promptable': true,
|
|
141
141
|
'required': false,
|
|
142
|
-
'default':
|
|
142
|
+
'default': true,
|
|
143
143
|
'valueSpace': 'bounded'
|
|
144
144
|
},
|
|
145
145
|
'benchmarkConcurrency': {
|
|
@@ -569,6 +569,17 @@ export const parameterMatrix = {
|
|
|
569
569
|
'default': null,
|
|
570
570
|
'valueSpace': 'unbounded'
|
|
571
571
|
},
|
|
572
|
+
'icEnv': {
|
|
573
|
+
'cliOption': 'ic-env',
|
|
574
|
+
'envVar': null,
|
|
575
|
+
'configFile': true,
|
|
576
|
+
'packageJson': false,
|
|
577
|
+
'mcp': false,
|
|
578
|
+
'promptable': false,
|
|
579
|
+
'required': false,
|
|
580
|
+
'default': [],
|
|
581
|
+
'valueSpace': 'unbounded'
|
|
582
|
+
},
|
|
572
583
|
'includeSampleModel': {
|
|
573
584
|
'cliOption': 'include-sample',
|
|
574
585
|
'envVar': 'ML_INCLUDE_SAMPLE',
|
|
@@ -216,6 +216,12 @@ export default class McpQueryRunner {
|
|
|
216
216
|
if (parsed.choices?.instanceType?.length > 0) {
|
|
217
217
|
this.runner._instanceSizerMetadata = parsed.metadata || null;
|
|
218
218
|
|
|
219
|
+
// Store maxModelLen from sizer if context was capped (AC-1.7)
|
|
220
|
+
if (parsed.values?.maxModelLen) {
|
|
221
|
+
this.runner._sizerMaxModelLen = parsed.values.maxModelLen;
|
|
222
|
+
console.log(` ✓ Context length capped: max_model_len=${parsed.values.maxModelLen}`);
|
|
223
|
+
}
|
|
224
|
+
|
|
219
225
|
// Build display labels with VRAM estimate and utilization percentage
|
|
220
226
|
const recommendations = parsed.metadata?.recommendations || [];
|
|
221
227
|
const estimatedVramGb = parsed.metadata?.estimatedVramGb;
|
package/src/lib/prompt-runner.js
CHANGED
|
@@ -710,6 +710,11 @@ export default class PromptRunner {
|
|
|
710
710
|
delete combinedAnswers.customHyperPodCluster;
|
|
711
711
|
}
|
|
712
712
|
|
|
713
|
+
// Propagate max_model_len from instance-sizer context capping (AC-1.7)
|
|
714
|
+
if (this._sizerMaxModelLen) {
|
|
715
|
+
combinedAnswers.sizerMaxModelLen = this._sizerMaxModelLen;
|
|
716
|
+
}
|
|
717
|
+
|
|
713
718
|
// Apply CUDA version selection → inference AMI override
|
|
714
719
|
if (combinedAnswers._resolvedInferenceAmiVersion) {
|
|
715
720
|
combinedAnswers.inferenceAmiVersion = combinedAnswers._resolvedInferenceAmiVersion;
|
|
@@ -90,7 +90,7 @@ const loraPrompts = [
|
|
|
90
90
|
type: 'confirm',
|
|
91
91
|
name: 'enableLora',
|
|
92
92
|
message: 'Enable LoRA adapter serving?',
|
|
93
|
-
default:
|
|
93
|
+
default: true,
|
|
94
94
|
when: (answers) => {
|
|
95
95
|
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
96
96
|
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
@@ -314,13 +314,6 @@ export default class TemplateManager {
|
|
|
314
314
|
_validateBenchmarkConfig() {
|
|
315
315
|
if (!this.answers.includeBenchmark) return;
|
|
316
316
|
|
|
317
|
-
// Gate to supported architectures
|
|
318
|
-
const dc = this.answers.deploymentConfig;
|
|
319
|
-
const arch = dc ? dc.split('-')[0] : this.answers.architecture;
|
|
320
|
-
if (arch !== 'transformers' && arch !== 'diffusors') {
|
|
321
|
-
throw new Error('⚠️ Benchmarking is only supported with transformers and diffusors architectures.');
|
|
322
|
-
}
|
|
323
|
-
|
|
324
317
|
// Gate to supported deployment targets
|
|
325
318
|
if (this.answers.deploymentTarget === 'hyperpod-eks') {
|
|
326
319
|
throw new Error('⚠️ Benchmarking is only supported with managed-inference, async-inference, and batch-transform deployment targets');
|
|
@@ -232,7 +232,7 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
|
|
|
232
232
|
artifactUri: '',
|
|
233
233
|
modelLoadStrategy: 'runtime',
|
|
234
234
|
existingEndpointName: null,
|
|
235
|
-
enableLora:
|
|
235
|
+
enableLora: true,
|
|
236
236
|
maxLoras: 30,
|
|
237
237
|
maxLoraRank: 64
|
|
238
238
|
};
|
|
@@ -261,6 +261,20 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
|
|
|
261
261
|
}
|
|
262
262
|
}
|
|
263
263
|
|
|
264
|
+
// Always include benchmarking by default (AC-2.3 — enabled for all architectures).
|
|
265
|
+
// Only set when not explicitly provided by user (AC-2.4, AC-2.7 — respect explicit opt-out).
|
|
266
|
+
if (answers.includeBenchmark === undefined) {
|
|
267
|
+
answers.includeBenchmark = true;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Enforce enableLora scoping: only LoRA-capable servers get enableLora=true
|
|
271
|
+
// (AC-2.1, NFR-2). All incompatible backends are forced to false.
|
|
272
|
+
const loraCapableServers = ['vllm', 'sglang', 'djl-lmi', 'lmi', 'djl'];
|
|
273
|
+
const resolvedBackend = answers.backend || answers.modelServer;
|
|
274
|
+
if (!loraCapableServers.includes(resolvedBackend)) {
|
|
275
|
+
answers.enableLora = false;
|
|
276
|
+
}
|
|
277
|
+
|
|
264
278
|
// Merge catalog env vars into answers.envVars with correct precedence
|
|
265
279
|
await _mergeEnvVarsWithPrecedence(answers, registryConfigManager);
|
|
266
280
|
|
|
@@ -445,6 +459,35 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
|
|
|
445
459
|
}
|
|
446
460
|
}
|
|
447
461
|
|
|
462
|
+
// Propagate max_model_len from instance-sizer context capping to env vars (AC-1.7).
|
|
463
|
+
// The instance-sizer sets sizerMaxModelLen when the model's full context doesn't fit
|
|
464
|
+
// on the recommended instance. Write as VLLM_MAX_MODEL_LEN or SGLANG_MAX_MODEL_LEN.
|
|
465
|
+
const _MAX_MODEL_LEN_ENGINE_MAP = {
|
|
466
|
+
'vllm': 'VLLM_MAX_MODEL_LEN',
|
|
467
|
+
'vllm-omni': 'VLLM_MAX_MODEL_LEN',
|
|
468
|
+
'sglang': 'SGLANG_MAX_MODEL_LEN'
|
|
469
|
+
};
|
|
470
|
+
|
|
471
|
+
if (answers.sizerMaxModelLen) {
|
|
472
|
+
const maxLenEngine = answers.backend || answers.modelServer;
|
|
473
|
+
const maxLenEnvKey = maxLenEngine ? _MAX_MODEL_LEN_ENGINE_MAP[maxLenEngine] : null;
|
|
474
|
+
if (maxLenEnvKey) {
|
|
475
|
+
// Only set if user hasn't explicitly provided this env var
|
|
476
|
+
const userServerEnvVars = answers.serverEnvVars || {};
|
|
477
|
+
const userExplicitlySetMaxLen = (
|
|
478
|
+
userServerEnvVars['MAX_MODEL_LEN'] !== undefined ||
|
|
479
|
+
userServerEnvVars[maxLenEnvKey] !== undefined
|
|
480
|
+
);
|
|
481
|
+
if (!userExplicitlySetMaxLen && (!answers.envVars || !answers.envVars[maxLenEnvKey])) {
|
|
482
|
+
if (!answers.envVars) {
|
|
483
|
+
answers.envVars = {};
|
|
484
|
+
}
|
|
485
|
+
answers.envVars[maxLenEnvKey] = String(answers.sizerMaxModelLen);
|
|
486
|
+
console.log(` ℹ️ max_model_len: ${answers.sizerMaxModelLen} (context capped by instance-sizer)`);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
448
491
|
// Determine tune support based on model presence in the tune catalog.
|
|
449
492
|
// Used by the do/config template to write TUNE_SUPPORTED=true|false.
|
|
450
493
|
if (answers.tuneSupported === undefined) {
|
|
@@ -481,4 +524,11 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
|
|
|
481
524
|
answers.tuneModelId = null;
|
|
482
525
|
}
|
|
483
526
|
}
|
|
527
|
+
|
|
528
|
+
// Propagate --ic-env KEY=VALUE pairs to icEnvVars for do/config template rendering.
|
|
529
|
+
// These are rendered as IC_ENV_* exports in do/config, which inference-component.sh
|
|
530
|
+
// reads at deploy time and passes as the Environment field in InferenceComponent.create().
|
|
531
|
+
if (!answers.icEnvVars) {
|
|
532
|
+
answers.icEnvVars = {};
|
|
533
|
+
}
|
|
484
534
|
}
|
|
@@ -74,22 +74,35 @@ export function persistSubmissionState(configPath, { technique, trainingType, da
|
|
|
74
74
|
* Simulate the config writes that happen after a job completes successfully.
|
|
75
75
|
* This mirrors the behavior in do/tune's _handle_completion() function.
|
|
76
76
|
*
|
|
77
|
+
* Writes three levels of tracking (AC-4.1, AC-4.2):
|
|
78
|
+
* - Level 1: TUNE_OUTPUT_PATH_LATEST (always the last run, any technique)
|
|
79
|
+
* - Level 2: TUNE_ADAPTER_PATH_<TECHNIQUE> (last run per technique)
|
|
80
|
+
* - Level 3: TUNE_ADAPTER_PATH_<TECHNIQUE>_<SLUG> (per technique + dataset slug)
|
|
81
|
+
*
|
|
77
82
|
* @param {string} configPath - Path to the config file
|
|
78
83
|
* @param {object} params - Completion parameters
|
|
79
84
|
* @param {string} params.technique - Technique (sft, dpo, rlaif, rlvr)
|
|
80
85
|
* @param {string} params.trainingType - Training type (lora, full-rank)
|
|
81
86
|
* @param {string} params.artifactPath - S3 path to the output artifact
|
|
82
87
|
* @param {string} params.outputType - Output type (adapter, full-model)
|
|
88
|
+
* @param {string} [params.datasetSlug] - Optional dataset slug for per-technique-per-dataset tracking
|
|
83
89
|
*/
|
|
84
|
-
export function persistCompletionState(configPath, { technique, trainingType, artifactPath, outputType }) {
|
|
90
|
+
export function persistCompletionState(configPath, { technique, trainingType, artifactPath, outputType, datasetSlug }) {
|
|
85
91
|
const techniqueUpper = technique.toUpperCase();
|
|
86
92
|
|
|
87
93
|
if (trainingType === 'lora') {
|
|
94
|
+
// Level 2: per-technique
|
|
88
95
|
updateConfigVar(configPath, `TUNE_ADAPTER_PATH_${techniqueUpper}`, artifactPath);
|
|
96
|
+
// Level 3: per-technique + per-dataset (if slug available)
|
|
97
|
+
if (datasetSlug) {
|
|
98
|
+
const slugUpper = datasetSlug.toUpperCase().replace(/-/g, '_');
|
|
99
|
+
updateConfigVar(configPath, `TUNE_ADAPTER_PATH_${techniqueUpper}_${slugUpper}`, artifactPath);
|
|
100
|
+
}
|
|
89
101
|
} else if (trainingType === 'full-rank') {
|
|
90
102
|
updateConfigVar(configPath, `TUNE_MODEL_PATH_${techniqueUpper}`, artifactPath);
|
|
91
103
|
}
|
|
92
104
|
|
|
105
|
+
// Level 1: latest
|
|
93
106
|
updateConfigVar(configPath, 'TUNE_OUTPUT_PATH_LATEST', artifactPath);
|
|
94
107
|
updateConfigVar(configPath, 'TUNE_OUTPUT_TYPE_LATEST', outputType);
|
|
95
108
|
}
|