@aws/ml-container-creator 0.13.4 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +23 -5
  2. package/config/parameter-schema-v2.json +32 -4
  3. package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
  4. package/infra/ci-harness/package-lock.json +122 -116
  5. package/infra/ci-harness/package.json +1 -1
  6. package/package.json +5 -3
  7. package/pyproject.toml +21 -0
  8. package/requirements.txt +19 -0
  9. package/servers/instance-sizer/index.js +72 -4
  10. package/servers/instance-sizer/lib/model-resolver.js +28 -2
  11. package/src/app.js +17 -0
  12. package/src/lib/bootstrap-command-handler.js +33 -23
  13. package/src/lib/config-loader.js +18 -0
  14. package/src/lib/config-manager.js +6 -1
  15. package/src/lib/dataset-slug.js +152 -0
  16. package/src/lib/generated/cli-options.js +9 -3
  17. package/src/lib/generated/parameter-matrix.js +14 -3
  18. package/src/lib/generated/validation-rules.js +1 -1
  19. package/src/lib/mcp-query-runner.js +6 -0
  20. package/src/lib/prompt-runner.js +5 -0
  21. package/src/lib/prompts/feature-prompts.js +1 -1
  22. package/src/lib/template-manager.js +0 -7
  23. package/src/lib/template-variable-resolver.js +51 -1
  24. package/src/lib/tune-config-state.js +14 -1
  25. package/templates/do/.adapter_helper.py +451 -0
  26. package/templates/do/.benchmark_writer.py +22 -0
  27. package/templates/do/.register_helper.py +1163 -0
  28. package/templates/do/.stage_helper.py +419 -0
  29. package/templates/do/.tune_helper.py +379 -65
  30. package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
  31. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  32. package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
  33. package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
  34. package/templates/do/adapter +427 -27
  35. package/templates/do/add-ic +85 -3
  36. package/templates/do/benchmark +173 -15
  37. package/templates/do/config +24 -0
  38. package/templates/do/lib/inference-component.sh +56 -3
  39. package/templates/do/lib/profile.sh +5 -0
  40. package/templates/do/register +552 -6
  41. package/templates/do/stage +91 -272
  42. package/templates/do/test +12 -2
  43. package/templates/do/tune +264 -12
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aws/ml-container-creator",
3
- "version": "0.13.4",
3
+ "version": "0.15.0",
4
4
  "description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -76,7 +76,9 @@
76
76
  "README.md",
77
77
  "LICENSE",
78
78
  "LICENSE-THIRD-PARTY",
79
- "NOTICE"
79
+ "NOTICE",
80
+ "requirements.txt",
81
+ "pyproject.toml"
80
82
  ],
81
83
  "type": "module",
82
84
  "license": "Apache-2.0",
@@ -127,7 +129,7 @@
127
129
  "lint-staged": "^17.0.7",
128
130
  "mocha": "^10.2.0",
129
131
  "npm-force-resolutions": "^0.0.10",
130
- "nyc": "^15.1.0",
132
+ "nyc": "^18.0.0",
131
133
  "sbom": "^0.0.0"
132
134
  },
133
135
  "lint-staged": {
package/pyproject.toml ADDED
@@ -0,0 +1,21 @@
1
+ [project]
2
+ name = "ml-container-creator"
3
+ version = "0.13.4"
4
+ description = "Python dependencies for ml-container-creator do/ lifecycle scripts"
5
+ requires-python = ">=3.10"
6
+ dependencies = [
7
+ "boto3>=1.35.0",
8
+ "huggingface-hub>=0.25.0",
9
+ "hf-transfer>=0.1.8",
10
+ "pyarrow>=17.0.0",
11
+ "sagemaker-core>=1.0.0",
12
+ "sagemaker[train]>=3.0.0",
13
+ "sagemaker[serve]>=3.0.0",
14
+ "packaging>=24.0",
15
+ "pyyaml>=6.0",
16
+ ]
17
+
18
+ [dependency-groups]
19
+ dev = [
20
+ "pytest>=8.0",
21
+ ]
@@ -0,0 +1,19 @@
1
+ # Python dependencies for do/ lifecycle scripts
2
+ #
3
+ # Install with uv (recommended):
4
+ # uv pip install -r requirements.txt
5
+ #
6
+ # Or with pip:
7
+ # pip install -r requirements.txt
8
+ #
9
+ # Source of truth: pyproject.toml
10
+
11
+ boto3>=1.35.0
12
+ huggingface_hub>=0.25.0
13
+ hf_transfer>=0.1.8
14
+ pyarrow>=17.0.0
15
+ sagemaker-core>=1.0.0
16
+ sagemaker[train]>=3.0.0
17
+ sagemaker[serve]>=3.0.0
18
+ packaging>=24.0
19
+ PyYAML>=6.0
@@ -25,8 +25,8 @@ import { readFileSync } from 'node:fs';
25
25
  import { fileURLToPath } from 'node:url';
26
26
  import { resolve, dirname } from 'node:path';
27
27
  import { resolveModelMetadata } from './lib/model-resolver.js';
28
- import { estimateVram } from './lib/vram-estimator.js';
29
- import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js';
28
+ import { estimateVram, computeMaxModelLen } from './lib/vram-estimator.js';
29
+ import { filterAndRankInstances, applyAvailabilityRanking, getPerGpuMemoryGb } from './lib/instance-ranker.js';
30
30
  import { QuotaResolver } from './lib/quota-resolver.js';
31
31
  import { queryBedrock } from '../lib/bedrock-client.js';
32
32
 
@@ -393,6 +393,66 @@ async function handleGetInstanceRecommendation(params) {
393
393
  { limit }
394
394
  );
395
395
 
396
+ // Step 3-max_model_len: When no instance fits at full context, try capping context length
397
+ // NFR-1 guard: skip this logic for models with recommendedInstances in catalog
398
+ let suggestedMaxModelLen = null;
399
+ let contextLengthCapped = false;
400
+ let originalMaxPositionEmbeddings = null;
401
+
402
+ if (recommendations.length === 0 && !modelMetadata.recommendedInstances && modelMetadata.maxPositionEmbeddings) {
403
+ // Find the largest available GPU instance
404
+ const gpuInstances = Object.entries(effectiveCatalog)
405
+ .filter(([, meta]) => meta.category === 'gpu' && meta.gpus > 0)
406
+ .map(([name, meta]) => {
407
+ const perGpu = getPerGpuMemoryGb(meta);
408
+ return { name, meta, totalVramGb: perGpu ? perGpu * meta.gpus : 0 };
409
+ })
410
+ .filter(i => i.totalVramGb > 0)
411
+ .sort((a, b) => b.totalVramGb - a.totalVramGb);
412
+
413
+ if (gpuInstances.length > 0) {
414
+ const bestInstance = gpuInstances[0];
415
+
416
+ // Compute model weight memory for computeMaxModelLen
417
+ const weightsGb = vramEstimate.breakdown.weightsGb;
418
+
419
+ const safeLen = computeMaxModelLen({
420
+ modelWeightGb: weightsGb,
421
+ totalGpuMemoryGb: bestInstance.meta.gpuMemoryGb || (bestInstance.totalVramGb / bestInstance.meta.gpus),
422
+ gpuCount: bestInstance.meta.gpus,
423
+ numLayers: modelMetadata.numLayers,
424
+ numKvHeads: modelMetadata.numKvHeads,
425
+ headDim: modelMetadata.headDim
426
+ });
427
+
428
+ if (safeLen && safeLen.maxModelLen >= 2048) {
429
+ // Re-estimate VRAM with capped sequence length
430
+ const cappedEstimate = estimateVram({
431
+ parameterCount: modelMetadata.parameterCount,
432
+ dtype: modelMetadata.dtype,
433
+ quantization: quantization || undefined,
434
+ maxSequenceLength: safeLen.maxModelLen,
435
+ batchSize: effectiveBatchSize || undefined
436
+ });
437
+
438
+ // Re-filter instances with the reduced VRAM requirement
439
+ recommendations = filterAndRankInstances(
440
+ cappedEstimate.vramGb,
441
+ effectiveCatalog,
442
+ { limit }
443
+ );
444
+
445
+ suggestedMaxModelLen = safeLen.maxModelLen;
446
+ contextLengthCapped = true;
447
+ originalMaxPositionEmbeddings = modelMetadata.maxPositionEmbeddings;
448
+ log(`Context capped: ${modelMetadata.maxPositionEmbeddings} → ${safeLen.maxModelLen} for ${modelName}`);
449
+ } else {
450
+ // AC-1.6: safeLen < 2048 or null — recommend larger instance instead
451
+ log(`Model ${modelName} cannot fit 2048 context on ${bestInstance.name}, recommending larger instance`);
452
+ }
453
+ }
454
+ }
455
+
396
456
  // Step 3a: Quota & availability filtering (discover mode only)
397
457
  let preQuotaFilterCount = 0;
398
458
  let allFilteredByQuota = false;
@@ -521,7 +581,10 @@ async function handleGetInstanceRecommendation(params) {
521
581
  content: [{
522
582
  type: 'text',
523
583
  text: JSON.stringify({
524
- values: { instanceType: topRecommendation },
584
+ values: {
585
+ instanceType: topRecommendation,
586
+ ...(suggestedMaxModelLen ? { maxModelLen: suggestedMaxModelLen } : {})
587
+ },
525
588
  choices: { instanceType: rankedList },
526
589
  metadata: {
527
590
  modelName,
@@ -533,7 +596,12 @@ async function handleGetInstanceRecommendation(params) {
533
596
  recommendations: finalRecommendations,
534
597
  source: modelMetadata.source,
535
598
  smartModeUsed,
536
- allFilteredByQuota
599
+ allFilteredByQuota,
600
+ ...(contextLengthCapped ? {
601
+ suggestedMaxModelLen,
602
+ contextLengthCapped: true,
603
+ originalMaxPositionEmbeddings
604
+ } : {})
537
605
  }
538
606
  })
539
607
  }]
@@ -142,13 +142,27 @@ export function extractFromHuggingFaceConfig(config) {
142
142
  const architecture = (config.architectures && config.architectures[0]) || 'unknown';
143
143
  const maxPositionEmbeddings = config.max_position_embeddings || 4096;
144
144
 
145
- return {
145
+ // Extract architecture params for KV cache computation (computeMaxModelLen)
146
+ const numLayers = config.num_hidden_layers || null;
147
+ const numKvHeads = config.num_key_value_heads || config.num_attention_heads || null;
148
+ const headDim = config.head_dim || (config.hidden_size && config.num_attention_heads
149
+ ? Math.floor(config.hidden_size / config.num_attention_heads)
150
+ : null);
151
+
152
+ const result = {
146
153
  parameterCount,
147
154
  dtype,
148
155
  architecture,
149
156
  maxPositionEmbeddings,
150
157
  source: 'huggingface_api'
151
158
  };
159
+
160
+ // Only include architecture params if available (graceful degradation)
161
+ if (numLayers) result.numLayers = numLayers;
162
+ if (numKvHeads) result.numKvHeads = numKvHeads;
163
+ if (headDim) result.headDim = headDim;
164
+
165
+ return result;
152
166
  }
153
167
 
154
168
  /**
@@ -175,13 +189,25 @@ export async function resolveModelMetadata(modelName, options = {}) {
175
189
  const catalogEntry = catalogLookup(modelName, catalog);
176
190
 
177
191
  if (catalogEntry) {
178
- return {
192
+ const result = {
179
193
  parameterCount: catalogEntry.parameterCount,
180
194
  dtype: catalogEntry.defaultDtype || 'float16',
181
195
  architecture: catalogEntry.architecture || 'unknown',
182
196
  maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings || 4096,
183
197
  source: 'catalog'
184
198
  };
199
+
200
+ // Pass through recommendedInstances for NFR-1 guard
201
+ if (catalogEntry.recommendedInstances) {
202
+ result.recommendedInstances = catalogEntry.recommendedInstances;
203
+ }
204
+
205
+ // Pass through architecture params if available in catalog
206
+ if (catalogEntry.numLayers) result.numLayers = catalogEntry.numLayers;
207
+ if (catalogEntry.numKvHeads) result.numKvHeads = catalogEntry.numKvHeads;
208
+ if (catalogEntry.headDim) result.headDim = catalogEntry.headDim;
209
+
210
+ return result;
185
211
  }
186
212
 
187
213
  // Step 2: If discover mode, try HuggingFace Hub
package/src/app.js CHANGED
@@ -400,6 +400,9 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
400
400
  ignorePatterns.push('**/do/adapters/**');
401
401
  ignorePatterns.push('**/do/tune');
402
402
  ignorePatterns.push('**/do/.tune_helper.py');
403
+ ignorePatterns.push('**/do/.stage_helper.py');
404
+ ignorePatterns.push('**/do/.adapter_helper.py');
405
+ ignorePatterns.push('**/do/.register_helper.py');
403
406
  ignorePatterns.push('**/do/train');
404
407
  ignorePatterns.push('**/do/.train_build_request.py');
405
408
  ignorePatterns.push('**/do/.train_status_parser.py');
@@ -576,6 +579,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
576
579
  fs.writeFileSync(gitignorePath, mlccIgnore);
577
580
  }
578
581
  }
582
+
583
+ // Add __pycache__/ and *.pyc to .gitignore (Python helpers leave bytecode behind)
584
+ {
585
+ const gitignorePath = path.join(destDir, '.gitignore');
586
+ const pycacheIgnore = '# Python bytecode (generated by do/ helper scripts)\n__pycache__/\n*.pyc\n';
587
+ if (fs.existsSync(gitignorePath)) {
588
+ const existing = fs.readFileSync(gitignorePath, 'utf8');
589
+ if (!existing.includes('__pycache__')) {
590
+ fs.appendFileSync(gitignorePath, `\n${pycacheIgnore}`);
591
+ }
592
+ } else {
593
+ fs.writeFileSync(gitignorePath, pycacheIgnore);
594
+ }
595
+ }
579
596
  }
580
597
 
581
598
  /**
@@ -459,39 +459,49 @@ export default class BootstrapCommandHandler {
459
459
 
460
460
  // --no-rollback prevents rollback on AlreadyExists errors for IAM roles
461
461
  // that may pre-exist from a prior deployment or another region.
462
- // Check if benchmark bucket already exists (from a prior torn-down stack with RETAIN policy)
463
- let importBucketCtx = '';
462
+ // Check if benchmark results bucket already exists.
463
+ // If it does, skip CDK deploy for benchmark infra — just update the profile.
464
+ let benchmarkBucketExists = false;
464
465
  if (options.benchmarkInfra) {
466
+ const resultsBucketName = `mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}`;
465
467
  try {
466
468
  execSync(
467
- `aws s3api head-bucket --bucket mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}${profileData.awsProfile ? ` --profile ${profileData.awsProfile}` : ''} --region ${profileData.awsRegion}`,
469
+ `aws s3api head-bucket --bucket ${resultsBucketName}${profileData.awsProfile ? ` --profile ${profileData.awsProfile}` : ''} --region ${profileData.awsRegion}`,
468
470
  { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
469
471
  );
470
- importBucketCtx = ' -c importExistingBenchmarkBucket=true';
471
- console.log(' ℹ️ Benchmark results bucket already exists — importing into stack');
472
+ benchmarkBucketExists = true;
473
+ console.log(` Benchmark results bucket already exists: ${resultsBucketName}`);
474
+ console.log(' Skipping CDK deploy for benchmark infra — updating profile only.');
475
+ profileData.benchmarkInfraProvisioned = true;
476
+ profileData.ciGlueDatabase = profileData.ciGlueDatabase || 'mlcc_ci';
477
+ profileData.ciBenchmarkResultsBucket = resultsBucketName;
472
478
  } catch {
473
479
  // Bucket doesn't exist — will be created fresh
474
480
  }
475
481
  }
476
- const cdkDeployCmd = options.benchmarkInfra
477
- ? `npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true${importBucketCtx}`
478
- : 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
479
- execSync(
480
- cdkDeployCmd,
481
- {
482
- cwd: ciHarnessDir,
483
- encoding: 'utf8',
484
- stdio: 'inherit',
485
- env: {
486
- ...process.env,
487
- AWS_REGION: profileData.awsRegion,
488
- CDK_DEFAULT_REGION: profileData.awsRegion,
489
- CDK_DEFAULT_ACCOUNT: profileData.accountId,
490
- AWS_PROFILE: profileData.awsProfile
482
+
483
+ // Only run CDK deploy if we actually need to create infrastructure
484
+ if (!benchmarkBucketExists || !options.benchmarkInfra) {
485
+ const cdkDeployCmd = options.benchmarkInfra
486
+ ? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
487
+ : 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
488
+ execSync(
489
+ cdkDeployCmd,
490
+ {
491
+ cwd: ciHarnessDir,
492
+ encoding: 'utf8',
493
+ stdio: 'inherit',
494
+ env: {
495
+ ...process.env,
496
+ AWS_REGION: profileData.awsRegion,
497
+ CDK_DEFAULT_REGION: profileData.awsRegion,
498
+ CDK_DEFAULT_ACCOUNT: profileData.accountId,
499
+ AWS_PROFILE: profileData.awsProfile
500
+ }
491
501
  }
492
- }
493
- );
494
- console.log(' ✅ CI harness stack deployed');
502
+ );
503
+ console.log(' ✅ CI harness stack deployed');
504
+ }
495
505
 
496
506
  profileData.ciInfraProvisioned = true;
497
507
  profileData.ciTableName = 'mlcc-ci-table';
@@ -265,6 +265,21 @@ export default class ConfigLoader {
265
265
  return;
266
266
  }
267
267
 
268
+ // Handle icEnvVars object (deploy-time IC environment variables)
269
+ if (key === 'icEnvVars' && typeof value === 'object' && value !== null) {
270
+ if (!this.manager.config.icEnvVars) {
271
+ this.manager.config.icEnvVars = {};
272
+ }
273
+ const cliIcEnvVars = (this.manager.explicitConfig && this.manager.explicitConfig.icEnvVars) || {};
274
+ Object.entries(value).forEach(([envKey, envValue]) => {
275
+ if (!(envKey in cliIcEnvVars)) {
276
+ this.manager.config.icEnvVars[envKey] = envValue;
277
+ this.manager._recordSource(`icEnvVars.${envKey}`, envValue, 'config-file');
278
+ }
279
+ });
280
+ return;
281
+ }
282
+
268
283
  if (this.manager._isSourceSupported(key, 'configFile')) {
269
284
  filteredConfig[key] = this.manager._parseValue(key, value);
270
285
  this.manager._recordSource(key, this.manager._parseValue(key, value), 'config-file');
@@ -342,6 +357,9 @@ export default class ConfigLoader {
342
357
 
343
358
  // Parse --server-env KEY=VALUE pairs
344
359
  this._parseEnvVarOptions('server-env', 'serverEnvVars');
360
+
361
+ // Parse --ic-env KEY=VALUE pairs (deploy-time IC environment variables)
362
+ this._parseEnvVarOptions('ic-env', 'icEnvVars');
345
363
  }
346
364
 
347
365
  /**
@@ -183,6 +183,9 @@ export default class ConfigManager {
183
183
  if (this.config.serverEnvVars && typeof this.config.serverEnvVars === 'object') {
184
184
  finalConfig.serverEnvVars = { ...this.config.serverEnvVars };
185
185
  }
186
+ if (this.config.icEnvVars && typeof this.config.icEnvVars === 'object') {
187
+ finalConfig.icEnvVars = { ...this.config.icEnvVars };
188
+ }
186
189
 
187
190
  // Ensure all parameters from the matrix are included in final config
188
191
  // This is important for optional parameters that might be null
@@ -411,7 +414,8 @@ export default class ConfigManager {
411
414
  ...endpointParams,
412
415
  ...icParams,
413
416
  'modelEnvVars',
414
- 'serverEnvVars'
417
+ 'serverEnvVars',
418
+ 'icEnvVars'
415
419
  ]);
416
420
  const core = {};
417
421
  for (const [key, value] of Object.entries(this.config)) {
@@ -426,6 +430,7 @@ export default class ConfigManager {
426
430
  icConfig,
427
431
  modelEnvVars: { ...(this.config.modelEnvVars || {}) },
428
432
  serverEnvVars: { ...(this.config.serverEnvVars || {}) },
433
+ icEnvVars: { ...(this.config.icEnvVars || {}) },
429
434
  manifest: [...this._sourceManifest]
430
435
  };
431
436
  }
@@ -0,0 +1,152 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Dataset Slug Derivation
6
+ *
7
+ * Derives a deterministic, short slug from a dataset URI for use in
8
+ * tuning-job-aware adapter naming conventions.
9
+ *
10
+ * Slugification rules:
11
+ * - Lowercase
12
+ * - Strip non-alphanumeric characters (keep hyphens)
13
+ * - Truncate to 20 characters
14
+ * - Replace consecutive hyphens with single hyphen
15
+ * - Strip leading/trailing hyphens
16
+ *
17
+ * Examples:
18
+ * hf://org/name -> "name"
19
+ * hf://tatsu-lab/alpaca -> "alpaca"
20
+ * hf://Open-Orca/OpenOrca -> "openorca"
21
+ * s3://bucket/path/file.jsonl -> "file"
22
+ *
23
+ * Requirements: US-4 (AC-4.2)
24
+ */
25
+
26
+ /**
27
+ * Derive a dataset slug from a dataset URI.
28
+ *
29
+ * @param {string} datasetUri - Dataset URI (s3://... or hf://...)
30
+ * @returns {string} The derived slug, or empty string if extraction fails
31
+ */
32
+ export function deriveDatasetSlug(datasetUri) {
33
+ if (!datasetUri || typeof datasetUri !== 'string') {
34
+ return '';
35
+ }
36
+
37
+ let rawName = '';
38
+
39
+ if (datasetUri.startsWith('hf://')) {
40
+ // hf://org/name[/split][?file=pattern]
41
+ // Extract the dataset name (second path component)
42
+ const hfPath = datasetUri.slice(5); // remove "hf://"
43
+ const withoutQuery = hfPath.split('?')[0]; // remove ?file=...
44
+ const parts = withoutQuery.split('/');
45
+ // parts[0] = org, parts[1] = name, parts[2+] = split
46
+ rawName = parts[1] || parts[0] || '';
47
+ } else if (datasetUri.startsWith('s3://')) {
48
+ // s3://bucket/path/file.jsonl -> slug from filename (without extension)
49
+ const s3Path = datasetUri.slice(5); // remove "s3://"
50
+ const parts = s3Path.split('/');
51
+ const filename = parts[parts.length - 1] || '';
52
+ // Remove file extension
53
+ const dotIndex = filename.lastIndexOf('.');
54
+ rawName = dotIndex > 0 ? filename.substring(0, dotIndex) : filename;
55
+ } else {
56
+ // Unknown format — try to extract last path component
57
+ const parts = datasetUri.split('/');
58
+ rawName = parts[parts.length - 1] || '';
59
+ }
60
+
61
+ return slugify(rawName);
62
+ }
63
+
64
+ /**
65
+ * Apply slugification rules to a raw name.
66
+ *
67
+ * @param {string} raw - Raw name to slugify
68
+ * @returns {string} Slugified string
69
+ */
70
+ export function slugify(raw) {
71
+ if (!raw) return '';
72
+
73
+ let slug = raw
74
+ .toLowerCase() // lowercase
75
+ .replace(/[^a-z0-9-]/g, '') // strip non-alphanumeric (keep hyphens)
76
+ .replace(/-{2,}/g, '-') // replace consecutive hyphens
77
+ .replace(/^-+/, '') // strip leading hyphens
78
+ .replace(/-+$/, ''); // strip trailing hyphens
79
+
80
+ // Truncate to 20 chars
81
+ if (slug.length > 20) {
82
+ slug = slug.substring(0, 20);
83
+ // Don't end on a hyphen after truncation
84
+ slug = slug.replace(/-+$/, '');
85
+ }
86
+
87
+ return slug;
88
+ }
89
+
90
+ /**
91
+ * Resolve a --from-tune argument to the appropriate config variable name.
92
+ *
93
+ * Resolution rules:
94
+ * - No arg (empty/null) -> TUNE_OUTPUT_PATH_LATEST
95
+ * - technique only (e.g., "sft") -> TUNE_ADAPTER_PATH_SFT
96
+ * - technique-dataset compound (e.g., "sft-alpaca") -> TUNE_ADAPTER_PATH_SFT_ALPACA
97
+ *
98
+ * @param {string} fromTuneArg - The --from-tune argument value
99
+ * @param {function} configVarExists - Function that checks if a config var exists
100
+ * @returns {{ varName: string, technique: string, slug: string, isCompound: boolean, fallback: string|null }}
101
+ */
102
+ export function resolveFromTuneVar(fromTuneArg, configVarExists) {
103
+ if (!fromTuneArg) {
104
+ return {
105
+ varName: 'TUNE_OUTPUT_PATH_LATEST',
106
+ technique: '',
107
+ slug: '',
108
+ isCompound: false,
109
+ fallback: null
110
+ };
111
+ }
112
+
113
+ const upper = fromTuneArg.toUpperCase();
114
+
115
+ // Check if argument contains a hyphen — potential compound key
116
+ const hyphenIndex = fromTuneArg.indexOf('-');
117
+ if (hyphenIndex > 0) {
118
+ const technique = fromTuneArg.substring(0, hyphenIndex);
119
+ const slug = fromTuneArg.substring(hyphenIndex + 1);
120
+ const techniqueUpper = technique.toUpperCase();
121
+ const slugUpper = slug.toUpperCase().replace(/-/g, '_');
122
+ const compoundVar = `TUNE_ADAPTER_PATH_${techniqueUpper}_${slugUpper}`;
123
+
124
+ if (configVarExists(compoundVar)) {
125
+ return {
126
+ varName: compoundVar,
127
+ technique,
128
+ slug,
129
+ isCompound: true,
130
+ fallback: null
131
+ };
132
+ }
133
+
134
+ // Compound key doesn't exist — fallback to technique-only
135
+ return {
136
+ varName: `TUNE_ADAPTER_PATH_${techniqueUpper}`,
137
+ technique,
138
+ slug,
139
+ isCompound: false,
140
+ fallback: compoundVar // the compound var that was tried but didn't exist
141
+ };
142
+ }
143
+
144
+ // No hyphen — technique-only
145
+ return {
146
+ varName: `TUNE_ADAPTER_PATH_${upper}`,
147
+ technique: fromTuneArg,
148
+ slug: '',
149
+ isCompound: false,
150
+ fallback: null
151
+ };
152
+ }
@@ -1,6 +1,6 @@
1
1
  // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-06-15T20:16:03.840Z
3
+ // Generated: 2026-06-22T13:49:00.815Z
4
4
 
5
5
  /**
6
6
  * CLI option definitions derived from parameter-schema-v2.json.
@@ -70,7 +70,7 @@ export const cliOptions = [
70
70
  {
71
71
  'flag': '--enable-lora',
72
72
  'description': 'Enable LoRA adapter serving',
73
- 'defaultValue': false
73
+ 'defaultValue': true
74
74
  },
75
75
  {
76
76
  'flag': '--max-loras <n>',
@@ -85,7 +85,7 @@ export const cliOptions = [
85
85
  {
86
86
  'flag': '--include-benchmark',
87
87
  'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
88
- 'defaultValue': false
88
+ 'defaultValue': true
89
89
  },
90
90
  {
91
91
  'flag': '--benchmark-concurrency <n>',
@@ -353,6 +353,11 @@ export const cliOptions = [
353
353
  'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
354
354
  'repeatable': true
355
355
  },
356
+ {
357
+ 'flag': '--ic-env <KEY=VALUE>',
358
+ 'description': 'Deploy-time environment variable for inference components (IC_ENV_* prefix), repeatable (e.g. VLLM_MAX_MODEL_LEN=8192)',
359
+ 'repeatable': true
360
+ },
356
361
  {
357
362
  'flag': '--include-sample',
358
363
  'description': 'Include sample model code',
@@ -464,6 +469,7 @@ export const helpGroups = {
464
469
  '--fsx-volume-handle': 'hyperpod',
465
470
  '--model-env': 'env',
466
471
  '--server-env': 'env',
472
+ '--ic-env': 'ic',
467
473
  '--include-sample': 'features',
468
474
  '--include-testing': 'features',
469
475
  '--test-types': 'features',
@@ -1,6 +1,6 @@
1
1
  // AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-06-15T20:16:03.952Z
3
+ // Generated: 2026-06-22T13:49:00.924Z
4
4
 
5
5
  /**
6
6
  * Parameter matrix defining how each parameter is loaded from various sources.
@@ -106,7 +106,7 @@ export const parameterMatrix = {
106
106
  'mcp': false,
107
107
  'promptable': true,
108
108
  'required': false,
109
- 'default': false,
109
+ 'default': true,
110
110
  'valueSpace': 'bounded'
111
111
  },
112
112
  'maxLoras': {
@@ -139,7 +139,7 @@ export const parameterMatrix = {
139
139
  'mcp': false,
140
140
  'promptable': true,
141
141
  'required': false,
142
- 'default': false,
142
+ 'default': true,
143
143
  'valueSpace': 'bounded'
144
144
  },
145
145
  'benchmarkConcurrency': {
@@ -569,6 +569,17 @@ export const parameterMatrix = {
569
569
  'default': null,
570
570
  'valueSpace': 'unbounded'
571
571
  },
572
+ 'icEnv': {
573
+ 'cliOption': 'ic-env',
574
+ 'envVar': null,
575
+ 'configFile': true,
576
+ 'packageJson': false,
577
+ 'mcp': false,
578
+ 'promptable': false,
579
+ 'required': false,
580
+ 'default': [],
581
+ 'valueSpace': 'unbounded'
582
+ },
572
583
  'includeSampleModel': {
573
584
  'cliOption': 'include-sample',
574
585
  'envVar': 'ML_INCLUDE_SAMPLE',
@@ -1,6 +1,6 @@
1
1
  // AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-06-15T20:16:03.877Z
3
+ // Generated: 2026-06-22T13:49:00.849Z
4
4
 
5
5
  /**
6
6
  * Validation rules derived from parameter-schema-v2.json.
@@ -216,6 +216,12 @@ export default class McpQueryRunner {
216
216
  if (parsed.choices?.instanceType?.length > 0) {
217
217
  this.runner._instanceSizerMetadata = parsed.metadata || null;
218
218
 
219
+ // Store maxModelLen from sizer if context was capped (AC-1.7)
220
+ if (parsed.values?.maxModelLen) {
221
+ this.runner._sizerMaxModelLen = parsed.values.maxModelLen;
222
+ console.log(` ✓ Context length capped: max_model_len=${parsed.values.maxModelLen}`);
223
+ }
224
+
219
225
  // Build display labels with VRAM estimate and utilization percentage
220
226
  const recommendations = parsed.metadata?.recommendations || [];
221
227
  const estimatedVramGb = parsed.metadata?.estimatedVramGb;