@aws/ml-container-creator 0.9.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +2049 -0
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -68
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +138 -138
  23. package/servers/instance-sizer/lib/instance-ranker.js +76 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/catalogs/models.json +182 -26
  31. package/servers/lib/custom-validators.js +13 -13
  32. package/servers/lib/dynamic-resolver.js +4 -4
  33. package/servers/marketplace-picker/index.js +342 -0
  34. package/servers/marketplace-picker/manifest.json +14 -0
  35. package/servers/marketplace-picker/package.json +18 -0
  36. package/servers/model-picker/index.js +382 -382
  37. package/servers/region-picker/index.js +56 -56
  38. package/servers/workload-picker/LICENSE +202 -0
  39. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  40. package/servers/workload-picker/index.js +171 -0
  41. package/servers/workload-picker/manifest.json +16 -0
  42. package/servers/workload-picker/package.json +16 -0
  43. package/src/app.js +4 -390
  44. package/src/lib/bootstrap-command-handler.js +710 -1148
  45. package/src/lib/bootstrap-config.js +36 -0
  46. package/src/lib/bootstrap-profile-manager.js +641 -0
  47. package/src/lib/bootstrap-provisioners.js +421 -0
  48. package/src/lib/ci-register-helpers.js +74 -0
  49. package/src/lib/config-loader.js +408 -0
  50. package/src/lib/config-manager.js +66 -1685
  51. package/src/lib/config-mcp-client.js +118 -0
  52. package/src/lib/config-validator.js +634 -0
  53. package/src/lib/cuda-resolver.js +149 -0
  54. package/src/lib/e2e-catalog-validator.js +251 -3
  55. package/src/lib/e2e-ci-recorder.js +103 -0
  56. package/src/lib/generated/cli-options.js +315 -311
  57. package/src/lib/generated/parameter-matrix.js +671 -0
  58. package/src/lib/generated/validation-rules.js +71 -71
  59. package/src/lib/marketplace-flow.js +276 -0
  60. package/src/lib/mcp-query-runner.js +768 -0
  61. package/src/lib/parameter-schema-validator.js +62 -18
  62. package/src/lib/path-prover-brain.js +607 -0
  63. package/src/lib/prompt-runner.js +41 -1504
  64. package/src/lib/prompts/feature-prompts.js +172 -0
  65. package/src/lib/prompts/index.js +48 -0
  66. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  67. package/src/lib/prompts/model-prompts.js +552 -0
  68. package/src/lib/prompts/project-prompts.js +82 -0
  69. package/src/lib/prompts.js +2 -1446
  70. package/src/lib/registry-command-handler.js +135 -3
  71. package/src/lib/secrets-prompt-runner.js +251 -0
  72. package/src/lib/template-variable-resolver.js +422 -0
  73. package/src/lib/tune-catalog-validator.js +37 -4
  74. package/templates/Dockerfile +9 -0
  75. package/templates/code/adapter_sidecar.py +444 -0
  76. package/templates/code/serve +6 -0
  77. package/templates/code/serve.d/vllm.ejs +1 -1
  78. package/templates/do/.benchmark_writer.py +1476 -0
  79. package/templates/do/.tune_helper.py +982 -57
  80. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  81. package/templates/do/adapter +149 -0
  82. package/templates/do/benchmark +639 -85
  83. package/templates/do/config +108 -5
  84. package/templates/do/deploy.d/managed-inference.ejs +192 -11
  85. package/templates/do/optimize +106 -37
  86. package/templates/do/register +89 -0
  87. package/templates/do/test +13 -0
  88. package/templates/do/tune +378 -59
  89. package/templates/do/validate +44 -4
  90. package/config/parameter-schema.json +0 -88
@@ -0,0 +1,690 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Infrastructure prompt definitions.
6
+ * Covers: all infra* prompts, base image prompts, and utilities
7
+ * (formatImageChoices, filterByCudaGeneration, getInstanceCudaGeneration, instanceCatalogRaw).
8
+ */
9
+
10
+ import Table from 'cli-table3';
11
+ import chalk from 'chalk';
12
+ import { readFileSync } from 'node:fs';
13
+ import { resolve, dirname } from 'node:path';
14
+ import { fileURLToPath } from 'node:url';
15
+
16
+ const __promptsFilename = fileURLToPath(import.meta.url);
17
+ const __promptsDir = dirname(__promptsFilename);
18
+ const instancesCatalogPath = resolve(__promptsDir, '../../../servers/lib/catalogs/instances.json');
19
+
20
+ /**
21
+ * Load instance types from the instances.json catalog and transform
22
+ * into the display shape expected by prompts (type, vcpus, memory, accelerator, useCase, category).
23
+ */
24
+ function loadInstanceTypeRegistry() {
25
+ try {
26
+ const raw = readFileSync(instancesCatalogPath, 'utf8');
27
+ const catalog = JSON.parse(raw);
28
+ const entries = catalog?.catalog || {};
29
+ const registry = {};
30
+ for (const [instanceType, entry] of Object.entries(entries)) {
31
+ registry[instanceType] = {
32
+ type: instanceType,
33
+ vcpus: entry.vcpus || 0,
34
+ memory: entry.memGb ? `${entry.memGb} GB` : '0 GB',
35
+ accelerator: entry.hardware && entry.hardware !== 'None'
36
+ ? entry.accelerator || entry.hardware
37
+ : 'None',
38
+ useCase: entry.notes || entry.tags?.join(', ') || '',
39
+ category: entry.category || 'cpu'
40
+ };
41
+ }
42
+ return registry;
43
+ } catch (error) {
44
+ console.warn(`Failed to load instance type registry from catalog: ${error.message}`);
45
+ return {};
46
+ }
47
+ }
48
+
49
+ const instanceTypeRegistry = loadInstanceTypeRegistry();
50
+
51
+ /**
52
+ * Load the raw instance catalog for GPU/CUDA generation lookups.
53
+ * Returns the full catalog entries keyed by instance type.
54
+ */
55
+ function loadInstanceCatalogRaw() {
56
+ try {
57
+ const raw = readFileSync(instancesCatalogPath, 'utf8');
58
+ const catalog = JSON.parse(raw);
59
+ return catalog?.catalog || {};
60
+ } catch (error) {
61
+ return {};
62
+ }
63
+ }
64
+
65
+ const instanceCatalogRaw = loadInstanceCatalogRaw();
66
+
67
+ /**
68
+ * Get the CUDA generation key for an instance type.
69
+ * Uses gpuArchitecture as the generation grouping (e.g., "Turing", "Ampere", "Hopper").
70
+ * Instances in the same generation share AMI compatibility.
71
+ * @param {string} instanceType - e.g., "ml.g5.xlarge"
72
+ * @returns {string|null} Generation key or null if not found/not GPU
73
+ */
74
+ function getInstanceCudaGeneration(instanceType) {
75
+ const entry = instanceCatalogRaw[instanceType];
76
+ if (!entry) return null;
77
+ if (entry.acceleratorType !== 'cuda') return null;
78
+ return entry.gpuArchitecture || null;
79
+ }
80
+
81
+ /**
82
+ * Filter instance choices to only include instances from the same CUDA generation
83
+ * as the first (highest-priority) instance in the list.
84
+ * @param {string[]} instanceTypes - Array of instance type strings
85
+ * @returns {{ filtered: string[], generation: string|null, removed: string[] }}
86
+ */
87
+ function filterByCudaGeneration(instanceTypes) {
88
+ if (!instanceTypes || instanceTypes.length === 0) {
89
+ return { filtered: [], generation: null, removed: [] };
90
+ }
91
+
92
+ // Find the generation of the first instance
93
+ const firstGen = getInstanceCudaGeneration(instanceTypes[0]);
94
+ if (!firstGen) {
95
+ // First instance not in catalog or not CUDA — return all (can't filter)
96
+ return { filtered: instanceTypes, generation: null, removed: [] };
97
+ }
98
+
99
+ const filtered = [];
100
+ const removed = [];
101
+ for (const it of instanceTypes) {
102
+ const gen = getInstanceCudaGeneration(it);
103
+ // Keep if same generation, or if not in catalog (don't block unknown types)
104
+ if (gen === firstGen || gen === null) {
105
+ filtered.push(it);
106
+ } else {
107
+ removed.push(it);
108
+ }
109
+ }
110
+
111
+ return { filtered, generation: firstGen, removed };
112
+ }
113
+
114
+ /**
115
+ * Infrastructure prompts split into sub-phases so the prompt runner can
116
+ * interleave MCP queries between them (e.g. query instance-recommender
117
+ * only after we know the deployment target is realtime-inference).
118
+ *
119
+ * Ordering: Region → Deployment Target → Instance/HyperPod → Build Target → Role
120
+ */
121
+
122
+ // Sub-phase A: Region + Deployment Target (always asked first)
123
+ const infraRegionAndTargetPrompts = [
124
+ {
125
+ type: 'list',
126
+ name: 'awsRegion',
127
+ message: 'Target AWS region?',
128
+ choices: (answers) => {
129
+ // If a bootstrap profile set a region, include it in choices
130
+ const bootstrapRegion = answers._bootstrapRegion;
131
+ const choices = ['us-east-1'];
132
+ if (bootstrapRegion && bootstrapRegion !== 'us-east-1') {
133
+ choices.unshift({ name: `${bootstrapRegion} (from bootstrap profile)`, value: bootstrapRegion });
134
+ }
135
+ choices.push({ name: 'Custom...', value: 'custom' });
136
+ return choices;
137
+ },
138
+ default: (answers) => answers._bootstrapRegion || 'us-east-1'
139
+ },
140
+ {
141
+ type: 'input',
142
+ name: 'customAwsRegion',
143
+ message: 'Enter AWS region (e.g., us-west-2, eu-west-1):',
144
+ when: answers => answers.awsRegion === 'custom'
145
+ },
146
+ {
147
+ type: 'list',
148
+ name: 'deploymentTarget',
149
+ message: 'Deployment target?',
150
+ choices: [
151
+ { name: 'SageMaker Real-Time Inference', value: 'realtime-inference' },
152
+ { name: 'SageMaker Async Inference', value: 'async-inference' },
153
+ { name: 'SageMaker Batch Transform', value: 'batch-transform' },
154
+ { name: 'SageMaker HyperPod - EKS', value: 'hyperpod-eks' }
155
+ ],
156
+ default: 'realtime-inference'
157
+ }
158
+ ];
159
+
160
+ // Sub-phase A2: Existing endpoint prompt (only when deploymentTarget === 'realtime-inference')
161
+ const infraExistingEndpointPrompts = [
162
+ {
163
+ type: 'list',
164
+ name: 'useExistingEndpoint',
165
+ message: 'Deploy to an existing endpoint? (attach IC to running endpoint)',
166
+ choices: [
167
+ { name: 'No — create a new endpoint', value: 'no' },
168
+ { name: 'Yes — attach to an existing endpoint', value: 'yes' }
169
+ ],
170
+ default: 'no',
171
+ when: answers => answers.deploymentTarget === 'realtime-inference'
172
+ },
173
+ {
174
+ type: 'list',
175
+ name: 'existingEndpointName',
176
+ message: 'Select endpoint:',
177
+ choices: (answers) => {
178
+ const mcpChoices = answers._mcpEndpointChoices || [];
179
+ if (mcpChoices.length > 0) {
180
+ return [...mcpChoices, { name: 'Custom (enter manually)', value: 'custom' }];
181
+ }
182
+ return [{ name: 'Enter endpoint name manually', value: 'custom' }];
183
+ },
184
+ when: answers => answers.useExistingEndpoint === 'yes'
185
+ },
186
+ {
187
+ type: 'input',
188
+ name: 'customExistingEndpointName',
189
+ message: 'Enter existing endpoint name:',
190
+ validate: (input) => {
191
+ if (!input || input.trim() === '') {
192
+ return 'Endpoint name is required';
193
+ }
194
+ return true;
195
+ },
196
+ when: answers => answers.useExistingEndpoint === 'yes' && answers.existingEndpointName === 'custom'
197
+ }
198
+ ];
199
+
200
+ // Sub-phase B: Instance type (only when deploymentTarget === 'realtime-inference')
201
+ const infraInstancePrompts = [
202
+ // Multi-select prompt: shown when MCP sizer has choices AND deployment target is realtime-inference
203
+ // User can select 1-5 instances; selection count determines single-type vs instance-pools behavior
204
+ // Requirements: 6.4
205
+ {
206
+ type: 'checkbox',
207
+ name: 'instanceTypeSelections',
208
+ when: answers => answers.deploymentTarget === 'realtime-inference' &&
209
+ answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 1,
210
+ message: 'Select instance type(s) — select multiple for instance pools (priority = selection order, max 5):',
211
+ choices: (answers) => {
212
+ const mcpChoices = answers._mcpInstanceChoices || [];
213
+ // Show all compatible instances — CUDA generation filtering happens
214
+ // after selection to allow users to see all options and make informed choices.
215
+ // If they select instances from different generations, the post-selection
216
+ // filter (filterByCudaGeneration in prompt-runner.js) will warn and remove incompatible ones.
217
+ const choices = mcpChoices.map(instanceType => {
218
+ const entry = instanceCatalogRaw[instanceType];
219
+ const gpuInfo = entry ? `${entry.gpus} GPU${entry.gpus > 1 ? 's' : ''}, ${entry.gpuMemoryGb || '?'}GB` : '';
220
+ return {
221
+ name: gpuInfo ? `${instanceType} (${gpuInfo})` : instanceType,
222
+ value: instanceType,
223
+ short: instanceType
224
+ };
225
+ });
226
+ // Always include a "Custom Input" option at the end
227
+ choices.push({
228
+ name: 'Custom Input (enter one or comma-separated list)',
229
+ value: '__custom_input__',
230
+ short: 'Custom'
231
+ });
232
+ return choices;
233
+ },
234
+ validate: (input) => {
235
+ if (!input || input.length === 0) {
236
+ return 'Select at least one instance type';
237
+ }
238
+ if (input.length > 5) {
239
+ return 'Maximum 5 instance types allowed (API limit). Please deselect some.';
240
+ }
241
+ return true;
242
+ }
243
+ },
244
+ // Custom input prompt for multi-select: shown when user selects "Custom Input" in instanceTypeSelections
245
+ {
246
+ type: 'input',
247
+ name: 'customInstanceTypeSelections',
248
+ message: 'Enter instance type(s) — single for homogeneous, comma-separated for heterogeneous (e.g., ml.g5.xlarge or ml.g5.xlarge,ml.g5.2xlarge):',
249
+ when: answers => Array.isArray(answers.instanceTypeSelections) &&
250
+ answers.instanceTypeSelections.includes('__custom_input__'),
251
+ validate: (input) => {
252
+ if (!input || input.trim() === '') {
253
+ return 'At least one instance type is required';
254
+ }
255
+ const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
256
+ const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
257
+ if (instances.length === 0) {
258
+ return 'At least one instance type is required';
259
+ }
260
+ if (instances.length > 5) {
261
+ return 'Maximum 5 instance types allowed (API limit).';
262
+ }
263
+ for (const inst of instances) {
264
+ if (!instancePattern.test(inst)) {
265
+ return `Invalid instance type format: "${inst}". Expected format: ml.{family}.{size} (e.g., ml.g5.xlarge)`;
266
+ }
267
+ }
268
+ return true;
269
+ }
270
+ },
271
+ // Single-select prompt: shown when no MCP choices, or for non-realtime targets, or only 1 MCP choice
272
+ {
273
+ type: 'list',
274
+ name: 'instanceType',
275
+ when: answers => {
276
+ // Skip if multi-select was shown (realtime with multiple MCP choices)
277
+ if (answers.deploymentTarget === 'realtime-inference' &&
278
+ answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 1) {
279
+ return false;
280
+ }
281
+ return answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks';
282
+ },
283
+ message: (answers) => {
284
+ const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
285
+
286
+ // Skip table when MCP sizer already displayed annotated results
287
+ if (answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 0) {
288
+ return 'Select instance type:';
289
+ }
290
+
291
+ const table = new Table({
292
+ head: [
293
+ chalk.cyan('Instance Type'),
294
+ chalk.cyan('vCPUs'),
295
+ chalk.cyan('Memory'),
296
+ chalk.cyan('Accelerator'),
297
+ chalk.cyan('Use Case')
298
+ ],
299
+ colWidths: [20, 8, 12, 20, 25]
300
+ });
301
+
302
+ const instances = Object.values(instanceTypeRegistry);
303
+ let filteredInstances = framework === 'transformers'
304
+ ? instances.filter(i => i.category === 'gpu')
305
+ : instances;
306
+
307
+ const mcpChoices = answers._mcpInstanceChoices;
308
+ if (mcpChoices && mcpChoices.length > 0) {
309
+ const mcpSet = new Set(mcpChoices);
310
+ filteredInstances = filteredInstances.filter(i => mcpSet.has(i.type));
311
+ }
312
+
313
+ filteredInstances.forEach(instance => {
314
+ table.push([
315
+ instance.type,
316
+ instance.vcpus.toString(),
317
+ instance.memory,
318
+ instance.accelerator,
319
+ instance.useCase
320
+ ]);
321
+ });
322
+
323
+ table.push([
324
+ chalk.yellow('Custom...'),
325
+ '-',
326
+ '-',
327
+ '-',
328
+ 'Specify your own'
329
+ ]);
330
+
331
+ const header = mcpChoices && mcpChoices.length > 0
332
+ ? 'Available Instance Types (filtered by MCP):'
333
+ : 'Available Instance Types:';
334
+ console.log(`\n${ chalk.bold(header)}`);
335
+ console.log(table.toString());
336
+ console.log('');
337
+
338
+ return 'Select instance type:';
339
+ },
340
+ choices: (answers) => {
341
+ const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
342
+
343
+ const instances = Object.values(instanceTypeRegistry);
344
+ let filteredInstances = framework === 'transformers'
345
+ ? instances.filter(i => i.category === 'gpu')
346
+ : instances;
347
+
348
+ const mcpChoices = answers._mcpInstanceChoices;
349
+ if (mcpChoices && mcpChoices.length > 0) {
350
+ const mcpSet = new Set(mcpChoices);
351
+ filteredInstances = filteredInstances.filter(i => mcpSet.has(i.type));
352
+ }
353
+
354
+ const choices = filteredInstances.map(instance => ({
355
+ name: instance.type,
356
+ value: instance.type
357
+ }));
358
+
359
+ choices.push({
360
+ name: 'Custom...',
361
+ value: 'custom'
362
+ });
363
+
364
+ return choices;
365
+ },
366
+ default: (answers) => {
367
+ const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
368
+ const modelServer = answers.modelServer || answers.deploymentConfig?.split('-')[1];
369
+
370
+ if (framework === 'transformers') {
371
+ if (modelServer === 'tensorrt-llm') {
372
+ return 'ml.g5.12xlarge';
373
+ }
374
+ return 'ml.g5.2xlarge';
375
+ }
376
+ return 'ml.m5.xlarge';
377
+ }
378
+ },
379
+ {
380
+ type: 'input',
381
+ name: 'customInstanceType',
382
+ message: 'Enter AWS SageMaker instance type (e.g., ml.t3.medium, ml.g4dn.xlarge):',
383
+ validate: (input) => {
384
+ if (!input || input.trim() === '') {
385
+ return 'Instance type is required';
386
+ }
387
+ const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
388
+ if (!instancePattern.test(input.trim())) {
389
+ return 'Invalid instance type format. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)';
390
+ }
391
+ return true;
392
+ },
393
+ when: answers => answers.instanceType === 'custom'
394
+ }
395
+ ];
396
+
397
+ // Sub-phase C: HyperPod EKS-specific prompts (only when deploymentTarget === 'hyperpod-eks')
398
+ const infraHyperPodPrompts = [
399
+ {
400
+ type: 'list',
401
+ name: 'hyperPodCluster',
402
+ message: 'Select HyperPod EKS cluster:',
403
+ choices: (answers) => {
404
+ const mcpChoices = answers._mcpHyperPodChoices || [];
405
+ if (mcpChoices.length > 0) {
406
+ return [...mcpChoices, { name: 'Custom (enter manually)', value: 'custom' }];
407
+ }
408
+ // No MCP results — offer manual entry as the only option
409
+ return [{ name: 'Enter cluster name manually', value: 'custom' }];
410
+ },
411
+ when: answers => answers.deploymentTarget === 'hyperpod-eks'
412
+ },
413
+ {
414
+ type: 'input',
415
+ name: 'customHyperPodCluster',
416
+ message: 'Enter HyperPod EKS cluster name:',
417
+ validate: (input) => {
418
+ if (!input || input.trim() === '') {
419
+ return 'Cluster name is required';
420
+ }
421
+ return true;
422
+ },
423
+ when: answers => answers.deploymentTarget === 'hyperpod-eks' && answers.hyperPodCluster === 'custom'
424
+ },
425
+ {
426
+ type: 'input',
427
+ name: 'hyperPodNamespace',
428
+ message: 'Kubernetes namespace?',
429
+ default: 'default',
430
+ when: answers => answers.deploymentTarget === 'hyperpod-eks'
431
+ },
432
+ {
433
+ type: 'number',
434
+ name: 'hyperPodReplicas',
435
+ message: 'Number of pod replicas?',
436
+ default: 1,
437
+ when: answers => answers.deploymentTarget === 'hyperpod-eks'
438
+ },
439
+ {
440
+ type: 'input',
441
+ name: 'fsxVolumeHandle',
442
+ message: 'FSx for Lustre volume handle (optional, press Enter to skip):',
443
+ when: answers => answers.deploymentTarget === 'hyperpod-eks'
444
+ }
445
+ ];
446
+
447
+ // Sub-phase D: Build target + role ARN (always asked last)
448
+ const infraBuildPrompts = [
449
+ {
450
+ type: 'list',
451
+ name: 'buildTarget',
452
+ message: 'Build target?',
453
+ choices: [
454
+ { name: 'CodeBuild (recommended)', value: 'codebuild' }
455
+ ],
456
+ default: 'codebuild'
457
+ },
458
+ {
459
+ type: 'list',
460
+ name: 'codebuildComputeType',
461
+ message: 'CodeBuild compute type?',
462
+ choices: [
463
+ 'BUILD_GENERAL1_SMALL',
464
+ 'BUILD_GENERAL1_MEDIUM',
465
+ 'BUILD_GENERAL1_LARGE'
466
+ ],
467
+ default: 'BUILD_GENERAL1_MEDIUM',
468
+ when: answers => answers.buildTarget === 'codebuild'
469
+ },
470
+ {
471
+ type: 'input',
472
+ name: 'awsRoleArn',
473
+ message: 'AWS IAM Role ARN for SageMaker execution (optional)?',
474
+ validate: (input) => {
475
+ if (!input || input.trim() === '') {
476
+ return true;
477
+ }
478
+ const arnPattern = /^arn:aws:iam::\d{12}:role\/[\w+=,.@-]+$/;
479
+ if (!arnPattern.test(input)) {
480
+ return 'Invalid ARN format. Expected: arn:aws:iam::123456789012:role/RoleName';
481
+ }
482
+ return true;
483
+ }
484
+ }
485
+ ];
486
+
487
+ /**
488
+ * Sub-phase: Async-specific prompts (only when deploymentTarget === 'async-inference')
489
+ * Requirements: 2.1, 2.2, 2.3, 2.4
490
+ */
491
+ const infraAsyncPrompts = [
492
+ {
493
+ type: 'input',
494
+ name: 'asyncS3OutputPath',
495
+ message: 'S3 output path for async results (leave empty for default: s3://ml-container-creator-async-{region}-{account-id}/{project-name}/output/):',
496
+ when: answers => answers.deploymentTarget === 'async-inference'
497
+ },
498
+ {
499
+ type: 'input',
500
+ name: 'asyncSnsSuccessTopic',
501
+ message: 'SNS success topic ARN (leave empty for auto-created per-project topic):',
502
+ when: answers => answers.deploymentTarget === 'async-inference'
503
+ },
504
+ {
505
+ type: 'input',
506
+ name: 'asyncSnsErrorTopic',
507
+ message: 'SNS error topic ARN (leave empty for auto-created per-project topic):',
508
+ when: answers => answers.deploymentTarget === 'async-inference'
509
+ },
510
+ {
511
+ type: 'number',
512
+ name: 'asyncMaxConcurrentInvocations',
513
+ message: 'Max concurrent invocations per instance?',
514
+ default: 1,
515
+ when: answers => answers.deploymentTarget === 'async-inference'
516
+ }
517
+ ];
518
+
519
+ /**
520
+ * Sub-phase: Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
521
+ * Requirements: 2.1, 2.2, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9
522
+ */
523
+ const infraBatchTransformPrompts = [
524
+ {
525
+ type: 'input',
526
+ name: 'batchInputPath',
527
+ message: 'S3 input path for batch transform data (leave empty for default: s3://ml-container-creator-batch-{region}-{account-id}/{project-name}/input/):',
528
+ when: answers => answers.deploymentTarget === 'batch-transform'
529
+ },
530
+ {
531
+ type: 'input',
532
+ name: 'batchOutputPath',
533
+ message: 'S3 output path for batch transform results (leave empty for default: s3://ml-container-creator-batch-{region}-{account-id}/{project-name}/output/):',
534
+ when: answers => answers.deploymentTarget === 'batch-transform'
535
+ },
536
+ {
537
+ type: 'number',
538
+ name: 'batchInstanceCount',
539
+ message: 'How many instances should run the batch job in parallel?',
540
+ default: 1,
541
+ when: answers => answers.deploymentTarget === 'batch-transform'
542
+ },
543
+ {
544
+ type: 'list',
545
+ name: 'batchSplitType',
546
+ message: 'Input file format — how should SageMaker read your input files?',
547
+ choices: [
548
+ { name: 'Line — one record per line (JSON lines, CSV)', value: 'Line' },
549
+ { name: 'RecordIO — Amazon RecordIO format', value: 'RecordIO' },
550
+ { name: 'None — send each file as a single request', value: 'None' }
551
+ ],
552
+ default: 'Line',
553
+ when: answers => answers.deploymentTarget === 'batch-transform'
554
+ },
555
+ {
556
+ type: 'list',
557
+ name: 'batchStrategy',
558
+ message: 'How many records should be sent per inference request?',
559
+ choices: [
560
+ { name: 'MultiRecord — batch multiple records per request (higher throughput)', value: 'MultiRecord' },
561
+ { name: 'SingleRecord — one record per request (simpler, more predictable)', value: 'SingleRecord' }
562
+ ],
563
+ default: 'MultiRecord',
564
+ when: answers => answers.deploymentTarget === 'batch-transform'
565
+ },
566
+ {
567
+ type: 'list',
568
+ name: 'batchJoinSource',
569
+ message: 'Include original input data alongside predictions in the output?',
570
+ choices: [
571
+ { name: 'No — output predictions only', value: 'None' },
572
+ { name: 'Yes — merge input with predictions (useful for traceability)', value: 'Input' }
573
+ ],
574
+ default: 'None',
575
+ when: answers => answers.deploymentTarget === 'batch-transform'
576
+ },
577
+ {
578
+ type: 'number',
579
+ name: 'batchMaxConcurrentTransforms',
580
+ message: 'Max concurrent inference requests per instance?',
581
+ default: 1,
582
+ when: answers => answers.deploymentTarget === 'batch-transform'
583
+ },
584
+ {
585
+ type: 'number',
586
+ name: 'batchMaxPayloadInMB',
587
+ message: 'Max request payload size in MB (0-100)?',
588
+ default: 6,
589
+ when: answers => answers.deploymentTarget === 'batch-transform'
590
+ }
591
+ ];
592
+
593
+ // Combined view for tests and backward compatibility
594
+ const infrastructurePrompts = [
595
+ ...infraRegionAndTargetPrompts,
596
+ ...infraInstancePrompts,
597
+ ...infraHyperPodPrompts,
598
+ ...infraBuildPrompts
599
+ ];
600
+
601
+ /**
602
+ * Format ImageEntry[] into Inquirer list choices with tabular display.
603
+ *
604
+ * @param {ImageEntry[]} entries - Image entries from the resolver
605
+ * @param {boolean} isTransformer - Whether to show CUDA column
606
+ * @returns {Array<{name: string, value: string}>} Inquirer choices
607
+ */
608
+ function formatImageChoices(entries, isTransformer) {
609
+ return entries.map(entry => {
610
+ const cuda = entry.labels.cuda_version || '-';
611
+ const python = entry.labels.python_version || '-';
612
+ const date = entry.created.slice(0, 10);
613
+
614
+ const name = isTransformer
615
+ ? `${entry.repository.padEnd(30)} ${entry.tag.padEnd(16)} ${entry.architecture.padEnd(7)} ${cuda.padEnd(6)} ${python.padEnd(8)} ${date}`
616
+ : `${entry.repository.padEnd(30)} ${entry.tag.padEnd(16)} ${entry.architecture.padEnd(7)} ${python.padEnd(8)} ${date}`;
617
+
618
+ return { name, value: entry.image, _meta: { labels: entry.labels, accelerator: entry.accelerator } };
619
+ });
620
+ }
621
+
622
+ /**
623
+ * Base image search prompt (non-transformer only)
624
+ * Requirements: 5.2, 5.4
625
+ */
626
+ const baseImageSearchPrompts = [
627
+ {
628
+ type: 'input',
629
+ name: 'baseImageSearch',
630
+ message: '🔌 Search for a Python base image (e.g. "3.11", "3.10", or leave empty for all):',
631
+ default: '',
632
+ when: (answers) => {
633
+ const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
634
+ // Skip for transformers (uses model-server images) and triton (uses NGC images)
635
+ return architecture !== 'transformers' && architecture !== 'triton';
636
+ }
637
+ }
638
+ ];
639
+
640
+ /**
641
+ * Base image selection prompt (all frameworks)
642
+ * Requirements: 5.2, 5.4, 10.1, 10.2, 10.3
643
+ */
644
+ const baseImagePrompts = [
645
+ {
646
+ type: 'list',
647
+ name: 'baseImage',
648
+ message: 'Select base container image:',
649
+ choices: (answers) => {
650
+ const mcpChoices = answers._mcpBaseImageChoices || [];
651
+ return [...mcpChoices, { name: 'Custom (enter your own)', value: 'custom' }];
652
+ },
653
+ when: (answers) => {
654
+ return answers._mcpBaseImageChoices && answers._mcpBaseImageChoices.length > 0;
655
+ }
656
+ },
657
+ {
658
+ type: 'input',
659
+ name: 'customBaseImage',
660
+ message: 'Enter custom base container image (e.g. myrepo/myimage:v1):',
661
+ validate: (input) => {
662
+ if (!input || input.trim() === '') {
663
+ return 'Base image is required';
664
+ }
665
+ const pattern = /^[a-zA-Z0-9][a-zA-Z0-9._\-/]*(:[a-zA-Z0-9._-]+)?$/;
666
+ if (!pattern.test(input.trim())) {
667
+ return 'Invalid image format. Expected: [registry/]repository[:tag]';
668
+ }
669
+ return true;
670
+ },
671
+ when: (answers) => answers.baseImage === 'custom'
672
+ }
673
+ ];
674
+
675
+ export {
676
+ infrastructurePrompts,
677
+ infraRegionAndTargetPrompts,
678
+ infraExistingEndpointPrompts,
679
+ infraInstancePrompts,
680
+ infraAsyncPrompts,
681
+ infraBatchTransformPrompts,
682
+ infraHyperPodPrompts,
683
+ infraBuildPrompts,
684
+ baseImageSearchPrompts,
685
+ baseImagePrompts,
686
+ formatImageChoices,
687
+ filterByCudaGeneration,
688
+ getInstanceCudaGeneration,
689
+ instanceCatalogRaw
690
+ };