@aws/ml-container-creator 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/config/parameter-schema-v2.json +2065 -0
  2. package/package.json +4 -4
  3. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  4. package/servers/lib/catalogs/models.json +182 -26
  5. package/src/app.js +1 -389
  6. package/src/lib/bootstrap-command-handler.js +75 -1078
  7. package/src/lib/bootstrap-profile-manager.js +634 -0
  8. package/src/lib/bootstrap-provisioners.js +421 -0
  9. package/src/lib/config-loader.js +405 -0
  10. package/src/lib/config-manager.js +59 -1685
  11. package/src/lib/config-mcp-client.js +118 -0
  12. package/src/lib/config-validator.js +634 -0
  13. package/src/lib/cuda-resolver.js +140 -0
  14. package/src/lib/e2e-catalog-validator.js +251 -3
  15. package/src/lib/e2e-ci-recorder.js +103 -0
  16. package/src/lib/generated/cli-options.js +8 -4
  17. package/src/lib/generated/parameter-matrix.js +671 -0
  18. package/src/lib/generated/validation-rules.js +2 -2
  19. package/src/lib/marketplace-flow.js +276 -0
  20. package/src/lib/mcp-query-runner.js +768 -0
  21. package/src/lib/parameter-schema-validator.js +62 -18
  22. package/src/lib/prompt-runner.js +41 -1504
  23. package/src/lib/prompts/feature-prompts.js +172 -0
  24. package/src/lib/prompts/index.js +48 -0
  25. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  26. package/src/lib/prompts/model-prompts.js +552 -0
  27. package/src/lib/prompts/project-prompts.js +70 -0
  28. package/src/lib/prompts.js +2 -1446
  29. package/src/lib/registry-command-handler.js +135 -3
  30. package/src/lib/secrets-prompt-runner.js +251 -0
  31. package/src/lib/template-variable-resolver.js +398 -0
  32. package/config/parameter-schema.json +0 -88
@@ -2,1451 +2,7 @@
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  /**
5
- * Prompt definitions organized by phase for better maintainability.
6
- * Each phase handles a specific aspect of project configuration.
5
+ * Re-export all prompts from the prompts/ directory for backward compatibility.
7
6
  */
8
7
 
9
- import Table from 'cli-table3';
10
- import chalk from 'chalk';
11
- import { readFileSync } from 'node:fs';
12
- import { resolve, dirname } from 'node:path';
13
- import { fileURLToPath } from 'node:url';
14
-
15
- const __promptsFilename = fileURLToPath(import.meta.url);
16
- const __promptsDir = dirname(__promptsFilename);
17
- const instancesCatalogPath = resolve(__promptsDir, '../../servers/lib/catalogs/instances.json');
18
-
19
- /**
20
- * Load instance types from the instances.json catalog and transform
21
- * into the display shape expected by prompts (type, vcpus, memory, accelerator, useCase, category).
22
- */
23
- function loadInstanceTypeRegistry() {
24
- try {
25
- const raw = readFileSync(instancesCatalogPath, 'utf8');
26
- const catalog = JSON.parse(raw);
27
- const entries = catalog?.catalog || {};
28
- const registry = {};
29
- for (const [instanceType, entry] of Object.entries(entries)) {
30
- registry[instanceType] = {
31
- type: instanceType,
32
- vcpus: entry.vcpus || 0,
33
- memory: entry.memGb ? `${entry.memGb} GB` : '0 GB',
34
- accelerator: entry.hardware && entry.hardware !== 'None'
35
- ? entry.accelerator || entry.hardware
36
- : 'None',
37
- useCase: entry.notes || entry.tags?.join(', ') || '',
38
- category: entry.category || 'cpu'
39
- };
40
- }
41
- return registry;
42
- } catch (error) {
43
- console.warn(`Failed to load instance type registry from catalog: ${error.message}`);
44
- return {};
45
- }
46
- }
47
-
48
- const instanceTypeRegistry = loadInstanceTypeRegistry();
49
-
50
- /**
51
- * Load the raw instance catalog for GPU/CUDA generation lookups.
52
- * Returns the full catalog entries keyed by instance type.
53
- */
54
- function loadInstanceCatalogRaw() {
55
- try {
56
- const raw = readFileSync(instancesCatalogPath, 'utf8');
57
- const catalog = JSON.parse(raw);
58
- return catalog?.catalog || {};
59
- } catch (error) {
60
- return {};
61
- }
62
- }
63
-
64
- const instanceCatalogRaw = loadInstanceCatalogRaw();
65
-
66
- /**
67
- * Get the CUDA generation key for an instance type.
68
- * Uses gpuArchitecture as the generation grouping (e.g., "Turing", "Ampere", "Hopper").
69
- * Instances in the same generation share AMI compatibility.
70
- * @param {string} instanceType - e.g., "ml.g5.xlarge"
71
- * @returns {string|null} Generation key or null if not found/not GPU
72
- */
73
- function getInstanceCudaGeneration(instanceType) {
74
- const entry = instanceCatalogRaw[instanceType];
75
- if (!entry) return null;
76
- if (entry.acceleratorType !== 'cuda') return null;
77
- return entry.gpuArchitecture || null;
78
- }
79
-
80
- /**
81
- * Filter instance choices to only include instances from the same CUDA generation
82
- * as the first (highest-priority) instance in the list.
83
- * @param {string[]} instanceTypes - Array of instance type strings
84
- * @returns {{ filtered: string[], generation: string|null, removed: string[] }}
85
- */
86
- function filterByCudaGeneration(instanceTypes) {
87
- if (!instanceTypes || instanceTypes.length === 0) {
88
- return { filtered: [], generation: null, removed: [] };
89
- }
90
-
91
- // Find the generation of the first instance
92
- const firstGen = getInstanceCudaGeneration(instanceTypes[0]);
93
- if (!firstGen) {
94
- // First instance not in catalog or not CUDA — return all (can't filter)
95
- return { filtered: instanceTypes, generation: null, removed: [] };
96
- }
97
-
98
- const filtered = [];
99
- const removed = [];
100
- for (const it of instanceTypes) {
101
- const gen = getInstanceCudaGeneration(it);
102
- // Keep if same generation, or if not in catalog (don't block unknown types)
103
- if (gen === firstGen || gen === null) {
104
- filtered.push(it);
105
- } else {
106
- removed.push(it);
107
- }
108
- }
109
-
110
- return { filtered, generation: firstGen, removed };
111
- }
112
-
113
- /**
114
- * Generate pseudo-randomized project name based on framework
115
- * @param {string} framework - The ML framework
116
- * @returns {string} Generated project name
117
- */
118
- function generateProjectName(framework) {
119
- const adjectives = [
120
- 'smart', 'fast', 'clever', 'bright', 'swift', 'agile', 'sharp', 'quick',
121
- 'wise', 'keen', 'bold', 'sleek', 'neat', 'cool', 'fresh', 'prime'
122
- ];
123
-
124
- const frameworkNames = {
125
- 'sklearn': ['sklearn', 'scikit', 'sk'],
126
- 'xgboost': ['xgb', 'xgboost', 'boost'],
127
- 'tensorflow': ['tf', 'tensorflow', 'tensor'],
128
- 'transformers': ['llm', 'transformer', 'gpt', 'bert', 'ai']
129
- };
130
-
131
- const suffixes = [
132
- 'model', 'predictor', 'classifier', 'engine', 'service', 'api',
133
- 'container', 'deployment', 'inference', 'ml', 'ai', 'bot'
134
- ];
135
-
136
- // Get random elements
137
- const adjective = adjectives[Math.floor(Math.random() * adjectives.length)];
138
- const frameworkName = frameworkNames[framework] ?
139
- frameworkNames[framework][Math.floor(Math.random() * frameworkNames[framework].length)] :
140
- 'ml';
141
- const suffix = suffixes[Math.floor(Math.random() * suffixes.length)];
142
-
143
- return `${adjective}-${frameworkName}-${suffix}`;
144
- }
145
-
146
- /**
147
- * Phase 1: Core ML configuration (moved to first)
148
- * Flattened deployment configuration combining architecture + backend
149
- * Requirements: 3.1, 3.2, 16.1, 16.2, 16.3, 16.4, 16.8, 16.9
150
- */
151
- const deploymentConfigPrompts = [
152
- {
153
- type: 'list',
154
- name: 'deploymentConfig',
155
- message: 'Select deployment configuration:',
156
- choices: [
157
- { type: 'separator', separator: '── Large Language Models ──' },
158
- {
159
- name: 'Transformers with vLLM',
160
- value: 'transformers-vllm',
161
- short: 'transformers-vllm'
162
- },
163
- {
164
- name: 'Transformers with SGLang',
165
- value: 'transformers-sglang',
166
- short: 'transformers-sglang'
167
- },
168
- {
169
- name: 'Transformers with TensorRT-LLM',
170
- value: 'transformers-tensorrt-llm',
171
- short: 'transformers-tensorrt-llm'
172
- },
173
- {
174
- name: 'Transformers with LMI (Large Model Inference)',
175
- value: 'transformers-lmi',
176
- short: 'transformers-lmi'
177
- },
178
- {
179
- name: 'Transformers with DJL (Deep Java Library)',
180
- value: 'transformers-djl',
181
- short: 'transformers-djl'
182
- },
183
- { type: 'separator', separator: '── HTTP Serving ──' },
184
- {
185
- name: 'HTTP with Flask',
186
- value: 'http-flask',
187
- short: 'http-flask'
188
- },
189
- {
190
- name: 'HTTP with FastAPI',
191
- value: 'http-fastapi',
192
- short: 'http-fastapi'
193
- },
194
- { type: 'separator', separator: '── NVIDIA Triton Inference Server ──' },
195
- {
196
- name: 'Triton FIL (XGBoost, LightGBM)',
197
- value: 'triton-fil',
198
- short: 'triton-fil'
199
- },
200
- {
201
- name: 'Triton ONNX Runtime',
202
- value: 'triton-onnxruntime',
203
- short: 'triton-onnxruntime'
204
- },
205
- {
206
- name: 'Triton TensorFlow',
207
- value: 'triton-tensorflow',
208
- short: 'triton-tensorflow'
209
- },
210
- {
211
- name: 'Triton PyTorch',
212
- value: 'triton-pytorch',
213
- short: 'triton-pytorch'
214
- },
215
- {
216
- name: 'Triton vLLM',
217
- value: 'triton-vllm',
218
- short: 'triton-vllm'
219
- },
220
- {
221
- name: 'Triton TensorRT-LLM',
222
- value: 'triton-tensorrtllm',
223
- short: 'triton-tensorrtllm'
224
- },
225
- {
226
- name: 'Triton Python Backend',
227
- value: 'triton-python',
228
- short: 'triton-python'
229
- },
230
- { type: 'separator', separator: '── Diffusion Models ──' },
231
- {
232
- name: 'Diffusors with vLLM Omni',
233
- value: 'diffusors-vllm-omni',
234
- short: 'diffusors-vllm-omni'
235
- },
236
- { type: 'separator', separator: '── AWS Marketplace ──' },
237
- {
238
- name: 'Marketplace Model Package',
239
- value: 'marketplace',
240
- short: 'marketplace'
241
- }
242
- ]
243
- }
244
- ];
245
-
246
- // Keep legacy frameworkPrompts for backward compatibility (deprecated)
247
- const frameworkPrompts = deploymentConfigPrompts;
248
-
249
- /**
250
- * Engine selection prompt for http architecture
251
- * Requirements: 3.7
252
- */
253
- const enginePrompts = [
254
- {
255
- type: 'list',
256
- name: 'engine',
257
- message: 'Select ML engine:',
258
- choices: [
259
- { name: 'scikit-learn', value: 'sklearn' },
260
- { name: 'XGBoost', value: 'xgboost' },
261
- { name: 'TensorFlow', value: 'tensorflow' }
262
- ],
263
- when: (answers) => {
264
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
265
- return architecture === 'http';
266
- }
267
- }
268
- ];
269
-
270
- /**
271
- * Framework version selection prompts (for registry system)
272
- * Requirements: 2.1, 2.6, 8.2, 8.3
273
- */
274
- const frameworkVersionPrompts = [
275
- {
276
- type: 'list',
277
- name: 'frameworkVersion',
278
- message: (answers) => `Which version of ${answers.framework} are you using?`,
279
- choices: (answers) => {
280
- // Choices will be populated by PromptRunner with registry data
281
- return answers._frameworkVersionChoices || [];
282
- },
283
- when: (answers) => {
284
- // Only show if we have version choices available
285
- return answers._frameworkVersionChoices && answers._frameworkVersionChoices.length > 0;
286
- }
287
- }
288
- ];
289
-
290
- /**
291
- * Framework profile selection prompts (for registry system)
292
- * Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.10
293
- */
294
- const frameworkProfilePrompts = [
295
- {
296
- type: 'list',
297
- name: 'frameworkProfile',
298
- message: 'Select a framework configuration profile:',
299
- choices: (answers) => {
300
- // Choices will be populated by PromptRunner with registry data
301
- return answers._frameworkProfileChoices || [];
302
- },
303
- when: (answers) => {
304
- // Only show if we have profile choices available
305
- return answers._frameworkProfileChoices && answers._frameworkProfileChoices.length > 0;
306
- }
307
- }
308
- ];
309
-
310
- const modelFormatPrompts = [
311
- {
312
- type: 'list',
313
- name: 'modelFormat',
314
- message: 'In which format is your model serialized?',
315
- choices: (answers) => {
316
- // Derive architecture from deploymentConfig
317
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
318
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
319
-
320
- // For http architecture, use engine to determine formats
321
- if (architecture === 'http') {
322
- const engine = answers.engine;
323
- const formatMap = {
324
- 'xgboost': ['json', 'model', 'ubj'],
325
- 'sklearn': ['pkl', 'joblib'],
326
- 'tensorflow': ['keras', 'h5', 'SavedModel']
327
- };
328
- return formatMap[engine] || [];
329
- }
330
-
331
- // For triton architecture, use backend-specific formats
332
- if (architecture === 'triton') {
333
- // FIL backend has multiple format choices
334
- if (backend === 'fil') {
335
- return ['xgboost_json', 'xgboost_ubj', 'lightgbm_txt'];
336
- }
337
- // Python backend has multiple format choices
338
- if (backend === 'python') {
339
- return ['pkl', 'joblib', 'custom'];
340
- }
341
- // Other Triton backends have auto-set formats (handled in when clause)
342
- return [];
343
- }
344
-
345
- // Legacy support for old format (should not be reached with new configs)
346
- const framework = answers.framework || architecture;
347
- const formatMap = {
348
- 'xgboost': ['json', 'model', 'ubj'],
349
- 'sklearn': ['pkl', 'joblib'],
350
- 'tensorflow': ['keras', 'h5', 'SavedModel']
351
- };
352
- return formatMap[framework] || [];
353
- },
354
- when: answers => {
355
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
356
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
357
-
358
- // Skip for transformers (they use HF Hub)
359
- if (architecture === 'transformers') {
360
- return false;
361
- }
362
-
363
- // Skip for diffusors (they use HF Hub)
364
- if (architecture === 'diffusors') {
365
- return false;
366
- }
367
-
368
- // For http architecture, always show
369
- if (architecture === 'http') {
370
- return true;
371
- }
372
-
373
- // For triton architecture, only show for backends with multiple format choices
374
- if (architecture === 'triton') {
375
- // FIL and Python backends have multiple format choices
376
- if (backend === 'fil' || backend === 'python') {
377
- return true;
378
- }
379
- // Other backends have auto-set formats
380
- return false;
381
- }
382
-
383
- // Legacy support
384
- const framework = answers.framework || architecture;
385
- return framework !== 'transformers';
386
- }
387
- },
388
- {
389
- type: 'list',
390
- name: 'modelName',
391
- message: 'Which model do you want to use?',
392
- choices: (answers) => {
393
- // Use MCP model-picker choices when available
394
- if (answers._mcpModelChoices && answers._mcpModelChoices.length > 0) {
395
- return [...answers._mcpModelChoices, 'Custom (enter manually)'];
396
- }
397
- // Fallback to hardcoded defaults based on architecture
398
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
399
- if (architecture === 'diffusors') {
400
- return [
401
- 'stabilityai/stable-diffusion-3.5-medium',
402
- 'black-forest-labs/FLUX.1-schnell',
403
- 'black-forest-labs/FLUX.1-dev',
404
- 'Custom (enter manually)'
405
- ];
406
- }
407
- return [
408
- { type: 'separator', separator: '── Meta Llama ──' },
409
- 'meta-llama/Llama-3.2-1B-Instruct',
410
- 'meta-llama/Llama-3.2-3B-Instruct',
411
- 'meta-llama/Llama-3.1-8B-Instruct',
412
- 'meta-llama/Llama-3.3-70B-Instruct',
413
- { type: 'separator', separator: '── Qwen (Alibaba) ──' },
414
- 'Qwen/Qwen3-0.6B',
415
- 'Qwen/Qwen3-1.7B',
416
- 'Qwen/Qwen3-4B',
417
- 'Qwen/Qwen3-8B',
418
- 'Qwen/Qwen3-14B',
419
- 'Qwen/Qwen3-32B',
420
- 'Qwen/Qwen2.5-7B-Instruct',
421
- 'Qwen/Qwen2.5-14B-Instruct',
422
- 'Qwen/Qwen2.5-32B-Instruct',
423
- 'Qwen/Qwen2.5-72B-Instruct',
424
- { type: 'separator', separator: '── DeepSeek ──' },
425
- 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B',
426
- 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B',
427
- 'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
428
- 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
429
- 'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
430
- 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
431
- { type: 'separator', separator: '── OpenAI ──' },
432
- 'openai/gpt-oss-20b',
433
- 'openai/gpt-oss-120b',
434
- { type: 'separator', separator: '──────────────' },
435
- 'Custom (enter manually)'
436
- ];
437
- },
438
- default: (answers) => {
439
- if (answers._mcpModelChoices && answers._mcpModelChoices.length > 0) {
440
- return answers._mcpModelChoices[0];
441
- }
442
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
443
- if (architecture === 'diffusors') {
444
- return 'stabilityai/stable-diffusion-3.5-medium';
445
- }
446
- return 'meta-llama/Llama-3.1-8B-Instruct';
447
- },
448
- when: answers => {
449
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
450
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
451
-
452
- // Show for transformers architecture
453
- if (architecture === 'transformers') {
454
- return true;
455
- }
456
-
457
- // Show for diffusors architecture (reuse HuggingFace model selection)
458
- if (architecture === 'diffusors') {
459
- return true;
460
- }
461
-
462
- // Show for Triton LLM backends (vllm, tensorrtllm)
463
- if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
464
- return true;
465
- }
466
-
467
- return false;
468
- }
469
- },
470
- {
471
- type: 'input',
472
- name: 'customModelName',
473
- message: 'Enter the model path:',
474
- validate: (input) => {
475
- if (!input || input.trim() === '') {
476
- return 'Model name is required';
477
- }
478
- // Basic validation - must contain a slash (org/model, s3://path, etc.)
479
- if (!input.includes('/')) {
480
- return 'Please use the full model path (e.g., microsoft/DialoGPT-medium, s3://bucket/model, registry://my-package)';
481
- }
482
- return true;
483
- },
484
- when: answers => {
485
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
486
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
487
-
488
- // Show for transformers with custom model selection
489
- if (architecture === 'transformers' && answers.modelName === 'Custom (enter manually)') {
490
- return true;
491
- }
492
-
493
- // Show for diffusors with custom model selection
494
- if (architecture === 'diffusors' && answers.modelName === 'Custom (enter manually)') {
495
- return true;
496
- }
497
-
498
- // Show for Triton LLM backends with custom model selection
499
- if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm') && answers.modelName === 'Custom (enter manually)') {
500
- return true;
501
- }
502
-
503
- return false;
504
- }
505
- }
506
- ];
507
-
508
- // Model server prompts are now deprecated - modelServer is derived from deploymentConfig
509
- const modelServerPrompts = [];
510
-
511
- /**
512
- * Model loading strategy prompt
513
- * Asks user whether to bake model into image at build time or download at container startup.
514
- * Requirements: 13.1, 13.2, 13.3, 13.4, 13.5
515
- */
516
- const modelLoadStrategyPrompts = [
517
- {
518
- type: 'list',
519
- name: 'modelLoadStrategy',
520
- message: 'How should the model be loaded?\n'
521
- + ' Build-time: Bakes model into image (larger image, faster startup)\n'
522
- + ' Runtime: Downloads at container startup (smaller image, slower startup)',
523
- choices: [
524
- { name: 'Runtime (download at startup)', value: 'runtime' },
525
- { name: 'Build-time (bake into image) [EXPERIMENTAL]', value: 'build-time' }
526
- ],
527
- default: 'runtime',
528
- when: (answers) => {
529
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
530
- return architecture === 'transformers' || architecture === 'diffusors';
531
- }
532
- }
533
- ];
534
-
535
- /**
536
- * Model profile selection prompts (for registry system)
537
- * Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.10
538
- */
539
- const modelProfilePrompts = [
540
- {
541
- type: 'list',
542
- name: 'modelProfile',
543
- message: 'Select a model configuration profile:',
544
- choices: (answers) => {
545
- // Choices will be populated by PromptRunner with registry data
546
- return answers._modelProfileChoices || [];
547
- },
548
- when: (answers) => {
549
- // Only show if we have profile choices available
550
- return answers._modelProfileChoices && answers._modelProfileChoices.length > 0;
551
- }
552
- }
553
- ];
554
-
555
- /**
556
- * List of example model IDs that don't require HF_TOKEN prompts
557
- * These are public models that don't need authentication
558
- */
559
- // eslint-disable-next-line no-unused-vars -- reference list for future use
560
- const EXAMPLE_MODEL_IDS = [
561
- 'meta-llama/Llama-3.1-8B-Instruct',
562
- 'meta-llama/Llama-3.2-3B-Instruct',
563
- 'Qwen/Qwen3-8B',
564
- 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B',
565
- 'openai/gpt-oss-20b'
566
- ];
567
-
568
- const hfTokenPrompts = [
569
- {
570
- type: 'input',
571
- name: 'hfToken',
572
- message: 'HuggingFace token (enter token, "$HF_TOKEN" for env var, or leave empty):',
573
- when: (answers) => {
574
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
575
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
576
-
577
- // Prompt for transformers architecture
578
- const isTransformers = architecture === 'transformers';
579
-
580
- // Prompt for diffusors architecture (uses HuggingFace Hub)
581
- const isDiffusors = architecture === 'diffusors';
582
-
583
- // Prompt for Triton LLM backends (vllm, tensorrtllm)
584
- // Requirements: 9.1, 9.2
585
- const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
586
-
587
- if (!isTransformers && !isDiffusors && !isTritonLlm) {
588
- return false;
589
- }
590
-
591
- // Skip HF token prompt for non-HuggingFace model sources
592
- // (S3, Registry models don't need HF auth)
593
- const modelSource = answers.modelSource;
594
- if (modelSource && modelSource !== 'huggingface') {
595
- return false;
596
- }
597
-
598
- // Display security warning before prompting
599
- console.log('\n🔐 HuggingFace Authentication');
600
- console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
601
- console.log('⚠️ Security Note: The token will be baked into the Docker image.');
602
- console.log(' Anyone with access to the image can extract the token using \'docker inspect\'.');
603
- console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.');
604
- console.log(' This keeps the token out of the image and allows rotation without rebuilding.\n');
605
-
606
- return true;
607
- },
608
- validate: (input) => {
609
- // Empty is valid (not all models require auth)
610
- if (!input || input.trim() === '') {
611
- return true;
612
- }
613
-
614
- // $HF_TOKEN reference is valid
615
- if (input.trim() === '$HF_TOKEN') {
616
- return true;
617
- }
618
-
619
- // Direct token should start with hf_ (warning only, not blocking)
620
- if (!input.startsWith('hf_')) {
621
- console.warn('\n⚠️ Warning: HuggingFace tokens typically start with "hf_"');
622
- console.warn(' If this is intentional, you can ignore this warning.');
623
- }
624
-
625
- return true; // Always return true (non-blocking validation)
626
- }
627
- }
628
- ];
629
-
630
- const ngcApiKeyPrompts = [
631
- {
632
- type: 'input',
633
- name: 'ngcApiKey',
634
- message: 'NVIDIA NGC API key (enter key, "$NGC_API_KEY" for env var, or leave empty):',
635
- when: (answers) => {
636
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
637
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
638
-
639
- // Never prompt for NGC key for Triton configs (public images)
640
- // Requirements: 9.2
641
- if (architecture === 'triton') {
642
- return false;
643
- }
644
-
645
- // Never prompt for NGC key for diffusors configs (public Docker Hub images)
646
- if (architecture === 'diffusors') {
647
- return false;
648
- }
649
-
650
- // Only prompt for transformers-tensorrt-llm
651
- if (architecture === 'transformers' && backend === 'tensorrt-llm') {
652
- console.log('\n🔐 NVIDIA NGC Authentication');
653
- console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
654
- console.log(' 1. Create account at: https://ngc.nvidia.com/');
655
- console.log(' 2. Generate API key in account settings');
656
- console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
657
- return true;
658
- }
659
-
660
- return false;
661
- },
662
- validate: (input) => {
663
- if (!input || input.trim() === '') {
664
- return true;
665
- }
666
-
667
- if (input.trim() === '$NGC_API_KEY') {
668
- return true;
669
- }
670
-
671
- return true;
672
- }
673
- }
674
- ];
675
-
676
- const modulePrompts = [
677
- {
678
- type: 'confirm',
679
- name: 'includeSampleModel',
680
- message: 'Include sample Abalone classifier?',
681
- default: true,
682
- when: (answers) => {
683
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
684
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
685
-
686
- // Never for transformers
687
- if (architecture === 'transformers') {
688
- return false;
689
- }
690
-
691
- // Never for diffusors (diffusion models cannot be trained inline)
692
- if (architecture === 'diffusors') {
693
- return false;
694
- }
695
-
696
- // For Triton, check if backend supports sample model
697
- if (architecture === 'triton') {
698
- // Triton LLM backends don't support sample model
699
- if (backend === 'vllm' || backend === 'tensorrtllm' || backend === 'pytorch') {
700
- return false;
701
- }
702
- // Other Triton backends support sample model
703
- return true;
704
- }
705
-
706
- // For http architecture, always show
707
- return true;
708
- }
709
- },
710
- {
711
- type: 'checkbox',
712
- name: 'testTypes',
713
- message: 'Test type?',
714
- choices: (answers) => {
715
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
716
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
717
-
718
- // Transformers and Triton LLM backends only support hosted endpoint tests
719
- if (architecture === 'transformers') {
720
- return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
721
- }
722
- if (architecture === 'diffusors') {
723
- return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
724
- }
725
- if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
726
- return ['hosted-model-endpoint'];
727
- }
728
-
729
- return ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'];
730
- },
731
- default: (answers) => {
732
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
733
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
734
-
735
- if (architecture === 'transformers') {
736
- return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
737
- }
738
- if (architecture === 'diffusors') {
739
- return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
740
- }
741
- if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
742
- return ['hosted-model-endpoint'];
743
- }
744
-
745
- return ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'];
746
- }
747
- }
748
- ];
749
-
750
- /**
751
- * Infrastructure prompts split into sub-phases so the prompt runner can
752
- * interleave MCP queries between them (e.g. query instance-recommender
753
- * only after we know the deployment target is realtime-inference).
754
- *
755
- * Ordering: Region → Deployment Target → Instance/HyperPod → Build Target → Role
756
- */
757
-
758
- // Sub-phase A: Region + Deployment Target (always asked first)
759
- const infraRegionAndTargetPrompts = [
760
- {
761
- type: 'list',
762
- name: 'awsRegion',
763
- message: 'Target AWS region?',
764
- choices: (answers) => {
765
- // If a bootstrap profile set a region, include it in choices
766
- const bootstrapRegion = answers._bootstrapRegion;
767
- const choices = ['us-east-1'];
768
- if (bootstrapRegion && bootstrapRegion !== 'us-east-1') {
769
- choices.unshift({ name: `${bootstrapRegion} (from bootstrap profile)`, value: bootstrapRegion });
770
- }
771
- choices.push({ name: 'Custom...', value: 'custom' });
772
- return choices;
773
- },
774
- default: (answers) => answers._bootstrapRegion || 'us-east-1'
775
- },
776
- {
777
- type: 'input',
778
- name: 'customAwsRegion',
779
- message: 'Enter AWS region (e.g., us-west-2, eu-west-1):',
780
- when: answers => answers.awsRegion === 'custom'
781
- },
782
- {
783
- type: 'list',
784
- name: 'deploymentTarget',
785
- message: 'Deployment target?',
786
- choices: [
787
- { name: 'SageMaker Real-Time Inference', value: 'realtime-inference' },
788
- { name: 'SageMaker Async Inference', value: 'async-inference' },
789
- { name: 'SageMaker Batch Transform', value: 'batch-transform' },
790
- { name: 'SageMaker HyperPod - EKS', value: 'hyperpod-eks' }
791
- ],
792
- default: 'realtime-inference'
793
- }
794
- ];
795
-
796
- // Sub-phase A2: Existing endpoint prompt (only when deploymentTarget === 'realtime-inference')
797
- const infraExistingEndpointPrompts = [
798
- {
799
- type: 'list',
800
- name: 'useExistingEndpoint',
801
- message: 'Deploy to an existing endpoint? (attach IC to running endpoint)',
802
- choices: [
803
- { name: 'No — create a new endpoint', value: 'no' },
804
- { name: 'Yes — attach to an existing endpoint', value: 'yes' }
805
- ],
806
- default: 'no',
807
- when: answers => answers.deploymentTarget === 'realtime-inference'
808
- },
809
- {
810
- type: 'list',
811
- name: 'existingEndpointName',
812
- message: 'Select endpoint:',
813
- choices: (answers) => {
814
- const mcpChoices = answers._mcpEndpointChoices || [];
815
- if (mcpChoices.length > 0) {
816
- return [...mcpChoices, { name: 'Custom (enter manually)', value: 'custom' }];
817
- }
818
- return [{ name: 'Enter endpoint name manually', value: 'custom' }];
819
- },
820
- when: answers => answers.useExistingEndpoint === 'yes'
821
- },
822
- {
823
- type: 'input',
824
- name: 'customExistingEndpointName',
825
- message: 'Enter existing endpoint name:',
826
- validate: (input) => {
827
- if (!input || input.trim() === '') {
828
- return 'Endpoint name is required';
829
- }
830
- return true;
831
- },
832
- when: answers => answers.useExistingEndpoint === 'yes' && answers.existingEndpointName === 'custom'
833
- }
834
- ];
835
-
836
- // Sub-phase B: Instance type (only when deploymentTarget === 'realtime-inference')
837
- const infraInstancePrompts = [
838
- // Multi-select prompt: shown when MCP sizer has choices AND deployment target is realtime-inference
839
- // User can select 1-5 instances; selection count determines single-type vs instance-pools behavior
840
- // Requirements: 6.4
841
- {
842
- type: 'checkbox',
843
- name: 'instanceTypeSelections',
844
- when: answers => answers.deploymentTarget === 'realtime-inference' &&
845
- answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 1,
846
- message: 'Select instance type(s) — select multiple for instance pools (priority = selection order, max 5):',
847
- choices: (answers) => {
848
- const mcpChoices = answers._mcpInstanceChoices || [];
849
- // Show all compatible instances — CUDA generation filtering happens
850
- // after selection to allow users to see all options and make informed choices.
851
- // If they select instances from different generations, the post-selection
852
- // filter (filterByCudaGeneration in prompt-runner.js) will warn and remove incompatible ones.
853
- const choices = mcpChoices.map(instanceType => {
854
- const entry = instanceCatalogRaw[instanceType];
855
- const gpuInfo = entry ? `${entry.gpus} GPU${entry.gpus > 1 ? 's' : ''}, ${entry.gpuMemoryGb || '?'}GB` : '';
856
- return {
857
- name: gpuInfo ? `${instanceType} (${gpuInfo})` : instanceType,
858
- value: instanceType,
859
- short: instanceType
860
- };
861
- });
862
- // Always include a "Custom Input" option at the end
863
- choices.push({
864
- name: 'Custom Input (enter one or comma-separated list)',
865
- value: '__custom_input__',
866
- short: 'Custom'
867
- });
868
- return choices;
869
- },
870
- validate: (input) => {
871
- if (!input || input.length === 0) {
872
- return 'Select at least one instance type';
873
- }
874
- if (input.length > 5) {
875
- return 'Maximum 5 instance types allowed (API limit). Please deselect some.';
876
- }
877
- return true;
878
- }
879
- },
880
- // Custom input prompt for multi-select: shown when user selects "Custom Input" in instanceTypeSelections
881
- {
882
- type: 'input',
883
- name: 'customInstanceTypeSelections',
884
- message: 'Enter instance type(s) — single for homogeneous, comma-separated for heterogeneous (e.g., ml.g5.xlarge or ml.g5.xlarge,ml.g5.2xlarge):',
885
- when: answers => Array.isArray(answers.instanceTypeSelections) &&
886
- answers.instanceTypeSelections.includes('__custom_input__'),
887
- validate: (input) => {
888
- if (!input || input.trim() === '') {
889
- return 'At least one instance type is required';
890
- }
891
- const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
892
- const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
893
- if (instances.length === 0) {
894
- return 'At least one instance type is required';
895
- }
896
- if (instances.length > 5) {
897
- return 'Maximum 5 instance types allowed (API limit).';
898
- }
899
- for (const inst of instances) {
900
- if (!instancePattern.test(inst)) {
901
- return `Invalid instance type format: "${inst}". Expected format: ml.{family}.{size} (e.g., ml.g5.xlarge)`;
902
- }
903
- }
904
- return true;
905
- }
906
- },
907
- // Single-select prompt: shown when no MCP choices, or for non-realtime targets, or only 1 MCP choice
908
- {
909
- type: 'list',
910
- name: 'instanceType',
911
- when: answers => {
912
- // Skip if multi-select was shown (realtime with multiple MCP choices)
913
- if (answers.deploymentTarget === 'realtime-inference' &&
914
- answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 1) {
915
- return false;
916
- }
917
- return answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks';
918
- },
919
- message: (answers) => {
920
- const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
921
-
922
- // Skip table when MCP sizer already displayed annotated results
923
- if (answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 0) {
924
- return 'Select instance type:';
925
- }
926
-
927
- const table = new Table({
928
- head: [
929
- chalk.cyan('Instance Type'),
930
- chalk.cyan('vCPUs'),
931
- chalk.cyan('Memory'),
932
- chalk.cyan('Accelerator'),
933
- chalk.cyan('Use Case')
934
- ],
935
- colWidths: [20, 8, 12, 20, 25]
936
- });
937
-
938
- const instances = Object.values(instanceTypeRegistry);
939
- let filteredInstances = framework === 'transformers'
940
- ? instances.filter(i => i.category === 'gpu')
941
- : instances;
942
-
943
- const mcpChoices = answers._mcpInstanceChoices;
944
- if (mcpChoices && mcpChoices.length > 0) {
945
- const mcpSet = new Set(mcpChoices);
946
- filteredInstances = filteredInstances.filter(i => mcpSet.has(i.type));
947
- }
948
-
949
- filteredInstances.forEach(instance => {
950
- table.push([
951
- instance.type,
952
- instance.vcpus.toString(),
953
- instance.memory,
954
- instance.accelerator,
955
- instance.useCase
956
- ]);
957
- });
958
-
959
- table.push([
960
- chalk.yellow('Custom...'),
961
- '-',
962
- '-',
963
- '-',
964
- 'Specify your own'
965
- ]);
966
-
967
- const header = mcpChoices && mcpChoices.length > 0
968
- ? 'Available Instance Types (filtered by MCP):'
969
- : 'Available Instance Types:';
970
- console.log(`\n${ chalk.bold(header)}`);
971
- console.log(table.toString());
972
- console.log('');
973
-
974
- return 'Select instance type:';
975
- },
976
- choices: (answers) => {
977
- const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
978
-
979
- const instances = Object.values(instanceTypeRegistry);
980
- let filteredInstances = framework === 'transformers'
981
- ? instances.filter(i => i.category === 'gpu')
982
- : instances;
983
-
984
- const mcpChoices = answers._mcpInstanceChoices;
985
- if (mcpChoices && mcpChoices.length > 0) {
986
- const mcpSet = new Set(mcpChoices);
987
- filteredInstances = filteredInstances.filter(i => mcpSet.has(i.type));
988
- }
989
-
990
- const choices = filteredInstances.map(instance => ({
991
- name: instance.type,
992
- value: instance.type
993
- }));
994
-
995
- choices.push({
996
- name: 'Custom...',
997
- value: 'custom'
998
- });
999
-
1000
- return choices;
1001
- },
1002
- default: (answers) => {
1003
- const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
1004
- const modelServer = answers.modelServer || answers.deploymentConfig?.split('-')[1];
1005
-
1006
- if (framework === 'transformers') {
1007
- if (modelServer === 'tensorrt-llm') {
1008
- return 'ml.g5.12xlarge';
1009
- }
1010
- return 'ml.g5.2xlarge';
1011
- }
1012
- return 'ml.m5.xlarge';
1013
- }
1014
- },
1015
- {
1016
- type: 'input',
1017
- name: 'customInstanceType',
1018
- message: 'Enter AWS SageMaker instance type (e.g., ml.t3.medium, ml.g4dn.xlarge):',
1019
- validate: (input) => {
1020
- if (!input || input.trim() === '') {
1021
- return 'Instance type is required';
1022
- }
1023
- const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
1024
- if (!instancePattern.test(input.trim())) {
1025
- return 'Invalid instance type format. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)';
1026
- }
1027
- return true;
1028
- },
1029
- when: answers => answers.instanceType === 'custom'
1030
- }
1031
- ];
1032
-
1033
- // Sub-phase C: HyperPod EKS-specific prompts (only when deploymentTarget === 'hyperpod-eks')
1034
- const infraHyperPodPrompts = [
1035
- {
1036
- type: 'list',
1037
- name: 'hyperPodCluster',
1038
- message: 'Select HyperPod EKS cluster:',
1039
- choices: (answers) => {
1040
- const mcpChoices = answers._mcpHyperPodChoices || [];
1041
- if (mcpChoices.length > 0) {
1042
- return [...mcpChoices, { name: 'Custom (enter manually)', value: 'custom' }];
1043
- }
1044
- // No MCP results — offer manual entry as the only option
1045
- return [{ name: 'Enter cluster name manually', value: 'custom' }];
1046
- },
1047
- when: answers => answers.deploymentTarget === 'hyperpod-eks'
1048
- },
1049
- {
1050
- type: 'input',
1051
- name: 'customHyperPodCluster',
1052
- message: 'Enter HyperPod EKS cluster name:',
1053
- validate: (input) => {
1054
- if (!input || input.trim() === '') {
1055
- return 'Cluster name is required';
1056
- }
1057
- return true;
1058
- },
1059
- when: answers => answers.deploymentTarget === 'hyperpod-eks' && answers.hyperPodCluster === 'custom'
1060
- },
1061
- {
1062
- type: 'input',
1063
- name: 'hyperPodNamespace',
1064
- message: 'Kubernetes namespace?',
1065
- default: 'default',
1066
- when: answers => answers.deploymentTarget === 'hyperpod-eks'
1067
- },
1068
- {
1069
- type: 'number',
1070
- name: 'hyperPodReplicas',
1071
- message: 'Number of pod replicas?',
1072
- default: 1,
1073
- when: answers => answers.deploymentTarget === 'hyperpod-eks'
1074
- },
1075
- {
1076
- type: 'input',
1077
- name: 'fsxVolumeHandle',
1078
- message: 'FSx for Lustre volume handle (optional, press Enter to skip):',
1079
- when: answers => answers.deploymentTarget === 'hyperpod-eks'
1080
- }
1081
- ];
1082
-
1083
- // Sub-phase D: Build target + role ARN (always asked last)
1084
- const infraBuildPrompts = [
1085
- {
1086
- type: 'list',
1087
- name: 'buildTarget',
1088
- message: 'Build target?',
1089
- choices: [
1090
- { name: 'CodeBuild (recommended)', value: 'codebuild' }
1091
- ],
1092
- default: 'codebuild'
1093
- },
1094
- {
1095
- type: 'list',
1096
- name: 'codebuildComputeType',
1097
- message: 'CodeBuild compute type?',
1098
- choices: [
1099
- 'BUILD_GENERAL1_SMALL',
1100
- 'BUILD_GENERAL1_MEDIUM',
1101
- 'BUILD_GENERAL1_LARGE'
1102
- ],
1103
- default: 'BUILD_GENERAL1_MEDIUM',
1104
- when: answers => answers.buildTarget === 'codebuild'
1105
- },
1106
- {
1107
- type: 'input',
1108
- name: 'awsRoleArn',
1109
- message: 'AWS IAM Role ARN for SageMaker execution (optional)?',
1110
- validate: (input) => {
1111
- if (!input || input.trim() === '') {
1112
- return true;
1113
- }
1114
- const arnPattern = /^arn:aws:iam::\d{12}:role\/[\w+=,.@-]+$/;
1115
- if (!arnPattern.test(input)) {
1116
- return 'Invalid ARN format. Expected: arn:aws:iam::123456789012:role/RoleName';
1117
- }
1118
- return true;
1119
- }
1120
- }
1121
- ];
1122
-
1123
- /**
1124
- * Sub-phase: Async-specific prompts (only when deploymentTarget === 'async-inference')
1125
- * Requirements: 2.1, 2.2, 2.3, 2.4
1126
- */
1127
- const infraAsyncPrompts = [
1128
- {
1129
- type: 'input',
1130
- name: 'asyncS3OutputPath',
1131
- message: 'S3 output path for async results (leave empty for default: s3://ml-container-creator-async-{region}-{account-id}/{project-name}/output/):',
1132
- when: answers => answers.deploymentTarget === 'async-inference'
1133
- },
1134
- {
1135
- type: 'input',
1136
- name: 'asyncSnsSuccessTopic',
1137
- message: 'SNS success topic ARN (leave empty for auto-created per-project topic):',
1138
- when: answers => answers.deploymentTarget === 'async-inference'
1139
- },
1140
- {
1141
- type: 'input',
1142
- name: 'asyncSnsErrorTopic',
1143
- message: 'SNS error topic ARN (leave empty for auto-created per-project topic):',
1144
- when: answers => answers.deploymentTarget === 'async-inference'
1145
- },
1146
- {
1147
- type: 'number',
1148
- name: 'asyncMaxConcurrentInvocations',
1149
- message: 'Max concurrent invocations per instance?',
1150
- default: 1,
1151
- when: answers => answers.deploymentTarget === 'async-inference'
1152
- }
1153
- ];
1154
-
1155
- /**
1156
- * Sub-phase: Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
1157
- * Requirements: 2.1, 2.2, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9
1158
- */
1159
- const infraBatchTransformPrompts = [
1160
- {
1161
- type: 'input',
1162
- name: 'batchInputPath',
1163
- message: 'S3 input path for batch transform data (leave empty for default: s3://ml-container-creator-batch-{region}-{account-id}/{project-name}/input/):',
1164
- when: answers => answers.deploymentTarget === 'batch-transform'
1165
- },
1166
- {
1167
- type: 'input',
1168
- name: 'batchOutputPath',
1169
- message: 'S3 output path for batch transform results (leave empty for default: s3://ml-container-creator-batch-{region}-{account-id}/{project-name}/output/):',
1170
- when: answers => answers.deploymentTarget === 'batch-transform'
1171
- },
1172
- {
1173
- type: 'number',
1174
- name: 'batchInstanceCount',
1175
- message: 'How many instances should run the batch job in parallel?',
1176
- default: 1,
1177
- when: answers => answers.deploymentTarget === 'batch-transform'
1178
- },
1179
- {
1180
- type: 'list',
1181
- name: 'batchSplitType',
1182
- message: 'Input file format — how should SageMaker read your input files?',
1183
- choices: [
1184
- { name: 'Line — one record per line (JSON lines, CSV)', value: 'Line' },
1185
- { name: 'RecordIO — Amazon RecordIO format', value: 'RecordIO' },
1186
- { name: 'None — send each file as a single request', value: 'None' }
1187
- ],
1188
- default: 'Line',
1189
- when: answers => answers.deploymentTarget === 'batch-transform'
1190
- },
1191
- {
1192
- type: 'list',
1193
- name: 'batchStrategy',
1194
- message: 'How many records should be sent per inference request?',
1195
- choices: [
1196
- { name: 'MultiRecord — batch multiple records per request (higher throughput)', value: 'MultiRecord' },
1197
- { name: 'SingleRecord — one record per request (simpler, more predictable)', value: 'SingleRecord' }
1198
- ],
1199
- default: 'MultiRecord',
1200
- when: answers => answers.deploymentTarget === 'batch-transform'
1201
- },
1202
- {
1203
- type: 'list',
1204
- name: 'batchJoinSource',
1205
- message: 'Include original input data alongside predictions in the output?',
1206
- choices: [
1207
- { name: 'No — output predictions only', value: 'None' },
1208
- { name: 'Yes — merge input with predictions (useful for traceability)', value: 'Input' }
1209
- ],
1210
- default: 'None',
1211
- when: answers => answers.deploymentTarget === 'batch-transform'
1212
- },
1213
- {
1214
- type: 'number',
1215
- name: 'batchMaxConcurrentTransforms',
1216
- message: 'Max concurrent inference requests per instance?',
1217
- default: 1,
1218
- when: answers => answers.deploymentTarget === 'batch-transform'
1219
- },
1220
- {
1221
- type: 'number',
1222
- name: 'batchMaxPayloadInMB',
1223
- message: 'Max request payload size in MB (0-100)?',
1224
- default: 6,
1225
- when: answers => answers.deploymentTarget === 'batch-transform'
1226
- }
1227
- ];
1228
-
1229
- // Combined view for tests and backward compatibility
1230
- const infrastructurePrompts = [
1231
- ...infraRegionAndTargetPrompts,
1232
- ...infraInstancePrompts,
1233
- ...infraHyperPodPrompts,
1234
- ...infraBuildPrompts
1235
- ];
1236
-
1237
- const projectPrompts = [
1238
- {
1239
- type: 'input',
1240
- name: 'projectName',
1241
- message: 'What is the Project Name?',
1242
- default: (answers) => {
1243
- // Derive framework from deploymentConfig if not already set
1244
- const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
1245
- return generateProjectName(framework);
1246
- }
1247
- }
1248
- ];
1249
-
1250
- const destinationPrompts = [
1251
- {
1252
- type: 'input',
1253
- name: 'destinationDir',
1254
- message: 'Where will the output directory be?',
1255
- default: (answers) => {
1256
- const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
1257
- return `./${answers.projectName}-${timestamp}`;
1258
- }
1259
- }
1260
- ];
1261
-
1262
- /**
1263
- * Format ImageEntry[] into Inquirer list choices with tabular display.
1264
- *
1265
- * @param {ImageEntry[]} entries - Image entries from the resolver
1266
- * @param {boolean} isTransformer - Whether to show CUDA column
1267
- * @returns {Array<{name: string, value: string}>} Inquirer choices
1268
- */
1269
- function formatImageChoices(entries, isTransformer) {
1270
- return entries.map(entry => {
1271
- const cuda = entry.labels.cuda_version || '-';
1272
- const python = entry.labels.python_version || '-';
1273
- const date = entry.created.slice(0, 10);
1274
-
1275
- const name = isTransformer
1276
- ? `${entry.repository.padEnd(30)} ${entry.tag.padEnd(16)} ${entry.architecture.padEnd(7)} ${cuda.padEnd(6)} ${python.padEnd(8)} ${date}`
1277
- : `${entry.repository.padEnd(30)} ${entry.tag.padEnd(16)} ${entry.architecture.padEnd(7)} ${python.padEnd(8)} ${date}`;
1278
-
1279
- return { name, value: entry.image, _meta: { labels: entry.labels, accelerator: entry.accelerator } };
1280
- });
1281
- }
1282
-
1283
- /**
1284
- * Base image search prompt (non-transformer only)
1285
- * Requirements: 5.2, 5.4
1286
- */
1287
- const baseImageSearchPrompts = [
1288
- {
1289
- type: 'input',
1290
- name: 'baseImageSearch',
1291
- message: '🔌 Search for a Python base image (e.g. "3.11", "3.10", or leave empty for all):',
1292
- default: '',
1293
- when: (answers) => {
1294
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
1295
- // Skip for transformers (uses model-server images) and triton (uses NGC images)
1296
- return architecture !== 'transformers' && architecture !== 'triton';
1297
- }
1298
- }
1299
- ];
1300
-
1301
- /**
1302
- * Base image selection prompt (all frameworks)
1303
- * Requirements: 5.2, 5.4, 10.1, 10.2, 10.3
1304
- */
1305
- const baseImagePrompts = [
1306
- {
1307
- type: 'list',
1308
- name: 'baseImage',
1309
- message: 'Select base container image:',
1310
- choices: (answers) => {
1311
- const mcpChoices = answers._mcpBaseImageChoices || [];
1312
- return [...mcpChoices, { name: 'Custom (enter your own)', value: 'custom' }];
1313
- },
1314
- when: (answers) => {
1315
- return answers._mcpBaseImageChoices && answers._mcpBaseImageChoices.length > 0;
1316
- }
1317
- },
1318
- {
1319
- type: 'input',
1320
- name: 'customBaseImage',
1321
- message: 'Enter custom base container image (e.g. myrepo/myimage:v1):',
1322
- validate: (input) => {
1323
- if (!input || input.trim() === '') {
1324
- return 'Base image is required';
1325
- }
1326
- const pattern = /^[a-zA-Z0-9][a-zA-Z0-9._\-/]*(:[a-zA-Z0-9._-]+)?$/;
1327
- if (!pattern.test(input.trim())) {
1328
- return 'Invalid image format. Expected: [registry/]repository[:tag]';
1329
- }
1330
- return true;
1331
- },
1332
- when: (answers) => answers.baseImage === 'custom'
1333
- }
1334
- ];
1335
-
1336
- /**
1337
- * LoRA adapter prompts for multi-adapter serving configuration.
1338
- * Only shown when architecture is transformers AND model server is vllm, sglang, or djl-lmi.
1339
- * Requirements: 1.1, 1.2, 1.4
1340
- */
1341
- const loraPrompts = [
1342
- {
1343
- type: 'confirm',
1344
- name: 'enableLora',
1345
- message: 'Enable LoRA adapter serving?',
1346
- default: false,
1347
- when: (answers) => {
1348
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
1349
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
1350
- if (architecture !== 'transformers') return false;
1351
- const loraCapableServers = ['vllm', 'sglang', 'djl-lmi', 'lmi', 'djl'];
1352
- return loraCapableServers.includes(backend);
1353
- }
1354
- },
1355
- {
1356
- type: 'number',
1357
- name: 'maxLoras',
1358
- message: 'Maximum concurrent LoRA adapters in GPU memory:',
1359
- default: 30,
1360
- when: (answers) => answers.enableLora === true
1361
- },
1362
- {
1363
- type: 'number',
1364
- name: 'maxLoraRank',
1365
- message: 'Maximum LoRA rank:',
1366
- default: 64,
1367
- when: (answers) => answers.enableLora === true
1368
- }
1369
- ];
1370
-
1371
- /**
1372
- * Benchmark prompts for SageMaker AI Benchmarking (NVIDIA AIPerf)
1373
- * Sub-prompts shown when 'sagemaker-ai-automated-benchmarking' is selected in testTypes.
1374
- * Requirements: 2.1, 2.2, 2.3, 2.4, 2.5
1375
- */
1376
- const benchmarkPrompts = [
1377
- {
1378
- type: 'number',
1379
- name: 'benchmarkConcurrency',
1380
- message: 'Concurrent requests for benchmark:',
1381
- default: 10,
1382
- when: (answers) => answers.includeBenchmark === true
1383
- },
1384
- {
1385
- type: 'number',
1386
- name: 'benchmarkInputTokensMean',
1387
- message: 'Mean input tokens per request:',
1388
- default: 550,
1389
- when: (answers) => answers.includeBenchmark === true
1390
- },
1391
- {
1392
- type: 'number',
1393
- name: 'benchmarkOutputTokensMean',
1394
- message: 'Mean output tokens per request:',
1395
- default: 150,
1396
- when: (answers) => answers.includeBenchmark === true
1397
- },
1398
- {
1399
- type: 'confirm',
1400
- name: 'benchmarkStreaming',
1401
- message: 'Enable streaming for benchmark?',
1402
- default: true,
1403
- when: (answers) => answers.includeBenchmark === true
1404
- },
1405
- {
1406
- type: 'input',
1407
- name: 'benchmarkRequestCount',
1408
- message: 'Total request count (leave empty for service default):',
1409
- default: '',
1410
- when: (answers) => answers.includeBenchmark === true
1411
- },
1412
- {
1413
- type: 'input',
1414
- name: 'benchmarkS3OutputPath',
1415
- message: 'Benchmark results S3 path (leave empty for auto-created bucket):',
1416
- default: '',
1417
- when: (answers) => answers.includeBenchmark === true
1418
- }
1419
- ];
1420
-
1421
- export {
1422
- deploymentConfigPrompts,
1423
- frameworkPrompts, // Deprecated: kept for backward compatibility
1424
- enginePrompts,
1425
- frameworkVersionPrompts,
1426
- frameworkProfilePrompts,
1427
- modelFormatPrompts,
1428
- modelServerPrompts, // Deprecated: now empty, modelServer derived from deploymentConfig
1429
- modelLoadStrategyPrompts,
1430
- modelProfilePrompts,
1431
- hfTokenPrompts,
1432
- ngcApiKeyPrompts,
1433
- modulePrompts,
1434
- loraPrompts,
1435
- benchmarkPrompts,
1436
- infrastructurePrompts,
1437
- infraRegionAndTargetPrompts,
1438
- infraExistingEndpointPrompts,
1439
- infraInstancePrompts,
1440
- infraAsyncPrompts,
1441
- infraBatchTransformPrompts,
1442
- infraHyperPodPrompts,
1443
- infraBuildPrompts,
1444
- projectPrompts,
1445
- destinationPrompts,
1446
- baseImageSearchPrompts,
1447
- baseImagePrompts,
1448
- formatImageChoices,
1449
- filterByCudaGeneration,
1450
- getInstanceCudaGeneration,
1451
- instanceCatalogRaw
1452
- };
8
+ export * from './prompts/index.js';