@aws/ml-container-creator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/cli.js +31 -137
  2. package/config/parameter-schema-v2.json +2065 -0
  3. package/package.json +6 -3
  4. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  5. package/servers/lib/catalogs/models.json +182 -26
  6. package/src/app.js +6 -389
  7. package/src/lib/bootstrap-command-handler.js +75 -1078
  8. package/src/lib/bootstrap-profile-manager.js +634 -0
  9. package/src/lib/bootstrap-provisioners.js +421 -0
  10. package/src/lib/config-loader.js +405 -0
  11. package/src/lib/config-manager.js +59 -1668
  12. package/src/lib/config-mcp-client.js +118 -0
  13. package/src/lib/config-validator.js +634 -0
  14. package/src/lib/cuda-resolver.js +140 -0
  15. package/src/lib/e2e-catalog-validator.js +251 -3
  16. package/src/lib/e2e-ci-recorder.js +103 -0
  17. package/src/lib/generated/cli-options.js +471 -0
  18. package/src/lib/generated/parameter-matrix.js +671 -0
  19. package/src/lib/generated/validation-rules.js +202 -0
  20. package/src/lib/marketplace-flow.js +276 -0
  21. package/src/lib/mcp-query-runner.js +768 -0
  22. package/src/lib/parameter-schema-validator.js +62 -18
  23. package/src/lib/prompt-runner.js +41 -1504
  24. package/src/lib/prompts/feature-prompts.js +172 -0
  25. package/src/lib/prompts/index.js +48 -0
  26. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  27. package/src/lib/prompts/model-prompts.js +552 -0
  28. package/src/lib/prompts/project-prompts.js +70 -0
  29. package/src/lib/prompts.js +2 -1446
  30. package/src/lib/registry-command-handler.js +135 -3
  31. package/src/lib/secrets-prompt-runner.js +251 -0
  32. package/src/lib/template-variable-resolver.js +398 -0
  33. package/templates/code/serve +5 -134
  34. package/templates/code/serve.d/lmi.ejs +19 -0
  35. package/templates/code/serve.d/sglang.ejs +47 -0
  36. package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
  37. package/templates/code/serve.d/vllm.ejs +48 -0
  38. package/templates/do/clean +1 -1387
  39. package/templates/do/clean.d/async-inference.ejs +508 -0
  40. package/templates/do/clean.d/batch-transform.ejs +512 -0
  41. package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
  42. package/templates/do/clean.d/managed-inference.ejs +1043 -0
  43. package/templates/do/deploy +1 -1766
  44. package/templates/do/deploy.d/async-inference.ejs +501 -0
  45. package/templates/do/deploy.d/batch-transform.ejs +529 -0
  46. package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
  47. package/templates/do/deploy.d/managed-inference.ejs +726 -0
  48. package/config/parameter-schema.json +0 -88
package/src/app.js CHANGED
@@ -16,7 +16,7 @@ import CommentGenerator from './lib/comment-generator.js';
16
16
  import ConfigurationManager from './lib/configuration-manager.js';
17
17
  import RegistryLoader from './lib/registry-loader.js';
18
18
  import { resolvePrefixedEnvVars } from './lib/engine-prefix-resolver.js';
19
- import { isTuneSupported } from './lib/tune-catalog-validator.js';
19
+ import { _ensureTemplateVariables, _validateEnvironmentVariables, _mergeEnvVarsWithPrecedence } from './lib/template-variable-resolver.js';
20
20
  import ejs from 'ejs';
21
21
 
22
22
  const __filename = fileURLToPath(import.meta.url);
@@ -296,6 +296,11 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
296
296
  // Build ignore patterns
297
297
  const ignorePatterns = [];
298
298
 
299
+ // EJS partials — included by templates at render time, not copied to output
300
+ ignorePatterns.push('**/serve.d/**');
301
+ ignorePatterns.push('**/deploy.d/**');
302
+ ignorePatterns.push('**/clean.d/**');
303
+
299
304
  if (answers.deploymentTarget !== 'hyperpod-eks') {
300
305
  ignorePatterns.push('**/hyperpod/**');
301
306
  }
@@ -629,218 +634,6 @@ function _createGeneratorAdapter(projectName, options) {
629
634
  return adapter;
630
635
  }
631
636
 
632
- /**
633
- * Ensures all template variables have proper defaults to prevent
634
- * "undefined" errors in EJS templates. Also enriches answers with
635
- * registry data (env var merging, HuggingFace data, Triton base image).
636
- *
637
- * @param {object} answers - Answers object to fill defaults into
638
- * @param {object|null} registryConfigManager - Registry configuration manager (or null)
639
- */
640
- async function _ensureTemplateVariables(answers, registryConfigManager = null) {
641
- const defaults = {
642
- chatTemplate: null,
643
- chatTemplateSource: null,
644
- hfToken: null,
645
- hfTokenArn: null,
646
- ngcApiKey: null,
647
- ngcTokenArn: null,
648
- envVars: {},
649
- inferenceAmiVersion: null,
650
- accelerator: null,
651
- frameworkVersion: null,
652
- validationLevel: 'unknown',
653
- configSources: [],
654
- recommendedInstanceTypes: [],
655
- roleArn: null,
656
- deploymentConfig: '',
657
- architecture: null,
658
- backend: null,
659
- engine: null,
660
- codebuildComputeType: null,
661
- codebuildProjectName: null,
662
- modelName: null,
663
- modelFormat: null,
664
- includeSampleModel: true,
665
- includeTesting: true,
666
- testTypes: [],
667
- buildTimestamp: new Date().toISOString(),
668
- buildTarget: 'codebuild',
669
- deploymentTarget: 'realtime-inference',
670
- hyperPodCluster: null,
671
- hyperPodNamespace: 'default',
672
- hyperPodReplicas: 1,
673
- fsxVolumeHandle: null,
674
- baseImage: null,
675
- modelSource: 'huggingface',
676
- artifactUri: '',
677
- modelLoadStrategy: 'runtime',
678
- existingEndpointName: null,
679
- enableLora: false,
680
- maxLoras: 30,
681
- maxLoraRank: 64
682
- };
683
-
684
- Object.entries(defaults).forEach(([key, value]) => {
685
- if (answers[key] === undefined) {
686
- answers[key] = value;
687
- }
688
- });
689
-
690
- // Backward compatibility: populate framework and modelServer from architecture/backend
691
- if (!answers.framework && answers.architecture) {
692
- answers.framework = answers.architecture;
693
- }
694
- if (!answers.modelServer && answers.backend) {
695
- answers.modelServer = answers.backend;
696
- }
697
-
698
- // Always include testing with all available test types
699
- answers.includeTesting = true;
700
- if (!answers.testTypes || answers.testTypes.length === 0) {
701
- if (answers.architecture === 'transformers' || answers.framework === 'transformers') {
702
- answers.testTypes = ['hosted-model-endpoint'];
703
- } else {
704
- answers.testTypes = ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'];
705
- }
706
- }
707
-
708
- // Merge catalog env vars into answers.envVars with correct precedence
709
- await _mergeEnvVarsWithPrecedence(answers, registryConfigManager);
710
-
711
- // For Triton architecture, set default base image fallback
712
- if (answers.architecture === 'triton' && !answers.baseImage) {
713
- // Try to look up base image from framework registry using deployment-config key
714
- const tritonRegistryKey = answers.deploymentConfig;
715
- if (tritonRegistryKey && registryConfigManager?.frameworkRegistry) {
716
- const tritonFrameworkConfig = registryConfigManager.frameworkRegistry[tritonRegistryKey];
717
- if (tritonFrameworkConfig) {
718
- const versions = Object.keys(tritonFrameworkConfig).sort((a, b) =>
719
- b.localeCompare(a, undefined, { numeric: true })
720
- );
721
- if (versions.length > 0) {
722
- const latestConfig = tritonFrameworkConfig[versions[0]];
723
- if (latestConfig.baseImage) {
724
- answers.baseImage = latestConfig.baseImage;
725
- }
726
- if (latestConfig.inferenceAmiVersion && !answers.inferenceAmiVersion) {
727
- answers.inferenceAmiVersion = latestConfig.inferenceAmiVersion;
728
- }
729
- if (latestConfig.accelerator) {
730
- answers.accelerator = latestConfig.accelerator;
731
- }
732
- }
733
- }
734
- }
735
- // Final fallback: hardcoded default Triton base image
736
- if (!answers.baseImage) {
737
- answers.baseImage = 'nvcr.io/nvidia/tritonserver:24.08-py3';
738
- }
739
- }
740
-
741
- // For transformer models, enrich with HuggingFace data and non-envVar metadata
742
- if (answers.framework === 'transformers' && answers.modelName && registryConfigManager) {
743
- try {
744
- // Fetch HuggingFace data for model-specific info
745
- const hfData = await registryConfigManager._fetchHuggingFaceData(answers.modelName);
746
-
747
- // Merge chatTemplate if available and not already set
748
- if (hfData && hfData.chatTemplate && !answers.chatTemplate) {
749
- answers.chatTemplate = hfData.chatTemplate;
750
- answers.chatTemplateSource = 'HuggingFace_Hub_API';
751
- }
752
-
753
- // Check Model Registry for chatTemplate overrides
754
- if (registryConfigManager.modelRegistry) {
755
- const modelConfig = _findModelConfig(answers.modelName, registryConfigManager);
756
-
757
- if (modelConfig && modelConfig.chatTemplate) {
758
- answers.chatTemplate = modelConfig.chatTemplate;
759
- answers.chatTemplateSource = 'Model_Registry';
760
- }
761
- }
762
-
763
- // Set framework-level metadata (non-envVar fields)
764
- if (answers.frameworkVersion && registryConfigManager.frameworkRegistry) {
765
- const frameworkConfig = registryConfigManager.frameworkRegistry[answers.framework]?.[answers.frameworkVersion];
766
-
767
- if (frameworkConfig) {
768
- if (frameworkConfig.inferenceAmiVersion && !answers.inferenceAmiVersion) {
769
- answers.inferenceAmiVersion = frameworkConfig.inferenceAmiVersion;
770
- }
771
- if (frameworkConfig.accelerator) {
772
- answers.accelerator = frameworkConfig.accelerator;
773
- }
774
- }
775
- }
776
- } catch (error) {
777
- // Silently continue - defaults are already set
778
- }
779
- }
780
-
781
- // Populate baseImage from the catalog when still falsy (covers --skip-prompts and
782
- // cases where MCP/CLI/config did not provide a base image).
783
- // Precedence: MCP > CLI > config > catalog default (this block).
784
- if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
785
- const backendKey = answers.backend || answers.modelServer;
786
- if (backendKey) {
787
- const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
788
- if (frameworkVersions) {
789
- let resolvedConfig = null;
790
- if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
791
- resolvedConfig = frameworkVersions[answers.frameworkVersion];
792
- } else {
793
- // Fall back to latest version
794
- const versions = Object.keys(frameworkVersions).sort((a, b) =>
795
- b.localeCompare(a, undefined, { numeric: true })
796
- );
797
- if (versions.length > 0) {
798
- resolvedConfig = frameworkVersions[versions[0]];
799
- }
800
- }
801
- if (resolvedConfig?.baseImage) {
802
- answers.baseImage = resolvedConfig.baseImage;
803
- }
804
- }
805
- }
806
- }
807
-
808
- // Populate icGpuCount from instance catalog when not explicitly set.
809
- // The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
810
- // so it must always have a value for GPU deployments.
811
- if (answers.icGpuCount == null && answers.instanceType) {
812
- // Use gpuCount from instance-sizer recommendation if available
813
- if (answers.gpuCount) {
814
- answers.icGpuCount = answers.gpuCount;
815
- } else {
816
- // Look up from instances catalog
817
- try {
818
- const catalogPath = path.resolve(__dirname, '..', 'servers', 'lib', 'catalogs', 'instances.json');
819
- const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
820
- const instanceInfo = catalogData?.catalog?.[answers.instanceType];
821
- if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
822
- answers.icGpuCount = instanceInfo.gpus;
823
- }
824
- } catch {
825
- // Silently continue — template fallback handles missing value
826
- }
827
- }
828
- }
829
-
830
- // Determine tune support based on model presence in the tune catalog.
831
- // Used by the do/config template to write TUNE_SUPPORTED=true|false.
832
- if (answers.tuneSupported === undefined) {
833
- try {
834
- const tuneCatalogPath = path.resolve(__dirname, '..', 'config', 'tune-catalog.json');
835
- const tuneCatalog = JSON.parse(fs.readFileSync(tuneCatalogPath, 'utf-8'));
836
- const modelId = answers.modelName || '';
837
- answers.tuneSupported = isTuneSupported(modelId, tuneCatalog);
838
- } catch {
839
- answers.tuneSupported = false;
840
- }
841
- }
842
- }
843
-
844
637
  /**
845
638
  * Orders environment variables by priority category for template rendering.
846
639
  *
@@ -889,182 +682,6 @@ function _getOrderedEnvVars(envVars) {
889
682
  return sorted.map(([key, value]) => ({ key, value }));
890
683
  }
891
684
 
892
- /**
893
- * Validates environment variables using the registry system.
894
- * Displays errors and warnings to the user.
895
- *
896
- * @param {object} answers - Configuration answers
897
- * @param {object} registryConfigManager - Registry configuration manager
898
- */
899
- async function _validateEnvironmentVariables(answers, registryConfigManager) {
900
- // Get framework configuration
901
- // For Triton configs, look up using deploymentConfig key (e.g. 'triton-fil')
902
- let frameworkConfig;
903
- if (answers.architecture === 'triton' && answers.deploymentConfig) {
904
- const tritonEntry = registryConfigManager.frameworkRegistry?.[answers.deploymentConfig];
905
- if (tritonEntry) {
906
- const versions = Object.keys(tritonEntry);
907
- if (versions.length > 0) {
908
- frameworkConfig = tritonEntry[versions[0]];
909
- }
910
- }
911
- }
912
- if (!frameworkConfig) {
913
- frameworkConfig = registryConfigManager.frameworkRegistry?.[answers.framework]?.[answers.frameworkVersion];
914
- }
915
-
916
- if (!frameworkConfig || !frameworkConfig.envVars) {
917
- return; // No env vars to validate
918
- }
919
-
920
- console.log('\n🔍 Validating environment variables...');
921
-
922
- // Validate environment variables
923
- const validationResult = registryConfigManager.validateEnvironmentVariables(
924
- frameworkConfig.envVars,
925
- frameworkConfig
926
- );
927
-
928
- // Display validation results
929
- if (validationResult.errors && validationResult.errors.length > 0) {
930
- console.log('\n❌ Environment Variable Validation Errors:');
931
- validationResult.errors.forEach(error => {
932
- console.log(` • ${error.key}: ${error.message}`);
933
- });
934
- }
935
-
936
- if (validationResult.warnings && validationResult.warnings.length > 0) {
937
- console.log('\n⚠️ Environment Variable Validation Warnings:');
938
- validationResult.warnings.forEach(warning => {
939
- console.log(` • ${warning.key ? `${warning.key}: ` : ''}${warning.message}`);
940
- });
941
- }
942
-
943
- if (validationResult.strategiesUsed && validationResult.strategiesUsed.length > 0) {
944
- console.log(`\n✅ Validation methods used: ${validationResult.strategiesUsed.join(', ')}`);
945
- }
946
-
947
- if (!validationResult.errors || validationResult.errors.length === 0) {
948
- if (!validationResult.warnings || validationResult.warnings.length === 0) {
949
- console.log(' ✅ All environment variables validated successfully');
950
- }
951
- }
952
-
953
- // In non-interactive mode (skip-prompts), throw on errors
954
- if (validationResult.errors && validationResult.errors.length > 0) {
955
- throw new Error('Environment variable validation failed. Please fix the errors and try again.');
956
- }
957
- }
958
-
959
- /**
960
- * Merges environment variables from all catalog sources with correct precedence.
961
- * Precedence (lowest → highest):
962
- * 1. catalog defaults (Image_Entry defaults.envVars)
963
- * 2. framework profile (Image_Entry profiles[selectedProfile].envVars)
964
- * 3. model entry (model catalog entry envVars)
965
- * 4. model profile (model catalog entry profiles[selectedProfile].envVars)
966
- * 5. CLI overrides (existing answers.envVars from user CLI input)
967
- *
968
- * @param {object} answers - Configuration answers
969
- * @param {object|null} registryConfigManager - Registry configuration manager
970
- */
971
- async function _mergeEnvVarsWithPrecedence(answers, registryConfigManager) {
972
- if (!registryConfigManager) return;
973
-
974
- // Capture CLI-provided env vars before merging (highest precedence)
975
- const cliEnvVars = { ...answers.envVars };
976
-
977
- // Resolve the framework config for the selected framework + version
978
- const frameworkName = answers.framework || answers.deploymentConfig;
979
- const frameworkVersion = answers.frameworkVersion;
980
- let frameworkConfig = null;
981
-
982
- if (frameworkName && registryConfigManager.frameworkRegistry) {
983
- const frameworkVersions = registryConfigManager.frameworkRegistry[frameworkName];
984
- if (frameworkVersions) {
985
- if (frameworkVersion && frameworkVersions[frameworkVersion]) {
986
- frameworkConfig = frameworkVersions[frameworkVersion];
987
- } else {
988
- // Fall back to latest version for Triton and other non-versioned lookups
989
- const versions = Object.keys(frameworkVersions).sort((a, b) =>
990
- b.localeCompare(a, undefined, { numeric: true })
991
- );
992
- if (versions.length > 0) {
993
- frameworkConfig = frameworkVersions[versions[0]];
994
- }
995
- }
996
- }
997
- }
998
-
999
- // Resolve the model config (exact match or pattern match)
1000
- let modelConfig = null;
1001
- if (answers.modelName && registryConfigManager.modelRegistry) {
1002
- modelConfig = _findModelConfig(answers.modelName, registryConfigManager);
1003
- }
1004
-
1005
- // Layer 1: catalog defaults (Image_Entry defaults.envVars)
1006
- const catalogDefaults = frameworkConfig?.envVars || {};
1007
-
1008
- // Layer 2: framework profile envVars
1009
- let frameworkProfileEnvVars = {};
1010
- if (answers.frameworkProfile && frameworkConfig?.profiles) {
1011
- const profile = frameworkConfig.profiles[answers.frameworkProfile];
1012
- if (profile?.envVars) {
1013
- frameworkProfileEnvVars = profile.envVars;
1014
- }
1015
- }
1016
-
1017
- // Layer 3: model entry envVars
1018
- const modelEntryEnvVars = modelConfig?.envVars || {};
1019
-
1020
- // Layer 4: model profile envVars
1021
- let modelProfileEnvVars = {};
1022
- if (answers.modelProfile && modelConfig?.profiles) {
1023
- const profile = modelConfig.profiles[answers.modelProfile];
1024
- if (profile?.envVars) {
1025
- modelProfileEnvVars = profile.envVars;
1026
- }
1027
- }
1028
-
1029
- // Layer 5: CLI overrides (captured above)
1030
-
1031
- // Merge in precedence order: each layer overrides the previous
1032
- answers.envVars = {
1033
- ...catalogDefaults,
1034
- ...frameworkProfileEnvVars,
1035
- ...modelEntryEnvVars,
1036
- ...modelProfileEnvVars,
1037
- ...cliEnvVars
1038
- };
1039
- }
1040
-
1041
- /**
1042
- * Finds model configuration by exact match or glob-pattern match.
1043
- *
1044
- * @param {string} modelName - Model ID to look up
1045
- * @param {object} registryConfigManager - Registry configuration manager
1046
- * @returns {object|null} Model configuration or null
1047
- */
1048
- function _findModelConfig(modelName, registryConfigManager) {
1049
- if (!registryConfigManager?.modelRegistry) return null;
1050
-
1051
- // Exact match first
1052
- const exact = registryConfigManager.modelRegistry[modelName];
1053
- if (exact) return exact;
1054
-
1055
- // Pattern matching with glob-style wildcards
1056
- for (const [pattern, config] of Object.entries(registryConfigManager.modelRegistry)) {
1057
- if (pattern.includes('*')) {
1058
- const regex = new RegExp(`^${pattern.replace(/\*/g, '.*')}$`);
1059
- if (regex.test(modelName)) {
1060
- return config;
1061
- }
1062
- }
1063
- }
1064
-
1065
- return null;
1066
- }
1067
-
1068
685
  /**
1069
686
  * Generates Triton-specific files (Dockerfile, model repository structure).
1070
687
  *