@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +33 -22
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -67
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +166 -153
  23. package/servers/instance-sizer/lib/instance-ranker.js +120 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/instances.json +27 -0
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/custom-validators.js +13 -13
  31. package/servers/lib/dynamic-resolver.js +4 -4
  32. package/servers/marketplace-picker/index.js +342 -0
  33. package/servers/marketplace-picker/manifest.json +14 -0
  34. package/servers/marketplace-picker/package.json +18 -0
  35. package/servers/model-picker/index.js +382 -382
  36. package/servers/region-picker/index.js +56 -56
  37. package/servers/workload-picker/LICENSE +202 -0
  38. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  39. package/servers/workload-picker/index.js +171 -0
  40. package/servers/workload-picker/manifest.json +16 -0
  41. package/servers/workload-picker/package.json +16 -0
  42. package/src/app.js +12 -3
  43. package/src/lib/bootstrap-command-handler.js +609 -15
  44. package/src/lib/bootstrap-config.js +36 -0
  45. package/src/lib/bootstrap-profile-manager.js +48 -41
  46. package/src/lib/ci-register-helpers.js +74 -0
  47. package/src/lib/config-loader.js +3 -0
  48. package/src/lib/config-manager.js +7 -0
  49. package/src/lib/config-validator.js +1 -1
  50. package/src/lib/cuda-resolver.js +17 -8
  51. package/src/lib/generated/cli-options.js +319 -314
  52. package/src/lib/generated/parameter-matrix.js +672 -661
  53. package/src/lib/generated/validation-rules.js +76 -72
  54. package/src/lib/path-prover-brain.js +664 -0
  55. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  56. package/src/lib/prompts/model-prompts.js +6 -0
  57. package/src/lib/prompts/project-prompts.js +12 -0
  58. package/src/lib/secrets-prompt-runner.js +4 -0
  59. package/src/lib/template-manager.js +1 -1
  60. package/src/lib/template-variable-resolver.js +87 -1
  61. package/src/lib/tune-catalog-validator.js +37 -4
  62. package/templates/Dockerfile +9 -0
  63. package/templates/code/adapter_sidecar.py +444 -0
  64. package/templates/code/serve +6 -0
  65. package/templates/code/serve.d/vllm.ejs +1 -1
  66. package/templates/do/.benchmark_writer.py +1476 -0
  67. package/templates/do/.tune_helper.py +982 -57
  68. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  69. package/templates/do/adapter +154 -0
  70. package/templates/do/benchmark +639 -85
  71. package/templates/do/build +5 -0
  72. package/templates/do/clean.d/async-inference.ejs +5 -0
  73. package/templates/do/clean.d/batch-transform.ejs +5 -0
  74. package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
  75. package/templates/do/clean.d/managed-inference.ejs +5 -0
  76. package/templates/do/config +115 -45
  77. package/templates/do/deploy.d/async-inference.ejs +30 -3
  78. package/templates/do/deploy.d/batch-transform.ejs +29 -3
  79. package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
  80. package/templates/do/deploy.d/managed-inference.ejs +216 -14
  81. package/templates/do/lib/endpoint-config.sh +1 -1
  82. package/templates/do/lib/profile.sh +44 -0
  83. package/templates/do/optimize +106 -37
  84. package/templates/do/push +5 -0
  85. package/templates/do/register +94 -0
  86. package/templates/do/stage +567 -0
  87. package/templates/do/submit +7 -0
  88. package/templates/do/test +14 -0
  89. package/templates/do/tune +382 -59
  90. package/templates/do/validate +44 -4
@@ -1,202 +1,206 @@
1
1
  // AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-05-23T12:02:19.548Z
3
+ // Generated: 2026-06-12T22:03:00.468Z
4
4
 
5
5
  /**
6
6
  * Validation rules derived from parameter-schema-v2.json.
7
7
  * Each key maps to a function that returns null (valid) or an error string.
8
8
  */
9
9
  export const validationRules = {
10
- "projectName": (value) => {
11
- if (value.length < 2) return `projectName must be at least 2 characters`;
12
- if (value.length > 63) return `projectName must be at most 63 characters`;
13
- if (!new RegExp("^[a-z0-9][a-z0-9-]*[a-z0-9]$").test(value)) return `projectName does not match required pattern`;
10
+ 'projectName': (value) => {
11
+ if (value.length < 2) return 'projectName must be at least 2 characters';
12
+ if (value.length > 63) return 'projectName must be at most 63 characters';
13
+ if (!new RegExp('^[a-z0-9][a-z0-9-]*[a-z0-9]$').test(value)) return 'projectName does not match required pattern';
14
14
  return null;
15
15
  },
16
- "deploymentConfig": (value) => {
17
- if (!["http-flask","http-fastapi","transformers-vllm","transformers-sglang","transformers-tensorrt-llm","transformers-lmi","transformers-djl","triton-fil","triton-onnxruntime","triton-tensorflow","triton-pytorch","triton-vllm","triton-tensorrtllm","triton-python","diffusors-vllm-omni","marketplace"].includes(value)) return `Invalid value "${value}" for deploymentConfig. Valid: http-flask, http-fastapi, transformers-vllm, transformers-sglang, transformers-tensorrt-llm, transformers-lmi, transformers-djl, triton-fil, triton-onnxruntime, triton-tensorflow, triton-pytorch, triton-vllm, triton-tensorrtllm, triton-python, diffusors-vllm-omni, marketplace`;
16
+ 'deploymentConfig': (value) => {
17
+ if (!['http-flask','http-fastapi','transformers-vllm','transformers-sglang','transformers-tensorrt-llm','transformers-lmi','transformers-djl','triton-fil','triton-onnxruntime','triton-tensorflow','triton-pytorch','triton-vllm','triton-tensorrtllm','triton-python','diffusors-vllm-omni','marketplace'].includes(value)) return `Invalid value "${value}" for deploymentConfig. Valid: http-flask, http-fastapi, transformers-vllm, transformers-sglang, transformers-tensorrt-llm, transformers-lmi, transformers-djl, triton-fil, triton-onnxruntime, triton-tensorflow, triton-pytorch, triton-vllm, triton-tensorrtllm, triton-python, diffusors-vllm-omni, marketplace`;
18
18
  return null;
19
19
  },
20
- "modelName": (value) => {
21
- if (value.length < 1) return `modelName must be at least 1 characters`;
20
+ 'modelName': (value) => {
21
+ if (value.length < 1) return 'modelName must be at least 1 characters';
22
22
  return null;
23
23
  },
24
- "deploymentTarget": (value) => {
25
- if (!["managed-inference","realtime-inference","async-inference","batch-transform","hyperpod-eks"].includes(value)) return `Invalid value "${value}" for deploymentTarget. Valid: managed-inference, realtime-inference, async-inference, batch-transform, hyperpod-eks`;
24
+ 'deploymentTarget': (value) => {
25
+ if (!['managed-inference','realtime-inference','async-inference','batch-transform','hyperpod-eks'].includes(value)) return `Invalid value "${value}" for deploymentTarget. Valid: managed-inference, realtime-inference, async-inference, batch-transform, hyperpod-eks`;
26
26
  return null;
27
27
  },
28
- "instanceType": (value) => {
29
- if (!new RegExp("^ml\\.[a-z0-9]+\\.[a-z0-9]+$").test(value)) return `instanceType does not match required pattern`;
28
+ 'instanceType': (value) => {
29
+ if (!new RegExp('^ml\\.[a-z0-9-]+\\.[a-z0-9]+$').test(value)) return 'instanceType does not match required pattern';
30
30
  return null;
31
31
  },
32
- "icGpuCount": (value) => {
32
+ 'icGpuCount': (value) => {
33
33
  if (value < 0) return `icGpuCount must be >= 0, got ${value}`;
34
34
  if (value > 8) return `icGpuCount must be <= 8, got ${value}`;
35
35
  return null;
36
36
  },
37
- "icCopyCount": (value) => {
37
+ 'icCopyCount': (value) => {
38
38
  if (value < 0) return `icCopyCount must be >= 0, got ${value}`;
39
39
  if (value > 100) return `icCopyCount must be <= 100, got ${value}`;
40
40
  return null;
41
41
  },
42
- "icMemorySize": (value) => {
42
+ 'icMemorySize': (value) => {
43
43
  if (value < 128) return `icMemorySize must be >= 128, got ${value}`;
44
44
  if (value > 3145728) return `icMemorySize must be <= 3145728, got ${value}`;
45
45
  return null;
46
46
  },
47
- "maxLoras": (value) => {
47
+ 'maxLoras': (value) => {
48
48
  if (value < 1) return `maxLoras must be >= 1, got ${value}`;
49
49
  if (value > 256) return `maxLoras must be <= 256, got ${value}`;
50
50
  return null;
51
51
  },
52
- "maxLoraRank": (value) => {
52
+ 'maxLoraRank': (value) => {
53
53
  if (value < 8) return `maxLoraRank must be >= 8, got ${value}`;
54
54
  if (value > 512) return `maxLoraRank must be <= 512, got ${value}`;
55
55
  return null;
56
56
  },
57
- "benchmarkConcurrency": (value) => {
57
+ 'benchmarkConcurrency': (value) => {
58
58
  if (value < 1) return `benchmarkConcurrency must be >= 1, got ${value}`;
59
59
  if (value > 1000) return `benchmarkConcurrency must be <= 1000, got ${value}`;
60
60
  return null;
61
61
  },
62
- "benchmarkInputTokens": (value) => {
62
+ 'benchmarkInputTokens': (value) => {
63
63
  if (value < 1) return `benchmarkInputTokens must be >= 1, got ${value}`;
64
64
  if (value > 128000) return `benchmarkInputTokens must be <= 128000, got ${value}`;
65
65
  return null;
66
66
  },
67
- "benchmarkOutputTokens": (value) => {
67
+ 'benchmarkOutputTokens': (value) => {
68
68
  if (value < 1) return `benchmarkOutputTokens must be >= 1, got ${value}`;
69
69
  if (value > 128000) return `benchmarkOutputTokens must be <= 128000, got ${value}`;
70
70
  return null;
71
71
  },
72
- "benchmarkRequestCount": (value) => {
72
+ 'benchmarkRequestCount': (value) => {
73
73
  if (value < 1) return `benchmarkRequestCount must be >= 1, got ${value}`;
74
74
  return null;
75
75
  },
76
- "benchmarkS3OutputPath": (value) => {
77
- if (!new RegExp("^s3://").test(value)) return `benchmarkS3OutputPath does not match required pattern`;
76
+ 'benchmarkS3OutputPath': (value) => {
77
+ if (!new RegExp('^s3://').test(value)) return 'benchmarkS3OutputPath does not match required pattern';
78
78
  return null;
79
79
  },
80
- "framework": (value) => {
81
- if (!["sklearn","xgboost","tensorflow","transformers"].includes(value)) return `Invalid value "${value}" for framework. Valid: sklearn, xgboost, tensorflow, transformers`;
80
+ 'framework': (value) => {
81
+ if (!['sklearn','xgboost','tensorflow','transformers'].includes(value)) return `Invalid value "${value}" for framework. Valid: sklearn, xgboost, tensorflow, transformers`;
82
82
  return null;
83
83
  },
84
- "modelFormat": (value) => {
85
- if (!["pkl","joblib","json","model","ubj","keras","h5","SavedModel"].includes(value)) return `Invalid value "${value}" for modelFormat. Valid: pkl, joblib, json, model, ubj, keras, h5, SavedModel`;
84
+ 'modelFormat': (value) => {
85
+ if (!['pkl','joblib','json','model','ubj','keras','h5','SavedModel'].includes(value)) return `Invalid value "${value}" for modelFormat. Valid: pkl, joblib, json, model, ubj, keras, h5, SavedModel`;
86
86
  return null;
87
87
  },
88
- "modelServer": (value) => {
89
- if (!["flask","fastapi","vllm","sglang"].includes(value)) return `Invalid value "${value}" for modelServer. Valid: flask, fastapi, vllm, sglang`;
88
+ 'modelServer': (value) => {
89
+ if (!['flask','fastapi','vllm','sglang'].includes(value)) return `Invalid value "${value}" for modelServer. Valid: flask, fastapi, vllm, sglang`;
90
90
  return null;
91
91
  },
92
- "region": (value) => {
93
- if (!new RegExp("^[a-z]{2}-[a-z]+-\\d+$").test(value)) return `region does not match required pattern`;
92
+ 'region': (value) => {
93
+ if (!new RegExp('^[a-z]{2}-[a-z]+-\\d+$').test(value)) return 'region does not match required pattern';
94
94
  return null;
95
95
  },
96
- "roleArn": (value) => {
97
- if (!new RegExp("^arn:aws:iam::").test(value)) return `roleArn does not match required pattern`;
96
+ 'roleArn': (value) => {
97
+ if (!new RegExp('^arn:aws:iam::').test(value)) return 'roleArn does not match required pattern';
98
98
  return null;
99
99
  },
100
- "buildTarget": (value) => {
101
- if (!["codebuild"].includes(value)) return `Invalid value "${value}" for buildTarget. Valid: codebuild`;
100
+ 'buildTarget': (value) => {
101
+ if (!['codebuild'].includes(value)) return `Invalid value "${value}" for buildTarget. Valid: codebuild`;
102
102
  return null;
103
103
  },
104
- "codebuildComputeType": (value) => {
105
- if (!["SMALL","MEDIUM","LARGE","BUILD_GENERAL1_SMALL","BUILD_GENERAL1_MEDIUM","BUILD_GENERAL1_LARGE","BUILD_GENERAL1_2XLARGE"].includes(value)) return `Invalid value "${value}" for codebuildComputeType. Valid: SMALL, MEDIUM, LARGE, BUILD_GENERAL1_SMALL, BUILD_GENERAL1_MEDIUM, BUILD_GENERAL1_LARGE, BUILD_GENERAL1_2XLARGE`;
104
+ 'codebuildComputeType': (value) => {
105
+ if (!['SMALL','MEDIUM','LARGE','BUILD_GENERAL1_SMALL','BUILD_GENERAL1_MEDIUM','BUILD_GENERAL1_LARGE','BUILD_GENERAL1_2XLARGE'].includes(value)) return `Invalid value "${value}" for codebuildComputeType. Valid: SMALL, MEDIUM, LARGE, BUILD_GENERAL1_SMALL, BUILD_GENERAL1_MEDIUM, BUILD_GENERAL1_LARGE, BUILD_GENERAL1_2XLARGE`;
106
106
  return null;
107
107
  },
108
- "hfTokenArn": (value) => {
109
- if (!new RegExp("^arn:aws:secretsmanager:").test(value)) return `hfTokenArn does not match required pattern`;
108
+ 'hfTokenArn': (value) => {
109
+ if (!new RegExp('^arn:aws:secretsmanager:').test(value)) return 'hfTokenArn does not match required pattern';
110
110
  return null;
111
111
  },
112
- "ngcTokenArn": (value) => {
113
- if (!new RegExp("^arn:aws:secretsmanager:").test(value)) return `ngcTokenArn does not match required pattern`;
112
+ 'ngcTokenArn': (value) => {
113
+ if (!new RegExp('^arn:aws:secretsmanager:').test(value)) return 'ngcTokenArn does not match required pattern';
114
114
  return null;
115
115
  },
116
- "endpointInitialInstanceCount": (value) => {
116
+ 'endpointInitialInstanceCount': (value) => {
117
117
  if (value < 1) return `endpointInitialInstanceCount must be >= 1, got ${value}`;
118
118
  if (value > 100) return `endpointInitialInstanceCount must be <= 100, got ${value}`;
119
119
  return null;
120
120
  },
121
- "endpointDataCapturePercent": (value) => {
121
+ 'endpointDataCapturePercent': (value) => {
122
122
  if (value < 0) return `endpointDataCapturePercent must be >= 0, got ${value}`;
123
123
  if (value > 100) return `endpointDataCapturePercent must be <= 100, got ${value}`;
124
124
  return null;
125
125
  },
126
- "endpointVariantName": (value) => {
127
- if (!new RegExp("^[a-zA-Z0-9]([\\w-]{0,62}[a-zA-Z0-9])?$").test(value)) return `endpointVariantName does not match required pattern`;
126
+ 'endpointVariantName': (value) => {
127
+ if (!new RegExp('^[a-zA-Z0-9]([\\w-]{0,62}[a-zA-Z0-9])?$').test(value)) return 'endpointVariantName does not match required pattern';
128
128
  return null;
129
129
  },
130
- "endpointVolumeSize": (value) => {
130
+ 'endpointVolumeSize': (value) => {
131
131
  if (value < 1) return `endpointVolumeSize must be >= 1, got ${value}`;
132
132
  if (value > 16384) return `endpointVolumeSize must be <= 16384, got ${value}`;
133
133
  return null;
134
134
  },
135
- "icCpuCount": (value) => {
135
+ 'capacityReservationArn': (value) => {
136
+ if (!new RegExp('^arn:aws:sagemaker:').test(value)) return 'capacityReservationArn does not match required pattern';
137
+ return null;
138
+ },
139
+ 'icCpuCount': (value) => {
136
140
  if (value < 0.25) return `icCpuCount must be >= 0.25, got ${value}`;
137
141
  if (value > 768) return `icCpuCount must be <= 768, got ${value}`;
138
142
  return null;
139
143
  },
140
- "icModelWeight": (value) => {
144
+ 'icModelWeight': (value) => {
141
145
  if (value < 0) return `icModelWeight must be >= 0, got ${value}`;
142
146
  if (value > 1) return `icModelWeight must be <= 1, got ${value}`;
143
147
  return null;
144
148
  },
145
- "asyncS3OutputPath": (value) => {
146
- if (!new RegExp("^s3://").test(value)) return `asyncS3OutputPath does not match required pattern`;
149
+ 'asyncS3OutputPath': (value) => {
150
+ if (!new RegExp('^s3://').test(value)) return 'asyncS3OutputPath does not match required pattern';
147
151
  return null;
148
152
  },
149
- "asyncSnsSuccessTopic": (value) => {
150
- if (!new RegExp("^arn:aws:sns:").test(value)) return `asyncSnsSuccessTopic does not match required pattern`;
153
+ 'asyncSnsSuccessTopic': (value) => {
154
+ if (!new RegExp('^arn:aws:sns:').test(value)) return 'asyncSnsSuccessTopic does not match required pattern';
151
155
  return null;
152
156
  },
153
- "asyncSnsErrorTopic": (value) => {
154
- if (!new RegExp("^arn:aws:sns:").test(value)) return `asyncSnsErrorTopic does not match required pattern`;
157
+ 'asyncSnsErrorTopic': (value) => {
158
+ if (!new RegExp('^arn:aws:sns:').test(value)) return 'asyncSnsErrorTopic does not match required pattern';
155
159
  return null;
156
160
  },
157
- "asyncMaxConcurrent": (value) => {
161
+ 'asyncMaxConcurrent': (value) => {
158
162
  if (value < 1) return `asyncMaxConcurrent must be >= 1, got ${value}`;
159
163
  if (value > 100) return `asyncMaxConcurrent must be <= 100, got ${value}`;
160
164
  return null;
161
165
  },
162
- "batchInputPath": (value) => {
163
- if (!new RegExp("^s3://").test(value)) return `batchInputPath does not match required pattern`;
166
+ 'batchInputPath': (value) => {
167
+ if (!new RegExp('^s3://').test(value)) return 'batchInputPath does not match required pattern';
164
168
  return null;
165
169
  },
166
- "batchOutputPath": (value) => {
167
- if (!new RegExp("^s3://").test(value)) return `batchOutputPath does not match required pattern`;
170
+ 'batchOutputPath': (value) => {
171
+ if (!new RegExp('^s3://').test(value)) return 'batchOutputPath does not match required pattern';
168
172
  return null;
169
173
  },
170
- "batchInstanceCount": (value) => {
174
+ 'batchInstanceCount': (value) => {
171
175
  if (value < 1) return `batchInstanceCount must be >= 1, got ${value}`;
172
176
  if (value > 100) return `batchInstanceCount must be <= 100, got ${value}`;
173
177
  return null;
174
178
  },
175
- "batchSplitType": (value) => {
176
- if (!["Line","RecordIO","None"].includes(value)) return `Invalid value "${value}" for batchSplitType. Valid: Line, RecordIO, None`;
179
+ 'batchSplitType': (value) => {
180
+ if (!['Line','RecordIO','None'].includes(value)) return `Invalid value "${value}" for batchSplitType. Valid: Line, RecordIO, None`;
177
181
  return null;
178
182
  },
179
- "batchStrategy": (value) => {
180
- if (!["MultiRecord","SingleRecord"].includes(value)) return `Invalid value "${value}" for batchStrategy. Valid: MultiRecord, SingleRecord`;
183
+ 'batchStrategy': (value) => {
184
+ if (!['MultiRecord','SingleRecord'].includes(value)) return `Invalid value "${value}" for batchStrategy. Valid: MultiRecord, SingleRecord`;
181
185
  return null;
182
186
  },
183
- "batchJoinSource": (value) => {
184
- if (!["Input","None"].includes(value)) return `Invalid value "${value}" for batchJoinSource. Valid: Input, None`;
187
+ 'batchJoinSource': (value) => {
188
+ if (!['Input','None'].includes(value)) return `Invalid value "${value}" for batchJoinSource. Valid: Input, None`;
185
189
  return null;
186
190
  },
187
- "batchMaxConcurrent": (value) => {
191
+ 'batchMaxConcurrent': (value) => {
188
192
  if (value < 1) return `batchMaxConcurrent must be >= 1, got ${value}`;
189
193
  return null;
190
194
  },
191
- "batchMaxPayload": (value) => {
195
+ 'batchMaxPayload': (value) => {
192
196
  if (value < 0) return `batchMaxPayload must be >= 0, got ${value}`;
193
197
  if (value > 100) return `batchMaxPayload must be <= 100, got ${value}`;
194
198
  return null;
195
199
  },
196
- "hyperpodReplicas": (value) => {
200
+ 'hyperpodReplicas': (value) => {
197
201
  if (value < 1) return `hyperpodReplicas must be >= 1, got ${value}`;
198
202
  return null;
199
- },
203
+ }
200
204
  };
201
205
 
202
- // 43 parameters have validation rules
206
+ // 44 parameters have validation rules