@aws/ml-container-creator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +202 -0
  2. package/LICENSE-THIRD-PARTY +68620 -0
  3. package/NOTICE +2 -0
  4. package/README.md +106 -0
  5. package/bin/cli.js +365 -0
  6. package/config/defaults.json +32 -0
  7. package/config/presets/transformers-djl.json +26 -0
  8. package/config/presets/transformers-gpu.json +24 -0
  9. package/config/presets/transformers-lmi.json +27 -0
  10. package/package.json +129 -0
  11. package/servers/README.md +419 -0
  12. package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
  13. package/servers/base-image-picker/catalogs/python-slim.json +38 -0
  14. package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
  15. package/servers/base-image-picker/catalogs/triton.json +38 -0
  16. package/servers/base-image-picker/index.js +495 -0
  17. package/servers/base-image-picker/manifest.json +17 -0
  18. package/servers/base-image-picker/package.json +15 -0
  19. package/servers/hyperpod-cluster-picker/LICENSE +202 -0
  20. package/servers/hyperpod-cluster-picker/index.js +424 -0
  21. package/servers/hyperpod-cluster-picker/manifest.json +14 -0
  22. package/servers/hyperpod-cluster-picker/package.json +17 -0
  23. package/servers/instance-recommender/LICENSE +202 -0
  24. package/servers/instance-recommender/catalogs/instances.json +852 -0
  25. package/servers/instance-recommender/index.js +284 -0
  26. package/servers/instance-recommender/manifest.json +16 -0
  27. package/servers/instance-recommender/package.json +15 -0
  28. package/servers/lib/LICENSE +202 -0
  29. package/servers/lib/bedrock-client.js +160 -0
  30. package/servers/lib/custom-validators.js +46 -0
  31. package/servers/lib/dynamic-resolver.js +36 -0
  32. package/servers/lib/package.json +11 -0
  33. package/servers/lib/schemas/image-catalog.schema.json +185 -0
  34. package/servers/lib/schemas/instances.schema.json +124 -0
  35. package/servers/lib/schemas/manifest.schema.json +64 -0
  36. package/servers/lib/schemas/model-catalog.schema.json +91 -0
  37. package/servers/lib/schemas/regions.schema.json +26 -0
  38. package/servers/lib/schemas/triton-backends.schema.json +51 -0
  39. package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
  40. package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
  41. package/servers/model-picker/catalogs/popular-transformers.json +226 -0
  42. package/servers/model-picker/index.js +1693 -0
  43. package/servers/model-picker/manifest.json +18 -0
  44. package/servers/model-picker/package.json +20 -0
  45. package/servers/region-picker/LICENSE +202 -0
  46. package/servers/region-picker/catalogs/regions.json +263 -0
  47. package/servers/region-picker/index.js +230 -0
  48. package/servers/region-picker/manifest.json +16 -0
  49. package/servers/region-picker/package.json +15 -0
  50. package/src/app.js +1007 -0
  51. package/src/copy-tpl.js +77 -0
  52. package/src/lib/accelerator-validator.js +39 -0
  53. package/src/lib/asset-manager.js +385 -0
  54. package/src/lib/aws-profile-parser.js +181 -0
  55. package/src/lib/bootstrap-command-handler.js +1647 -0
  56. package/src/lib/bootstrap-config.js +238 -0
  57. package/src/lib/ci-register-helpers.js +124 -0
  58. package/src/lib/ci-report-helpers.js +158 -0
  59. package/src/lib/ci-stage-helpers.js +268 -0
  60. package/src/lib/cli-handler.js +529 -0
  61. package/src/lib/comment-generator.js +544 -0
  62. package/src/lib/community-reports-validator.js +91 -0
  63. package/src/lib/config-manager.js +2106 -0
  64. package/src/lib/configuration-exporter.js +204 -0
  65. package/src/lib/configuration-manager.js +695 -0
  66. package/src/lib/configuration-matcher.js +221 -0
  67. package/src/lib/cpu-validator.js +36 -0
  68. package/src/lib/cuda-validator.js +57 -0
  69. package/src/lib/deployment-config-resolver.js +103 -0
  70. package/src/lib/deployment-entry-schema.js +125 -0
  71. package/src/lib/deployment-registry.js +598 -0
  72. package/src/lib/docker-introspection-validator.js +51 -0
  73. package/src/lib/engine-prefix-resolver.js +60 -0
  74. package/src/lib/huggingface-client.js +172 -0
  75. package/src/lib/key-value-parser.js +37 -0
  76. package/src/lib/known-flags-validator.js +200 -0
  77. package/src/lib/manifest-cli.js +280 -0
  78. package/src/lib/mcp-client.js +303 -0
  79. package/src/lib/mcp-command-handler.js +532 -0
  80. package/src/lib/neuron-validator.js +80 -0
  81. package/src/lib/parameter-schema-validator.js +284 -0
  82. package/src/lib/prompt-runner.js +1349 -0
  83. package/src/lib/prompts.js +1138 -0
  84. package/src/lib/registry-command-handler.js +519 -0
  85. package/src/lib/registry-loader.js +198 -0
  86. package/src/lib/rocm-validator.js +80 -0
  87. package/src/lib/schema-validator.js +157 -0
  88. package/src/lib/sensitive-redactor.js +59 -0
  89. package/src/lib/template-engine.js +156 -0
  90. package/src/lib/template-manager.js +341 -0
  91. package/src/lib/validation-engine.js +314 -0
  92. package/src/prompt-adapter.js +63 -0
  93. package/templates/Dockerfile +300 -0
  94. package/templates/IAM_PERMISSIONS.md +84 -0
  95. package/templates/MIGRATION.md +488 -0
  96. package/templates/PROJECT_README.md +439 -0
  97. package/templates/TEMPLATE_SYSTEM.md +243 -0
  98. package/templates/buildspec.yml +64 -0
  99. package/templates/code/chat_template.jinja +1 -0
  100. package/templates/code/flask/gunicorn_config.py +35 -0
  101. package/templates/code/flask/wsgi.py +10 -0
  102. package/templates/code/model_handler.py +387 -0
  103. package/templates/code/serve +300 -0
  104. package/templates/code/serve.py +175 -0
  105. package/templates/code/serving.properties +105 -0
  106. package/templates/code/start_server.py +39 -0
  107. package/templates/code/start_server.sh +39 -0
  108. package/templates/diffusors/Dockerfile +72 -0
  109. package/templates/diffusors/patch_image_api.py +35 -0
  110. package/templates/diffusors/serve +115 -0
  111. package/templates/diffusors/start_server.sh +114 -0
  112. package/templates/do/.gitkeep +1 -0
  113. package/templates/do/README.md +541 -0
  114. package/templates/do/build +83 -0
  115. package/templates/do/ci +681 -0
  116. package/templates/do/clean +811 -0
  117. package/templates/do/config +260 -0
  118. package/templates/do/deploy +1560 -0
  119. package/templates/do/export +306 -0
  120. package/templates/do/logs +319 -0
  121. package/templates/do/manifest +12 -0
  122. package/templates/do/push +119 -0
  123. package/templates/do/register +580 -0
  124. package/templates/do/run +113 -0
  125. package/templates/do/submit +417 -0
  126. package/templates/do/test +1147 -0
  127. package/templates/hyperpod/configmap.yaml +24 -0
  128. package/templates/hyperpod/deployment.yaml +71 -0
  129. package/templates/hyperpod/pvc.yaml +42 -0
  130. package/templates/hyperpod/service.yaml +17 -0
  131. package/templates/nginx-diffusors.conf +74 -0
  132. package/templates/nginx-predictors.conf +47 -0
  133. package/templates/nginx-tensorrt.conf +74 -0
  134. package/templates/requirements.txt +61 -0
  135. package/templates/sample_model/test_inference.py +123 -0
  136. package/templates/sample_model/train_abalone.py +252 -0
  137. package/templates/test/test_endpoint.sh +79 -0
  138. package/templates/test/test_local_image.sh +80 -0
  139. package/templates/test/test_model_handler.py +180 -0
  140. package/templates/triton/Dockerfile +128 -0
  141. package/templates/triton/config.pbtxt +163 -0
  142. package/templates/triton/model.py +130 -0
  143. package/templates/triton/requirements.txt +11 -0
@@ -0,0 +1,341 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Template Manager - Handles configuration validation
6
+ *
7
+ * This module validates user configuration choices to ensure they are
8
+ * supported by the generator. With do-framework integration, conditional
9
+ * file exclusion logic has been removed - all template files are now
10
+ * generated unconditionally, and runtime scripts handle conditional logic.
11
+ *
12
+ * Requirements: 7.1, 7.2, 7.3, 7.4, 2.1
13
+ */
14
+
15
+ /**
16
+ * GPU-requiring Triton backends that must use GPU instance types
17
+ */
18
+ const GPU_REQUIRING_BACKENDS = ['triton-vllm', 'triton-tensorrtllm', 'diffusors-vllm-omni']
19
+
20
+ /**
21
+ * CPU-only instance type families (patterns that indicate non-GPU instances)
22
+ */
23
+ const CPU_ONLY_INSTANCE_PATTERNS = [
24
+ /^ml\.m[0-9]+\./, // ml.m4.*, ml.m5.*, ml.m6i.*, etc.
25
+ /^ml\.c[0-9]+\./, // ml.c4.*, ml.c5.*, ml.c6i.*, etc.
26
+ /^ml\.t[0-9]+\./, // ml.t2.*, ml.t3.*, etc.
27
+ /^ml\.r[0-9]+\./, // ml.r5.*, ml.r6i.*, etc.
28
+ ]
29
+
30
+ /**
31
+ * Check if an instance type is CPU-only (no GPU)
32
+ * @param {string} instanceType - e.g. 'ml.m5.large', 'ml.g5.xlarge'
33
+ * @returns {boolean} true if CPU-only, false if GPU-capable
34
+ */
35
+ function isCpuOnlyInstance(instanceType) {
36
+ if (!instanceType || instanceType === 'custom') {
37
+ return false
38
+ }
39
+ return CPU_ONLY_INSTANCE_PATTERNS.some(pattern => pattern.test(instanceType))
40
+ }
41
+
42
+ export default class TemplateManager {
43
+ constructor(answers) {
44
+ this.answers = answers
45
+ }
46
+
47
+ /**
48
+ * Validates that the configuration is supported
49
+ * @throws {Error} If unsupported configuration detected
50
+ */
51
+ validate() {
52
+ const supportedOptions = {
53
+ // 15 canonical deployment-config values (2 http, 5 transformers, 7 triton, 1 diffusors)
54
+ deploymentConfigs: [
55
+ // HTTP architecture (2)
56
+ 'http-flask', 'http-fastapi',
57
+ // Transformers architecture (5)
58
+ 'transformers-vllm', 'transformers-sglang',
59
+ 'transformers-tensorrt-llm', 'transformers-lmi', 'transformers-djl',
60
+ // Triton architecture (7)
61
+ 'triton-fil', 'triton-onnxruntime', 'triton-tensorflow',
62
+ 'triton-pytorch', 'triton-vllm', 'triton-tensorrtllm', 'triton-python',
63
+ // Diffusors architecture (1)
64
+ 'diffusors-vllm-omni'
65
+ ],
66
+ buildTargets: ['codebuild'],
67
+ deploymentTargets: ['managed-inference', 'async-inference', 'batch-transform', 'hyperpod-eks'],
68
+ testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
69
+ awsRegions: [
70
+ 'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2',
71
+ 'eu-west-1', 'eu-west-2', 'eu-central-1', 'eu-north-1',
72
+ 'ap-southeast-1', 'ap-southeast-2', 'ap-northeast-1',
73
+ 'ca-central-1', 'sa-east-1'
74
+ ]
75
+ }
76
+
77
+ // Validate deployment configuration if present
78
+ if (this.answers.deploymentConfig) {
79
+ this._validateChoice('deploymentConfig', supportedOptions.deploymentConfigs)
80
+
81
+ // GPU instance type enforcement for GPU-requiring backends
82
+ this._validateGpuRequirement()
83
+ } else {
84
+ // Fallback: validate architecture and backend separately (new canonical format)
85
+ const architectures = ['http', 'transformers', 'triton', 'diffusors']
86
+ const backends = [
87
+ // http backends
88
+ 'flask', 'fastapi',
89
+ // transformers backends
90
+ 'vllm', 'sglang', 'tensorrt-llm', 'lmi', 'djl',
91
+ // triton backends
92
+ 'fil', 'onnxruntime', 'tensorflow', 'pytorch', 'tensorrtllm', 'python',
93
+ // diffusors backends
94
+ 'vllm-omni'
95
+ ]
96
+
97
+ this._validateChoice('architecture', architectures)
98
+ this._validateChoice('backend', backends)
99
+
100
+ // Validate tensorrt-llm is only used with transformers architecture
101
+ if (this.answers.backend === 'tensorrt-llm' && this.answers.architecture !== 'transformers') {
102
+ throw new Error('⚠️ TensorRT-LLM is only supported with the transformers architecture. Please select "transformers" as your architecture or choose a different backend.')
103
+ }
104
+
105
+ // GPU instance type enforcement for GPU-requiring backends (fallback path)
106
+ const deploymentConfig = this.answers.architecture && this.answers.backend
107
+ ? `${this.answers.architecture}-${this.answers.backend}`
108
+ : null
109
+ if (deploymentConfig && GPU_REQUIRING_BACKENDS.includes(deploymentConfig)) {
110
+ this._validateGpuRequirementForConfig(deploymentConfig)
111
+ }
112
+ }
113
+
114
+ // Validate buildTarget (replaces deployTarget)
115
+ if (this.answers.buildTarget) {
116
+ this._validateChoice('buildTarget', supportedOptions.buildTargets)
117
+ } else if (this.answers.deployTarget) {
118
+ // Backward compatibility: validate deployTarget against buildTargets
119
+ this._validateChoice('deployTarget', supportedOptions.buildTargets)
120
+ }
121
+
122
+ // Validate deploymentTarget
123
+ if (this.answers.deploymentTarget) {
124
+ this._validateChoice('deploymentTarget', supportedOptions.deploymentTargets)
125
+ }
126
+
127
+ // Validate HyperPod EKS specific fields
128
+ if (this.answers.deploymentTarget === 'hyperpod-eks') {
129
+ this._validateHyperPodConfig()
130
+ }
131
+
132
+ // Validate async inference specific fields
133
+ this._validateAsyncConfig()
134
+
135
+ // Validate batch transform specific fields
136
+ this._validateBatchTransformConfig()
137
+
138
+ // Validate instance type format (ml.*.*) - only for managed-inference
139
+ if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
140
+ const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/
141
+ if (!instancePattern.test(this.answers.instanceType)) {
142
+ throw new Error(`⚠️ Invalid instance type format: ${this.answers.instanceType}. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g5.xlarge)`)
143
+ }
144
+ }
145
+
146
+ this._validateChoice('awsRegion', supportedOptions.awsRegions)
147
+
148
+ // Validate test types if testing is enabled
149
+ if (this.answers.includeTesting && this.answers.testTypes) {
150
+ for (const testType of this.answers.testTypes) {
151
+ this._validateChoice('testType', supportedOptions.testTypes, testType)
152
+ }
153
+ }
154
+ }
155
+
156
+ /**
157
+ * Validates HyperPod EKS specific configuration
158
+ * @private
159
+ * @throws {Error} If HyperPod configuration is invalid
160
+ */
161
+ _validateHyperPodConfig() {
162
+ // Validate hyperPodCluster is non-empty
163
+ if (!this.answers.hyperPodCluster || this.answers.hyperPodCluster.trim() === '') {
164
+ throw new Error('⚠️ hyperPodCluster is required when deploymentTarget is "hyperpod-eks". Please provide a valid HyperPod cluster name.')
165
+ }
166
+
167
+ // Validate hyperPodNamespace conforms to RFC 1123 DNS label format
168
+ if (this.answers.hyperPodNamespace) {
169
+ if (!this._isValidRfc1123DnsLabel(this.answers.hyperPodNamespace)) {
170
+ throw new Error(`⚠️ Invalid hyperPodNamespace: "${this.answers.hyperPodNamespace}". Namespace must conform to RFC 1123 DNS label format: lowercase alphanumeric characters or hyphens, must start and end with an alphanumeric character, and be at most 63 characters.`)
171
+ }
172
+ }
173
+
174
+ // Validate hyperPodReplicas is an integer >= 1
175
+ if (this.answers.hyperPodReplicas !== undefined) {
176
+ const replicas = this.answers.hyperPodReplicas
177
+ if (!Number.isInteger(replicas) || replicas < 1) {
178
+ throw new Error(`⚠️ Invalid hyperPodReplicas: "${replicas}". Replicas must be an integer greater than or equal to 1.`)
179
+ }
180
+ }
181
+ }
182
+
183
+ /**
184
+ * Validates a string conforms to RFC 1123 DNS label format
185
+ * @private
186
+ * @param {string} value - The value to validate
187
+ * @returns {boolean} True if valid RFC 1123 DNS label
188
+ */
189
+ _isValidRfc1123DnsLabel(value) {
190
+ if (!value || typeof value !== 'string') {
191
+ return false
192
+ }
193
+ // RFC 1123 DNS label: lowercase alphanumeric, hyphens allowed (not at start/end), max 63 chars
194
+ const rfc1123Pattern = /^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$/
195
+ return value.length <= 63 && rfc1123Pattern.test(value)
196
+ }
197
+
198
+ /**
199
+ * Validates async inference specific configuration
200
+ * @private
201
+ * @throws {Error} If async configuration is invalid
202
+ */
203
+ _validateAsyncConfig() {
204
+ if (this.answers.deploymentTarget !== 'async-inference') return
205
+
206
+ // Validate S3 output path format if explicitly provided
207
+ if (this.answers.asyncS3OutputPath && this.answers.asyncS3OutputPath.trim() !== '') {
208
+ if (!this.answers.asyncS3OutputPath.startsWith('s3://')) {
209
+ throw new Error('⚠️ asyncS3OutputPath must start with "s3://". Example: s3://my-bucket/output/')
210
+ }
211
+ }
212
+
213
+ // Validate SNS topic ARN format if explicitly provided
214
+ const snsArnPattern = /^arn:aws:sns:[a-z0-9-]+:\d{12}:.+$/
215
+ if (this.answers.asyncSnsSuccessTopic && this.answers.asyncSnsSuccessTopic.trim() !== '') {
216
+ if (!snsArnPattern.test(this.answers.asyncSnsSuccessTopic)) {
217
+ throw new Error('⚠️ asyncSnsSuccessTopic must be a valid SNS ARN. Format: arn:aws:sns:<region>:<account-id>:<topic-name>')
218
+ }
219
+ }
220
+ if (this.answers.asyncSnsErrorTopic && this.answers.asyncSnsErrorTopic.trim() !== '') {
221
+ if (!snsArnPattern.test(this.answers.asyncSnsErrorTopic)) {
222
+ throw new Error('⚠️ asyncSnsErrorTopic must be a valid SNS ARN. Format: arn:aws:sns:<region>:<account-id>:<topic-name>')
223
+ }
224
+ }
225
+
226
+ // Validate max concurrent invocations
227
+ if (this.answers.asyncMaxConcurrentInvocations !== undefined) {
228
+ const val = this.answers.asyncMaxConcurrentInvocations
229
+ if (!Number.isInteger(val) || val < 1) {
230
+ throw new Error('⚠️ asyncMaxConcurrentInvocations must be an integer >= 1')
231
+ }
232
+ }
233
+ }
234
+
235
+ /**
236
+ * Validates batch transform specific configuration
237
+ * @private
238
+ * @throws {Error} If batch transform configuration is invalid
239
+ */
240
+ _validateBatchTransformConfig() {
241
+ if (this.answers.deploymentTarget !== 'batch-transform') return
242
+
243
+ // Validate S3 input path format if provided
244
+ if (this.answers.batchInputPath && this.answers.batchInputPath.trim() !== '') {
245
+ if (!this.answers.batchInputPath.startsWith('s3://')) {
246
+ throw new Error('⚠️ batchInputPath must start with "s3://". Example: s3://my-bucket/input/')
247
+ }
248
+ }
249
+
250
+ // Validate S3 output path format if provided
251
+ if (this.answers.batchOutputPath && this.answers.batchOutputPath.trim() !== '') {
252
+ if (!this.answers.batchOutputPath.startsWith('s3://')) {
253
+ throw new Error('⚠️ batchOutputPath must start with "s3://". Example: s3://my-bucket/output/')
254
+ }
255
+ }
256
+
257
+ // Validate instance count
258
+ if (this.answers.batchInstanceCount !== undefined) {
259
+ const val = this.answers.batchInstanceCount
260
+ if (!Number.isInteger(val) || val < 1) {
261
+ throw new Error('⚠️ batchInstanceCount must be an integer >= 1')
262
+ }
263
+ }
264
+
265
+ // Validate split type
266
+ const validSplitTypes = ['Line', 'RecordIO', 'None']
267
+ if (this.answers.batchSplitType && !validSplitTypes.includes(this.answers.batchSplitType)) {
268
+ throw new Error(`⚠️ batchSplitType must be one of: ${validSplitTypes.join(', ')}`)
269
+ }
270
+
271
+ // Validate batch strategy
272
+ const validStrategies = ['MultiRecord', 'SingleRecord']
273
+ if (this.answers.batchStrategy && !validStrategies.includes(this.answers.batchStrategy)) {
274
+ throw new Error(`⚠️ batchStrategy must be one of: ${validStrategies.join(', ')}`)
275
+ }
276
+
277
+ // Validate join source
278
+ const validJoinSources = ['Input', 'None']
279
+ if (this.answers.batchJoinSource && !validJoinSources.includes(this.answers.batchJoinSource)) {
280
+ throw new Error(`⚠️ batchJoinSource must be one of: ${validJoinSources.join(', ')}`)
281
+ }
282
+
283
+ // Validate max concurrent transforms
284
+ if (this.answers.batchMaxConcurrentTransforms !== undefined) {
285
+ const val = this.answers.batchMaxConcurrentTransforms
286
+ if (!Number.isInteger(val) || val < 0) {
287
+ throw new Error('⚠️ batchMaxConcurrentTransforms must be an integer >= 0')
288
+ }
289
+ }
290
+
291
+ // Validate max payload in MB
292
+ if (this.answers.batchMaxPayloadInMB !== undefined) {
293
+ const val = this.answers.batchMaxPayloadInMB
294
+ if (!Number.isInteger(val) || val < 0 || val > 100) {
295
+ throw new Error('⚠️ batchMaxPayloadInMB must be an integer between 0 and 100')
296
+ }
297
+ }
298
+ }
299
+
300
+ /**
301
+ * Validates GPU instance type requirement for GPU-requiring backends.
302
+ * Called when deploymentConfig is present.
303
+ * @private
304
+ * @throws {Error} If a GPU-requiring backend is paired with a CPU-only instance
305
+ */
306
+ _validateGpuRequirement() {
307
+ const dc = this.answers.deploymentConfig
308
+ if (GPU_REQUIRING_BACKENDS.includes(dc)) {
309
+ this._validateGpuRequirementForConfig(dc)
310
+ }
311
+ }
312
+
313
+ /**
314
+ * Validates that a GPU-requiring deployment config is not paired with a CPU-only instance.
315
+ * @private
316
+ * @param {string} deploymentConfig - The deployment config string
317
+ * @throws {Error} If instance type is CPU-only
318
+ */
319
+ _validateGpuRequirementForConfig(deploymentConfig) {
320
+ const instanceType = this.answers.instanceType
321
+ if (isCpuOnlyInstance(instanceType)) {
322
+ throw new Error(
323
+ `⚠️ ${deploymentConfig} requires a GPU instance type. ` +
324
+ `Selected: ${instanceType}. ` +
325
+ `Recommended: ml.g5.xlarge, ml.g5.2xlarge`
326
+ )
327
+ }
328
+ }
329
+
330
+ /**
331
+ * Validates a single configuration choice
332
+ * @private
333
+ */
334
+ _validateChoice(field, supportedValues, value = null) {
335
+ const actualValue = value || this.answers[field]
336
+ if (actualValue && !supportedValues.includes(actualValue)) {
337
+ throw new Error(`⚠️ ${actualValue} not implemented yet for ${field}.`)
338
+ }
339
+ }
340
+ }
341
+
@@ -0,0 +1,314 @@
1
+ import CudaValidator from './cuda-validator.js';
2
+ import NeuronValidator from './neuron-validator.js';
3
+ import CpuValidator from './cpu-validator.js';
4
+ import RocmValidator from './rocm-validator.js';
5
+
6
+ /**
7
+ * Validation engine for framework and instance compatibility.
8
+ * Orchestrates accelerator validation using pluggable validators.
9
+ *
10
+ * Requirements: 4.7, 4.8, 4.10, 4.11, 4.12, 4.13, 4.14, 4.15, 4.16, 4.17, 4.18, 4.20, 4.21
11
+ */
12
+ export default class ValidationEngine {
13
+ constructor() {
14
+ // Initialize accelerator validators registry
15
+ this.acceleratorValidators = new Map();
16
+
17
+ // Register default validators
18
+ this.registerAcceleratorValidator('cuda', new CudaValidator());
19
+ this.registerAcceleratorValidator('neuron', new NeuronValidator());
20
+ this.registerAcceleratorValidator('cpu', new CpuValidator());
21
+ this.registerAcceleratorValidator('rocm', new RocmValidator());
22
+ }
23
+
24
+ /**
25
+ * Register a custom accelerator validator.
26
+ * Enables extensibility for new accelerator types.
27
+ *
28
+ * @param {string} acceleratorType - Accelerator type (cuda, neuron, cpu, rocm, etc.)
29
+ * @param {AcceleratorValidator} validator - Validator instance
30
+ *
31
+ * Requirements: 4.10, 4.21
32
+ */
33
+ registerAcceleratorValidator(acceleratorType, validator) {
34
+ this.acceleratorValidators.set(acceleratorType, validator);
35
+ }
36
+
37
+ /**
38
+ * Validate accelerator compatibility between framework and instance.
39
+ * Delegates to appropriate accelerator-specific validator.
40
+ *
41
+ * @param {Object} frameworkConfig - Framework accelerator requirements
42
+ * @param {Object} frameworkConfig.accelerator - Accelerator configuration
43
+ * @param {string} frameworkConfig.accelerator.type - Accelerator type
44
+ * @param {string} frameworkConfig.accelerator.version - Required version
45
+ * @param {Object} instanceConfig - Instance accelerator capabilities
46
+ * @param {Object} instanceConfig.accelerator - Accelerator configuration
47
+ * @param {string} instanceConfig.accelerator.type - Accelerator type
48
+ * @param {Array<string>} instanceConfig.accelerator.versions - Supported versions
49
+ * @returns {Object} ValidationResult
50
+ * @returns {boolean} ValidationResult.compatible - Whether configuration is compatible
51
+ * @returns {string} [ValidationResult.error] - Error message if incompatible
52
+ * @returns {string} [ValidationResult.warning] - Warning message if issues detected
53
+ * @returns {string} [ValidationResult.info] - Informational message
54
+ *
55
+ * Requirements: 4.11, 4.12, 4.13, 4.14, 4.15, 4.16, 4.17, 4.18
56
+ */
57
+ validateAcceleratorCompatibility(frameworkConfig, instanceConfig) {
58
+ // Check if accelerator types match
59
+ if (frameworkConfig.accelerator.type !== instanceConfig.accelerator.type) {
60
+ return {
61
+ compatible: false,
62
+ error: `Framework requires ${frameworkConfig.accelerator.type} accelerator, ` +
63
+ `but instance provides ${instanceConfig.accelerator.type}. ` +
64
+ `Please select an instance type with ${frameworkConfig.accelerator.type} support.`
65
+ };
66
+ }
67
+
68
+ // Get validator for accelerator type
69
+ const validator = this.acceleratorValidators.get(frameworkConfig.accelerator.type);
70
+
71
+ if (!validator) {
72
+ return {
73
+ compatible: true,
74
+ warning: `No validator available for ${frameworkConfig.accelerator.type} accelerator. ` +
75
+ 'Proceeding without version validation.'
76
+ };
77
+ }
78
+
79
+ // Delegate to accelerator-specific validator
80
+ return validator.validate(frameworkConfig, instanceConfig);
81
+ }
82
+
83
+ /**
84
+ * Get recommended instance types for a framework configuration.
85
+ *
86
+ * @param {Object} frameworkConfig - Framework configuration
87
+ * @param {Object} instanceAcceleratorMapping - Instance accelerator mapping registry
88
+ * @returns {Array<Object>} Recommended instance types with compatibility info
89
+ *
90
+ * Requirements: 4.2, 4.3, 4.7, 4.8
91
+ */
92
+ getRecommendedInstanceTypes(frameworkConfig, instanceAcceleratorMapping) {
93
+ const recommendations = [];
94
+
95
+ // Iterate through all instance types in mapping
96
+ for (const [instanceType, instanceConfig] of Object.entries(instanceAcceleratorMapping)) {
97
+ // Validate compatibility
98
+ const validation = this.validateAcceleratorCompatibility(frameworkConfig, instanceConfig);
99
+
100
+ if (validation.compatible) {
101
+ recommendations.push({
102
+ instanceType,
103
+ acceleratorType: instanceConfig.accelerator.type,
104
+ acceleratorVersions: instanceConfig.accelerator.versions,
105
+ compatible: true,
106
+ info: validation.info
107
+ });
108
+ }
109
+ }
110
+
111
+ return recommendations;
112
+ }
113
+
114
+ /**
115
+ * Validate environment variables against framework specifications.
116
+ * Uses multiple validation strategies based on configuration.
117
+ *
118
+ * @param {Object} environmentVariables - Environment variables to validate
119
+ * @param {Object} frameworkConfig - Framework configuration with known flags
120
+ * @param {Object} options - Validation options
121
+ * @param {boolean} options.enabled - Enable/disable validation completely (default: true)
122
+ * @param {boolean} options.useKnownFlags - Use known flags registry (default: true)
123
+ * @param {boolean} options.useCommunityReports - Use community reports (default: true)
124
+ * @param {boolean} options.useDockerIntrospection - Use Docker introspection (default: false)
125
+ * @returns {Object} ValidationResult
126
+ * @returns {Array<Object>} ValidationResult.errors - Validation errors
127
+ * @returns {Array<Object>} ValidationResult.warnings - Validation warnings
128
+ * @returns {Array<string>} ValidationResult.strategiesUsed - Validation strategies used
129
+ *
130
+ * Requirements: 13.9, 13.10, 13.11, 13.13, 13.14, 13.15, 13.16, 13.17, 13.31
131
+ */
132
+ validateEnvironmentVariables(environmentVariables, frameworkConfig, options = {}) {
133
+ const {
134
+ enabled = true,
135
+ useKnownFlags = true,
136
+ useCommunityReports = true,
137
+ useDockerIntrospection = false
138
+ } = options;
139
+
140
+ // If validation is completely disabled, return empty result
141
+ if (!enabled) {
142
+ return {
143
+ errors: [],
144
+ warnings: [],
145
+ strategiesUsed: []
146
+ };
147
+ }
148
+
149
+ const errors = [];
150
+ const warnings = [];
151
+ const strategiesUsed = [];
152
+
153
+ // Known flags validation
154
+ if (useKnownFlags && frameworkConfig.knownFlags) {
155
+ strategiesUsed.push('known-flags-registry');
156
+ const knownFlagsResult = this._validateWithKnownFlags(
157
+ environmentVariables,
158
+ frameworkConfig.knownFlags
159
+ );
160
+ errors.push(...knownFlagsResult.errors);
161
+ warnings.push(...knownFlagsResult.warnings);
162
+ }
163
+
164
+ // Community reports validation
165
+ if (useCommunityReports && frameworkConfig.communityReports) {
166
+ strategiesUsed.push('community-reports');
167
+ const communityResult = this._validateWithCommunityReports(
168
+ environmentVariables,
169
+ frameworkConfig.communityReports
170
+ );
171
+ errors.push(...communityResult.errors);
172
+ warnings.push(...communityResult.warnings);
173
+ }
174
+
175
+ // Docker introspection validation (opt-in, experimental)
176
+ if (useDockerIntrospection) {
177
+ strategiesUsed.push('docker-introspection');
178
+ warnings.push({
179
+ variable: null,
180
+ message: 'Docker introspection validation is experimental and not tested in CI/CD'
181
+ });
182
+ }
183
+
184
+ return {
185
+ errors,
186
+ warnings,
187
+ strategiesUsed
188
+ };
189
+ }
190
+
191
+ /**
192
+ * Validate environment variables using known flags registry.
193
+ *
194
+ * @param {Object} environmentVariables - Environment variables to validate
195
+ * @param {Object} knownFlags - Known flags registry
196
+ * @returns {Object} Validation result with errors and warnings
197
+ *
198
+ * Requirements: 13.13, 13.14, 13.15, 13.16, 13.17
199
+ * @private
200
+ */
201
+ _validateWithKnownFlags(environmentVariables, knownFlags) {
202
+ const errors = [];
203
+ const warnings = [];
204
+
205
+ for (const [varName, varValue] of Object.entries(environmentVariables)) {
206
+ const flagSpec = knownFlags[varName];
207
+
208
+ if (!flagSpec) {
209
+ // Unknown flag - not an error, just informational
210
+ continue;
211
+ }
212
+
213
+ // Check if deprecated
214
+ if (flagSpec.deprecated) {
215
+ warnings.push({
216
+ variable: varName,
217
+ message: `${varName} is deprecated. ${flagSpec.deprecationMessage || ''}`,
218
+ replacement: flagSpec.replacement
219
+ });
220
+ }
221
+
222
+ // Validate type
223
+ if (flagSpec.type) {
224
+ const typeValid = this._validateType(varValue, flagSpec.type);
225
+ if (!typeValid) {
226
+ errors.push({
227
+ variable: varName,
228
+ message: `${varName} must be of type ${flagSpec.type}, got ${typeof varValue}`
229
+ });
230
+ }
231
+ }
232
+
233
+ // Validate range constraints
234
+ if (flagSpec.min !== undefined || flagSpec.max !== undefined) {
235
+ const numValue = Number(varValue);
236
+ if (flagSpec.min !== undefined && numValue < flagSpec.min) {
237
+ errors.push({
238
+ variable: varName,
239
+ message: `${varName} must be >= ${flagSpec.min}, got ${numValue}`
240
+ });
241
+ }
242
+ if (flagSpec.max !== undefined && numValue > flagSpec.max) {
243
+ errors.push({
244
+ variable: varName,
245
+ message: `${varName} must be <= ${flagSpec.max}, got ${numValue}`
246
+ });
247
+ }
248
+ }
249
+ }
250
+
251
+ return { errors, warnings };
252
+ }
253
+
254
+ /**
255
+ * Validate environment variables using community reports.
256
+ *
257
+ * @param {Object} environmentVariables - Environment variables to validate
258
+ * @param {Object} communityReports - Community validation reports
259
+ * @returns {Object} Validation result with errors and warnings
260
+ *
261
+ * Requirements: 13.13, 13.14, 13.15
262
+ * @private
263
+ */
264
+ _validateWithCommunityReports(environmentVariables, communityReports) {
265
+ const errors = [];
266
+ const warnings = [];
267
+
268
+ for (const [varName] of Object.entries(environmentVariables)) {
269
+ const reports = communityReports[varName];
270
+
271
+ if (!reports || reports.length === 0) {
272
+ continue;
273
+ }
274
+
275
+ // Check for reported issues
276
+ const issueReports = reports.filter(r => r.status === 'invalid' || r.status === 'deprecated');
277
+ if (issueReports.length > 0) {
278
+ warnings.push({
279
+ variable: varName,
280
+ message: `Community reports indicate potential issues with ${varName}`,
281
+ reports: issueReports
282
+ });
283
+ }
284
+ }
285
+
286
+ return { errors, warnings };
287
+ }
288
+
289
+ /**
290
+ * Validate value type.
291
+ *
292
+ * @param {*} value - Value to validate
293
+ * @param {string} expectedType - Expected type (integer, float, string, boolean)
294
+ * @returns {boolean} True if type is valid
295
+ * @private
296
+ */
297
+ _validateType(value, expectedType) {
298
+ switch (expectedType) {
299
+ case 'integer':
300
+ return Number.isInteger(Number(value));
301
+ case 'float':
302
+ return !isNaN(Number(value));
303
+ case 'string':
304
+ return typeof value === 'string';
305
+ case 'boolean':
306
+ return value === 'true' || value === 'false' ||
307
+ value === true || value === false ||
308
+ value === '1' || value === '0' ||
309
+ value === 1 || value === 0;
310
+ default:
311
+ return true;
312
+ }
313
+ }
314
+ }