@aws/ml-container-creator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +202 -0
  2. package/LICENSE-THIRD-PARTY +68620 -0
  3. package/NOTICE +2 -0
  4. package/README.md +106 -0
  5. package/bin/cli.js +365 -0
  6. package/config/defaults.json +32 -0
  7. package/config/presets/transformers-djl.json +26 -0
  8. package/config/presets/transformers-gpu.json +24 -0
  9. package/config/presets/transformers-lmi.json +27 -0
  10. package/package.json +129 -0
  11. package/servers/README.md +419 -0
  12. package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
  13. package/servers/base-image-picker/catalogs/python-slim.json +38 -0
  14. package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
  15. package/servers/base-image-picker/catalogs/triton.json +38 -0
  16. package/servers/base-image-picker/index.js +495 -0
  17. package/servers/base-image-picker/manifest.json +17 -0
  18. package/servers/base-image-picker/package.json +15 -0
  19. package/servers/hyperpod-cluster-picker/LICENSE +202 -0
  20. package/servers/hyperpod-cluster-picker/index.js +424 -0
  21. package/servers/hyperpod-cluster-picker/manifest.json +14 -0
  22. package/servers/hyperpod-cluster-picker/package.json +17 -0
  23. package/servers/instance-recommender/LICENSE +202 -0
  24. package/servers/instance-recommender/catalogs/instances.json +852 -0
  25. package/servers/instance-recommender/index.js +284 -0
  26. package/servers/instance-recommender/manifest.json +16 -0
  27. package/servers/instance-recommender/package.json +15 -0
  28. package/servers/lib/LICENSE +202 -0
  29. package/servers/lib/bedrock-client.js +160 -0
  30. package/servers/lib/custom-validators.js +46 -0
  31. package/servers/lib/dynamic-resolver.js +36 -0
  32. package/servers/lib/package.json +11 -0
  33. package/servers/lib/schemas/image-catalog.schema.json +185 -0
  34. package/servers/lib/schemas/instances.schema.json +124 -0
  35. package/servers/lib/schemas/manifest.schema.json +64 -0
  36. package/servers/lib/schemas/model-catalog.schema.json +91 -0
  37. package/servers/lib/schemas/regions.schema.json +26 -0
  38. package/servers/lib/schemas/triton-backends.schema.json +51 -0
  39. package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
  40. package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
  41. package/servers/model-picker/catalogs/popular-transformers.json +226 -0
  42. package/servers/model-picker/index.js +1693 -0
  43. package/servers/model-picker/manifest.json +18 -0
  44. package/servers/model-picker/package.json +20 -0
  45. package/servers/region-picker/LICENSE +202 -0
  46. package/servers/region-picker/catalogs/regions.json +263 -0
  47. package/servers/region-picker/index.js +230 -0
  48. package/servers/region-picker/manifest.json +16 -0
  49. package/servers/region-picker/package.json +15 -0
  50. package/src/app.js +1007 -0
  51. package/src/copy-tpl.js +77 -0
  52. package/src/lib/accelerator-validator.js +39 -0
  53. package/src/lib/asset-manager.js +385 -0
  54. package/src/lib/aws-profile-parser.js +181 -0
  55. package/src/lib/bootstrap-command-handler.js +1647 -0
  56. package/src/lib/bootstrap-config.js +238 -0
  57. package/src/lib/ci-register-helpers.js +124 -0
  58. package/src/lib/ci-report-helpers.js +158 -0
  59. package/src/lib/ci-stage-helpers.js +268 -0
  60. package/src/lib/cli-handler.js +529 -0
  61. package/src/lib/comment-generator.js +544 -0
  62. package/src/lib/community-reports-validator.js +91 -0
  63. package/src/lib/config-manager.js +2106 -0
  64. package/src/lib/configuration-exporter.js +204 -0
  65. package/src/lib/configuration-manager.js +695 -0
  66. package/src/lib/configuration-matcher.js +221 -0
  67. package/src/lib/cpu-validator.js +36 -0
  68. package/src/lib/cuda-validator.js +57 -0
  69. package/src/lib/deployment-config-resolver.js +103 -0
  70. package/src/lib/deployment-entry-schema.js +125 -0
  71. package/src/lib/deployment-registry.js +598 -0
  72. package/src/lib/docker-introspection-validator.js +51 -0
  73. package/src/lib/engine-prefix-resolver.js +60 -0
  74. package/src/lib/huggingface-client.js +172 -0
  75. package/src/lib/key-value-parser.js +37 -0
  76. package/src/lib/known-flags-validator.js +200 -0
  77. package/src/lib/manifest-cli.js +280 -0
  78. package/src/lib/mcp-client.js +303 -0
  79. package/src/lib/mcp-command-handler.js +532 -0
  80. package/src/lib/neuron-validator.js +80 -0
  81. package/src/lib/parameter-schema-validator.js +284 -0
  82. package/src/lib/prompt-runner.js +1349 -0
  83. package/src/lib/prompts.js +1138 -0
  84. package/src/lib/registry-command-handler.js +519 -0
  85. package/src/lib/registry-loader.js +198 -0
  86. package/src/lib/rocm-validator.js +80 -0
  87. package/src/lib/schema-validator.js +157 -0
  88. package/src/lib/sensitive-redactor.js +59 -0
  89. package/src/lib/template-engine.js +156 -0
  90. package/src/lib/template-manager.js +341 -0
  91. package/src/lib/validation-engine.js +314 -0
  92. package/src/prompt-adapter.js +63 -0
  93. package/templates/Dockerfile +300 -0
  94. package/templates/IAM_PERMISSIONS.md +84 -0
  95. package/templates/MIGRATION.md +488 -0
  96. package/templates/PROJECT_README.md +439 -0
  97. package/templates/TEMPLATE_SYSTEM.md +243 -0
  98. package/templates/buildspec.yml +64 -0
  99. package/templates/code/chat_template.jinja +1 -0
  100. package/templates/code/flask/gunicorn_config.py +35 -0
  101. package/templates/code/flask/wsgi.py +10 -0
  102. package/templates/code/model_handler.py +387 -0
  103. package/templates/code/serve +300 -0
  104. package/templates/code/serve.py +175 -0
  105. package/templates/code/serving.properties +105 -0
  106. package/templates/code/start_server.py +39 -0
  107. package/templates/code/start_server.sh +39 -0
  108. package/templates/diffusors/Dockerfile +72 -0
  109. package/templates/diffusors/patch_image_api.py +35 -0
  110. package/templates/diffusors/serve +115 -0
  111. package/templates/diffusors/start_server.sh +114 -0
  112. package/templates/do/.gitkeep +1 -0
  113. package/templates/do/README.md +541 -0
  114. package/templates/do/build +83 -0
  115. package/templates/do/ci +681 -0
  116. package/templates/do/clean +811 -0
  117. package/templates/do/config +260 -0
  118. package/templates/do/deploy +1560 -0
  119. package/templates/do/export +306 -0
  120. package/templates/do/logs +319 -0
  121. package/templates/do/manifest +12 -0
  122. package/templates/do/push +119 -0
  123. package/templates/do/register +580 -0
  124. package/templates/do/run +113 -0
  125. package/templates/do/submit +417 -0
  126. package/templates/do/test +1147 -0
  127. package/templates/hyperpod/configmap.yaml +24 -0
  128. package/templates/hyperpod/deployment.yaml +71 -0
  129. package/templates/hyperpod/pvc.yaml +42 -0
  130. package/templates/hyperpod/service.yaml +17 -0
  131. package/templates/nginx-diffusors.conf +74 -0
  132. package/templates/nginx-predictors.conf +47 -0
  133. package/templates/nginx-tensorrt.conf +74 -0
  134. package/templates/requirements.txt +61 -0
  135. package/templates/sample_model/test_inference.py +123 -0
  136. package/templates/sample_model/train_abalone.py +252 -0
  137. package/templates/test/test_endpoint.sh +79 -0
  138. package/templates/test/test_local_image.sh +80 -0
  139. package/templates/test/test_model_handler.py +180 -0
  140. package/templates/triton/Dockerfile +128 -0
  141. package/templates/triton/config.pbtxt +163 -0
  142. package/templates/triton/model.py +130 -0
  143. package/templates/triton/requirements.txt +11 -0
@@ -0,0 +1,544 @@
1
+ /**
2
+ * CommentGenerator - Generates documentation comments for templates
3
+ *
4
+ * Responsible for creating comprehensive inline documentation for generated
5
+ * Dockerfiles and deployment scripts, including configuration sources,
6
+ * validation information, and troubleshooting tips.
7
+ */
8
+
9
+ export default class CommentGenerator {
10
+ /**
11
+ * Generate comprehensive Dockerfile comments
12
+ * @param {Object} config - Configuration profile
13
+ * @returns {Object} Comment sections for Dockerfile
14
+ */
15
+ generateDockerfileComments(config) {
16
+ return {
17
+ acceleratorInfo: this.generateAcceleratorComment(config),
18
+ envVarExplanations: this.generateEnvVarComments(config),
19
+ validationInfo: this.generateValidationComment(config),
20
+ troubleshooting: this.generateTroubleshootingTips(config),
21
+ chatTemplate: this.generateChatTemplateComment(config)
22
+ };
23
+ }
24
+
25
+ /**
26
+ * Generate deployment script comments
27
+ * @param {Object} config - Configuration profile
28
+ * @returns {Object} Comment sections for deployment scripts
29
+ */
30
+ generateDeploymentComments(config) {
31
+ const comments = {
32
+ header: this.generateDeploymentHeader(config),
33
+ amiVersion: this.generateAmiVersionComment(config),
34
+ instanceType: this.generateInstanceTypeComment(config),
35
+ configSource: this.generateConfigSourceComment(config)
36
+ };
37
+
38
+ return comments;
39
+ }
40
+
41
+ /**
42
+ * Generate accelerator compatibility comment
43
+ * @param {Object} config - Configuration profile
44
+ * @returns {string} Accelerator compatibility information
45
+ */
46
+ generateAcceleratorComment(config) {
47
+ if (!config.accelerator) {
48
+ return '# No accelerator requirements specified';
49
+ }
50
+
51
+ const frameworkLabel = config.backend || config.framework
52
+ const lines = [
53
+ '# Accelerator Compatibility Information',
54
+ `# Framework: ${frameworkLabel} ${config.version}`,
55
+ `# Required Accelerator: ${config.accelerator.type} ${config.accelerator.version || 'any'}`
56
+ ];
57
+
58
+ if (config.instanceType) {
59
+ lines.push(`# Instance Type: ${config.instanceType}`);
60
+ }
61
+
62
+ if (config.instanceHardware) {
63
+ lines.push(`# Hardware: ${config.instanceHardware}`);
64
+ }
65
+
66
+ if (config.inferenceAmiVersion) {
67
+ lines.push(`# SageMaker AMI: ${config.inferenceAmiVersion}`);
68
+ }
69
+
70
+ if (config.validationResults?.accelerator) {
71
+ const result = config.validationResults.accelerator;
72
+ const status = result.compatible ? '✓ Compatible' : '⚠ Issues detected';
73
+ lines.push(`# Validation: ${status}`);
74
+
75
+ if (result.info) {
76
+ lines.push(`# Info: ${result.info}`);
77
+ }
78
+
79
+ if (result.warning) {
80
+ lines.push(`# Warning: ${result.warning}`);
81
+ }
82
+
83
+ if (result.error) {
84
+ lines.push(`# Error: ${result.error}`);
85
+ }
86
+ }
87
+
88
+ if (config.validationLevel) {
89
+ lines.push(`# Validation Level: ${config.validationLevel}`);
90
+ }
91
+
92
+ const timestamp = config.generatedAt || new Date().toISOString().split('T')[0];
93
+ lines.push(`# Generated: ${timestamp}`);
94
+
95
+ return lines.join('\n');
96
+ }
97
+
98
+ /**
99
+ * Generate environment variable comments
100
+ * @param {Object} config - Configuration profile
101
+ * @returns {Object} Environment variable explanations grouped by category
102
+ */
103
+ generateEnvVarComments(config) {
104
+ const comments = {};
105
+
106
+ if (!config.envVars || Object.keys(config.envVars).length === 0) {
107
+ return { general: '# No environment variables configured' };
108
+ }
109
+
110
+ // Group environment variables by category
111
+ const frameworkLabel = config.backend || config.framework
112
+ const groups = this._groupEnvVars(config.envVars, frameworkLabel);
113
+
114
+ for (const [category, vars] of Object.entries(groups)) {
115
+ const categoryComments = [
116
+ `# ${category} Configuration`,
117
+ `# Source: ${this._getEnvVarSource(config, category)}`
118
+ ];
119
+
120
+ // Add warnings for specific variable types
121
+ const warnings = this._getEnvVarWarnings(vars, frameworkLabel);
122
+ if (warnings.length > 0) {
123
+ categoryComments.push('# Warnings:');
124
+ warnings.forEach(warning => {
125
+ categoryComments.push(`# - ${warning}`);
126
+ });
127
+ }
128
+
129
+ // Add documentation links if available
130
+ const docLink = this._getDocumentationLink(frameworkLabel, category);
131
+ if (docLink) {
132
+ categoryComments.push(`# Documentation: ${docLink}`);
133
+ }
134
+
135
+ comments[category] = categoryComments.join('\n');
136
+ }
137
+
138
+ return comments;
139
+ }
140
+
141
+ /**
142
+ * Generate validation information comment
143
+ * @param {Object} config - Configuration profile
144
+ * @returns {string} Validation information
145
+ */
146
+ generateValidationComment(config) {
147
+ const lines = [
148
+ '# Configuration Validation Information'
149
+ ];
150
+
151
+ // Add configuration sources
152
+ if (config.configSources && config.configSources.length > 0) {
153
+ lines.push('# Configuration Sources:');
154
+ config.configSources.forEach(source => {
155
+ lines.push(`# - ${source}`);
156
+ });
157
+ }
158
+
159
+ // Add validation level
160
+ if (config.validationLevel) {
161
+ lines.push(`# Validation Level: ${config.validationLevel}`);
162
+ lines.push(`# ${this._getValidationLevelExplanation(config.validationLevel)}`);
163
+ }
164
+
165
+ // Add validation results
166
+ if (config.validationResults) {
167
+ if (config.validationResults.envVars) {
168
+ const envResult = config.validationResults.envVars;
169
+ if (envResult.validated) {
170
+ lines.push('# Environment Variables: Validated');
171
+ if (envResult.methods && envResult.methods.length > 0) {
172
+ lines.push(`# Methods: ${envResult.methods.join(', ')}`);
173
+ }
174
+ }
175
+ }
176
+
177
+ if (config.validationResults.instanceType) {
178
+ const instResult = config.validationResults.instanceType;
179
+ if (instResult.validated) {
180
+ lines.push('# Instance Type: Validated');
181
+ }
182
+ }
183
+ }
184
+
185
+ return lines.join('\n');
186
+ }
187
+
188
+ /**
189
+ * Generate troubleshooting tips
190
+ * @param {Object} config - Configuration profile
191
+ * @returns {string} Troubleshooting tips
192
+ */
193
+ generateTroubleshootingTips(config) {
194
+ const lines = [
195
+ '# Troubleshooting Tips'
196
+ ];
197
+
198
+ // Framework-specific tips
199
+ const frameworkLabel = config.backend || config.framework
200
+ const frameworkTips = this._getFrameworkTroubleshootingTips(frameworkLabel);
201
+ if (frameworkTips.length > 0) {
202
+ lines.push(`# ${frameworkLabel} Common Issues:`);
203
+ frameworkTips.forEach(tip => {
204
+ lines.push(`# - ${tip}`);
205
+ });
206
+ }
207
+
208
+ // Accelerator-specific tips
209
+ if (config.accelerator) {
210
+ const acceleratorTips = this._getAcceleratorTroubleshootingTips(config.accelerator.type);
211
+ if (acceleratorTips.length > 0) {
212
+ lines.push(`# ${config.accelerator.type.toUpperCase()} Issues:`);
213
+ acceleratorTips.forEach(tip => {
214
+ lines.push(`# - ${tip}`);
215
+ });
216
+ }
217
+ }
218
+
219
+ // General tips
220
+ lines.push('# General Tips:');
221
+ lines.push('# - Check CloudWatch logs for detailed error messages');
222
+ lines.push('# - Verify model artifacts are in /opt/ml/model/');
223
+ lines.push('# - Test locally with docker run before deploying');
224
+ lines.push('# - Ensure IAM role has necessary permissions');
225
+
226
+ return lines.join('\n');
227
+ }
228
+
229
+ /**
230
+ * Generate chat template comment
231
+ * @param {Object} config - Configuration profile
232
+ * @returns {string} Chat template information
233
+ */
234
+ generateChatTemplateComment(config) {
235
+ if (!config.chatTemplate) {
236
+ return '# Chat Template: Not configured\n' +
237
+ '# Note: Chat endpoints may not work without a chat template.\n' +
238
+ '# You may need to configure this manually for your model.';
239
+ }
240
+
241
+ const lines = [
242
+ '# Chat Template Configuration',
243
+ `# Source: ${config.chatTemplateSource || 'Unknown'}`,
244
+ '# This template formats chat messages for the model.',
245
+ '# It is automatically applied by the serving framework.'
246
+ ];
247
+
248
+ if (config.chatTemplateSource === 'HuggingFace_Hub_API') {
249
+ lines.push('# Template was fetched from HuggingFace Hub.');
250
+ } else if (config.chatTemplateSource === 'Model_Registry') {
251
+ lines.push('# Template was provided by Model Registry.');
252
+ }
253
+
254
+ return lines.join('\n');
255
+ }
256
+
257
+ /**
258
+ * Generate deployment script header
259
+ * @param {Object} config - Configuration profile
260
+ * @returns {string} Deployment header comment
261
+ */
262
+ generateDeploymentHeader(config) {
263
+ const frameworkLabel = config.backend || config.framework
264
+ const lines = [
265
+ '#!/bin/bash',
266
+ '#',
267
+ '# SageMaker Deployment Script',
268
+ `# Framework: ${frameworkLabel} ${config.version}`,
269
+ `# Generated: ${config.generatedAt || new Date().toISOString().split('T')[0]}`,
270
+ '#'
271
+ ];
272
+
273
+ if (config.validationLevel) {
274
+ lines.push(`# Validation Level: ${config.validationLevel}`);
275
+ }
276
+
277
+ return lines.join('\n');
278
+ }
279
+
280
+ /**
281
+ * Generate AMI version comment
282
+ * @param {Object} config - Configuration profile
283
+ * @returns {string} AMI version explanation
284
+ */
285
+ generateAmiVersionComment(config) {
286
+ if (!config.inferenceAmiVersion) {
287
+ return '# AMI Version: Using default SageMaker AMI';
288
+ }
289
+
290
+ const lines = [
291
+ `# AMI Version: ${config.inferenceAmiVersion}`,
292
+ '# This AMI provides the required accelerator drivers and runtime.'
293
+ ];
294
+
295
+ if (config.accelerator) {
296
+ lines.push(`# Supports: ${config.accelerator.type} ${config.accelerator.version || ''}`);
297
+ }
298
+
299
+ if (config.configSources && config.configSources.includes('Framework_Registry')) {
300
+ lines.push('# Source: Framework Registry');
301
+ }
302
+
303
+ return lines.join('\n');
304
+ }
305
+
306
+ /**
307
+ * Generate instance type comment
308
+ * @param {Object} config - Configuration profile
309
+ * @returns {string} Instance type explanation
310
+ */
311
+ generateInstanceTypeComment(config) {
312
+ if (!config.instanceType) {
313
+ return '# Instance Type: Not specified';
314
+ }
315
+
316
+ const lines = [
317
+ `# Instance Type: ${config.instanceType}`
318
+ ];
319
+
320
+ if (config.instanceHardware) {
321
+ lines.push(`# Hardware: ${config.instanceHardware}`);
322
+ }
323
+
324
+ if (config.recommendedInstanceTypes && config.recommendedInstanceTypes.length > 0) {
325
+ lines.push('# Recommended alternatives:');
326
+ config.recommendedInstanceTypes.slice(0, 3).forEach(type => {
327
+ lines.push(`# - ${type}`);
328
+ });
329
+ }
330
+
331
+ return lines.join('\n');
332
+ }
333
+
334
+ /**
335
+ * Generate configuration source comment
336
+ * @param {Object} config - Configuration profile
337
+ * @returns {string} Configuration source information
338
+ */
339
+ generateConfigSourceComment(config) {
340
+ const lines = [
341
+ '# Configuration Sources:'
342
+ ];
343
+
344
+ if (config.configSources && config.configSources.length > 0) {
345
+ config.configSources.forEach(source => {
346
+ lines.push(`# - ${source}`);
347
+ });
348
+ } else {
349
+ lines.push('# - Default configuration');
350
+ }
351
+
352
+ return lines.join('\n');
353
+ }
354
+
355
+ // Private helper methods
356
+
357
+ /**
358
+ * Group environment variables by category
359
+ * @private
360
+ */
361
+ _groupEnvVars(envVars, framework) {
362
+ const groups = {
363
+ 'Framework': [],
364
+ 'Memory': [],
365
+ 'Performance': [],
366
+ 'CUDA': [],
367
+ 'Other': []
368
+ };
369
+
370
+ for (const [key, value] of Object.entries(envVars)) {
371
+ if (key.includes('CUDA') || key.includes('NVIDIA')) {
372
+ groups['CUDA'].push({ key, value });
373
+ } else if (key.includes('MEMORY') || key.includes('MEM')) {
374
+ groups['Memory'].push({ key, value });
375
+ } else if (key.includes('BATCH') || key.includes('WORKER') || key.includes('THREAD')) {
376
+ groups['Performance'].push({ key, value });
377
+ } else if (framework && key.toLowerCase().includes(framework.toLowerCase())) {
378
+ groups['Framework'].push({ key, value });
379
+ } else {
380
+ groups['Other'].push({ key, value });
381
+ }
382
+ }
383
+
384
+ // Remove empty groups
385
+ return Object.fromEntries(
386
+ Object.entries(groups).filter(([_, vars]) => vars.length > 0)
387
+ );
388
+ }
389
+
390
+ /**
391
+ * Get environment variable source
392
+ * @private
393
+ */
394
+ _getEnvVarSource(config, _category) {
395
+ if (config.configSources) {
396
+ if (config.configSources.includes('Model_Registry')) {
397
+ return 'Model Registry (highest priority)';
398
+ } else if (config.configSources.includes('HuggingFace_Hub_API')) {
399
+ return 'HuggingFace Hub API';
400
+ } else if (config.configSources.includes('Framework_Registry')) {
401
+ return 'Framework Registry';
402
+ }
403
+ }
404
+ return 'Default configuration';
405
+ }
406
+
407
+ /**
408
+ * Get warnings for environment variables
409
+ * @private
410
+ */
411
+ _getEnvVarWarnings(vars, _framework) {
412
+ const warnings = [];
413
+
414
+ vars.forEach(({ key, value: _value }) => {
415
+ if (key.includes('MEMORY') && key.includes('FRACTION')) {
416
+ warnings.push(`${key}: Adjust based on model size and available GPU memory`);
417
+ }
418
+ if (key.includes('CUDA_VISIBLE_DEVICES')) {
419
+ warnings.push(`${key}: Ensure this matches your instance GPU count`);
420
+ }
421
+ if (key.includes('MAX_BATCH_SIZE')) {
422
+ warnings.push(`${key}: May need tuning based on model size and latency requirements`);
423
+ }
424
+ });
425
+
426
+ return warnings;
427
+ }
428
+
429
+ /**
430
+ * Get documentation link for framework and category
431
+ * @private
432
+ */
433
+ _getDocumentationLink(framework, _category) {
434
+ const links = {
435
+ 'vllm': 'https://docs.vllm.ai/en/latest/serving/env_vars.html',
436
+ 'tensorrt-llm': 'https://nvidia.github.io/TensorRT-LLM/',
437
+ 'sglang': 'https://sgl-project.github.io/',
438
+ 'transformers': 'https://huggingface.co/docs/transformers/',
439
+ 'fil': 'https://github.com/triton-inference-server/fil_backend',
440
+ 'onnxruntime': 'https://onnxruntime.ai/docs/',
441
+ 'tensorrtllm': 'https://nvidia.github.io/TensorRT-LLM/',
442
+ 'triton-python': 'https://github.com/triton-inference-server/python_backend'
443
+ };
444
+
445
+ return links[framework?.toLowerCase()] || null;
446
+ }
447
+
448
+ /**
449
+ * Get validation level explanation
450
+ * @private
451
+ */
452
+ _getValidationLevelExplanation(level) {
453
+ const explanations = {
454
+ 'tested': 'This configuration has been tested on AWS SageMaker and verified to work.',
455
+ 'community-validated': 'This configuration has been validated by community members.',
456
+ 'experimental': 'This configuration is experimental and may require adjustments.',
457
+ 'unknown': 'This configuration has not been tested. Proceed with caution.'
458
+ };
459
+
460
+ return explanations[level] || 'Validation level unknown.';
461
+ }
462
+
463
+ /**
464
+ * Get framework-specific troubleshooting tips
465
+ * @private
466
+ */
467
+ _getFrameworkTroubleshootingTips(framework) {
468
+ const tips = {
469
+ 'vllm': [
470
+ 'If OOM errors occur, reduce MAX_MODEL_LEN or GPU_MEMORY_UTILIZATION',
471
+ 'For slow startup, check model download from HuggingFace Hub',
472
+ 'Tensor parallelism requires multiple GPUs on same instance'
473
+ ],
474
+ 'tensorrt-llm': [
475
+ 'Ensure CUDA version matches TensorRT-LLM requirements',
476
+ 'Model must be converted to TensorRT format before deployment',
477
+ 'Check UCX settings if using multi-GPU configurations'
478
+ ],
479
+ 'sglang': [
480
+ 'Verify model is compatible with SGLang runtime',
481
+ 'Check memory settings if experiencing OOM errors',
482
+ 'Ensure correct chat template is configured'
483
+ ],
484
+ 'transformers': [
485
+ 'Verify model files are present in /opt/ml/model/',
486
+ 'Check tokenizer configuration for chat models',
487
+ 'Ensure sufficient memory for model loading'
488
+ ],
489
+ 'fil': [
490
+ 'Verify model format matches config.pbtxt backend setting',
491
+ 'Check that model file is in the correct version directory',
492
+ 'Ensure input tensor dimensions match your model features'
493
+ ],
494
+ 'onnxruntime': [
495
+ 'Verify ONNX model was exported correctly',
496
+ 'Check input/output tensor names match config.pbtxt',
497
+ 'Ensure ONNX opset version is supported by the runtime'
498
+ ],
499
+ 'tensorrtllm': [
500
+ 'Ensure CUDA version matches TensorRT-LLM requirements',
501
+ 'Model must be converted to TensorRT format before deployment',
502
+ 'Check GPU memory is sufficient for the model'
503
+ ],
504
+ 'python': [
505
+ 'Verify model.py implements initialize(), execute(), finalize()',
506
+ 'Check that dependencies are listed in requirements.txt',
507
+ 'Ensure model artifacts are in the correct version directory'
508
+ ]
509
+ };
510
+
511
+ return tips[framework?.toLowerCase()] || [];
512
+ }
513
+
514
+ /**
515
+ * Get accelerator-specific troubleshooting tips
516
+ * @private
517
+ */
518
+ _getAcceleratorTroubleshootingTips(acceleratorType) {
519
+ const tips = {
520
+ 'cuda': [
521
+ 'Verify CUDA version with nvidia-smi in container',
522
+ 'Check GPU memory usage with nvidia-smi',
523
+ 'Ensure CUDA libraries are in LD_LIBRARY_PATH'
524
+ ],
525
+ 'neuron': [
526
+ 'Verify Neuron SDK version with neuron-ls',
527
+ 'Check Neuron device availability',
528
+ 'Review Neuron compiler logs for optimization issues'
529
+ ],
530
+ 'rocm': [
531
+ 'Verify ROCm installation with rocm-smi',
532
+ 'Check GPU visibility with HIP_VISIBLE_DEVICES',
533
+ 'Ensure ROCm libraries are properly loaded'
534
+ ],
535
+ 'cpu': [
536
+ 'Monitor CPU usage and memory consumption',
537
+ 'Consider using optimized CPU inference libraries',
538
+ 'Adjust thread count based on vCPU count'
539
+ ]
540
+ };
541
+
542
+ return tips[acceleratorType?.toLowerCase()] || [];
543
+ }
544
+ }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Community Reports Validator Strategy
3
+ *
4
+ * Validates environment variables against community-contributed reports.
5
+ * Provides warnings for flags reported as problematic by the community.
6
+ *
7
+ * Requirements: 13.10, 13.16, 13.17
8
+ */
9
+ export default class CommunityReportsValidator {
10
+ /**
11
+ * Create a new CommunityReportsValidator.
12
+ *
13
+ * @param {Object} communityReports - Community reports registry
14
+ */
15
+ constructor(communityReports = {}) {
16
+ this.communityReports = communityReports;
17
+ this.name = 'community-reports';
18
+ }
19
+
20
+ /**
21
+ * Validate environment variables against community reports.
22
+ *
23
+ * @param {string} framework - Framework name
24
+ * @param {string} version - Framework version
25
+ * @param {Object} envVars - Environment variables to validate
26
+ * @returns {Object} ValidationResult
27
+ * @returns {Array<Object>} ValidationResult.warnings - Warning messages
28
+ * @returns {Array<Object>} ValidationResult.errors - Error messages
29
+ */
30
+ async validate(framework, version, envVars) {
31
+ const warnings = [];
32
+ const errors = [];
33
+
34
+ // Get community reports for this framework
35
+ const reports = this.getCommunityReports(framework, version);
36
+
37
+ if (!reports || reports.length === 0) {
38
+ // No community reports available
39
+ return { warnings, errors };
40
+ }
41
+
42
+ // Check each environment variable against community reports
43
+ for (const [key] of Object.entries(envVars)) {
44
+ const relevantReports = reports.filter(report =>
45
+ report.variable === key || report.pattern && new RegExp(report.pattern).test(key)
46
+ );
47
+
48
+ for (const report of relevantReports) {
49
+ if (report.severity === 'error') {
50
+ errors.push({
51
+ key,
52
+ message: `Community report: ${report.message} (reported by ${report.reporter || 'community'})`
53
+ });
54
+ } else {
55
+ warnings.push({
56
+ key,
57
+ message: `Community report: ${report.message} (reported by ${report.reporter || 'community'})`
58
+ });
59
+ }
60
+ }
61
+ }
62
+
63
+ return { warnings, errors };
64
+ }
65
+
66
+ /**
67
+ * Get community reports for a framework version.
68
+ *
69
+ * @param {string} framework - Framework name
70
+ * @param {string} version - Framework version
71
+ * @returns {Array|null} Community reports
72
+ * @private
73
+ */
74
+ getCommunityReports(framework, version) {
75
+ if (!this.communityReports[framework]) {
76
+ return null;
77
+ }
78
+
79
+ // Try exact version match first
80
+ if (this.communityReports[framework][version]) {
81
+ return this.communityReports[framework][version];
82
+ }
83
+
84
+ // Try to find reports for all versions
85
+ if (this.communityReports[framework].all) {
86
+ return this.communityReports[framework].all;
87
+ }
88
+
89
+ return null;
90
+ }
91
+ }