@aws/ml-container-creator 0.2.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/bin/cli.js +38 -2
  2. package/config/bootstrap-stack.json +94 -1
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/package-lock.json +22 -9
  5. package/package.json +3 -1
  6. package/servers/instance-sizer/index.js +45 -8
  7. package/servers/instance-sizer/lib/instance-ranker.js +140 -11
  8. package/servers/instance-sizer/lib/model-resolver.js +10 -6
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +298 -20
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +101 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +15 -1
  16. package/servers/model-picker/index.js +2 -1
  17. package/src/app.js +96 -2
  18. package/src/lib/architecture-sync.js +171 -0
  19. package/src/lib/arn-detection.js +22 -0
  20. package/src/lib/bootstrap-command-handler.js +178 -3
  21. package/src/lib/cli-handler.js +2 -2
  22. package/src/lib/config-manager.js +121 -1
  23. package/src/lib/cross-cutting-checker.js +119 -0
  24. package/src/lib/deployment-entry-schema.js +1 -2
  25. package/src/lib/prompt-runner.js +514 -20
  26. package/src/lib/prompts.js +67 -5
  27. package/src/lib/registry-command-handler.js +236 -0
  28. package/src/lib/schema-sync.js +31 -0
  29. package/src/lib/secret-classification.js +56 -0
  30. package/src/lib/secrets-command-handler.js +550 -0
  31. package/src/lib/template-manager.js +49 -1
  32. package/src/lib/validate-runner.js +174 -2
  33. package/src/lib/validation-report.js +8 -1
  34. package/src/prompt-adapter.js +3 -2
  35. package/templates/Dockerfile +10 -2
  36. package/templates/code/cuda_compat.sh +22 -0
  37. package/templates/code/serve +3 -0
  38. package/templates/code/start_server.sh +3 -0
  39. package/templates/diffusors/Dockerfile +2 -1
  40. package/templates/diffusors/serve +3 -0
  41. package/templates/do/README.md +33 -0
  42. package/templates/do/benchmark +646 -0
  43. package/templates/do/build +22 -0
  44. package/templates/do/clean +86 -0
  45. package/templates/do/config +41 -6
  46. package/templates/do/deploy +66 -6
  47. package/templates/do/logs +18 -3
  48. package/templates/do/register +8 -1
  49. package/templates/do/run +10 -0
  50. package/templates/triton/Dockerfile +5 -0
@@ -15,10 +15,16 @@
15
15
 
16
16
  import { existsSync, readFileSync, readdirSync } from 'node:fs';
17
17
  import path from 'node:path';
18
+ import { fileURLToPath } from 'node:url';
18
19
  import PayloadBuilder from './payload-builder.js';
19
20
  import SchemaValidationEngine from './schema-validation-engine.js';
20
21
  import ServiceModelParser from './service-model-parser.js';
21
- import { getRegistryPath, loadManifest } from './schema-sync.js';
22
+ import CrossCuttingChecker from './cross-cutting-checker.js';
23
+ import HuggingFaceClient from './huggingface-client.js';
24
+ import { getRegistryPath, loadManifest, hasBenchmarkShape } from './schema-sync.js';
25
+
26
+ const __filename = fileURLToPath(import.meta.url);
27
+ const __dirname = path.dirname(__filename);
22
28
 
23
29
  /**
24
30
  * Parse a do/config shell file into a key-value object.
@@ -46,6 +52,115 @@ export function parseDoConfig(configPath) {
46
52
  return config;
47
53
  }
48
54
 
55
+ /**
56
+ * Validate benchmark parameters against service model constraints.
57
+ * Called when the CreateAIBenchmarkJob shape is available in the synced schema.
58
+ *
59
+ * Validates:
60
+ * - Concurrency: integer, min 1
61
+ * - S3OutputLocation: string, starts with s3://
62
+ * - AIBenchmarkJobName: pattern ^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars
63
+ *
64
+ * Requirements: 8.1, 8.2, 8.3
65
+ *
66
+ * @param {Object} config - Parsed do/config values
67
+ * @returns {Array<Object>} Array of validation findings
68
+ */
69
+ export function validateBenchmarkParams(config) {
70
+ const findings = [];
71
+
72
+ // Validate Concurrency (integer, min 1)
73
+ if (config.BENCHMARK_CONCURRENCY !== null && config.BENCHMARK_CONCURRENCY !== undefined && config.BENCHMARK_CONCURRENCY !== '') {
74
+ const concurrency = Number(config.BENCHMARK_CONCURRENCY);
75
+ if (!Number.isInteger(concurrency) || concurrency < 1) {
76
+ findings.push({
77
+ severity: 'error',
78
+ operation: 'CreateAIBenchmarkJob',
79
+ fieldPath: 'Concurrency',
80
+ constraint: 'integer >= 1',
81
+ invalidValue: config.BENCHMARK_CONCURRENCY,
82
+ remediationHint: 'BENCHMARK_CONCURRENCY must be a positive integer (>= 1)'
83
+ });
84
+ }
85
+ }
86
+
87
+ // Validate S3OutputLocation (string, starts with s3://)
88
+ if (config.BENCHMARK_S3_OUTPUT_PATH !== null && config.BENCHMARK_S3_OUTPUT_PATH !== undefined && config.BENCHMARK_S3_OUTPUT_PATH !== '') {
89
+ const s3Path = config.BENCHMARK_S3_OUTPUT_PATH;
90
+ // Skip dynamic shell expressions (e.g., s3://...$(aws ...))
91
+ if (!s3Path.includes('$(') && !s3Path.startsWith('s3://')) {
92
+ findings.push({
93
+ severity: 'error',
94
+ operation: 'CreateAIBenchmarkJob',
95
+ fieldPath: 'OutputConfig.S3OutputLocation',
96
+ constraint: 'must start with s3://',
97
+ invalidValue: s3Path,
98
+ remediationHint: 'BENCHMARK_S3_OUTPUT_PATH must start with "s3://". Example: s3://my-bucket/benchmark-results/'
99
+ });
100
+ }
101
+ }
102
+
103
+ // Validate AIBenchmarkJobName pattern (^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars)
104
+ if (config.BENCHMARK_JOB_NAME !== null && config.BENCHMARK_JOB_NAME !== undefined && config.BENCHMARK_JOB_NAME !== '') {
105
+ const jobName = config.BENCHMARK_JOB_NAME;
106
+ // Skip dynamic shell expressions
107
+ if (!jobName.includes('$(') && !jobName.includes('${')) {
108
+ const namePattern = /^[a-zA-Z0-9](-*[a-zA-Z0-9])*$/;
109
+ if (jobName.length > 63) {
110
+ findings.push({
111
+ severity: 'error',
112
+ operation: 'CreateAIBenchmarkJob',
113
+ fieldPath: 'AIBenchmarkJobName',
114
+ constraint: 'max 63 characters',
115
+ invalidValue: jobName,
116
+ remediationHint: 'AIBenchmarkJobName must be at most 63 characters'
117
+ });
118
+ } else if (!namePattern.test(jobName)) {
119
+ findings.push({
120
+ severity: 'error',
121
+ operation: 'CreateAIBenchmarkJob',
122
+ fieldPath: 'AIBenchmarkJobName',
123
+ constraint: 'pattern: ^[a-zA-Z0-9](-*[a-zA-Z0-9])*',
124
+ invalidValue: jobName,
125
+ remediationHint: 'AIBenchmarkJobName must start with alphanumeric and contain only alphanumeric characters and hyphens'
126
+ });
127
+ }
128
+ }
129
+ }
130
+
131
+ // Validate input tokens mean (integer, min 1)
132
+ if (config.BENCHMARK_INPUT_TOKENS_MEAN !== null && config.BENCHMARK_INPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_INPUT_TOKENS_MEAN !== '') {
133
+ const inputTokens = Number(config.BENCHMARK_INPUT_TOKENS_MEAN);
134
+ if (!Number.isInteger(inputTokens) || inputTokens < 1) {
135
+ findings.push({
136
+ severity: 'error',
137
+ operation: 'CreateAIWorkloadConfig',
138
+ fieldPath: 'WorkloadSpec.parameters.prompt_input_tokens_mean',
139
+ constraint: 'integer >= 1',
140
+ invalidValue: config.BENCHMARK_INPUT_TOKENS_MEAN,
141
+ remediationHint: 'BENCHMARK_INPUT_TOKENS_MEAN must be a positive integer (>= 1)'
142
+ });
143
+ }
144
+ }
145
+
146
+ // Validate output tokens mean (integer, min 1)
147
+ if (config.BENCHMARK_OUTPUT_TOKENS_MEAN !== null && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== '') {
148
+ const outputTokens = Number(config.BENCHMARK_OUTPUT_TOKENS_MEAN);
149
+ if (!Number.isInteger(outputTokens) || outputTokens < 1) {
150
+ findings.push({
151
+ severity: 'error',
152
+ operation: 'CreateAIWorkloadConfig',
153
+ fieldPath: 'WorkloadSpec.parameters.output_tokens_mean',
154
+ constraint: 'integer >= 1',
155
+ invalidValue: config.BENCHMARK_OUTPUT_TOKENS_MEAN,
156
+ remediationHint: 'BENCHMARK_OUTPUT_TOKENS_MEAN must be a positive integer (>= 1)'
157
+ });
158
+ }
159
+ }
160
+
161
+ return findings;
162
+ }
163
+
49
164
  /**
50
165
  * Run the full validation pipeline.
51
166
  *
@@ -122,6 +237,63 @@ export async function run(options = {}) {
122
237
  });
123
238
 
124
239
  const report = await engine.validate(context);
240
+
241
+ // Run model architecture compatibility check (Requirement 5.1-5.2)
242
+ if (config.MODEL_NAME) {
243
+ try {
244
+ const catalogPath = path.resolve(__dirname, '../../servers/lib/catalogs/model-servers.json');
245
+ if (existsSync(catalogPath)) {
246
+ const modelServersCatalog = JSON.parse(readFileSync(catalogPath, 'utf8'));
247
+
248
+ // Fetch model's config.json from HuggingFace to get model_type
249
+ const hfClient = new HuggingFaceClient({ timeout: 10000 });
250
+ const modelConfig = await hfClient.fetchModelConfig(config.MODEL_NAME);
251
+ const modelType = modelConfig?.model_type || null;
252
+
253
+ if (modelType) {
254
+ // Extract baseImageVersion from BASE_IMAGE (e.g., "vllm/vllm-openai:v0.10.1" → "v0.10.1")
255
+ const baseImage = config.BASE_IMAGE || '';
256
+ const baseImageVersion = baseImage.includes(':') ? baseImage.split(':').pop() : '';
257
+ // Strip leading 'v' to match catalog's framework_version format (e.g., "v0.10.1" → "0.10.1")
258
+ const frameworkVersion = baseImageVersion.replace(/^v/, '');
259
+
260
+ const modelServer = config.MODEL_SERVER || '';
261
+
262
+ // Build context fields for the architecture checker
263
+ const archContext = {
264
+ config: {
265
+ modelType,
266
+ modelServer,
267
+ baseImageVersion: frameworkVersion
268
+ }
269
+ };
270
+
271
+ const checker = new CrossCuttingChecker();
272
+ const archFindings = checker.checkModelArchitectureCompatibility(archContext, modelServersCatalog);
273
+ for (const finding of archFindings) {
274
+ report.addFinding(finding);
275
+ }
276
+ }
277
+ }
278
+ } catch {
279
+ // Graceful degradation: if architecture check fails, continue without it
280
+ }
281
+ }
282
+
283
+ // Run benchmark parameter validation (Requirements 8.1, 8.2, 8.3)
284
+ if (config.BENCHMARK_CONCURRENCY || config.BENCHMARK_INPUT_TOKENS_MEAN ||
285
+ config.BENCHMARK_OUTPUT_TOKENS_MEAN || config.BENCHMARK_S3_OUTPUT_PATH) {
286
+ const benchmarkCheck = hasBenchmarkShape(registryPath);
287
+ if (benchmarkCheck.available) {
288
+ const benchmarkFindings = validateBenchmarkParams(config);
289
+ for (const finding of benchmarkFindings) {
290
+ report.addFinding(finding);
291
+ }
292
+ } else {
293
+ console.log('⚠️ Benchmark validation skipped: service model does not include AI Benchmark operations. Run `bootstrap sync-schemas` to update.');
294
+ }
295
+ }
296
+
125
297
  const summary = report.getSummary();
126
298
 
127
299
  // Load manifest for version info
@@ -164,4 +336,4 @@ export async function run(options = {}) {
164
336
  return exitCode;
165
337
  }
166
338
 
167
- export default { run, parseDoConfig };
339
+ export default { run, parseDoConfig, validateBenchmarkParams };
@@ -22,7 +22,14 @@ export default class ValidationReport {
22
22
  const source = finding.source || '';
23
23
 
24
24
  if (source === 'cross-cutting') {
25
- this.crossCuttingErrors.push(finding);
25
+ // Cross-cutting findings with medium/low confidence are advisory, not errors
26
+ if (finding.confidence === 'medium' || finding.confidence === 'low') {
27
+ this.advisoryFindings.push(finding);
28
+ } else if (finding.severity === 'warning') {
29
+ this.warnings.push(finding);
30
+ } else {
31
+ this.crossCuttingErrors.push(finding);
32
+ }
26
33
  } else if (source === 'smart-mode' || source.startsWith('smart:')) {
27
34
  // Smart-mode findings are advisory UNLESS confidence is definitive AND severity is error
28
35
  if (finding.confidence === 'definitive' && finding.severity === 'error') {
@@ -1,12 +1,12 @@
1
1
  // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
 
4
- import { select, input, confirm, checkbox, number, Separator } from '@inquirer/prompts';
4
+ import { select, input, confirm, checkbox, number, password, Separator } from '@inquirer/prompts';
5
5
 
6
6
  /**
7
7
  * Maps Yeoman prompt type names to @inquirer/prompts runner functions.
8
8
  */
9
- const runners = { list: select, select, input, confirm, checkbox, number };
9
+ const runners = { list: select, select, input, confirm, checkbox, number, password };
10
10
 
11
11
  /**
12
12
  * Runs a sequence of Yeoman-style prompt definitions using @inquirer/prompts.
@@ -55,6 +55,7 @@ export async function runPrompts(prompts, previousAnswers = {}, options = {}) {
55
55
  if (mappedChoices !== undefined) config.choices = mappedChoices;
56
56
  if (defaultVal !== undefined) config.default = defaultVal;
57
57
  if (prompt.validate) config.validate = prompt.validate;
58
+ if (prompt.mask !== undefined) config.mask = prompt.mask;
58
59
 
59
60
  answers[prompt.name] = await runner(config);
60
61
  }
@@ -12,6 +12,9 @@
12
12
  <% if (framework !== 'transformers') { %>
13
13
  FROM <%= baseImage || 'public.ecr.aws/docker/library/python:3.12-slim' %>
14
14
 
15
+ # Ensure Python output is unbuffered so SageMaker can capture logs in CloudWatch
16
+ ENV PYTHONUNBUFFERED=1
17
+
15
18
  # Set a docker label to name this project, postpended with the build time
16
19
  LABEL project.name="<%= projectName %>-<%= buildTimestamp %>" \
17
20
  project.base-name="<%= projectName %>" \
@@ -143,6 +146,9 @@ ARG BASE_IMAGE=<%= baseImage || 'deepjavalibrary/djl-serving:0.36.0-pytorch-gpu'
143
146
 
144
147
  FROM ${BASE_IMAGE}
145
148
 
149
+ # Ensure Python output is unbuffered so SageMaker can capture logs in CloudWatch
150
+ ENV PYTHONUNBUFFERED=1
151
+
146
152
  <% if (comments && comments.chatTemplate) { %>
147
153
  <%= comments.chatTemplate %>
148
154
  <% } %>
@@ -271,8 +277,9 @@ COPY code/serve /usr/bin/serve_trtllm
271
277
  RUN chmod +x /usr/bin/serve_trtllm
272
278
 
273
279
  # Copy startup script
280
+ COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
274
281
  COPY code/start_server.sh /usr/bin/start_server.sh
275
- RUN chmod +x /usr/bin/start_server.sh
282
+ RUN chmod +x /usr/bin/start_server.sh /usr/bin/cuda_compat.sh
276
283
 
277
284
  ENTRYPOINT [ "/usr/bin/start_server.sh" ]
278
285
  <% } else if (modelServer === 'lmi' || modelServer === 'djl') { %>
@@ -287,8 +294,9 @@ COPY code/serving.properties /opt/ml/model/serving.properties
287
294
  # LMI/DJL containers use their own entrypoint
288
295
  # The container will automatically start DJL Serving with the configuration
289
296
  <% } else { %>
297
+ COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
290
298
  COPY code/serve /usr/bin/serve
291
- RUN chmod 777 /usr/bin/serve
299
+ RUN chmod 777 /usr/bin/serve /usr/bin/cuda_compat.sh
292
300
 
293
301
  <% if (comments && comments.troubleshooting) { %>
294
302
  <%= comments.troubleshooting %>
@@ -0,0 +1,22 @@
1
+ #!/bin/bash
2
+ # CUDA Compatibility Setup
3
+ # Required for SageMaker inference AMIs using NVIDIA Container Toolkit 1.17.4+
4
+ # (al2-ami-sagemaker-inference-gpu-2-1, al2-ami-sagemaker-inference-gpu-3-1,
5
+ # al2023-ami-sagemaker-inference-gpu-4-1)
6
+ #
7
+ # These AMIs no longer auto-mount CUDA compat libraries. This script detects
8
+ # whether the host NVIDIA driver is older than what the container's CUDA toolkit
9
+ # requires, and adds the compat libraries to LD_LIBRARY_PATH if needed.
10
+
11
+ _verlt() {
12
+ [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
13
+ }
14
+
15
+ if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
16
+ CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
17
+ NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
18
+ if [ -n "$NVIDIA_DRIVER_VERSION" ] && _verlt "$NVIDIA_DRIVER_VERSION" "$CUDA_COMPAT_MAX_DRIVER_VERSION"; then
19
+ echo "CUDA compat: driver ${NVIDIA_DRIVER_VERSION} < ${CUDA_COMPAT_MAX_DRIVER_VERSION}, adding compat libs"
20
+ export LD_LIBRARY_PATH=/usr/local/cuda/compat:${LD_LIBRARY_PATH:-}
21
+ fi
22
+ fi
@@ -2,6 +2,9 @@
2
2
  # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
+ # CUDA compatibility setup (required for newer SageMaker inference AMIs)
6
+ source /usr/bin/cuda_compat.sh 2>/dev/null || true
7
+
5
8
  <% if (modelServer === 'vllm') { %>
6
9
  echo "Starting vLLM server"
7
10
  <% } else if (modelServer === 'sglang') { %>
@@ -2,6 +2,9 @@
2
2
  # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
+ # CUDA compatibility setup (required for newer SageMaker inference AMIs)
6
+ source /usr/bin/cuda_compat.sh 2>/dev/null || true
7
+
5
8
  set -e
6
9
 
7
10
  echo "Starting TensorRT-LLM server on port 8081..."
@@ -59,8 +59,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends nginx \
59
59
  COPY nginx-diffusors.conf /etc/nginx/nginx.conf
60
60
 
61
61
  # Copy serve entrypoint and startup scripts
62
+ COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
62
63
  COPY code/serve /usr/bin/serve
63
- RUN chmod 777 /usr/bin/serve
64
+ RUN chmod 777 /usr/bin/serve /usr/bin/cuda_compat.sh
64
65
 
65
66
  COPY code/start_server.sh /usr/bin/start_server.sh
66
67
  RUN chmod +x /usr/bin/start_server.sh
@@ -2,6 +2,9 @@
2
2
  # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
+ # CUDA compatibility setup (required for newer SageMaker inference AMIs)
6
+ source /usr/bin/cuda_compat.sh 2>/dev/null || true
7
+
5
8
  echo "Starting vLLM-Omni server (diffusion model serving)"
6
9
 
7
10
  # Resolve model URI prefixes that engines cannot handle natively.
@@ -262,6 +262,39 @@ Clean everything:
262
262
 
263
263
  ---
264
264
 
265
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
266
+ ### `./do/benchmark`
267
+
268
+ Run SageMaker AI Benchmark against deployed endpoint.
269
+
270
+ **What it does:**
271
+ - Verifies endpoint is InService
272
+ - Ensures S3 output bucket exists
273
+ - Creates AI workload configuration
274
+ - Creates and monitors AI benchmark job
275
+ - Displays performance results (throughput, latency P50/P90/P99, TTFT, ITL)
276
+
277
+ **Prerequisites:**
278
+ - Endpoint deployed and InService (`./do/deploy`)
279
+ - AWS credentials configured
280
+
281
+ **Usage:**
282
+ ```bash
283
+ ./do/benchmark
284
+ ```
285
+
286
+ **Clean up benchmark resources:**
287
+ ```bash
288
+ ./do/benchmark --clean
289
+ ```
290
+
291
+ **Output:**
292
+ - Benchmark results summary table
293
+ - Detailed results in S3
294
+
295
+ ---
296
+
297
+ <% } %>
265
298
  <% if (buildTarget === 'codebuild') { %>
266
299
  ### `./do/submit`
267
300