@aws/ml-container-creator 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/bin/cli.js +45 -4
  2. package/config/bootstrap-stack.json +14 -0
  3. package/infra/ci-harness/package-lock.json +22 -9
  4. package/package.json +7 -8
  5. package/servers/base-image-picker/index.js +3 -3
  6. package/servers/base-image-picker/manifest.json +4 -2
  7. package/servers/instance-sizer/index.js +564 -0
  8. package/servers/instance-sizer/lib/instance-ranker.js +270 -0
  9. package/servers/instance-sizer/lib/model-resolver.js +269 -0
  10. package/servers/instance-sizer/lib/vram-estimator.js +177 -0
  11. package/servers/instance-sizer/manifest.json +17 -0
  12. package/servers/instance-sizer/package.json +15 -0
  13. package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
  14. package/servers/{base-image-picker → lib}/catalogs/model-servers.json +302 -254
  15. package/servers/lib/catalogs/model-sizes.json +131 -0
  16. package/servers/lib/catalogs/models.json +632 -0
  17. package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
  18. package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
  19. package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
  20. package/servers/lib/schemas/image-catalog.schema.json +6 -12
  21. package/servers/lib/schemas/instances.schema.json +29 -0
  22. package/servers/lib/schemas/model-catalog.schema.json +12 -10
  23. package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
  24. package/servers/model-picker/index.js +4 -4
  25. package/servers/model-picker/manifest.json +2 -3
  26. package/servers/region-picker/index.js +1 -1
  27. package/servers/region-picker/manifest.json +1 -1
  28. package/src/app.js +36 -0
  29. package/src/lib/architecture-sync.js +171 -0
  30. package/src/lib/arn-detection.js +22 -0
  31. package/src/lib/bootstrap-command-handler.js +120 -0
  32. package/src/lib/cli-handler.js +3 -3
  33. package/src/lib/config-manager.js +47 -1
  34. package/src/lib/configuration-manager.js +2 -2
  35. package/src/lib/cross-cutting-checker.js +460 -0
  36. package/src/lib/deployment-entry-schema.js +1 -2
  37. package/src/lib/dry-run-validator.js +78 -0
  38. package/src/lib/generation-validator.js +102 -0
  39. package/src/lib/mcp-validator-config.js +89 -0
  40. package/src/lib/payload-builder.js +153 -0
  41. package/src/lib/prompt-runner.js +866 -149
  42. package/src/lib/prompts.js +2 -2
  43. package/src/lib/registry-command-handler.js +236 -0
  44. package/src/lib/registry-loader.js +5 -5
  45. package/src/lib/schema-sync.js +203 -0
  46. package/src/lib/schema-validation-engine.js +195 -0
  47. package/src/lib/secret-classification.js +56 -0
  48. package/src/lib/secrets-command-handler.js +550 -0
  49. package/src/lib/service-model-parser.js +102 -0
  50. package/src/lib/validate-runner.js +216 -0
  51. package/src/lib/validation-report.js +140 -0
  52. package/src/lib/validators/base-validator.js +36 -0
  53. package/src/lib/validators/catalog-validator.js +177 -0
  54. package/src/lib/validators/enum-validator.js +120 -0
  55. package/src/lib/validators/required-field-validator.js +150 -0
  56. package/src/lib/validators/type-validator.js +313 -0
  57. package/src/prompt-adapter.js +3 -2
  58. package/templates/Dockerfile +1 -1
  59. package/templates/do/build +37 -5
  60. package/templates/do/config +15 -3
  61. package/templates/do/deploy +60 -5
  62. package/templates/do/logs +18 -3
  63. package/templates/do/run +15 -1
  64. package/templates/do/validate +61 -0
  65. package/servers/instance-recommender/LICENSE +0 -202
  66. package/servers/instance-recommender/index.js +0 -284
  67. package/servers/instance-recommender/manifest.json +0 -16
  68. package/servers/instance-recommender/package.json +0 -15
  69. /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
  70. /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
  71. /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
  72. /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
@@ -0,0 +1,460 @@
1
+ /* eslint-disable eqeqeq */
2
+ /**
3
+ * Validates consistency rules across multiple payloads and configuration sources.
4
+ * Checks GPU counts, tensor parallelism, model source requirements, role ARN format,
5
+ * CUDA compatibility, and model type / instance alignment.
6
+ *
7
+ * Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
8
+ */
9
+ export default class CrossCuttingChecker {
10
+ /**
11
+ * Run all cross-cutting consistency checks.
12
+ * @param {Object} context - ValidationContext from PayloadBuilder
13
+ * @param {Object} instanceCatalog - Instance catalog (from servers/lib/catalogs/instances.json)
14
+ * @returns {Array} Array of Finding objects
15
+ */
16
+ check(context, instanceCatalog) {
17
+ const findings = [];
18
+
19
+ findings.push(...this.checkGpuConsistency(context, instanceCatalog));
20
+ findings.push(...this.checkTensorParallelism(context, instanceCatalog));
21
+ findings.push(...this.checkModelSourceRequirements(context));
22
+ findings.push(...this.checkRoleArnFormat(context));
23
+ findings.push(...this.checkCudaCompatibility(context, instanceCatalog));
24
+ findings.push(...this.checkModelTypeInstanceAlignment(context, instanceCatalog));
25
+ findings.push(...this.checkKvCacheMemoryFit(context, instanceCatalog));
26
+
27
+ return findings;
28
+ }
29
+
30
+ /**
31
+ * Verify GPU count consistency: instance type ↔ IC spec.
32
+ * @param {Object} context - ValidationContext
33
+ * @param {Object} instanceCatalog - Instance catalog
34
+ * @returns {Array} Findings
35
+ */
36
+ checkGpuConsistency(context, instanceCatalog) {
37
+ const findings = [];
38
+ const config = context.config || {};
39
+ const catalog = instanceCatalog?.catalog || instanceCatalog || {};
40
+
41
+ const instanceType = config.INSTANCE_TYPE;
42
+ if (!instanceType) return findings;
43
+
44
+ const instanceInfo = catalog[instanceType];
45
+ if (!instanceInfo) return findings;
46
+
47
+ const instanceGpuCount = instanceInfo.gpus;
48
+ if (instanceGpuCount == null || instanceGpuCount === 0) return findings;
49
+
50
+ const icGpuCount = config.IC_GPU_COUNT;
51
+ if (icGpuCount == null) return findings;
52
+
53
+ if (Number(icGpuCount) !== Number(instanceGpuCount)) {
54
+ findings.push({
55
+ service: 'cross-cutting',
56
+ operation: 'configuration',
57
+ fieldPath: 'NumberOfAcceleratorDevicesRequired',
58
+ invalidValue: icGpuCount,
59
+ constraint: {
60
+ type: 'gpu-consistency',
61
+ expected: instanceGpuCount,
62
+ instanceType
63
+ },
64
+ severity: 'error',
65
+ confidence: 'high',
66
+ source: 'cross-cutting',
67
+ remediationHint: `NumberOfAcceleratorDevicesRequired (${icGpuCount}) does not match GPU count (${instanceGpuCount}) for instance type ${instanceType}. Set IC_GPU_COUNT to ${instanceGpuCount}.`
68
+ });
69
+ }
70
+
71
+ return findings;
72
+ }
73
+
74
+ /**
75
+ * Verify tensor parallelism three-way check:
76
+ * VLLM_TENSOR_PARALLEL_SIZE == NumberOfAcceleratorDevicesRequired == instance GPU count.
77
+ * Only applies when model server is vLLM or SGLang.
78
+ * @param {Object} context - ValidationContext
79
+ * @param {Object} instanceCatalog - Instance catalog
80
+ * @returns {Array} Findings
81
+ */
82
+ checkTensorParallelism(context, instanceCatalog) {
83
+ const findings = [];
84
+ const config = context.config || {};
85
+ const catalog = instanceCatalog?.catalog || instanceCatalog || {};
86
+
87
+ const modelServer = config.MODEL_SERVER || config.modelServer || '';
88
+ const normalizedServer = modelServer.toLowerCase();
89
+
90
+ if (normalizedServer !== 'vllm' && normalizedServer !== 'sglang') {
91
+ return findings;
92
+ }
93
+
94
+ const tpSize = config.VLLM_TENSOR_PARALLEL_SIZE;
95
+ if (tpSize == null) return findings;
96
+
97
+ const instanceType = config.INSTANCE_TYPE;
98
+ const instanceInfo = instanceType ? catalog[instanceType] : null;
99
+ const instanceGpuCount = instanceInfo?.gpus;
100
+
101
+ const icGpuCount = config.IC_GPU_COUNT;
102
+
103
+ // Check TP size vs IC GPU count
104
+ if (icGpuCount != null && Number(tpSize) !== Number(icGpuCount)) {
105
+ findings.push({
106
+ service: 'cross-cutting',
107
+ operation: 'configuration',
108
+ fieldPath: 'VLLM_TENSOR_PARALLEL_SIZE',
109
+ invalidValue: tpSize,
110
+ constraint: {
111
+ type: 'tensor-parallelism',
112
+ expected: icGpuCount,
113
+ field: 'NumberOfAcceleratorDevicesRequired'
114
+ },
115
+ severity: 'error',
116
+ confidence: 'high',
117
+ source: 'cross-cutting',
118
+ remediationHint: `VLLM_TENSOR_PARALLEL_SIZE (${tpSize}) must equal NumberOfAcceleratorDevicesRequired (${icGpuCount}) for ${modelServer}.`
119
+ });
120
+ }
121
+
122
+ // Check TP size vs instance GPU count
123
+ if (instanceGpuCount != null && Number(tpSize) !== Number(instanceGpuCount)) {
124
+ findings.push({
125
+ service: 'cross-cutting',
126
+ operation: 'configuration',
127
+ fieldPath: 'VLLM_TENSOR_PARALLEL_SIZE',
128
+ invalidValue: tpSize,
129
+ constraint: {
130
+ type: 'tensor-parallelism',
131
+ expected: instanceGpuCount,
132
+ field: 'instanceGpuCount'
133
+ },
134
+ severity: 'error',
135
+ confidence: 'high',
136
+ source: 'cross-cutting',
137
+ remediationHint: `VLLM_TENSOR_PARALLEL_SIZE (${tpSize}) must equal instance GPU count (${instanceGpuCount}) for ${instanceType}.`
138
+ });
139
+ }
140
+
141
+ return findings;
142
+ }
143
+
144
+ /**
145
+ * Verify model source requirements (artifact URI, hub access config).
146
+ * @param {Object} context - ValidationContext
147
+ * @returns {Array} Findings
148
+ */
149
+ checkModelSourceRequirements(context) {
150
+ const findings = [];
151
+ const config = context.config || {};
152
+
153
+ const modelSource = config.modelSource || config.MODEL_SOURCE || '';
154
+
155
+ // When modelSource is 'jumpstart-hub', verify HubAccessConfig.HubContentArn is present
156
+ if (modelSource === 'jumpstart-hub') {
157
+ const payloads = context.payloads || {};
158
+ let hubContentArnFound = false;
159
+
160
+ for (const payload of Object.values(payloads)) {
161
+ if (payload?.HubAccessConfig?.HubContentArn) {
162
+ hubContentArnFound = true;
163
+ break;
164
+ }
165
+ }
166
+
167
+ if (!hubContentArnFound && !config.HUB_CONTENT_ARN) {
168
+ findings.push({
169
+ service: 'cross-cutting',
170
+ operation: 'configuration',
171
+ fieldPath: 'HubAccessConfig.HubContentArn',
172
+ invalidValue: null,
173
+ constraint: {
174
+ type: 'conditional-required',
175
+ condition: 'modelSource === jumpstart-hub'
176
+ },
177
+ severity: 'error',
178
+ confidence: 'high',
179
+ source: 'cross-cutting',
180
+ remediationHint: 'When modelSource is "jumpstart-hub", HubAccessConfig.HubContentArn must be present in the payload.'
181
+ });
182
+ }
183
+ }
184
+
185
+ // When modelSource in {s3, jumpstart, jumpstart-hub, registry}, verify MODEL_ARTIFACT_URI is non-empty
186
+ const sourcesRequiringArtifact = ['s3', 'jumpstart', 'jumpstart-hub', 'registry'];
187
+ if (sourcesRequiringArtifact.includes(modelSource)) {
188
+ const artifactUri = config.MODEL_ARTIFACT_URI || '';
189
+ if (!artifactUri || artifactUri.trim() === '') {
190
+ findings.push({
191
+ service: 'cross-cutting',
192
+ operation: 'configuration',
193
+ fieldPath: 'MODEL_ARTIFACT_URI',
194
+ invalidValue: artifactUri || null,
195
+ constraint: {
196
+ type: 'conditional-required',
197
+ condition: `modelSource === ${modelSource}`
198
+ },
199
+ severity: 'error',
200
+ confidence: 'high',
201
+ source: 'cross-cutting',
202
+ remediationHint: `When modelSource is "${modelSource}", MODEL_ARTIFACT_URI must be set and non-empty.`
203
+ });
204
+ }
205
+ }
206
+
207
+ return findings;
208
+ }
209
+
210
+ /**
211
+ * Verify role ARN format for realtime-inference.
212
+ * @param {Object} context - ValidationContext
213
+ * @returns {Array} Findings
214
+ */
215
+ checkRoleArnFormat(context) {
216
+ const findings = [];
217
+ const config = context.config || {};
218
+ const deploymentTarget = context.deploymentTarget || '';
219
+
220
+ if (deploymentTarget !== 'realtime-inference') return findings;
221
+
222
+ const roleArn = config.ROLE_ARN;
223
+ if (roleArn == null || roleArn === '') return findings;
224
+
225
+ const arnPattern = /^arn:aws:iam::\d{12}:role\/.+$/;
226
+ if (!arnPattern.test(roleArn)) {
227
+ findings.push({
228
+ service: 'cross-cutting',
229
+ operation: 'configuration',
230
+ fieldPath: 'ROLE_ARN',
231
+ invalidValue: roleArn,
232
+ constraint: {
233
+ type: 'pattern',
234
+ pattern: 'arn:aws:iam::\\d{12}:role/.+'
235
+ },
236
+ severity: 'error',
237
+ confidence: 'high',
238
+ source: 'cross-cutting',
239
+ remediationHint: `ROLE_ARN "${roleArn}" does not match IAM role ARN pattern. Expected format: arn:aws:iam::<12-digit-account-id>:role/<role-name>.`
240
+ });
241
+ }
242
+
243
+ return findings;
244
+ }
245
+
246
+ /**
247
+ * Verify CUDA version compatibility: base image CUDA ∩ instance CUDA versions is non-empty.
248
+ * @param {Object} context - ValidationContext
249
+ * @param {Object} instanceCatalog - Instance catalog
250
+ * @returns {Array} Findings
251
+ */
252
+ checkCudaCompatibility(context, instanceCatalog) {
253
+ const findings = [];
254
+ const config = context.config || {};
255
+ const catalog = instanceCatalog?.catalog || instanceCatalog || {};
256
+
257
+ const instanceType = config.INSTANCE_TYPE;
258
+ if (!instanceType) return findings;
259
+
260
+ const instanceInfo = catalog[instanceType];
261
+ if (!instanceInfo) return findings;
262
+
263
+ const instanceCudaVersions = instanceInfo.cudaVersions;
264
+ if (!instanceCudaVersions || !Array.isArray(instanceCudaVersions) || instanceCudaVersions.length === 0) {
265
+ return findings;
266
+ }
267
+
268
+ // Extract base image CUDA requirement from config
269
+ const cudaRequirement = config.acceleratorVersion || config.CUDA_VERSION || '';
270
+ if (!cudaRequirement) return findings;
271
+
272
+ // Check if any instance CUDA version matches the base image requirement
273
+ // Compare major version (e.g., "12" matches "12.1", "12.2")
274
+ const requiredMajor = String(cudaRequirement).split('.')[0];
275
+
276
+ const hasCompatible = instanceCudaVersions.some(v => {
277
+ const vMajor = String(v).split('.')[0];
278
+ return vMajor === requiredMajor;
279
+ });
280
+
281
+ if (!hasCompatible) {
282
+ findings.push({
283
+ service: 'cross-cutting',
284
+ operation: 'configuration',
285
+ fieldPath: 'acceleratorVersion',
286
+ invalidValue: cudaRequirement,
287
+ constraint: {
288
+ type: 'cuda-compatibility',
289
+ instanceCudaVersions,
290
+ instanceType
291
+ },
292
+ severity: 'error',
293
+ confidence: 'high',
294
+ source: 'cross-cutting',
295
+ remediationHint: `Base image requires CUDA ${cudaRequirement} but instance ${instanceType} supports CUDA versions [${instanceCudaVersions.join(', ')}]. No compatible CUDA version found.`
296
+ });
297
+ }
298
+
299
+ return findings;
300
+ }
301
+
302
+ /**
303
+ * Verify model architecture compatibility with the selected server version.
304
+ * Checks model_type against the server's supportedModelTypes from the catalog.
305
+ * Skips silently when supportedModelTypes is empty (sync not run).
306
+ *
307
+ * @param {Object} context - ValidationContext
308
+ * @param {Object} modelServersCatalog - Model servers catalog (from servers/lib/catalogs/model-servers.json)
309
+ * @returns {Array} Findings
310
+ */
311
+ checkModelArchitectureCompatibility(context, modelServersCatalog) {
312
+ const findings = [];
313
+ const config = context.config || {};
314
+
315
+ const modelType = config.modelType;
316
+ const serverVersion = config.baseImageVersion;
317
+ const server = config.modelServer;
318
+
319
+ if (!modelType || !server || !serverVersion) return findings;
320
+
321
+ const entries = modelServersCatalog[server] || [];
322
+ const entry = entries.find(e => e.labels?.framework_version === serverVersion);
323
+ if (!entry?.supportedModelTypes?.length) return findings;
324
+
325
+ if (!entry.supportedModelTypes.includes(modelType.toLowerCase())) {
326
+ findings.push({
327
+ service: 'cross-cutting',
328
+ operation: 'configuration',
329
+ fieldPath: 'MODEL_NAME',
330
+ invalidValue: modelType,
331
+ constraint: { type: 'architecture-compatibility', server, version: serverVersion },
332
+ severity: 'warning',
333
+ confidence: 'medium',
334
+ source: 'cross-cutting',
335
+ remediationHint: `Model architecture "${modelType}" may not be supported by ${server} ${serverVersion}. Consider a newer server version.`
336
+ });
337
+ }
338
+ return findings;
339
+ }
340
+
341
+ /**
342
+ * Verify predictor models are not assigned GPU instances.
343
+ * @param {Object} context - ValidationContext
344
+ * @param {Object} instanceCatalog - Instance catalog
345
+ * @returns {Array} Findings
346
+ */
347
+ checkModelTypeInstanceAlignment(context, instanceCatalog) {
348
+ const findings = [];
349
+ const config = context.config || {};
350
+ const catalog = instanceCatalog?.catalog || instanceCatalog || {};
351
+
352
+ const modelType = config.modelType || config.MODEL_TYPE || '';
353
+ if (modelType !== 'predictor') return findings;
354
+
355
+ const instanceType = config.INSTANCE_TYPE;
356
+ if (!instanceType) return findings;
357
+
358
+ const instanceInfo = catalog[instanceType];
359
+ if (!instanceInfo) return findings;
360
+
361
+ if (instanceInfo.gpus > 0 || instanceInfo.category === 'gpu') {
362
+ findings.push({
363
+ service: 'cross-cutting',
364
+ operation: 'configuration',
365
+ fieldPath: 'INSTANCE_TYPE',
366
+ invalidValue: instanceType,
367
+ constraint: {
368
+ type: 'model-type-alignment',
369
+ modelType: 'predictor',
370
+ instanceCategory: instanceInfo.category
371
+ },
372
+ severity: 'warning',
373
+ confidence: 'high',
374
+ source: 'cross-cutting',
375
+ remediationHint: `Model type "predictor" typically does not require GPU acceleration. Consider using a CPU instance (e.g., ml.m5.xlarge) instead of ${instanceType}.`
376
+ });
377
+ }
378
+
379
+ return findings;
380
+ }
381
+
382
+ /**
383
+ * Verify that the model's estimated VRAM (weights + KV cache at configured max_model_len)
384
+ * fits in the instance's available GPU memory.
385
+ *
386
+ * Uses the same estimation formula as the instance-sizer's vram-estimator:
387
+ * total = weights + KV cache + 10% overhead
388
+ *
389
+ * @param {Object} context - ValidationContext
390
+ * @param {Object} instanceCatalog - Instance catalog
391
+ * @returns {Array} Findings
392
+ */
393
+ checkKvCacheMemoryFit(context, instanceCatalog) {
394
+ const findings = [];
395
+ const config = context.config || {};
396
+ const catalog = instanceCatalog?.catalog || instanceCatalog || {};
397
+
398
+ const instanceType = config.INSTANCE_TYPE;
399
+ if (!instanceType) return findings;
400
+
401
+ const instanceInfo = catalog[instanceType];
402
+ if (!instanceInfo || !instanceInfo.gpus || instanceInfo.gpus <= 0) return findings;
403
+
404
+ // Need parameter count to estimate weights
405
+ const parameterCount = config._parameterCount || config.parameterCount;
406
+ if (!parameterCount) return findings;
407
+
408
+ // Resolve max sequence length: explicit env var > model's max_position_embeddings > skip
409
+ const maxModelLen = parseInt(config.VLLM_MAX_MODEL_LEN || config.SGLANG_MAX_MODEL_LEN || '0', 10);
410
+ const maxPosEmbed = parseInt(config._maxPositionEmbeddings || '0', 10);
411
+ const seqLen = maxModelLen || maxPosEmbed;
412
+ if (!seqLen) return findings;
413
+
414
+ // Estimate per-GPU VRAM from instance catalog
415
+ let perGpuVramGb = instanceInfo.gpuMemoryGb;
416
+ if (!perGpuVramGb && instanceInfo.accelerator) {
417
+ const match = instanceInfo.accelerator.match(/(\d+)GB/);
418
+ if (match) {
419
+ const totalGb = parseInt(match[1], 10);
420
+ const hasMultiplier = instanceInfo.accelerator.match(/^(\d+)x\s/);
421
+ perGpuVramGb = hasMultiplier ? totalGb / instanceInfo.gpus : totalGb;
422
+ }
423
+ }
424
+ if (!perGpuVramGb) return findings;
425
+
426
+ const totalVramGb = perGpuVramGb * instanceInfo.gpus;
427
+
428
+ // Estimate VRAM needed (same formula as vram-estimator.js)
429
+ const dtype = config._dtype || 'float16';
430
+ const bytesPerParam = dtype === 'float32' ? 4.0 : dtype === 'int8' ? 1.0 : 2.0;
431
+ const weightsGb = (parameterCount * bytesPerParam) / (1024 ** 3);
432
+ const kvCacheGb = (parameterCount * (seqLen / 4096) * 0.05) / (1024 ** 3);
433
+ const overheadGb = weightsGb * 0.1;
434
+ const estimatedTotalGb = weightsGb + kvCacheGb + overheadGb;
435
+
436
+ if (estimatedTotalGb > totalVramGb) {
437
+ findings.push({
438
+ service: 'cross-cutting',
439
+ operation: 'configuration',
440
+ fieldPath: 'INSTANCE_TYPE',
441
+ invalidValue: instanceType,
442
+ constraint: {
443
+ type: 'kv-cache-memory-fit',
444
+ estimatedVramGb: Math.round(estimatedTotalGb * 10) / 10,
445
+ weightsGb: Math.round(weightsGb * 10) / 10,
446
+ kvCacheGb: Math.round(kvCacheGb * 10) / 10,
447
+ totalVramGb,
448
+ maxModelLen: seqLen,
449
+ instanceType
450
+ },
451
+ severity: 'warning',
452
+ confidence: 'medium',
453
+ source: 'cross-cutting',
454
+ remediationHint: `Estimated VRAM needed: ${estimatedTotalGb.toFixed(1)}GB (weights: ${weightsGb.toFixed(1)}GB + KV cache: ${kvCacheGb.toFixed(1)}GB at seq_len=${seqLen}) exceeds instance capacity (${totalVramGb}GB). Reduce VLLM_MAX_MODEL_LEN, use quantization, or select a larger instance.`
455
+ });
456
+ }
457
+
458
+ return findings;
459
+ }
460
+ }
@@ -65,8 +65,7 @@ export default {
65
65
  required: ['modelName'],
66
66
  properties: {
67
67
  modelName: {
68
- type: 'string',
69
- minLength: 1
68
+ type: ['string', 'null']
70
69
  },
71
70
  modelFormat: {
72
71
  type: ['string', 'null']
@@ -0,0 +1,78 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Dry-run validator — a module that can be called during do/deploy --dry-run.
6
+ * Runs schema validation and blocks deployment if schema errors are found.
7
+ *
8
+ * Returns { passed: boolean, report: ValidationReport }
9
+ *
10
+ * Requirements: 9.1
11
+ */
12
+
13
+ import { existsSync, readFileSync, readdirSync } from 'node:fs';
14
+ import path from 'node:path';
15
+ import PayloadBuilder from './payload-builder.js';
16
+ import SchemaValidationEngine from './schema-validation-engine.js';
17
+ import ServiceModelParser from './service-model-parser.js';
18
+ import { getRegistryPath } from './schema-sync.js';
19
+
20
+ /**
21
+ * Run schema validation for dry-run mode.
22
+ * Blocks deployment if schema errors are found.
23
+ *
24
+ * @param {Object} config - Configuration values from do/config
25
+ * @param {string} deploymentTarget - 'realtime-inference' | 'async-inference' | 'batch-transform'
26
+ * @param {Object} [options]
27
+ * @param {boolean} [options.smart] - Enable smart-mode validators
28
+ * @param {string} [options.registryPath] - Override schema registry path
29
+ * @returns {Promise<{ passed: boolean, report: Object|null, skipped: boolean }>}
30
+ */
31
+ export async function validateDryRun(config, deploymentTarget, options = {}) {
32
+ const smart = options.smart || false;
33
+ const registryPath = options.registryPath || getRegistryPath();
34
+
35
+ // Skip if schema registry is not present
36
+ if (!existsSync(registryPath) || !existsSync(path.join(registryPath, 'manifest.json'))) {
37
+ return { passed: true, report: null, skipped: true };
38
+ }
39
+
40
+ // Construct payloads
41
+ const builder = new PayloadBuilder();
42
+ const context = builder.build(config, deploymentTarget);
43
+
44
+ // Load and parse service models from registry
45
+ const parser = new ServiceModelParser();
46
+ const serviceModels = [];
47
+ try {
48
+ const entries = readdirSync(registryPath, { withFileTypes: true });
49
+ for (const entry of entries) {
50
+ if (entry.isDirectory()) {
51
+ const modelPath = path.join(registryPath, entry.name, 'service-2.json');
52
+ if (existsSync(modelPath)) {
53
+ const rawModel = JSON.parse(readFileSync(modelPath, 'utf8'));
54
+ serviceModels.push(parser.parse(rawModel));
55
+ }
56
+ }
57
+ }
58
+ } catch {
59
+ return { passed: true, report: null, skipped: true };
60
+ }
61
+
62
+ // Run validation engine
63
+ const engine = new SchemaValidationEngine({
64
+ registryPath,
65
+ smartMode: smart,
66
+ serviceModels
67
+ });
68
+
69
+ const report = await engine.validate(context);
70
+ const summary = report.getSummary();
71
+
72
+ // Block deployment if errors found
73
+ const passed = summary.errors === 0;
74
+
75
+ return { passed, report, skipped: false };
76
+ }
77
+
78
+ export default { validateDryRun };
@@ -0,0 +1,102 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Generation-time validation helper.
6
+ * Called after deploy scripts are generated to validate payloads against service models.
7
+ * Prints errors as warnings (non-blocking) with a summary line.
8
+ *
9
+ * - Skips silently if schema registry is not present
10
+ * - Skips entirely if --no-validate flag is passed (check via options parameter)
11
+ *
12
+ * This is a standalone module — does NOT modify the main generator file.
13
+ *
14
+ * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5
15
+ */
16
+
17
+ import { existsSync, readFileSync, readdirSync } from 'node:fs';
18
+ import path from 'node:path';
19
+ import PayloadBuilder from './payload-builder.js';
20
+ import SchemaValidationEngine from './schema-validation-engine.js';
21
+ import ServiceModelParser from './service-model-parser.js';
22
+ import { getRegistryPath } from './schema-sync.js';
23
+
24
+ /**
25
+ * Run schema validation at generation time (non-blocking).
26
+ *
27
+ * @param {Object} config - Configuration values (from generator answers or do/config)
28
+ * @param {string} deploymentTarget - 'realtime-inference' | 'async-inference' | 'batch-transform'
29
+ * @param {Object} [options]
30
+ * @param {boolean} [options.noValidate] - If true, skip validation entirely
31
+ * @param {string} [options.registryPath] - Override schema registry path
32
+ * @returns {Promise<{ skipped: boolean, report: Object|null }>}
33
+ */
34
+ export async function runGenerationValidation(config, deploymentTarget, options = {}) {
35
+ // Skip entirely if --no-validate flag is passed
36
+ if (options.noValidate) {
37
+ return { skipped: true, report: null };
38
+ }
39
+
40
+ const registryPath = options.registryPath || getRegistryPath();
41
+
42
+ // Skip silently if schema registry is not present
43
+ if (!existsSync(registryPath) || !existsSync(path.join(registryPath, 'manifest.json'))) {
44
+ return { skipped: true, report: null };
45
+ }
46
+
47
+ // Construct payloads
48
+ const builder = new PayloadBuilder();
49
+ const context = builder.build(config, deploymentTarget);
50
+
51
+ // Load and parse service models from registry
52
+ const parser = new ServiceModelParser();
53
+ const serviceModels = [];
54
+ try {
55
+ const entries = readdirSync(registryPath, { withFileTypes: true });
56
+ for (const entry of entries) {
57
+ if (entry.isDirectory()) {
58
+ const modelPath = path.join(registryPath, entry.name, 'service-2.json');
59
+ if (existsSync(modelPath)) {
60
+ const rawModel = JSON.parse(readFileSync(modelPath, 'utf8'));
61
+ serviceModels.push(parser.parse(rawModel));
62
+ }
63
+ }
64
+ }
65
+ } catch {
66
+ // If we can't load models, skip validation silently
67
+ return { skipped: true, report: null };
68
+ }
69
+
70
+ // Run validation
71
+ const engine = new SchemaValidationEngine({
72
+ registryPath,
73
+ ignoreStaleness: true,
74
+ serviceModels
75
+ });
76
+
77
+ const report = await engine.validate(context);
78
+ const summary = report.getSummary();
79
+
80
+ // Print errors as warnings (non-blocking)
81
+ if (summary.errors > 0) {
82
+ console.log('');
83
+ console.log('\x1b[33m⚠️ Schema validation found issues:\x1b[0m');
84
+
85
+ for (const error of report.schemaErrors) {
86
+ const location = [error.operation, error.fieldPath].filter(Boolean).join(' → ');
87
+ console.log(` \x1b[33m⚠\x1b[0m ${location}: ${error.invalidValue || ''} ${error.remediationHint || ''}`);
88
+ }
89
+
90
+ for (const error of report.crossCuttingErrors) {
91
+ const location = [error.operation, error.fieldPath].filter(Boolean).join(' → ');
92
+ console.log(` \x1b[33m⚠\x1b[0m ${location}: ${error.remediationHint || ''}`);
93
+ }
94
+
95
+ console.log('');
96
+ console.log(` ${summary.errors} issue(s) found. Run \x1b[36mdo/validate\x1b[0m before deployment.`);
97
+ }
98
+
99
+ return { skipped: false, report };
100
+ }
101
+
102
+ export default { runGenerationValidation };