@aws/ml-container-creator 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +45 -4
- package/config/bootstrap-stack.json +14 -0
- package/infra/ci-harness/package-lock.json +22 -9
- package/package.json +7 -8
- package/servers/base-image-picker/index.js +3 -3
- package/servers/base-image-picker/manifest.json +4 -2
- package/servers/instance-sizer/index.js +564 -0
- package/servers/instance-sizer/lib/instance-ranker.js +270 -0
- package/servers/instance-sizer/lib/model-resolver.js +269 -0
- package/servers/instance-sizer/lib/vram-estimator.js +177 -0
- package/servers/instance-sizer/manifest.json +17 -0
- package/servers/instance-sizer/package.json +15 -0
- package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
- package/servers/{base-image-picker → lib}/catalogs/model-servers.json +302 -254
- package/servers/lib/catalogs/model-sizes.json +131 -0
- package/servers/lib/catalogs/models.json +632 -0
- package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
- package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
- package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
- package/servers/lib/schemas/image-catalog.schema.json +6 -12
- package/servers/lib/schemas/instances.schema.json +29 -0
- package/servers/lib/schemas/model-catalog.schema.json +12 -10
- package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
- package/servers/model-picker/index.js +4 -4
- package/servers/model-picker/manifest.json +2 -3
- package/servers/region-picker/index.js +1 -1
- package/servers/region-picker/manifest.json +1 -1
- package/src/app.js +36 -0
- package/src/lib/architecture-sync.js +171 -0
- package/src/lib/arn-detection.js +22 -0
- package/src/lib/bootstrap-command-handler.js +120 -0
- package/src/lib/cli-handler.js +3 -3
- package/src/lib/config-manager.js +47 -1
- package/src/lib/configuration-manager.js +2 -2
- package/src/lib/cross-cutting-checker.js +460 -0
- package/src/lib/deployment-entry-schema.js +1 -2
- package/src/lib/dry-run-validator.js +78 -0
- package/src/lib/generation-validator.js +102 -0
- package/src/lib/mcp-validator-config.js +89 -0
- package/src/lib/payload-builder.js +153 -0
- package/src/lib/prompt-runner.js +866 -149
- package/src/lib/prompts.js +2 -2
- package/src/lib/registry-command-handler.js +236 -0
- package/src/lib/registry-loader.js +5 -5
- package/src/lib/schema-sync.js +203 -0
- package/src/lib/schema-validation-engine.js +195 -0
- package/src/lib/secret-classification.js +56 -0
- package/src/lib/secrets-command-handler.js +550 -0
- package/src/lib/service-model-parser.js +102 -0
- package/src/lib/validate-runner.js +216 -0
- package/src/lib/validation-report.js +140 -0
- package/src/lib/validators/base-validator.js +36 -0
- package/src/lib/validators/catalog-validator.js +177 -0
- package/src/lib/validators/enum-validator.js +120 -0
- package/src/lib/validators/required-field-validator.js +150 -0
- package/src/lib/validators/type-validator.js +313 -0
- package/src/prompt-adapter.js +3 -2
- package/templates/Dockerfile +1 -1
- package/templates/do/build +37 -5
- package/templates/do/config +15 -3
- package/templates/do/deploy +60 -5
- package/templates/do/logs +18 -3
- package/templates/do/run +15 -1
- package/templates/do/validate +61 -0
- package/servers/instance-recommender/LICENSE +0 -202
- package/servers/instance-recommender/index.js +0 -284
- package/servers/instance-recommender/manifest.json +0 -16
- package/servers/instance-recommender/package.json +0 -15
- /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
- /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
/* eslint-disable eqeqeq */
|
|
2
|
+
/**
|
|
3
|
+
* Validates consistency rules across multiple payloads and configuration sources.
|
|
4
|
+
* Checks GPU counts, tensor parallelism, model source requirements, role ARN format,
|
|
5
|
+
* CUDA compatibility, and model type / instance alignment.
|
|
6
|
+
*
|
|
7
|
+
* Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
|
|
8
|
+
*/
|
|
9
|
+
export default class CrossCuttingChecker {
|
|
10
|
+
/**
|
|
11
|
+
* Run all cross-cutting consistency checks.
|
|
12
|
+
* @param {Object} context - ValidationContext from PayloadBuilder
|
|
13
|
+
* @param {Object} instanceCatalog - Instance catalog (from servers/lib/catalogs/instances.json)
|
|
14
|
+
* @returns {Array} Array of Finding objects
|
|
15
|
+
*/
|
|
16
|
+
check(context, instanceCatalog) {
|
|
17
|
+
const findings = [];
|
|
18
|
+
|
|
19
|
+
findings.push(...this.checkGpuConsistency(context, instanceCatalog));
|
|
20
|
+
findings.push(...this.checkTensorParallelism(context, instanceCatalog));
|
|
21
|
+
findings.push(...this.checkModelSourceRequirements(context));
|
|
22
|
+
findings.push(...this.checkRoleArnFormat(context));
|
|
23
|
+
findings.push(...this.checkCudaCompatibility(context, instanceCatalog));
|
|
24
|
+
findings.push(...this.checkModelTypeInstanceAlignment(context, instanceCatalog));
|
|
25
|
+
findings.push(...this.checkKvCacheMemoryFit(context, instanceCatalog));
|
|
26
|
+
|
|
27
|
+
return findings;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Verify GPU count consistency: instance type ↔ IC spec.
|
|
32
|
+
* @param {Object} context - ValidationContext
|
|
33
|
+
* @param {Object} instanceCatalog - Instance catalog
|
|
34
|
+
* @returns {Array} Findings
|
|
35
|
+
*/
|
|
36
|
+
checkGpuConsistency(context, instanceCatalog) {
|
|
37
|
+
const findings = [];
|
|
38
|
+
const config = context.config || {};
|
|
39
|
+
const catalog = instanceCatalog?.catalog || instanceCatalog || {};
|
|
40
|
+
|
|
41
|
+
const instanceType = config.INSTANCE_TYPE;
|
|
42
|
+
if (!instanceType) return findings;
|
|
43
|
+
|
|
44
|
+
const instanceInfo = catalog[instanceType];
|
|
45
|
+
if (!instanceInfo) return findings;
|
|
46
|
+
|
|
47
|
+
const instanceGpuCount = instanceInfo.gpus;
|
|
48
|
+
if (instanceGpuCount == null || instanceGpuCount === 0) return findings;
|
|
49
|
+
|
|
50
|
+
const icGpuCount = config.IC_GPU_COUNT;
|
|
51
|
+
if (icGpuCount == null) return findings;
|
|
52
|
+
|
|
53
|
+
if (Number(icGpuCount) !== Number(instanceGpuCount)) {
|
|
54
|
+
findings.push({
|
|
55
|
+
service: 'cross-cutting',
|
|
56
|
+
operation: 'configuration',
|
|
57
|
+
fieldPath: 'NumberOfAcceleratorDevicesRequired',
|
|
58
|
+
invalidValue: icGpuCount,
|
|
59
|
+
constraint: {
|
|
60
|
+
type: 'gpu-consistency',
|
|
61
|
+
expected: instanceGpuCount,
|
|
62
|
+
instanceType
|
|
63
|
+
},
|
|
64
|
+
severity: 'error',
|
|
65
|
+
confidence: 'high',
|
|
66
|
+
source: 'cross-cutting',
|
|
67
|
+
remediationHint: `NumberOfAcceleratorDevicesRequired (${icGpuCount}) does not match GPU count (${instanceGpuCount}) for instance type ${instanceType}. Set IC_GPU_COUNT to ${instanceGpuCount}.`
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return findings;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Verify tensor parallelism three-way check:
|
|
76
|
+
* VLLM_TENSOR_PARALLEL_SIZE == NumberOfAcceleratorDevicesRequired == instance GPU count.
|
|
77
|
+
* Only applies when model server is vLLM or SGLang.
|
|
78
|
+
* @param {Object} context - ValidationContext
|
|
79
|
+
* @param {Object} instanceCatalog - Instance catalog
|
|
80
|
+
* @returns {Array} Findings
|
|
81
|
+
*/
|
|
82
|
+
checkTensorParallelism(context, instanceCatalog) {
|
|
83
|
+
const findings = [];
|
|
84
|
+
const config = context.config || {};
|
|
85
|
+
const catalog = instanceCatalog?.catalog || instanceCatalog || {};
|
|
86
|
+
|
|
87
|
+
const modelServer = config.MODEL_SERVER || config.modelServer || '';
|
|
88
|
+
const normalizedServer = modelServer.toLowerCase();
|
|
89
|
+
|
|
90
|
+
if (normalizedServer !== 'vllm' && normalizedServer !== 'sglang') {
|
|
91
|
+
return findings;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const tpSize = config.VLLM_TENSOR_PARALLEL_SIZE;
|
|
95
|
+
if (tpSize == null) return findings;
|
|
96
|
+
|
|
97
|
+
const instanceType = config.INSTANCE_TYPE;
|
|
98
|
+
const instanceInfo = instanceType ? catalog[instanceType] : null;
|
|
99
|
+
const instanceGpuCount = instanceInfo?.gpus;
|
|
100
|
+
|
|
101
|
+
const icGpuCount = config.IC_GPU_COUNT;
|
|
102
|
+
|
|
103
|
+
// Check TP size vs IC GPU count
|
|
104
|
+
if (icGpuCount != null && Number(tpSize) !== Number(icGpuCount)) {
|
|
105
|
+
findings.push({
|
|
106
|
+
service: 'cross-cutting',
|
|
107
|
+
operation: 'configuration',
|
|
108
|
+
fieldPath: 'VLLM_TENSOR_PARALLEL_SIZE',
|
|
109
|
+
invalidValue: tpSize,
|
|
110
|
+
constraint: {
|
|
111
|
+
type: 'tensor-parallelism',
|
|
112
|
+
expected: icGpuCount,
|
|
113
|
+
field: 'NumberOfAcceleratorDevicesRequired'
|
|
114
|
+
},
|
|
115
|
+
severity: 'error',
|
|
116
|
+
confidence: 'high',
|
|
117
|
+
source: 'cross-cutting',
|
|
118
|
+
remediationHint: `VLLM_TENSOR_PARALLEL_SIZE (${tpSize}) must equal NumberOfAcceleratorDevicesRequired (${icGpuCount}) for ${modelServer}.`
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Check TP size vs instance GPU count
|
|
123
|
+
if (instanceGpuCount != null && Number(tpSize) !== Number(instanceGpuCount)) {
|
|
124
|
+
findings.push({
|
|
125
|
+
service: 'cross-cutting',
|
|
126
|
+
operation: 'configuration',
|
|
127
|
+
fieldPath: 'VLLM_TENSOR_PARALLEL_SIZE',
|
|
128
|
+
invalidValue: tpSize,
|
|
129
|
+
constraint: {
|
|
130
|
+
type: 'tensor-parallelism',
|
|
131
|
+
expected: instanceGpuCount,
|
|
132
|
+
field: 'instanceGpuCount'
|
|
133
|
+
},
|
|
134
|
+
severity: 'error',
|
|
135
|
+
confidence: 'high',
|
|
136
|
+
source: 'cross-cutting',
|
|
137
|
+
remediationHint: `VLLM_TENSOR_PARALLEL_SIZE (${tpSize}) must equal instance GPU count (${instanceGpuCount}) for ${instanceType}.`
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return findings;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Verify model source requirements (artifact URI, hub access config).
|
|
146
|
+
* @param {Object} context - ValidationContext
|
|
147
|
+
* @returns {Array} Findings
|
|
148
|
+
*/
|
|
149
|
+
checkModelSourceRequirements(context) {
|
|
150
|
+
const findings = [];
|
|
151
|
+
const config = context.config || {};
|
|
152
|
+
|
|
153
|
+
const modelSource = config.modelSource || config.MODEL_SOURCE || '';
|
|
154
|
+
|
|
155
|
+
// When modelSource is 'jumpstart-hub', verify HubAccessConfig.HubContentArn is present
|
|
156
|
+
if (modelSource === 'jumpstart-hub') {
|
|
157
|
+
const payloads = context.payloads || {};
|
|
158
|
+
let hubContentArnFound = false;
|
|
159
|
+
|
|
160
|
+
for (const payload of Object.values(payloads)) {
|
|
161
|
+
if (payload?.HubAccessConfig?.HubContentArn) {
|
|
162
|
+
hubContentArnFound = true;
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (!hubContentArnFound && !config.HUB_CONTENT_ARN) {
|
|
168
|
+
findings.push({
|
|
169
|
+
service: 'cross-cutting',
|
|
170
|
+
operation: 'configuration',
|
|
171
|
+
fieldPath: 'HubAccessConfig.HubContentArn',
|
|
172
|
+
invalidValue: null,
|
|
173
|
+
constraint: {
|
|
174
|
+
type: 'conditional-required',
|
|
175
|
+
condition: 'modelSource === jumpstart-hub'
|
|
176
|
+
},
|
|
177
|
+
severity: 'error',
|
|
178
|
+
confidence: 'high',
|
|
179
|
+
source: 'cross-cutting',
|
|
180
|
+
remediationHint: 'When modelSource is "jumpstart-hub", HubAccessConfig.HubContentArn must be present in the payload.'
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// When modelSource in {s3, jumpstart, jumpstart-hub, registry}, verify MODEL_ARTIFACT_URI is non-empty
|
|
186
|
+
const sourcesRequiringArtifact = ['s3', 'jumpstart', 'jumpstart-hub', 'registry'];
|
|
187
|
+
if (sourcesRequiringArtifact.includes(modelSource)) {
|
|
188
|
+
const artifactUri = config.MODEL_ARTIFACT_URI || '';
|
|
189
|
+
if (!artifactUri || artifactUri.trim() === '') {
|
|
190
|
+
findings.push({
|
|
191
|
+
service: 'cross-cutting',
|
|
192
|
+
operation: 'configuration',
|
|
193
|
+
fieldPath: 'MODEL_ARTIFACT_URI',
|
|
194
|
+
invalidValue: artifactUri || null,
|
|
195
|
+
constraint: {
|
|
196
|
+
type: 'conditional-required',
|
|
197
|
+
condition: `modelSource === ${modelSource}`
|
|
198
|
+
},
|
|
199
|
+
severity: 'error',
|
|
200
|
+
confidence: 'high',
|
|
201
|
+
source: 'cross-cutting',
|
|
202
|
+
remediationHint: `When modelSource is "${modelSource}", MODEL_ARTIFACT_URI must be set and non-empty.`
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return findings;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Verify role ARN format for realtime-inference.
|
|
212
|
+
* @param {Object} context - ValidationContext
|
|
213
|
+
* @returns {Array} Findings
|
|
214
|
+
*/
|
|
215
|
+
checkRoleArnFormat(context) {
|
|
216
|
+
const findings = [];
|
|
217
|
+
const config = context.config || {};
|
|
218
|
+
const deploymentTarget = context.deploymentTarget || '';
|
|
219
|
+
|
|
220
|
+
if (deploymentTarget !== 'realtime-inference') return findings;
|
|
221
|
+
|
|
222
|
+
const roleArn = config.ROLE_ARN;
|
|
223
|
+
if (roleArn == null || roleArn === '') return findings;
|
|
224
|
+
|
|
225
|
+
const arnPattern = /^arn:aws:iam::\d{12}:role\/.+$/;
|
|
226
|
+
if (!arnPattern.test(roleArn)) {
|
|
227
|
+
findings.push({
|
|
228
|
+
service: 'cross-cutting',
|
|
229
|
+
operation: 'configuration',
|
|
230
|
+
fieldPath: 'ROLE_ARN',
|
|
231
|
+
invalidValue: roleArn,
|
|
232
|
+
constraint: {
|
|
233
|
+
type: 'pattern',
|
|
234
|
+
pattern: 'arn:aws:iam::\\d{12}:role/.+'
|
|
235
|
+
},
|
|
236
|
+
severity: 'error',
|
|
237
|
+
confidence: 'high',
|
|
238
|
+
source: 'cross-cutting',
|
|
239
|
+
remediationHint: `ROLE_ARN "${roleArn}" does not match IAM role ARN pattern. Expected format: arn:aws:iam::<12-digit-account-id>:role/<role-name>.`
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return findings;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Verify CUDA version compatibility: base image CUDA ∩ instance CUDA versions is non-empty.
|
|
248
|
+
* @param {Object} context - ValidationContext
|
|
249
|
+
* @param {Object} instanceCatalog - Instance catalog
|
|
250
|
+
* @returns {Array} Findings
|
|
251
|
+
*/
|
|
252
|
+
checkCudaCompatibility(context, instanceCatalog) {
|
|
253
|
+
const findings = [];
|
|
254
|
+
const config = context.config || {};
|
|
255
|
+
const catalog = instanceCatalog?.catalog || instanceCatalog || {};
|
|
256
|
+
|
|
257
|
+
const instanceType = config.INSTANCE_TYPE;
|
|
258
|
+
if (!instanceType) return findings;
|
|
259
|
+
|
|
260
|
+
const instanceInfo = catalog[instanceType];
|
|
261
|
+
if (!instanceInfo) return findings;
|
|
262
|
+
|
|
263
|
+
const instanceCudaVersions = instanceInfo.cudaVersions;
|
|
264
|
+
if (!instanceCudaVersions || !Array.isArray(instanceCudaVersions) || instanceCudaVersions.length === 0) {
|
|
265
|
+
return findings;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Extract base image CUDA requirement from config
|
|
269
|
+
const cudaRequirement = config.acceleratorVersion || config.CUDA_VERSION || '';
|
|
270
|
+
if (!cudaRequirement) return findings;
|
|
271
|
+
|
|
272
|
+
// Check if any instance CUDA version matches the base image requirement
|
|
273
|
+
// Compare major version (e.g., "12" matches "12.1", "12.2")
|
|
274
|
+
const requiredMajor = String(cudaRequirement).split('.')[0];
|
|
275
|
+
|
|
276
|
+
const hasCompatible = instanceCudaVersions.some(v => {
|
|
277
|
+
const vMajor = String(v).split('.')[0];
|
|
278
|
+
return vMajor === requiredMajor;
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
if (!hasCompatible) {
|
|
282
|
+
findings.push({
|
|
283
|
+
service: 'cross-cutting',
|
|
284
|
+
operation: 'configuration',
|
|
285
|
+
fieldPath: 'acceleratorVersion',
|
|
286
|
+
invalidValue: cudaRequirement,
|
|
287
|
+
constraint: {
|
|
288
|
+
type: 'cuda-compatibility',
|
|
289
|
+
instanceCudaVersions,
|
|
290
|
+
instanceType
|
|
291
|
+
},
|
|
292
|
+
severity: 'error',
|
|
293
|
+
confidence: 'high',
|
|
294
|
+
source: 'cross-cutting',
|
|
295
|
+
remediationHint: `Base image requires CUDA ${cudaRequirement} but instance ${instanceType} supports CUDA versions [${instanceCudaVersions.join(', ')}]. No compatible CUDA version found.`
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
return findings;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Verify model architecture compatibility with the selected server version.
|
|
304
|
+
* Checks model_type against the server's supportedModelTypes from the catalog.
|
|
305
|
+
* Skips silently when supportedModelTypes is empty (sync not run).
|
|
306
|
+
*
|
|
307
|
+
* @param {Object} context - ValidationContext
|
|
308
|
+
* @param {Object} modelServersCatalog - Model servers catalog (from servers/lib/catalogs/model-servers.json)
|
|
309
|
+
* @returns {Array} Findings
|
|
310
|
+
*/
|
|
311
|
+
checkModelArchitectureCompatibility(context, modelServersCatalog) {
|
|
312
|
+
const findings = [];
|
|
313
|
+
const config = context.config || {};
|
|
314
|
+
|
|
315
|
+
const modelType = config.modelType;
|
|
316
|
+
const serverVersion = config.baseImageVersion;
|
|
317
|
+
const server = config.modelServer;
|
|
318
|
+
|
|
319
|
+
if (!modelType || !server || !serverVersion) return findings;
|
|
320
|
+
|
|
321
|
+
const entries = modelServersCatalog[server] || [];
|
|
322
|
+
const entry = entries.find(e => e.labels?.framework_version === serverVersion);
|
|
323
|
+
if (!entry?.supportedModelTypes?.length) return findings;
|
|
324
|
+
|
|
325
|
+
if (!entry.supportedModelTypes.includes(modelType.toLowerCase())) {
|
|
326
|
+
findings.push({
|
|
327
|
+
service: 'cross-cutting',
|
|
328
|
+
operation: 'configuration',
|
|
329
|
+
fieldPath: 'MODEL_NAME',
|
|
330
|
+
invalidValue: modelType,
|
|
331
|
+
constraint: { type: 'architecture-compatibility', server, version: serverVersion },
|
|
332
|
+
severity: 'warning',
|
|
333
|
+
confidence: 'medium',
|
|
334
|
+
source: 'cross-cutting',
|
|
335
|
+
remediationHint: `Model architecture "${modelType}" may not be supported by ${server} ${serverVersion}. Consider a newer server version.`
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
return findings;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Verify predictor models are not assigned GPU instances.
|
|
343
|
+
* @param {Object} context - ValidationContext
|
|
344
|
+
* @param {Object} instanceCatalog - Instance catalog
|
|
345
|
+
* @returns {Array} Findings
|
|
346
|
+
*/
|
|
347
|
+
checkModelTypeInstanceAlignment(context, instanceCatalog) {
|
|
348
|
+
const findings = [];
|
|
349
|
+
const config = context.config || {};
|
|
350
|
+
const catalog = instanceCatalog?.catalog || instanceCatalog || {};
|
|
351
|
+
|
|
352
|
+
const modelType = config.modelType || config.MODEL_TYPE || '';
|
|
353
|
+
if (modelType !== 'predictor') return findings;
|
|
354
|
+
|
|
355
|
+
const instanceType = config.INSTANCE_TYPE;
|
|
356
|
+
if (!instanceType) return findings;
|
|
357
|
+
|
|
358
|
+
const instanceInfo = catalog[instanceType];
|
|
359
|
+
if (!instanceInfo) return findings;
|
|
360
|
+
|
|
361
|
+
if (instanceInfo.gpus > 0 || instanceInfo.category === 'gpu') {
|
|
362
|
+
findings.push({
|
|
363
|
+
service: 'cross-cutting',
|
|
364
|
+
operation: 'configuration',
|
|
365
|
+
fieldPath: 'INSTANCE_TYPE',
|
|
366
|
+
invalidValue: instanceType,
|
|
367
|
+
constraint: {
|
|
368
|
+
type: 'model-type-alignment',
|
|
369
|
+
modelType: 'predictor',
|
|
370
|
+
instanceCategory: instanceInfo.category
|
|
371
|
+
},
|
|
372
|
+
severity: 'warning',
|
|
373
|
+
confidence: 'high',
|
|
374
|
+
source: 'cross-cutting',
|
|
375
|
+
remediationHint: `Model type "predictor" typically does not require GPU acceleration. Consider using a CPU instance (e.g., ml.m5.xlarge) instead of ${instanceType}.`
|
|
376
|
+
});
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
return findings;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Verify that the model's estimated VRAM (weights + KV cache at configured max_model_len)
|
|
384
|
+
* fits in the instance's available GPU memory.
|
|
385
|
+
*
|
|
386
|
+
* Uses the same estimation formula as the instance-sizer's vram-estimator:
|
|
387
|
+
* total = weights + KV cache + 10% overhead
|
|
388
|
+
*
|
|
389
|
+
* @param {Object} context - ValidationContext
|
|
390
|
+
* @param {Object} instanceCatalog - Instance catalog
|
|
391
|
+
* @returns {Array} Findings
|
|
392
|
+
*/
|
|
393
|
+
checkKvCacheMemoryFit(context, instanceCatalog) {
|
|
394
|
+
const findings = [];
|
|
395
|
+
const config = context.config || {};
|
|
396
|
+
const catalog = instanceCatalog?.catalog || instanceCatalog || {};
|
|
397
|
+
|
|
398
|
+
const instanceType = config.INSTANCE_TYPE;
|
|
399
|
+
if (!instanceType) return findings;
|
|
400
|
+
|
|
401
|
+
const instanceInfo = catalog[instanceType];
|
|
402
|
+
if (!instanceInfo || !instanceInfo.gpus || instanceInfo.gpus <= 0) return findings;
|
|
403
|
+
|
|
404
|
+
// Need parameter count to estimate weights
|
|
405
|
+
const parameterCount = config._parameterCount || config.parameterCount;
|
|
406
|
+
if (!parameterCount) return findings;
|
|
407
|
+
|
|
408
|
+
// Resolve max sequence length: explicit env var > model's max_position_embeddings > skip
|
|
409
|
+
const maxModelLen = parseInt(config.VLLM_MAX_MODEL_LEN || config.SGLANG_MAX_MODEL_LEN || '0', 10);
|
|
410
|
+
const maxPosEmbed = parseInt(config._maxPositionEmbeddings || '0', 10);
|
|
411
|
+
const seqLen = maxModelLen || maxPosEmbed;
|
|
412
|
+
if (!seqLen) return findings;
|
|
413
|
+
|
|
414
|
+
// Estimate per-GPU VRAM from instance catalog
|
|
415
|
+
let perGpuVramGb = instanceInfo.gpuMemoryGb;
|
|
416
|
+
if (!perGpuVramGb && instanceInfo.accelerator) {
|
|
417
|
+
const match = instanceInfo.accelerator.match(/(\d+)GB/);
|
|
418
|
+
if (match) {
|
|
419
|
+
const totalGb = parseInt(match[1], 10);
|
|
420
|
+
const hasMultiplier = instanceInfo.accelerator.match(/^(\d+)x\s/);
|
|
421
|
+
perGpuVramGb = hasMultiplier ? totalGb / instanceInfo.gpus : totalGb;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
if (!perGpuVramGb) return findings;
|
|
425
|
+
|
|
426
|
+
const totalVramGb = perGpuVramGb * instanceInfo.gpus;
|
|
427
|
+
|
|
428
|
+
// Estimate VRAM needed (same formula as vram-estimator.js)
|
|
429
|
+
const dtype = config._dtype || 'float16';
|
|
430
|
+
const bytesPerParam = dtype === 'float32' ? 4.0 : dtype === 'int8' ? 1.0 : 2.0;
|
|
431
|
+
const weightsGb = (parameterCount * bytesPerParam) / (1024 ** 3);
|
|
432
|
+
const kvCacheGb = (parameterCount * (seqLen / 4096) * 0.05) / (1024 ** 3);
|
|
433
|
+
const overheadGb = weightsGb * 0.1;
|
|
434
|
+
const estimatedTotalGb = weightsGb + kvCacheGb + overheadGb;
|
|
435
|
+
|
|
436
|
+
if (estimatedTotalGb > totalVramGb) {
|
|
437
|
+
findings.push({
|
|
438
|
+
service: 'cross-cutting',
|
|
439
|
+
operation: 'configuration',
|
|
440
|
+
fieldPath: 'INSTANCE_TYPE',
|
|
441
|
+
invalidValue: instanceType,
|
|
442
|
+
constraint: {
|
|
443
|
+
type: 'kv-cache-memory-fit',
|
|
444
|
+
estimatedVramGb: Math.round(estimatedTotalGb * 10) / 10,
|
|
445
|
+
weightsGb: Math.round(weightsGb * 10) / 10,
|
|
446
|
+
kvCacheGb: Math.round(kvCacheGb * 10) / 10,
|
|
447
|
+
totalVramGb,
|
|
448
|
+
maxModelLen: seqLen,
|
|
449
|
+
instanceType
|
|
450
|
+
},
|
|
451
|
+
severity: 'warning',
|
|
452
|
+
confidence: 'medium',
|
|
453
|
+
source: 'cross-cutting',
|
|
454
|
+
remediationHint: `Estimated VRAM needed: ${estimatedTotalGb.toFixed(1)}GB (weights: ${weightsGb.toFixed(1)}GB + KV cache: ${kvCacheGb.toFixed(1)}GB at seq_len=${seqLen}) exceeds instance capacity (${totalVramGb}GB). Reduce VLLM_MAX_MODEL_LEN, use quantization, or select a larger instance.`
|
|
455
|
+
});
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
return findings;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Dry-run validator — a module that can be called during do/deploy --dry-run.
|
|
6
|
+
* Runs schema validation and blocks deployment if schema errors are found.
|
|
7
|
+
*
|
|
8
|
+
* Returns { passed: boolean, report: ValidationReport }
|
|
9
|
+
*
|
|
10
|
+
* Requirements: 9.1
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
14
|
+
import path from 'node:path';
|
|
15
|
+
import PayloadBuilder from './payload-builder.js';
|
|
16
|
+
import SchemaValidationEngine from './schema-validation-engine.js';
|
|
17
|
+
import ServiceModelParser from './service-model-parser.js';
|
|
18
|
+
import { getRegistryPath } from './schema-sync.js';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Run schema validation for dry-run mode.
|
|
22
|
+
* Blocks deployment if schema errors are found.
|
|
23
|
+
*
|
|
24
|
+
* @param {Object} config - Configuration values from do/config
|
|
25
|
+
* @param {string} deploymentTarget - 'realtime-inference' | 'async-inference' | 'batch-transform'
|
|
26
|
+
* @param {Object} [options]
|
|
27
|
+
* @param {boolean} [options.smart] - Enable smart-mode validators
|
|
28
|
+
* @param {string} [options.registryPath] - Override schema registry path
|
|
29
|
+
* @returns {Promise<{ passed: boolean, report: Object|null, skipped: boolean }>}
|
|
30
|
+
*/
|
|
31
|
+
export async function validateDryRun(config, deploymentTarget, options = {}) {
|
|
32
|
+
const smart = options.smart || false;
|
|
33
|
+
const registryPath = options.registryPath || getRegistryPath();
|
|
34
|
+
|
|
35
|
+
// Skip if schema registry is not present
|
|
36
|
+
if (!existsSync(registryPath) || !existsSync(path.join(registryPath, 'manifest.json'))) {
|
|
37
|
+
return { passed: true, report: null, skipped: true };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Construct payloads
|
|
41
|
+
const builder = new PayloadBuilder();
|
|
42
|
+
const context = builder.build(config, deploymentTarget);
|
|
43
|
+
|
|
44
|
+
// Load and parse service models from registry
|
|
45
|
+
const parser = new ServiceModelParser();
|
|
46
|
+
const serviceModels = [];
|
|
47
|
+
try {
|
|
48
|
+
const entries = readdirSync(registryPath, { withFileTypes: true });
|
|
49
|
+
for (const entry of entries) {
|
|
50
|
+
if (entry.isDirectory()) {
|
|
51
|
+
const modelPath = path.join(registryPath, entry.name, 'service-2.json');
|
|
52
|
+
if (existsSync(modelPath)) {
|
|
53
|
+
const rawModel = JSON.parse(readFileSync(modelPath, 'utf8'));
|
|
54
|
+
serviceModels.push(parser.parse(rawModel));
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
} catch {
|
|
59
|
+
return { passed: true, report: null, skipped: true };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Run validation engine
|
|
63
|
+
const engine = new SchemaValidationEngine({
|
|
64
|
+
registryPath,
|
|
65
|
+
smartMode: smart,
|
|
66
|
+
serviceModels
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
const report = await engine.validate(context);
|
|
70
|
+
const summary = report.getSummary();
|
|
71
|
+
|
|
72
|
+
// Block deployment if errors found
|
|
73
|
+
const passed = summary.errors === 0;
|
|
74
|
+
|
|
75
|
+
return { passed, report, skipped: false };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export default { validateDryRun };
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Generation-time validation helper.
|
|
6
|
+
* Called after deploy scripts are generated to validate payloads against service models.
|
|
7
|
+
* Prints errors as warnings (non-blocking) with a summary line.
|
|
8
|
+
*
|
|
9
|
+
* - Skips silently if schema registry is not present
|
|
10
|
+
* - Skips entirely if --no-validate flag is passed (check via options parameter)
|
|
11
|
+
*
|
|
12
|
+
* This is a standalone module — does NOT modify the main generator file.
|
|
13
|
+
*
|
|
14
|
+
* Requirements: 8.1, 8.2, 8.3, 8.4, 8.5
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
18
|
+
import path from 'node:path';
|
|
19
|
+
import PayloadBuilder from './payload-builder.js';
|
|
20
|
+
import SchemaValidationEngine from './schema-validation-engine.js';
|
|
21
|
+
import ServiceModelParser from './service-model-parser.js';
|
|
22
|
+
import { getRegistryPath } from './schema-sync.js';
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Run schema validation at generation time (non-blocking).
|
|
26
|
+
*
|
|
27
|
+
* @param {Object} config - Configuration values (from generator answers or do/config)
|
|
28
|
+
* @param {string} deploymentTarget - 'realtime-inference' | 'async-inference' | 'batch-transform'
|
|
29
|
+
* @param {Object} [options]
|
|
30
|
+
* @param {boolean} [options.noValidate] - If true, skip validation entirely
|
|
31
|
+
* @param {string} [options.registryPath] - Override schema registry path
|
|
32
|
+
* @returns {Promise<{ skipped: boolean, report: Object|null }>}
|
|
33
|
+
*/
|
|
34
|
+
export async function runGenerationValidation(config, deploymentTarget, options = {}) {
|
|
35
|
+
// Skip entirely if --no-validate flag is passed
|
|
36
|
+
if (options.noValidate) {
|
|
37
|
+
return { skipped: true, report: null };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const registryPath = options.registryPath || getRegistryPath();
|
|
41
|
+
|
|
42
|
+
// Skip silently if schema registry is not present
|
|
43
|
+
if (!existsSync(registryPath) || !existsSync(path.join(registryPath, 'manifest.json'))) {
|
|
44
|
+
return { skipped: true, report: null };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Construct payloads
|
|
48
|
+
const builder = new PayloadBuilder();
|
|
49
|
+
const context = builder.build(config, deploymentTarget);
|
|
50
|
+
|
|
51
|
+
// Load and parse service models from registry
|
|
52
|
+
const parser = new ServiceModelParser();
|
|
53
|
+
const serviceModels = [];
|
|
54
|
+
try {
|
|
55
|
+
const entries = readdirSync(registryPath, { withFileTypes: true });
|
|
56
|
+
for (const entry of entries) {
|
|
57
|
+
if (entry.isDirectory()) {
|
|
58
|
+
const modelPath = path.join(registryPath, entry.name, 'service-2.json');
|
|
59
|
+
if (existsSync(modelPath)) {
|
|
60
|
+
const rawModel = JSON.parse(readFileSync(modelPath, 'utf8'));
|
|
61
|
+
serviceModels.push(parser.parse(rawModel));
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
} catch {
|
|
66
|
+
// If we can't load models, skip validation silently
|
|
67
|
+
return { skipped: true, report: null };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Run validation
|
|
71
|
+
const engine = new SchemaValidationEngine({
|
|
72
|
+
registryPath,
|
|
73
|
+
ignoreStaleness: true,
|
|
74
|
+
serviceModels
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const report = await engine.validate(context);
|
|
78
|
+
const summary = report.getSummary();
|
|
79
|
+
|
|
80
|
+
// Print errors as warnings (non-blocking)
|
|
81
|
+
if (summary.errors > 0) {
|
|
82
|
+
console.log('');
|
|
83
|
+
console.log('\x1b[33m⚠️ Schema validation found issues:\x1b[0m');
|
|
84
|
+
|
|
85
|
+
for (const error of report.schemaErrors) {
|
|
86
|
+
const location = [error.operation, error.fieldPath].filter(Boolean).join(' → ');
|
|
87
|
+
console.log(` \x1b[33m⚠\x1b[0m ${location}: ${error.invalidValue || ''} ${error.remediationHint || ''}`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
for (const error of report.crossCuttingErrors) {
|
|
91
|
+
const location = [error.operation, error.fieldPath].filter(Boolean).join(' → ');
|
|
92
|
+
console.log(` \x1b[33m⚠\x1b[0m ${location}: ${error.remediationHint || ''}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
console.log('');
|
|
96
|
+
console.log(` ${summary.errors} issue(s) found. Run \x1b[36mdo/validate\x1b[0m before deployment.`);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return { skipped: false, report };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export default { runGenerationValidation };
|