@aws/ml-container-creator 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,6 +67,37 @@
67
67
  ],
68
68
  "Resource": "*"
69
69
  },
70
+ {
71
+ "Sid": "SageMakerBenchmarking",
72
+ "Effect": "Allow",
73
+ "Action": [
74
+ "sagemaker:CreateAIBenchmarkJob",
75
+ "sagemaker:DescribeAIBenchmarkJob",
76
+ "sagemaker:ListAIBenchmarkJobs",
77
+ "sagemaker:StopAIBenchmarkJob",
78
+ "sagemaker:DeleteAIBenchmarkJob",
79
+ "sagemaker:CreateAIWorkloadConfig",
80
+ "sagemaker:DescribeAIWorkloadConfig",
81
+ "sagemaker:ListAIWorkloadConfigs",
82
+ "sagemaker:DeleteAIWorkloadConfig",
83
+ "sagemaker:CreateTrainingJob",
84
+ "sagemaker:DescribeTrainingJob",
85
+ "sagemaker:StopTrainingJob",
86
+ "sagemaker:AddTags"
87
+ ],
88
+ "Resource": "*"
89
+ },
90
+ {
91
+ "Sid": "PassRoleToSageMaker",
92
+ "Effect": "Allow",
93
+ "Action": "iam:PassRole",
94
+ "Resource": { "Fn::Sub": "arn:aws:iam::${AWS::AccountId}:role/mlcc-sagemaker-execution-role" },
95
+ "Condition": {
96
+ "StringEquals": {
97
+ "iam:PassedToService": "sagemaker.amazonaws.com"
98
+ }
99
+ }
100
+ },
70
101
  {
71
102
  "Sid": "ECRPull",
72
103
  "Effect": "Allow",
@@ -76,7 +107,7 @@
76
107
  "ecr:GetDownloadUrlForLayer",
77
108
  "ecr:BatchGetImage"
78
109
  ],
79
- "Resource": { "Fn::Sub": "arn:aws:ecr:*:${AWS::AccountId}:repository/ml-container-creator" }
110
+ "Resource": "*"
80
111
  },
81
112
  {
82
113
  "Sid": "ECRAuth",
@@ -99,6 +130,7 @@
99
130
  "Effect": "Allow",
100
131
  "Action": [
101
132
  "s3:GetObject",
133
+ "s3:PutObject",
102
134
  "s3:ListBucket"
103
135
  ],
104
136
  "Resource": [
@@ -113,12 +145,36 @@
113
145
  "secretsmanager:GetSecretValue",
114
146
  "secretsmanager:DescribeSecret"
115
147
  ],
116
- "Resource": "arn:aws:secretsmanager:*:*:secret:mlcc/*",
117
- "Condition": {
118
- "StringEquals": {
119
- "aws:ResourceTag/mlcc:managed-by": "ml-container-creator"
120
- }
121
- }
148
+ "Resource": [
149
+ "arn:aws:secretsmanager:*:*:secret:mlcc/*",
150
+ "arn:aws:secretsmanager:*:*:secret:ml-container-creator/*"
151
+ ]
152
+ },
153
+ {
154
+ "Sid": "SecretsManagerWrite",
155
+ "Effect": "Allow",
156
+ "Action": [
157
+ "secretsmanager:CreateSecret",
158
+ "secretsmanager:PutSecretValue",
159
+ "secretsmanager:TagResource"
160
+ ],
161
+ "Resource": [
162
+ "arn:aws:secretsmanager:*:*:secret:mlcc/*",
163
+ "arn:aws:secretsmanager:*:*:secret:ml-container-creator/*"
164
+ ]
165
+ },
166
+ {
167
+ "Sid": "QuotaAndAvailability",
168
+ "Effect": "Allow",
169
+ "Action": [
170
+ "service-quotas:GetServiceQuota",
171
+ "service-quotas:ListServiceQuotas",
172
+ "ec2:DescribeCapacityReservations",
173
+ "sagemaker:ListTrainingPlans",
174
+ "sagemaker:DescribeTrainingPlan",
175
+ "sagemaker:ListEndpoints"
176
+ ],
177
+ "Resource": "*"
122
178
  }
123
179
  ]
124
180
  }
@@ -185,6 +241,25 @@
185
241
  { "Key": "mlcc:created-by", "Value": "bootstrap" }
186
242
  ]
187
243
  }
244
+ },
245
+
246
+ "BenchmarkS3Bucket": {
247
+ "Type": "AWS::S3::Bucket",
248
+ "DeletionPolicy": "Retain",
249
+ "UpdateReplacePolicy": "Retain",
250
+ "Properties": {
251
+ "BucketName": { "Fn::Sub": "ml-container-creator-benchmark-${AWS::Region}-${AWS::AccountId}" },
252
+ "VersioningConfiguration": { "Status": "Enabled" },
253
+ "BucketEncryption": {
254
+ "ServerSideEncryptionConfiguration": [
255
+ { "ServerSideEncryptionByDefault": { "SSEAlgorithm": "AES256" } }
256
+ ]
257
+ },
258
+ "Tags": [
259
+ { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
260
+ { "Key": "mlcc:created-by", "Value": "bootstrap" }
261
+ ]
262
+ }
188
263
  }
189
264
  },
190
265
 
@@ -217,6 +292,10 @@
217
292
  "Description": "S3 bucket for batch transform I/O",
218
293
  "Value": { "Ref": "BatchS3Bucket" }
219
294
  },
295
+ "BenchmarkS3BucketName": {
296
+ "Description": "S3 bucket for benchmark results output",
297
+ "Value": { "Ref": "BenchmarkS3Bucket" }
298
+ },
220
299
  "StackVersion": {
221
300
  "Description": "Bootstrap stack template version for forward compatibility tracking",
222
301
  "Value": "2026-05-04"
@@ -12,7 +12,7 @@
12
12
  "awsRegion": "us-east-1",
13
13
  "includeTesting": true,
14
14
  "testTypes": ["local-model-cli", "local-model-server", "hosted-model-endpoint"],
15
- "includeSampleModel": false,
15
+ "includeSampleModel": true,
16
16
  "skipPrompts": false
17
17
  },
18
18
  "validation": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aws/ml-container-creator",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
5
5
  "type": "module",
6
6
  "main": "src/app.js",
@@ -111,6 +111,8 @@
111
111
  "tinyglobby": "^0.2.16"
112
112
  },
113
113
  "devDependencies": {
114
+ "@aws-sdk/client-sagemaker": "^3.700.0",
115
+ "@aws-sdk/client-service-quotas": "^3.700.0",
114
116
  "@microsoft/eslint-formatter-sarif": "^3.1.0",
115
117
  "eslint": "^8.57.0",
116
118
  "fast-check": "^4.5.2",
@@ -26,7 +26,8 @@ import { fileURLToPath } from 'node:url'
26
26
  import { resolve, dirname } from 'node:path'
27
27
  import { resolveModelMetadata } from './lib/model-resolver.js'
28
28
  import { estimateVram } from './lib/vram-estimator.js'
29
- import { filterAndRankInstances } from './lib/instance-ranker.js'
29
+ import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js'
30
+ import { QuotaResolver } from './lib/quota-resolver.js'
30
31
  import { queryBedrock } from '../lib/bedrock-client.js'
31
32
 
32
33
  // ── Path setup ───────────────────────────────────────────────────────────────
@@ -379,6 +380,38 @@ async function handleGetInstanceRecommendation(params) {
379
380
  { limit }
380
381
  )
381
382
 
383
+ // Step 3a: Quota & availability filtering (discover mode only)
384
+ let preQuotaFilterCount = 0
385
+ let allFilteredByQuota = false
386
+ if (DISCOVER_MODE && recommendations.length > 0) {
387
+ try {
388
+ const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION
389
+ const quotaResolver = new QuotaResolver(region)
390
+
391
+ const instanceTypes = recommendations.map(r => r.instanceType)
392
+ const [quotas, reservations, ftps] = await Promise.allSettled([
393
+ quotaResolver.getQuotaHeadroom(instanceTypes),
394
+ quotaResolver.getCapacityReservations(),
395
+ quotaResolver.getTrainingPlans()
396
+ ])
397
+
398
+ preQuotaFilterCount = recommendations.length
399
+ recommendations = applyAvailabilityRanking(
400
+ recommendations,
401
+ quotas.status === 'fulfilled' ? quotas.value : null,
402
+ reservations.status === 'fulfilled' ? reservations.value : null,
403
+ ftps.status === 'fulfilled' ? ftps.value : null
404
+ )
405
+ if (recommendations.length === 0 && preQuotaFilterCount > 0) {
406
+ allFilteredByQuota = true
407
+ }
408
+ } catch (err) {
409
+ // Graceful degradation: if credentials are missing or any unexpected
410
+ // error occurs, skip quota filtering and continue with unfiltered results
411
+ log(`Quota resolution skipped: ${err.message}`)
412
+ }
413
+ }
414
+
382
415
  // Step 3b: If instanceSearch is also provided, further filter by tags
383
416
  if (instanceSearch && recommendations.length > 0) {
384
417
  const searchMatches = new Set(searchInstancesByTag(instanceSearch, effectiveCatalog, { limit: 100 }))
@@ -480,7 +513,8 @@ async function handleGetInstanceRecommendation(params) {
480
513
  vramBreakdown: vramEstimate.breakdown,
481
514
  recommendations: finalRecommendations,
482
515
  source: modelMetadata.source,
483
- smartModeUsed
516
+ smartModeUsed,
517
+ allFilteredByQuota
484
518
  }
485
519
  })
486
520
  }]
@@ -31,14 +31,20 @@ const GPU_MEMORY_MAP = {
31
31
  */
32
32
  const COST_TIER_MAP = {
33
33
  'g4dn': 'low',
34
+ 'g4ad': 'low',
34
35
  'inf2': 'low',
35
36
  'g5': 'medium',
36
37
  'g6': 'medium',
38
+ 'g6e': 'medium',
39
+ 'g7e': 'medium',
37
40
  'trn1': 'medium',
38
41
  'p3': 'high',
39
42
  'p4d': 'high',
40
43
  'p4de': 'high',
41
- 'p5': 'high'
44
+ 'p5': 'high',
45
+ 'p5e': 'high',
46
+ 'p5en': 'high',
47
+ 'p6': 'high'
42
48
  }
43
49
 
44
50
  /**
@@ -56,15 +62,21 @@ const COST_TIER_WEIGHT = {
56
62
  * Lower is newer (sorted first). Newer generations offer better perf/$.
57
63
  */
58
64
  const GENERATION_WEIGHT = {
59
- 'g6': 1,
60
- 'p5': 1,
61
- 'trn1': 2,
62
- 'inf2': 2,
63
- 'g5': 3,
64
- 'p4de': 4,
65
- 'p4d': 4,
66
- 'p3': 5,
67
- 'g4dn': 6
65
+ 'g7e': 1,
66
+ 'p6': 1,
67
+ 'g6e': 2,
68
+ 'p5e': 2,
69
+ 'p5en': 2,
70
+ 'g6': 3,
71
+ 'p5': 3,
72
+ 'trn1': 3,
73
+ 'inf2': 3,
74
+ 'g5': 4,
75
+ 'p4de': 5,
76
+ 'p4d': 5,
77
+ 'p3': 6,
78
+ 'g4dn': 7,
79
+ 'g4ad': 7
68
80
  }
69
81
 
70
82
  /**
@@ -257,8 +269,99 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
257
269
  return candidates.slice(0, limit)
258
270
  }
259
271
 
272
+ // ── Availability Ranking ─────────────────────────────────────────────────────
273
+
274
+ /**
275
+ * Priority weights for capacity types used in availability ranking.
276
+ * Lower value = higher priority (sorted first).
277
+ */
278
+ const CAPACITY_TYPE_PRIORITY = {
279
+ reserved: 0,
280
+ ftp: 1,
281
+ 'on-demand': 2
282
+ }
283
+
284
+ /**
285
+ * Annotate, filter, and re-rank instance recommendations based on
286
+ * quota headroom, capacity reservations, and Flexible Training Plans.
287
+ *
288
+ * Each recommendation is annotated with:
289
+ * - capacityType: 'reserved' | 'ftp' | 'on-demand'
290
+ * - quotaStatus: 'available' | 'limited' | 'zero-quota'
291
+ * - reservationInfo: object (when capacityType is 'reserved')
292
+ * - ftpInfo: object (when capacityType is 'ftp')
293
+ *
294
+ * Instances with quotaStatus === 'zero-quota' are filtered out.
295
+ * Sort order: reserved → FTP → on-demand, preserving existing order within tiers.
296
+ *
297
+ * When any input signal is null (API failure), that signal is skipped
298
+ * and the function degrades gracefully.
299
+ *
300
+ * @param {object[]} recommendations - Ranked instance recommendations from filterAndRankInstances
301
+ * @param {Map|null} quotas - Map: instanceType → { quota, deployed, headroom }, or null
302
+ * @param {Map|null} reservations - Map: instanceType → { reservationId, count, expiresAt }, or null
303
+ * @param {Map|null} ftps - Map: instanceType → { planName, remainingCapacity, expiresAt }, or null
304
+ * @returns {object[]} Filtered and re-ranked recommendations
305
+ */
306
+ const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) => {
307
+ if (!recommendations || recommendations.length === 0) {
308
+ return []
309
+ }
310
+
311
+ // If all signals are null (all API calls failed), return unmodified
312
+ if (!quotas && !reservations && !ftps) {
313
+ return recommendations
314
+ }
315
+
316
+ // Annotate each recommendation with capacityType and quotaStatus
317
+ for (const rec of recommendations) {
318
+ rec.capacityType = 'on-demand'
319
+ rec.quotaStatus = 'available'
320
+
321
+ if (reservations?.has(rec.instanceType)) {
322
+ rec.capacityType = 'reserved'
323
+ rec.reservationInfo = reservations.get(rec.instanceType)
324
+ rec.reservationType = 'training-plan'
325
+ } else if (ftps?.has(rec.instanceType)) {
326
+ rec.capacityType = 'ftp'
327
+ rec.ftpInfo = ftps.get(rec.instanceType)
328
+ }
329
+
330
+ // quotaStatus applies to all instances regardless of capacityType
331
+ if (quotas) {
332
+ const q = quotas.get(rec.instanceType)
333
+ if (q && q.headroom === 0) {
334
+ rec.quotaStatus = 'zero-quota'
335
+ } else if (q && q.headroom < 2) {
336
+ rec.quotaStatus = 'limited'
337
+ }
338
+ if (q) {
339
+ rec.quotaHeadroom = q.headroom
340
+ rec.quotaDeployed = q.deployed
341
+ rec.quotaLimit = q.quota
342
+ }
343
+ }
344
+ }
345
+
346
+ // Filter out zero-quota instances (but never filter reserved/FTP — you have the capacity)
347
+ const filtered = recommendations.filter(r =>
348
+ r.quotaStatus !== 'zero-quota' || r.capacityType === 'reserved' || r.capacityType === 'ftp'
349
+ )
350
+
351
+ // Sort: reserved first, then FTP, then on-demand (preserve existing order within tier)
352
+ filtered.sort((a, b) => {
353
+ const pa = CAPACITY_TYPE_PRIORITY[a.capacityType] ?? 2
354
+ const pb = CAPACITY_TYPE_PRIORITY[b.capacityType] ?? 2
355
+ if (pa !== pb) return pa - pb
356
+ return 0
357
+ })
358
+
359
+ return filtered
360
+ }
361
+
260
362
  export {
261
363
  filterAndRankInstances,
364
+ applyAvailabilityRanking,
262
365
  getPerGpuMemoryGb,
263
366
  getCostTier,
264
367
  effectiveVram,
@@ -266,5 +369,6 @@ export {
266
369
  COST_TIER_MAP,
267
370
  COST_TIER_WEIGHT,
268
371
  GENERATION_WEIGHT,
372
+ CAPACITY_TYPE_PRIORITY,
269
373
  TP_OVERHEAD_PER_GPU
270
374
  }