@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -0,0 +1,368 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Quota Resolver
6
+ *
7
+ * Queries AWS APIs to determine account-level quota headroom, capacity
8
+ * reservations, and Flexible Training Plans for SageMaker instance types.
9
+ * Used in discover mode to filter and prioritize instance recommendations.
10
+ *
11
+ * All methods degrade gracefully — API failures return null and log to stderr.
12
+ */
13
+
14
+ import { ServiceQuotasClient, ListServiceQuotasCommand } from '@aws-sdk/client-service-quotas'
15
+ import { SageMakerClient, ListEndpointsCommand, ListTrainingPlansCommand } from '@aws-sdk/client-sagemaker'
16
+
17
+ // ── Constants ────────────────────────────────────────────────────────────────
18
+
19
+ const SAGEMAKER_SERVICE_CODE = 'sagemaker'
20
+ const DEFAULT_TIMEOUT_MS = 5000
21
+ const DEFAULT_CACHE_TTL_MS = 300000 // 5 minutes
22
+ const QUOTA_NAME_PATTERN = /^(ml\.[a-z0-9]+\.[a-z0-9]+) for endpoint usage$/
23
+
24
+ // ── Logging ──────────────────────────────────────────────────────────────────
25
+
26
+ function log(message) {
27
+ process.stderr.write(`[quota-resolver] ${message}\n`)
28
+ }
29
+
30
+ // ── QuotaResolver Class ──────────────────────────────────────────────────────
31
+
32
+ class QuotaResolver {
33
+ /**
34
+ * @param {string} region - AWS region to query
35
+ * @param {object} [options={}]
36
+ * @param {number} [options.timeout=5000] - Timeout per API call in ms
37
+ * @param {number} [options.cacheTtl=300000] - Cache TTL in ms (default 5 min)
38
+ */
39
+ constructor(region, options = {}) {
40
+ this.region = region
41
+ this.timeout = options.timeout || DEFAULT_TIMEOUT_MS
42
+ this.cacheTtl = options.cacheTtl || DEFAULT_CACHE_TTL_MS
43
+ this.cache = new Map()
44
+
45
+ const clientConfig = {
46
+ region: this.region,
47
+ requestHandler: {
48
+ requestTimeout: this.timeout
49
+ }
50
+ }
51
+
52
+ this.quotasClient = new ServiceQuotasClient(clientConfig)
53
+ this.sagemakerClient = new SageMakerClient(clientConfig)
54
+ }
55
+
56
+ /**
57
+ * Check cache for a key. Returns cached value if within TTL, else null.
58
+ * @param {string} key - Cache key
59
+ * @returns {*|null} Cached value or null
60
+ */
61
+ _getCached(key) {
62
+ const entry = this.cache.get(key)
63
+ if (!entry) return null
64
+ if (Date.now() - entry.timestamp > this.cacheTtl) {
65
+ this.cache.delete(key)
66
+ return null
67
+ }
68
+ return entry.value
69
+ }
70
+
71
+ /**
72
+ * Store a value in the cache with current timestamp.
73
+ * @param {string} key - Cache key
74
+ * @param {*} value - Value to cache
75
+ */
76
+ _setCache(key, value) {
77
+ this.cache.set(key, { value, timestamp: Date.now() })
78
+ }
79
+
80
+ /**
81
+ * Parse a SageMaker quota name to extract the instance type.
82
+ * Expected pattern: "ml.<family>.<size> for endpoint usage"
83
+ *
84
+ * @param {string} quotaName - Quota name from Service Quotas API
85
+ * @returns {string|null} Instance type or null if pattern doesn't match
86
+ */
87
+ _parseQuotaName(quotaName) {
88
+ const match = quotaName.match(QUOTA_NAME_PATTERN)
89
+ return match ? match[1] : null
90
+ }
91
+
92
+ /**
93
+ * Get quota headroom for a list of instance types.
94
+ *
95
+ * Queries Service Quotas for SageMaker endpoint instance limits and
96
+ * ListEndpoints to count currently deployed instances per type.
97
+ * Headroom = quota limit - deployed count.
98
+ *
99
+ * @param {string[]} instanceTypes - Instance types to check (e.g., ['ml.g5.xlarge'])
100
+ * @returns {Promise<Map|null>} Map: instanceType → { quota, deployed, headroom }, or null on failure
101
+ */
102
+ async getQuotaHeadroom(instanceTypes) {
103
+ const cacheKey = 'quotaHeadroom'
104
+ const cached = this._getCached(cacheKey)
105
+ if (cached) return cached
106
+
107
+ try {
108
+ const [quotaMap, deployedMap] = await Promise.allSettled([
109
+ this._fetchServiceQuotas(),
110
+ this._fetchDeployedCounts()
111
+ ])
112
+
113
+ const quotas = quotaMap.status === 'fulfilled' ? quotaMap.value : null
114
+ const deployed = deployedMap.status === 'fulfilled' ? deployedMap.value : null
115
+
116
+ if (!quotas) {
117
+ return null
118
+ }
119
+
120
+ const result = new Map()
121
+ const deployedCounts = deployed || new Map()
122
+
123
+ for (const instanceType of instanceTypes) {
124
+ const quota = quotas.get(instanceType)
125
+ if (quota != null) {
126
+ const deployedCount = deployedCounts.get(instanceType) || 0
127
+ const headroom = quota - deployedCount
128
+ result.set(instanceType, {
129
+ quota,
130
+ deployed: deployedCount,
131
+ headroom
132
+ })
133
+ }
134
+ }
135
+
136
+ this._setCache(cacheKey, result)
137
+ return result
138
+ } catch (err) {
139
+ if (err.name === 'AccessDeniedException' || err.Code === 'AccessDeniedException') {
140
+ log(`AccessDenied: insufficient permissions for quota queries — skipping`)
141
+ } else if (err.name === 'ThrottlingException' || err.Code === 'ThrottlingException') {
142
+ log(`Throttled: Service Quotas API rate limit hit — skipping`)
143
+ } else {
144
+ log(`Failed to get quota headroom: ${err.message}`)
145
+ }
146
+ return null
147
+ }
148
+ }
149
+
150
+ /**
151
+ * Fetch all SageMaker service quotas for endpoint instance types.
152
+ * Paginates through all results.
153
+ *
154
+ * @returns {Promise<Map>} Map: instanceType → quota limit (number)
155
+ */
156
+ async _fetchServiceQuotas() {
157
+ const quotaMap = new Map()
158
+ let nextToken = undefined
159
+
160
+ do {
161
+ const command = new ListServiceQuotasCommand({
162
+ ServiceCode: SAGEMAKER_SERVICE_CODE,
163
+ ...(nextToken && { NextToken: nextToken })
164
+ })
165
+
166
+ const response = await this.quotasClient.send(command)
167
+
168
+ for (const quota of (response.Quotas || [])) {
169
+ const instanceType = this._parseQuotaName(quota.QuotaName || '')
170
+ if (instanceType && quota.Value != null) {
171
+ quotaMap.set(instanceType, quota.Value)
172
+ }
173
+ }
174
+
175
+ nextToken = response.NextToken
176
+ } while (nextToken)
177
+
178
+ return quotaMap
179
+ }
180
+
181
+ /**
182
+ * Fetch currently deployed endpoint instances and count per type.
183
+ * Paginates through all endpoints.
184
+ *
185
+ * @returns {Promise<Map>} Map: instanceType → deployed count
186
+ */
187
+ async _fetchDeployedCounts() {
188
+ const deployedMap = new Map()
189
+ let nextToken = undefined
190
+
191
+ do {
192
+ const command = new ListEndpointsCommand({
193
+ StatusEquals: 'InService',
194
+ ...(nextToken && { NextToken: nextToken })
195
+ })
196
+
197
+ const response = await this.sagemakerClient.send(command)
198
+
199
+ for (const endpoint of (response.Endpoints || [])) {
200
+ // ListEndpoints returns endpoint summaries; instance type info
201
+ // is in the ProductionVariants. We count each endpoint as 1
202
+ // instance of its configured type. For more accurate counts,
203
+ // DescribeEndpoint would be needed, but that's too many API calls.
204
+ // The endpoint name often encodes the instance type, but the
205
+ // reliable approach is to count endpoints and map via config.
206
+ // For now, we track endpoint counts by checking production variants
207
+ // if available, otherwise skip.
208
+ if (endpoint.ProductionVariants) {
209
+ for (const variant of endpoint.ProductionVariants) {
210
+ if (variant.InstanceType) {
211
+ const current = deployedMap.get(variant.InstanceType) || 0
212
+ const count = variant.CurrentInstanceCount || 1
213
+ deployedMap.set(variant.InstanceType, current + count)
214
+ }
215
+ }
216
+ }
217
+ }
218
+
219
+ nextToken = response.NextToken
220
+ } while (nextToken)
221
+
222
+ return deployedMap
223
+ }
224
+
225
+ /**
226
+ * Get active Training Plan reservations for inference endpoints.
227
+ *
228
+ * Queries ListTrainingPlans for active plans with TargetResources=endpoint.
229
+ * These are SageMaker-managed capacity reservations that can be referenced
230
+ * via MlReservationArn in CreateEndpointConfig.
231
+ *
232
+ * ⚠️ EXPERIMENTAL: Training Plans for inference is a newer feature.
233
+ *
234
+ * @returns {Promise<Map|null>} Map: instanceType → { planName, planArn, remainingCapacity, startDate, endDate }, or null on failure
235
+ */
236
+ async getCapacityReservations() {
237
+ const cacheKey = 'capacityReservations'
238
+ const cached = this._getCached(cacheKey)
239
+ if (cached) return cached
240
+
241
+ try {
242
+ const result = new Map()
243
+ let nextToken = undefined
244
+
245
+ do {
246
+ const command = new ListTrainingPlansCommand({
247
+ StatusEquals: 'Active',
248
+ ...(nextToken && { NextToken: nextToken })
249
+ })
250
+
251
+ const response = await this.sagemakerClient.send(command)
252
+ const now = new Date()
253
+
254
+ for (const plan of (response.TrainingPlanSummaries || [])) {
255
+ // Only include plans targeting inference endpoints
256
+ const targetResources = plan.TargetResources || []
257
+ if (!targetResources.includes('endpoint')) continue
258
+
259
+ const instanceType = plan.InstanceType || plan.ReservedCapacityInstanceType
260
+ if (!instanceType) continue
261
+
262
+ const planArn = plan.TrainingPlanArn
263
+ const planName = plan.TrainingPlanName || 'unknown'
264
+ const remainingCapacity = plan.AvailableInstanceCount
265
+ ?? plan.RemainingCapacity
266
+ ?? plan.TotalInstanceCount
267
+ ?? 0
268
+ const startDate = plan.StartTime || null
269
+ const endDate = plan.EndTime || plan.ExpirationTime || null
270
+
271
+ // Skip plans outside their time window
272
+ if (startDate && new Date(startDate) > now) continue
273
+ if (endDate && new Date(endDate) < now) continue
274
+
275
+ // Only include if there's remaining capacity
276
+ if (remainingCapacity <= 0) continue
277
+
278
+ result.set(instanceType, {
279
+ planName,
280
+ planArn,
281
+ type: 'training-plan',
282
+ count: remainingCapacity,
283
+ startDate: startDate ? (startDate instanceof Date ? startDate.toISOString() : startDate) : null,
284
+ endDate: endDate ? (endDate instanceof Date ? endDate.toISOString() : endDate) : null
285
+ })
286
+ }
287
+
288
+ nextToken = response.NextToken
289
+ } while (nextToken)
290
+
291
+ this._setCache(cacheKey, result)
292
+ return result
293
+ } catch (err) {
294
+ if (err.name === 'AccessDeniedException' || err.Code === 'AccessDeniedException') {
295
+ log(`AccessDenied: insufficient permissions for training plan queries — skipping`)
296
+ } else if (err.name === 'ValidationException') {
297
+ log(`ListTrainingPlans not available in region ${this.region} — skipping`)
298
+ } else if (err.name === 'ThrottlingException' || err.Code === 'ThrottlingException') {
299
+ log(`Throttled: ListTrainingPlans rate limit hit — skipping`)
300
+ } else {
301
+ log(`Failed to get capacity reservations: ${err.message}`)
302
+ }
303
+ return null
304
+ }
305
+ }
306
+
307
+ /**
308
+ * Get active Flexible Training Plans with remaining capacity.
309
+ *
310
+ * Calls ListTrainingPlans with status filter for active plans and
311
+ * extracts instance types and remaining capacity from each plan.
312
+ *
313
+ * @returns {Promise<Map|null>} Map: instanceType → { planName, remainingCapacity, expiresAt }, or null on failure
314
+ */
315
+ async getTrainingPlans() {
316
+ const cacheKey = 'trainingPlans'
317
+ const cached = this._getCached(cacheKey)
318
+ if (cached) return cached
319
+
320
+ try {
321
+ const result = new Map()
322
+ let nextToken = undefined
323
+
324
+ do {
325
+ const command = new ListTrainingPlansCommand({
326
+ StatusEquals: 'Active',
327
+ ...(nextToken && { NextToken: nextToken })
328
+ })
329
+
330
+ const response = await this.sagemakerClient.send(command)
331
+
332
+ for (const plan of (response.TrainingPlanSummaries || [])) {
333
+ const instanceType = plan.InstanceType || plan.ReservedCapacityInstanceType
334
+ const planName = plan.TrainingPlanName || plan.TrainingPlanArn || 'unknown'
335
+ const remainingCapacity = plan.AvailableInstanceCount
336
+ ?? plan.RemainingCapacity
337
+ ?? plan.TotalInstanceCount
338
+ ?? 0
339
+ const expiresAt = plan.EndTime || plan.ExpirationTime || null
340
+
341
+ if (instanceType && remainingCapacity > 0) {
342
+ result.set(instanceType, {
343
+ planName,
344
+ remainingCapacity,
345
+ expiresAt
346
+ })
347
+ }
348
+ }
349
+
350
+ nextToken = response.NextToken
351
+ } while (nextToken)
352
+
353
+ this._setCache(cacheKey, result)
354
+ return result
355
+ } catch (err) {
356
+ if (err.name === 'AccessDeniedException' || err.Code === 'AccessDeniedException') {
357
+ log(`AccessDenied: insufficient permissions for training plan queries — skipping`)
358
+ } else if (err.name === 'ValidationException') {
359
+ log(`ListTrainingPlans not available in region ${this.region} — skipping`)
360
+ } else {
361
+ log(`Failed to get training plans: ${err.message}`)
362
+ }
363
+ return null
364
+ }
365
+ }
366
+ }
367
+
368
+ export { QuotaResolver, QUOTA_NAME_PATTERN, SAGEMAKER_SERVICE_CODE, DEFAULT_TIMEOUT_MS, DEFAULT_CACHE_TTL_MS }
@@ -10,6 +10,8 @@
10
10
  "test": "node test.js"
11
11
  },
12
12
  "dependencies": {
13
+ "@aws-sdk/client-sagemaker": "^3.700.0",
14
+ "@aws-sdk/client-service-quotas": "^3.700.0",
13
15
  "@modelcontextprotocol/sdk": "^1.0.0"
14
16
  }
15
17
  }