npm - @aws/ml-container-creator - Versions diffs - 0.3.0 → 0.4.0 - Mend

@aws/ml-container-creator 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/config/bootstrap-stack.json +86 -7
package/config/defaults.json +1 -1
package/package.json +3 -1
package/servers/instance-sizer/index.js +36 -2
package/servers/instance-sizer/lib/instance-ranker.js +114 -10
package/servers/instance-sizer/lib/quota-resolver.js +368 -0
package/servers/instance-sizer/package.json +2 -0
package/servers/lib/catalogs/instances.json +527 -12
package/servers/lib/catalogs/model-servers.json +15 -15
package/servers/lib/catalogs/model-sizes.json +27 -0
package/servers/lib/catalogs/models.json +71 -0
package/servers/lib/schemas/image-catalog.schema.json +9 -1
package/src/app.js +77 -2
package/src/lib/bootstrap-command-handler.js +96 -3
package/src/lib/cli-handler.js +2 -2
package/src/lib/config-manager.js +78 -1
package/src/lib/prompt-runner.js +96 -9
package/src/lib/prompts.js +66 -4
package/src/lib/schema-sync.js +31 -0
package/src/lib/template-manager.js +49 -1
package/src/lib/validate-runner.js +125 -2
package/templates/Dockerfile +10 -2
package/templates/code/cuda_compat.sh +22 -0
package/templates/code/serve +3 -0
package/templates/code/start_server.sh +3 -0
package/templates/diffusors/Dockerfile +2 -1
package/templates/diffusors/serve +3 -0
package/templates/do/README.md +33 -0
package/templates/do/benchmark +646 -0
package/templates/do/clean +86 -0
package/templates/do/config +26 -3
package/templates/do/deploy +6 -1
package/templates/do/register +8 -1
package/templates/triton/Dockerfile +5 -0

package/config/bootstrap-stack.json CHANGED Viewed

@@ -67,6 +67,37 @@
                   ],
                   "Resource": "*"
                 },
+                {
+                  "Sid": "SageMakerBenchmarking",
+                  "Effect": "Allow",
+                  "Action": [
+                    "sagemaker:CreateAIBenchmarkJob",
+                    "sagemaker:DescribeAIBenchmarkJob",
+                    "sagemaker:ListAIBenchmarkJobs",
+                    "sagemaker:StopAIBenchmarkJob",
+                    "sagemaker:DeleteAIBenchmarkJob",
+                    "sagemaker:CreateAIWorkloadConfig",
+                    "sagemaker:DescribeAIWorkloadConfig",
+                    "sagemaker:ListAIWorkloadConfigs",
+                    "sagemaker:DeleteAIWorkloadConfig",
+                    "sagemaker:CreateTrainingJob",
+                    "sagemaker:DescribeTrainingJob",
+                    "sagemaker:StopTrainingJob",
+                    "sagemaker:AddTags"
+                  ],
+                  "Resource": "*"
+                },
+                {
+                  "Sid": "PassRoleToSageMaker",
+                  "Effect": "Allow",
+                  "Action": "iam:PassRole",
+                  "Resource": { "Fn::Sub": "arn:aws:iam::${AWS::AccountId}:role/mlcc-sagemaker-execution-role" },
+                  "Condition": {
+                    "StringEquals": {
+                      "iam:PassedToService": "sagemaker.amazonaws.com"
+                    }
+                  }
+                },
                 {
                   "Sid": "ECRPull",
                   "Effect": "Allow",
@@ -76,7 +107,7 @@
                     "ecr:GetDownloadUrlForLayer",
                     "ecr:BatchGetImage"
                   ],
-                  "Resource": { "Fn::Sub": "arn:aws:ecr:*:${AWS::AccountId}:repository/ml-container-creator" }
+                  "Resource": "*"
                 },
                 {
                   "Sid": "ECRAuth",
@@ -99,6 +130,7 @@
                   "Effect": "Allow",
                   "Action": [
                     "s3:GetObject",
+                    "s3:PutObject",
                     "s3:ListBucket"
                   ],
                   "Resource": [
@@ -113,12 +145,36 @@
                     "secretsmanager:GetSecretValue",
                     "secretsmanager:DescribeSecret"
                   ],
-                  "Resource": "arn:aws:secretsmanager:*:*:secret:mlcc/*",
-                  "Condition": {
-                    "StringEquals": {
-                      "aws:ResourceTag/mlcc:managed-by": "ml-container-creator"
-                    }
-                  }
+                  "Resource": [
+                    "arn:aws:secretsmanager:*:*:secret:mlcc/*",
+                    "arn:aws:secretsmanager:*:*:secret:ml-container-creator/*"
+                  ]
+                },
+                {
+                  "Sid": "SecretsManagerWrite",
+                  "Effect": "Allow",
+                  "Action": [
+                    "secretsmanager:CreateSecret",
+                    "secretsmanager:PutSecretValue",
+                    "secretsmanager:TagResource"
+                  ],
+                  "Resource": [
+                    "arn:aws:secretsmanager:*:*:secret:mlcc/*",
+                    "arn:aws:secretsmanager:*:*:secret:ml-container-creator/*"
+                  ]
+                },
+                {
+                  "Sid": "QuotaAndAvailability",
+                  "Effect": "Allow",
+                  "Action": [
+                    "service-quotas:GetServiceQuota",
+                    "service-quotas:ListServiceQuotas",
+                    "ec2:DescribeCapacityReservations",
+                    "sagemaker:ListTrainingPlans",
+                    "sagemaker:DescribeTrainingPlan",
+                    "sagemaker:ListEndpoints"
+                  ],
+                  "Resource": "*"
                 }
               ]
             }
@@ -185,6 +241,25 @@
           { "Key": "mlcc:created-by", "Value": "bootstrap" }
         ]
       }
+    },
+    "BenchmarkS3Bucket": {
+      "Type": "AWS::S3::Bucket",
+      "DeletionPolicy": "Retain",
+      "UpdateReplacePolicy": "Retain",
+      "Properties": {
+        "BucketName": { "Fn::Sub": "ml-container-creator-benchmark-${AWS::Region}-${AWS::AccountId}" },
+        "VersioningConfiguration": { "Status": "Enabled" },
+        "BucketEncryption": {
+          "ServerSideEncryptionConfiguration": [
+            { "ServerSideEncryptionByDefault": { "SSEAlgorithm": "AES256" } }
+          ]
+        },
+        "Tags": [
+          { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
+          { "Key": "mlcc:created-by", "Value": "bootstrap" }
+        ]
+      }
     }
   },
@@ -217,6 +292,10 @@
       "Description": "S3 bucket for batch transform I/O",
       "Value": { "Ref": "BatchS3Bucket" }
     },
+    "BenchmarkS3BucketName": {
+      "Description": "S3 bucket for benchmark results output",
+      "Value": { "Ref": "BenchmarkS3Bucket" }
+    },
     "StackVersion": {
       "Description": "Bootstrap stack template version for forward compatibility tracking",
       "Value": "2026-05-04"

package/config/defaults.json CHANGED Viewed

@@ -12,7 +12,7 @@
     "awsRegion": "us-east-1",
     "includeTesting": true,
     "testTypes": ["local-model-cli", "local-model-server", "hosted-model-endpoint"],
-    "includeSampleModel": false,
+    "includeSampleModel": true,
     "skipPrompts": false
   },
   "validation": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aws/ml-container-creator",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
   "type": "module",
   "main": "src/app.js",
@@ -111,6 +111,8 @@
     "tinyglobby": "^0.2.16"
   },
   "devDependencies": {
+    "@aws-sdk/client-sagemaker": "^3.700.0",
+    "@aws-sdk/client-service-quotas": "^3.700.0",
     "@microsoft/eslint-formatter-sarif": "^3.1.0",
     "eslint": "^8.57.0",
     "fast-check": "^4.5.2",

package/servers/instance-sizer/index.js CHANGED Viewed

@@ -26,7 +26,8 @@ import { fileURLToPath } from 'node:url'
 import { resolve, dirname } from 'node:path'
 import { resolveModelMetadata } from './lib/model-resolver.js'
 import { estimateVram } from './lib/vram-estimator.js'
-import { filterAndRankInstances } from './lib/instance-ranker.js'
+import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js'
+import { QuotaResolver } from './lib/quota-resolver.js'
 import { queryBedrock } from '../lib/bedrock-client.js'
 // ── Path setup ───────────────────────────────────────────────────────────────
@@ -379,6 +380,38 @@ async function handleGetInstanceRecommendation(params) {
         { limit }
     )
+    // Step 3a: Quota & availability filtering (discover mode only)
+    let preQuotaFilterCount = 0
+    let allFilteredByQuota = false
+    if (DISCOVER_MODE && recommendations.length > 0) {
+        try {
+            const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION
+            const quotaResolver = new QuotaResolver(region)
+            const instanceTypes = recommendations.map(r => r.instanceType)
+            const [quotas, reservations, ftps] = await Promise.allSettled([
+                quotaResolver.getQuotaHeadroom(instanceTypes),
+                quotaResolver.getCapacityReservations(),
+                quotaResolver.getTrainingPlans()
+            ])
+            preQuotaFilterCount = recommendations.length
+            recommendations = applyAvailabilityRanking(
+                recommendations,
+                quotas.status === 'fulfilled' ? quotas.value : null,
+                reservations.status === 'fulfilled' ? reservations.value : null,
+                ftps.status === 'fulfilled' ? ftps.value : null
+            )
+            if (recommendations.length === 0 && preQuotaFilterCount > 0) {
+                allFilteredByQuota = true
+            }
+        } catch (err) {
+            // Graceful degradation: if credentials are missing or any unexpected
+            // error occurs, skip quota filtering and continue with unfiltered results
+            log(`Quota resolution skipped: ${err.message}`)
+        }
+    }
     // Step 3b: If instanceSearch is also provided, further filter by tags
     if (instanceSearch && recommendations.length > 0) {
         const searchMatches = new Set(searchInstancesByTag(instanceSearch, effectiveCatalog, { limit: 100 }))
@@ -480,7 +513,8 @@ async function handleGetInstanceRecommendation(params) {
                     vramBreakdown: vramEstimate.breakdown,
                     recommendations: finalRecommendations,
                     source: modelMetadata.source,
-                    smartModeUsed
+                    smartModeUsed,
+                    allFilteredByQuota
                 }
             })
         }]

package/servers/instance-sizer/lib/instance-ranker.js CHANGED Viewed

@@ -31,14 +31,20 @@ const GPU_MEMORY_MAP = {
  */
 const COST_TIER_MAP = {
     'g4dn': 'low',
+    'g4ad': 'low',
     'inf2': 'low',
     'g5': 'medium',
     'g6': 'medium',
+    'g6e': 'medium',
+    'g7e': 'medium',
     'trn1': 'medium',
     'p3': 'high',
     'p4d': 'high',
     'p4de': 'high',
-    'p5': 'high'
+    'p5': 'high',
+    'p5e': 'high',
+    'p5en': 'high',
+    'p6': 'high'
 }
 /**
@@ -56,15 +62,21 @@ const COST_TIER_WEIGHT = {
  * Lower is newer (sorted first). Newer generations offer better perf/$.
  */
 const GENERATION_WEIGHT = {
-    'g6': 1,
-    'p5': 1,
-    'trn1': 2,
-    'inf2': 2,
-    'g5': 3,
-    'p4de': 4,
-    'p4d': 4,
-    'p3': 5,
-    'g4dn': 6
+    'g7e': 1,
+    'p6': 1,
+    'g6e': 2,
+    'p5e': 2,
+    'p5en': 2,
+    'g6': 3,
+    'p5': 3,
+    'trn1': 3,
+    'inf2': 3,
+    'g5': 4,
+    'p4de': 5,
+    'p4d': 5,
+    'p3': 6,
+    'g4dn': 7,
+    'g4ad': 7
 }
 /**
@@ -257,8 +269,99 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
     return candidates.slice(0, limit)
 }
+// ── Availability Ranking ─────────────────────────────────────────────────────
+/**
+ * Priority weights for capacity types used in availability ranking.
+ * Lower value = higher priority (sorted first).
+ */
+const CAPACITY_TYPE_PRIORITY = {
+    reserved: 0,
+    ftp: 1,
+    'on-demand': 2
+}
+/**
+ * Annotate, filter, and re-rank instance recommendations based on
+ * quota headroom, capacity reservations, and Flexible Training Plans.
+ *
+ * Each recommendation is annotated with:
+ * - capacityType: 'reserved' | 'ftp' | 'on-demand'
+ * - quotaStatus: 'available' | 'limited' | 'zero-quota'
+ * - reservationInfo: object (when capacityType is 'reserved')
+ * - ftpInfo: object (when capacityType is 'ftp')
+ *
+ * Instances with quotaStatus === 'zero-quota' are filtered out.
+ * Sort order: reserved → FTP → on-demand, preserving existing order within tiers.
+ *
+ * When any input signal is null (API failure), that signal is skipped
+ * and the function degrades gracefully.
+ *
+ * @param {object[]} recommendations - Ranked instance recommendations from filterAndRankInstances
+ * @param {Map|null} quotas - Map: instanceType → { quota, deployed, headroom }, or null
+ * @param {Map|null} reservations - Map: instanceType → { reservationId, count, expiresAt }, or null
+ * @param {Map|null} ftps - Map: instanceType → { planName, remainingCapacity, expiresAt }, or null
+ * @returns {object[]} Filtered and re-ranked recommendations
+ */
+const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) => {
+    if (!recommendations || recommendations.length === 0) {
+        return []
+    }
+    // If all signals are null (all API calls failed), return unmodified
+    if (!quotas && !reservations && !ftps) {
+        return recommendations
+    }
+    // Annotate each recommendation with capacityType and quotaStatus
+    for (const rec of recommendations) {
+        rec.capacityType = 'on-demand'
+        rec.quotaStatus = 'available'
+        if (reservations?.has(rec.instanceType)) {
+            rec.capacityType = 'reserved'
+            rec.reservationInfo = reservations.get(rec.instanceType)
+            rec.reservationType = 'training-plan'
+        } else if (ftps?.has(rec.instanceType)) {
+            rec.capacityType = 'ftp'
+            rec.ftpInfo = ftps.get(rec.instanceType)
+        }
+        // quotaStatus applies to all instances regardless of capacityType
+        if (quotas) {
+            const q = quotas.get(rec.instanceType)
+            if (q && q.headroom === 0) {
+                rec.quotaStatus = 'zero-quota'
+            } else if (q && q.headroom < 2) {
+                rec.quotaStatus = 'limited'
+            }
+            if (q) {
+                rec.quotaHeadroom = q.headroom
+                rec.quotaDeployed = q.deployed
+                rec.quotaLimit = q.quota
+            }
+        }
+    }
+    // Filter out zero-quota instances (but never filter reserved/FTP — you have the capacity)
+    const filtered = recommendations.filter(r =>
+        r.quotaStatus !== 'zero-quota' || r.capacityType === 'reserved' || r.capacityType === 'ftp'
+    )
+    // Sort: reserved first, then FTP, then on-demand (preserve existing order within tier)
+    filtered.sort((a, b) => {
+        const pa = CAPACITY_TYPE_PRIORITY[a.capacityType] ?? 2
+        const pb = CAPACITY_TYPE_PRIORITY[b.capacityType] ?? 2
+        if (pa !== pb) return pa - pb
+        return 0
+    })
+    return filtered
+}
 export {
     filterAndRankInstances,
+    applyAvailabilityRanking,
     getPerGpuMemoryGb,
     getCostTier,
     effectiveVram,
@@ -266,5 +369,6 @@ export {
     COST_TIER_MAP,
     COST_TIER_WEIGHT,
     GENERATION_WEIGHT,
+    CAPACITY_TYPE_PRIORITY,
     TP_OVERHEAD_PER_GPU
 }