npm - @aws/ml-container-creator - Versions diffs - 0.4.0 → 0.6.0 - Mend

@aws/ml-container-creator 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/bin/cli.js +5 -2
package/config/bootstrap-stack.json +40 -9
package/infra/ci-harness/buildspec.yml +60 -0
package/infra/ci-harness/package-lock.json +5 -1
package/package.json +1 -1
package/servers/README.md +41 -1
package/servers/instance-sizer/index.js +10 -4
package/servers/instance-sizer/lib/model-resolver.js +1 -1
package/servers/lib/catalogs/model-sizes.json +135 -90
package/servers/lib/catalogs/models.json +483 -411
package/src/app.js +33 -2
package/src/lib/bootstrap-command-handler.js +6 -0
package/src/lib/cli-handler.js +1 -1
package/src/lib/config-manager.js +41 -2
package/src/lib/deployment-entry-schema.js +16 -0
package/src/lib/mcp-client.js +3 -3
package/src/lib/prompt-runner.js +179 -8
package/src/lib/prompts.js +253 -7
package/src/lib/registry-command-handler.js +12 -0
package/templates/Dockerfile +12 -0
package/templates/code/serving.properties +14 -0
package/templates/do/adapter +1230 -0
package/templates/do/adapters/.gitkeep +2 -0
package/templates/do/add-ic +130 -0
package/templates/do/benchmark +81 -9
package/templates/do/clean +507 -17
package/templates/do/config +28 -5
package/templates/do/deploy +513 -367
package/templates/do/ic/default.conf +32 -0
package/templates/do/lib/endpoint-config.sh +216 -0
package/templates/do/lib/inference-component.sh +167 -0
package/templates/do/lib/secrets.sh +44 -0
package/templates/do/lib/wait.sh +131 -0
package/templates/do/logs +107 -27
package/templates/do/optimize +528 -0
package/templates/do/register +111 -1
package/templates/do/status +337 -0
package/templates/do/test +80 -28

package/bin/cli.js CHANGED Viewed

@@ -98,6 +98,9 @@ program
     .addOption(new Option('--include-sample', 'Include sample model code'))
     .addOption(new Option('--include-testing', 'Include test suite'))
     .addOption(new Option('--test-types <types>', 'Comma-separated test types'))
+    .addOption(new Option('--enable-lora', 'Enable LoRA adapter serving (transformers with vllm/sglang/djl-lmi only)'))
+    .addOption(new Option('--max-loras <n>', 'Maximum concurrent LoRA adapters in GPU memory (default: 30)'))
+    .addOption(new Option('--max-lora-rank <n>', 'Maximum LoRA rank (default: 64)'))
     // --- MCP & Discovery ---
     .addOption(new Option('--smart', 'Enable Bedrock-powered smart mode on MCP servers'))
@@ -190,7 +193,7 @@ program.configureHelp({
                 groups.env.push(opt);
             } else if (['--hf-token', '--hf-token-arn', '--ngc-token', '--ngc-token-arn'].includes(long)) {
                 groups.auth.push(opt);
-            } else if (['--include-sample', '--include-testing', '--test-types'].includes(long)) {
+            } else if (['--include-sample', '--include-testing', '--test-types', '--enable-lora', '--max-loras', '--max-lora-rank'].includes(long)) {
                 groups.features.push(opt);
             } else if (['--smart', '--discover'].includes(long)) {
                 groups.mcp.push(opt);
@@ -307,7 +310,6 @@ program
 program
     .command('registry')
     .description('Registry operations (list, get, remove, replay, export, import, search) — experimental, may be reconciled with do/register')
-    .passThroughOptions()
     .argument('<action>', 'Registry action (log, list, get, remove, replay, export, import, search)')
     .argument('[args...]', 'Additional arguments')
     .option('--backend <backend>', 'Filter by backend')
@@ -328,6 +330,7 @@ program
     .option('--notes <text>', 'Deployment notes')
     .option('--project', 'Use project-level registry')
     .option('--parameters <json>', 'Parameters JSON string')
+    .option('--ic-list <json>', 'IC list JSON string')
     .option('--generator-version <version>', 'Generator version')
     // Options used by `registry list-architectures`
     .option('--server <name>', 'Filter by server name (for list-architectures)')

package/config/bootstrap-stack.json CHANGED Viewed

@@ -7,7 +7,7 @@
       "Type": "String",
       "Default": "false",
       "AllowedValues": ["true", "false"],
-      "Description": "Whether to create S3 buckets for async inference and batch transform"
+      "Description": "Whether to create S3 buckets for async inference, batch transform, adapters, and benchmarks"
     },
     "UseExistingRoleArn": {
       "Type": "String",
@@ -134,8 +134,8 @@
                     "s3:ListBucket"
                   ],
                   "Resource": [
-                    "arn:aws:s3:::ml-container-creator-*",
-                    "arn:aws:s3:::ml-container-creator-*/*"
+                    "arn:aws:s3:::mlcc-*",
+                    "arn:aws:s3:::mlcc-*/*"
                   ]
                 },
                 {
@@ -209,7 +209,7 @@
       "DeletionPolicy": "Retain",
       "UpdateReplacePolicy": "Retain",
       "Properties": {
-        "BucketName": { "Fn::Sub": "${AWS::AccountId}-${AWS::Region}-ml-container-creator-async" },
+        "BucketName": { "Fn::Sub": "mlcc-async-${AWS::AccountId}-${AWS::Region}" },
         "VersioningConfiguration": { "Status": "Enabled" },
         "BucketEncryption": {
           "ServerSideEncryptionConfiguration": [
@@ -218,7 +218,8 @@
         },
         "Tags": [
           { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
-          { "Key": "mlcc:created-by", "Value": "bootstrap" }
+          { "Key": "mlcc:created-by", "Value": "bootstrap" },
+          { "Key": "mlcc:purpose", "Value": "async-inference-output" }
         ]
       }
     },
@@ -229,7 +230,7 @@
       "DeletionPolicy": "Retain",
       "UpdateReplacePolicy": "Retain",
       "Properties": {
-        "BucketName": { "Fn::Sub": "${AWS::AccountId}-${AWS::Region}-ml-container-creator-batch" },
+        "BucketName": { "Fn::Sub": "mlcc-batch-${AWS::AccountId}-${AWS::Region}" },
         "VersioningConfiguration": { "Status": "Enabled" },
         "BucketEncryption": {
           "ServerSideEncryptionConfiguration": [
@@ -238,17 +239,40 @@
         },
         "Tags": [
           { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
-          { "Key": "mlcc:created-by", "Value": "bootstrap" }
+          { "Key": "mlcc:created-by", "Value": "bootstrap" },
+          { "Key": "mlcc:purpose", "Value": "batch-transform-io" }
+        ]
+      }
+    },
+    "AdapterS3Bucket": {
+      "Type": "AWS::S3::Bucket",
+      "Condition": "ShouldCreateS3Buckets",
+      "DeletionPolicy": "Retain",
+      "UpdateReplacePolicy": "Retain",
+      "Properties": {
+        "BucketName": { "Fn::Sub": "mlcc-adapters-${AWS::AccountId}-${AWS::Region}" },
+        "VersioningConfiguration": { "Status": "Enabled" },
+        "BucketEncryption": {
+          "ServerSideEncryptionConfiguration": [
+            { "ServerSideEncryptionByDefault": { "SSEAlgorithm": "AES256" } }
+          ]
+        },
+        "Tags": [
+          { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
+          { "Key": "mlcc:created-by", "Value": "bootstrap" },
+          { "Key": "mlcc:purpose", "Value": "lora-adapter-storage" }
         ]
       }
     },
     "BenchmarkS3Bucket": {
       "Type": "AWS::S3::Bucket",
+      "Condition": "ShouldCreateS3Buckets",
       "DeletionPolicy": "Retain",
       "UpdateReplacePolicy": "Retain",
       "Properties": {
-        "BucketName": { "Fn::Sub": "ml-container-creator-benchmark-${AWS::Region}-${AWS::AccountId}" },
+        "BucketName": { "Fn::Sub": "mlcc-benchmark-${AWS::AccountId}-${AWS::Region}" },
         "VersioningConfiguration": { "Status": "Enabled" },
         "BucketEncryption": {
           "ServerSideEncryptionConfiguration": [
@@ -257,7 +281,8 @@
         },
         "Tags": [
           { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
-          { "Key": "mlcc:created-by", "Value": "bootstrap" }
+          { "Key": "mlcc:created-by", "Value": "bootstrap" },
+          { "Key": "mlcc:purpose", "Value": "benchmark-results" }
         ]
       }
     }
@@ -292,7 +317,13 @@
       "Description": "S3 bucket for batch transform I/O",
       "Value": { "Ref": "BatchS3Bucket" }
     },
+    "AdapterS3BucketName": {
+      "Condition": "ShouldCreateS3Buckets",
+      "Description": "S3 bucket for LoRA adapter storage",
+      "Value": { "Ref": "AdapterS3Bucket" }
+    },
     "BenchmarkS3BucketName": {
+      "Condition": "ShouldCreateS3Buckets",
       "Description": "S3 bucket for benchmark results output",
       "Value": { "Ref": "BenchmarkS3Bucket" }
     },

package/infra/ci-harness/buildspec.yml CHANGED Viewed

@@ -40,6 +40,10 @@ phases:
       - REGISTER_DURATION=0
       - REGISTER_LOG_POINTER=""
       - REGISTER_ERROR_SUMMARY=""
+      - ADAPTER_TEST_STATUS="skip"
+      - ADAPTER_TEST_DURATION=0
+      - ADAPTER_TEST_LOG_POINTER=""
+      - ADAPTER_TEST_ERROR_SUMMARY=""
       - TEARDOWN_STATUS="skip"
       - TEARDOWN_DURATION=0
       - TEARDOWN_LOG_POINTER=""
@@ -182,6 +186,54 @@ phases:
         fi
       - rm -f "$STAGE_STDERR_FILE"
+      # --- Stage: Adapter_Test (only if do/adapters/ has .conf files) ---
+      - echo "=== Stage: Adapter_Test ==="
+      - STAGE_START=$(date +%s)
+      - ADAPTER_TEST_LOG_POINTER="$LOG_POINTER_PREFIX"
+      - STAGE_STDERR_FILE=$(mktemp)
+      - |
+        if [ -n "$FIRST_FAILURE" ]; then
+          echo "Skipping Adapter_Test stage due to prior failure in $FIRST_FAILURE"
+          ADAPTER_TEST_STATUS="skip"
+          ADAPTER_TEST_DURATION=0
+        else
+          cd /tmp/ci-project
+          ADAPTER_CONFS=$(find do/adapters -name '*.conf' 2>/dev/null | grep -v '.gitkeep' || true)
+          if [ -z "$ADAPTER_CONFS" ]; then
+            echo "No adapter configs found in do/adapters/ — skipping"
+            ADAPTER_TEST_STATUS="skip"
+            ADAPTER_TEST_DURATION=0
+          else
+            (
+              set -e
+              cd /tmp/ci-project
+              for conf in do/adapters/*.conf; do
+                [ -f "$conf" ] || continue
+                [[ "$(basename "$conf")" == ".gitkeep" ]] && continue
+                ADAPTER_NAME=$(basename "$conf" .conf)
+                echo "Testing adapter: ${ADAPTER_NAME}"
+                # Source to get weights URI
+                source "$conf"
+                ./do/adapter add "${ADAPTER_NAME}" --weights "${ADAPTER_WEIGHTS_URI}"
+                ./do/test --ic "${ADAPTER_NAME}"
+                ./do/adapter remove "${ADAPTER_NAME}"
+              done
+            ) 2>"$STAGE_STDERR_FILE"; STAGE_EXIT=$?
+            STAGE_END=$(date +%s)
+            ADAPTER_TEST_DURATION=$((STAGE_END - STAGE_START))
+            if [ "$STAGE_EXIT" -eq 0 ]; then
+              ADAPTER_TEST_STATUS="pass"
+              echo "Adapter_Test stage passed in ${ADAPTER_TEST_DURATION}s"
+            else
+              ADAPTER_TEST_STATUS="fail"
+              ADAPTER_TEST_ERROR_SUMMARY=$(tail -c 500 "$STAGE_STDERR_FILE" | tr -d '\000' | tr '"' "'" | tr '\n' ' ')
+              FIRST_FAILURE="adapter_test"
+              echo "Adapter_Test stage FAILED (exit code $STAGE_EXIT) in ${ADAPTER_TEST_DURATION}s"
+            fi
+          fi
+        fi
+      - rm -f "$STAGE_STDERR_FILE"
       # --- Stage: Register (placeholder) ---
       - echo "=== Stage: Register ==="
       - STAGE_START=$(date +%s)
@@ -260,6 +312,7 @@ phases:
             validate)    FINAL_ERROR_MESSAGE="$VALIDATE_ERROR_SUMMARY" ;;
             build)       FINAL_ERROR_MESSAGE="$BUILD_ERROR_SUMMARY" ;;
             deploy_test) FINAL_ERROR_MESSAGE="$DEPLOY_TEST_ERROR_SUMMARY" ;;
+            adapter_test) FINAL_ERROR_MESSAGE="$ADAPTER_TEST_ERROR_SUMMARY" ;;
             register)    FINAL_ERROR_MESSAGE="$REGISTER_ERROR_SUMMARY" ;;
             *)           FINAL_ERROR_MESSAGE="Unknown failure stage" ;;
           esac
@@ -272,6 +325,7 @@ phases:
         ESCAPED_VALIDATE_ERROR=$(printf '%s' "$VALIDATE_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
         ESCAPED_BUILD_ERROR=$(printf '%s' "$BUILD_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
         ESCAPED_DEPLOY_TEST_ERROR=$(printf '%s' "$DEPLOY_TEST_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
+        ESCAPED_ADAPTER_TEST_ERROR=$(printf '%s' "$ADAPTER_TEST_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
         ESCAPED_REGISTER_ERROR=$(printf '%s' "$REGISTER_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
         ESCAPED_TEARDOWN_ERROR=$(printf '%s' "$TEARDOWN_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
         ESCAPED_FINAL_ERROR=$(printf '%s' "$FINAL_ERROR_MESSAGE" | sed 's/\\/\\\\/g; s/"/\\"/g')
@@ -314,6 +368,12 @@ phases:
                   \"logPointer\": {\"S\": \"$DEPLOY_TEST_LOG_POINTER\"},
                   \"errorSummary\": {\"S\": \"$ESCAPED_DEPLOY_TEST_ERROR\"}
                 }},
+                \"adapter_test\": {\"M\": {
+                  \"status\": {\"S\": \"$ADAPTER_TEST_STATUS\"},
+                  \"durationSeconds\": {\"N\": \"$ADAPTER_TEST_DURATION\"},
+                  \"logPointer\": {\"S\": \"$ADAPTER_TEST_LOG_POINTER\"},
+                  \"errorSummary\": {\"S\": \"$ESCAPED_ADAPTER_TEST_ERROR\"}
+                }},
                 \"register\": {\"M\": {
                   \"status\": {\"S\": \"$REGISTER_STATUS\"},
                   \"durationSeconds\": {\"N\": \"$REGISTER_DURATION\"},

package/infra/ci-harness/package-lock.json CHANGED Viewed

@@ -48,6 +48,7 @@
         "semver"
       ],
       "license": "Apache-2.0",
+      "peer": true,
       "dependencies": {
         "jsonschema": "~1.4.1",
         "semver": "^7.7.4"
@@ -2150,6 +2151,7 @@
       "integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "undici-types": "~6.21.0"
       }
@@ -2789,7 +2791,8 @@
       "version": "10.6.0",
       "resolved": "https://registry.npmjs.org/constructs/-/constructs-10.6.0.tgz",
       "integrity": "sha512-TxHOnBO5zMo/G76ykzGF/wMpEHu257TbWiIxP9K0Yv/+t70UzgBQiTqjkAsWOPC6jW91DzJI0+ehQV6xDRNBuQ==",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "peer": true
     },
     "node_modules/create-require": {
       "version": "1.1.1",
@@ -3694,6 +3697,7 @@
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aws/ml-container-creator",
-  "version": "0.4.0",
+  "version": "0.6.0",
   "description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
   "type": "module",
   "main": "src/app.js",

package/servers/README.md CHANGED Viewed

@@ -15,7 +15,12 @@ servers/
 │   ├── test.js                 # Standalone tests (node test.js)
 │   ├── package.json
 │   └── LICENSE
-└── region-picker/              # AWS region suggestion server
+├── region-picker/              # AWS region suggestion server
+│   ├── index.js                # MCP server entry point
+│   ├── test.js                 # Standalone tests (node test.js)
+│   ├── package.json
+│   └── LICENSE
+└── endpoint-picker/            # SageMaker endpoint discovery server
     ├── index.js                # MCP server entry point
     ├── test.js                 # Standalone tests (node test.js)
     ├── package.json
@@ -74,6 +79,39 @@ Suggests AWS regions for SageMaker deployments based on a search term. Filters t
 }
 ```
+### endpoint-picker
+Discovers InService SageMaker real-time endpoints with available GPU capacity for attaching new inference components. Uses `ListEndpoints`, `DescribeEndpoint`, and `ListInferenceComponents` to calculate available capacity.
+**Discover mode:** Queries the SageMaker API using a 3-strategy credential fallback (explicit profile → default chain → detect profiles). No static mode — always requires AWS credentials.
+**Tool:** `get_inference_endpoints`
+| Input Field | Type | Description |
+|-------------|------|-------------|
+| `parameters` | `string[]` | Must include `"endpointName"` to get results |
+| `limit` | `number` | Max endpoints to return (default: 10) |
+| `context` | `object` | `awsRegion`, `awsProfile`, `deploymentTarget` (must be `realtime-inference`) |
+**Example response:**
+```json
+{
+  "values": { "endpointName": "my-endpoint-1234567890" },
+  "choices": { "endpointName": ["my-endpoint-1234567890", "prod-llm-endpoint"] },
+  "metadata": {
+    "my-endpoint-1234567890": {
+      "variantName": "AllTraffic",
+      "instanceType": "ml.g6e.48xlarge",
+      "instanceCount": 1,
+      "icCount": 2,
+      "availableGpus": 4,
+      "hasInstancePools": false
+    }
+  }
+}
+```
 ## Usage
 ### Adding a Bundled Server
@@ -297,6 +335,7 @@ The Bedrock API didn't respond within 10 seconds. This usually means network con
 ```bash
 node servers/region-picker/test.js
 node servers/instance-recommender/test.js
+node servers/endpoint-picker/test.js
 ```
 ### Smart Mode Not Activating
@@ -313,6 +352,7 @@ Each server has standalone tests that run without AWS credentials or network acc
 # Run individual server tests
 node servers/region-picker/test.js
 node servers/instance-recommender/test.js
+node servers/endpoint-picker/test.js
 # Run all server tests from the project root
 npm run test:servers

package/servers/instance-sizer/index.js CHANGED Viewed

@@ -51,7 +51,7 @@ try {
 // ── Mode configuration ───────────────────────────────────────────────────────
-const DISCOVER_MODE = process.argv.includes('--discover') || process.env.DISCOVER_MODE === 'true'
+const DISCOVER_MODE = process.env.DISCOVER_MODE !== 'false' && !process.argv.includes('--no-discover')
 const SMART_MODE = process.env.BEDROCK_SMART === 'true'
 const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0'
 const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1'
@@ -383,6 +383,7 @@ async function handleGetInstanceRecommendation(params) {
     // Step 3a: Quota & availability filtering (discover mode only)
     let preQuotaFilterCount = 0
     let allFilteredByQuota = false
+    let preQuotaRecommendations = []
     if (DISCOVER_MODE && recommendations.length > 0) {
         try {
             const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION
@@ -396,6 +397,7 @@ async function handleGetInstanceRecommendation(params) {
             ])
             preQuotaFilterCount = recommendations.length
+            preQuotaRecommendations = [...recommendations]
             recommendations = applyAvailabilityRanking(
                 recommendations,
                 quotas.status === 'fulfilled' ? quotas.value : null,
@@ -404,6 +406,10 @@ async function handleGetInstanceRecommendation(params) {
             )
             if (recommendations.length === 0 && preQuotaFilterCount > 0) {
                 allFilteredByQuota = true
+                // Restore pre-filter recommendations so user can see compatible instances
+                // and request quota increases for the ones they want
+                recommendations = preQuotaRecommendations
+                log(`All ${preQuotaFilterCount} instances filtered by zero-quota — restoring unfiltered list`)
             }
         } catch (err) {
             // Graceful degradation: if credentials are missing or any unexpected
@@ -587,10 +593,10 @@ const isMain = process.argv[1] && resolve(process.argv[1]) === __filename
 if (isMain) {
     if (SMART_MODE) {
         log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`)
-    } else if (DISCOVER_MODE) {
-        log('Discover mode enabled (HuggingFace API lookups active)')
+    } else if (!DISCOVER_MODE) {
+        log('Static mode (catalog-only, no network calls) — use --no-discover to force this')
     } else {
-        log('Static mode (catalog-only, no network calls)')
+        log('Discover mode (HuggingFace API + quota lookups active)')
     }
     const transport = new StdioServerTransport()

package/servers/instance-sizer/lib/model-resolver.js CHANGED Viewed

@@ -207,7 +207,7 @@ const isHuggingFacePattern = (modelName) => {
  * @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
  */
 const resolveModelMetadata = async (modelName, options = {}) => {
-    const { discover = false, catalogPath } = options
+    const { discover = true, catalogPath } = options
     // Tier 1: Catalog lookup
     const catalog = await loadCatalog(catalogPath)