npm - @aws/ml-container-creator - Versions diffs - 0.13.5 → 0.15.0 - Mend

@aws/ml-container-creator 0.13.5 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/config/parameter-schema-v2.json +32 -4
package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
package/infra/ci-harness/package-lock.json +121 -111
package/infra/ci-harness/package.json +1 -1
package/package.json +2 -2
package/servers/instance-sizer/index.js +72 -4
package/servers/instance-sizer/lib/model-resolver.js +28 -2
package/src/app.js +15 -0
package/src/lib/config-loader.js +18 -0
package/src/lib/config-manager.js +6 -1
package/src/lib/dataset-slug.js +152 -0
package/src/lib/generated/cli-options.js +9 -3
package/src/lib/generated/parameter-matrix.js +14 -3
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/mcp-query-runner.js +6 -0
package/src/lib/prompt-runner.js +5 -0
package/src/lib/prompts/feature-prompts.js +1 -1
package/src/lib/template-manager.js +0 -7
package/src/lib/template-variable-resolver.js +51 -1
package/src/lib/tune-config-state.js +14 -1
package/templates/do/.benchmark_writer.py +9 -0
package/templates/do/.register_helper.py +1163 -0
package/templates/do/.tune_helper.py +168 -2
package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +319 -27
package/templates/do/add-ic +85 -3
package/templates/do/benchmark +28 -8
package/templates/do/config +20 -0
package/templates/do/lib/inference-component.sh +56 -3
package/templates/do/register +552 -6
package/templates/do/test +12 -2
package/templates/do/tune +201 -6

package/templates/do/benchmark CHANGED Viewed

@@ -39,17 +39,18 @@ while [ $# -gt 0 ]; do
             echo "Options:"
             echo "  --status            Check job status; if completed, download results + write to Athena"
             echo "  --ic <name>         Benchmark a specific inference component"
-            echo "  --adapter <name>    Benchmark a specific LoRA adapter IC"
+            echo "  --adapter <name>    Benchmark a specific LoRA adapter (routes via adapter IC, records adapter_name in Athena)"
             echo "  --force             Create a new benchmark job even if one is already running"
             echo "  --clean             Delete workload config and benchmark job after displaying results"
             echo "  --no-stale-warning  Suppress schema registry staleness warning"
-            echo "  --no-stale-warning  Suppress schema registry staleness warning"
             echo ""
             echo "IC resolution:"
             echo "  --adapter <name> Use ADAPTER_IC_NAME from do/adapters/<name>.conf"
             echo "  --ic <name>      Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
             echo "  (no flag)        Use first IC in do/ic/ alphabetically, or legacy config"
             echo ""
+            echo "Adapter benchmarks are differentiated from base model runs in Athena via the adapter_name column."
+            echo ""
             echo "Status:"
             echo "  After interrupting a running benchmark, use --status to check completion"
             echo "  and trigger results download + Athena write."
@@ -113,10 +114,17 @@ if [ "${ARG_STATUS}" = true ]; then
                     aws s3 sync "${RESULTS_S3_PATH}" "${LOCAL_RESULTS_DIR}/output/" \
                         --region "${AWS_REGION}" --quiet
                     # Untar if output.tar.gz exists
-                    local tar_file
+                    tar_file=""
                     tar_file=$(find "${LOCAL_RESULTS_DIR}" -name "output.tar.gz" -type f 2>/dev/null | head -1)
                     if [ -n "${tar_file}" ]; then
-                        tar -xzf "${tar_file}" --strip-components=1 -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
+                        # Detect whether tar has a leading directory prefix
+                        _tar_first=""
+                        _tar_first=$(tar -tzf "${tar_file}" 2>/dev/null | head -1)
+                        if echo "${_tar_first}" | grep -qE '^[^/]+/$'; then
+                            tar -xzf "${tar_file}" --strip-components=1 -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
+                        else
+                            tar -xzf "${tar_file}" -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
+                        fi
                     fi
                     # Re-search after extraction
                     RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
@@ -145,7 +153,8 @@ if [ "${ARG_STATUS}" = true ]; then
                         --workload "${BENCHMARK_WORKLOAD:-manual}" \
                         --concurrency "${BENCHMARK_CONCURRENCY:-2}" \
                         --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
-                        --region "${AWS_REGION:-${REGION}}"; then
+                        --region "${AWS_REGION:-${REGION}}" \
+                        ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
                         echo "   ✅ Results persisted to Athena"
                     else
                         echo "   ⚠️  Athena write failed (non-fatal)"
@@ -549,7 +558,8 @@ print(f'Combined {n_metrics} concurrency level results')
                 --project-name "${PROJECT_NAME}" \
                 --workload "${BENCHMARK_WORKLOAD:-manual}" \
                 --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
-                --region "${AWS_REGION:-${REGION}}"; then
+                --region "${AWS_REGION:-${REGION}}" \
+                ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
                 echo "✅ Multi-level benchmark results persisted to S3"
             else
                 echo "⚠️  Failed to persist multi-level benchmark results to Athena (non-fatal)"
@@ -1087,7 +1097,16 @@ if [ "${JOB_STATUS}" = "Completed" ]; then
             # Extract any tar.gz archives (benchmark service packages results as output.tar.gz)
             for ARCHIVE in $(find "${LOCAL_RESULTS_DIR}" -name "*.tar.gz" -type f 2>/dev/null); do
                 ARCHIVE_DIR=$(dirname "${ARCHIVE}")
-                tar -xzf "${ARCHIVE}" --strip-components=1 -C "${ARCHIVE_DIR}" 2>/dev/null || true
+                # Detect whether tar has a leading directory prefix to strip.
+                # Some AIPerf versions wrap in output/, others are flat.
+                _TAR_FIRST=$(tar -tzf "${ARCHIVE}" 2>/dev/null | head -1)
+                if echo "${_TAR_FIRST}" | grep -qE '^[^/]+/$'; then
+                    # Leading directory (e.g., "output/") — strip it
+                    tar -xzf "${ARCHIVE}" --strip-components=1 -C "${ARCHIVE_DIR}" 2>/dev/null || true
+                else
+                    # Flat archive — extract as-is
+                    tar -xzf "${ARCHIVE}" -C "${ARCHIVE_DIR}" 2>/dev/null || true
+                fi
             done
             # Look for specific result files (priority: JSONL > aiperf JSON)
@@ -1346,7 +1365,8 @@ except Exception as e:
             --workload "${BENCHMARK_WORKLOAD:-manual}" \
             --concurrency "${BENCHMARK_CONCURRENCY}" \
             --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
-            --region "${AWS_REGION:-${REGION}}"; then
+            --region "${AWS_REGION:-${REGION}}" \
+            ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
             echo "✅ Benchmark results persisted to S3"
         else
             echo "⚠️  Failed to persist benchmark results to Athena (non-fatal)"

package/templates/do/config CHANGED Viewed

@@ -211,6 +211,26 @@ export <%= key %>=${<%= key %>:-<%= value %>}
 <% }); %>
 <% } %>
+<% if (typeof icEnvVars !== 'undefined' && icEnvVars && Object.keys(icEnvVars).length > 0) { %>
+# Deploy-time IC environment variables (applied at IC creation via SDK v3, not baked into image)
+# Max 16 vars, max 1024 chars per key/value
+# WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
+#   export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
+#   Runtime code resolves the ARN to the secret value (same pattern as HF_TOKEN_ARN in do/build).
+<% Object.entries(icEnvVars).forEach(([key, value]) => { %>
+export IC_ENV_<%= key %>=${IC_ENV_<%= key %>:-<%= value %>}
+<% }); %>
+<% } else if (deploymentTarget === 'realtime-inference') { %>
+# ─── Deploy-time IC environment variables (uncomment to configure) ─────────────
+# These are passed as the Environment field in InferenceComponent.create() at deploy time.
+# They do NOT affect the Docker build — build-time env vars remain in Dockerfile via ENV.
+# Max 16 vars, max 1024 chars per key/value.
+# WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
+#   export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
+# export IC_ENV_VLLM_MAX_MODEL_LEN=8192
+# export IC_ENV_VLLM_GPU_MEMORY_UTILIZATION=0.85
+<% } %>
 # Framework-specific configuration
 <% if (framework === 'transformers') { %>
 export MODEL_NAME="<%= modelName %>"

package/templates/do/lib/inference-component.sh CHANGED Viewed

@@ -6,6 +6,50 @@
 #   PROJECT_NAME, ENDPOINT_NAME, ECR_REPOSITORY, AWS_REGION, CONTAINER_ENV_JSON
 # Also expects _update_config_var() to be available (from wait.sh).
+# _collect_ic_env_vars()
+#   Reads IC_ENV_* prefixed variables from the environment (sourced from do/config),
+#   strips the IC_ENV_ prefix, validates constraints, and outputs JSON key-value pairs.
+#   Constraints: max 16 entries, max 1024 chars per key/value.
+#   IC_ENV_* overrides take precedence over CONTAINER_ENV_JSON.
+#
+#   Sets IC_ENV_OVERRIDE in the caller's scope.
+_collect_ic_env_vars() {
+    IC_ENV_OVERRIDE=""
+    local ic_env_count=0
+    while IFS='=' read -r full_key value; do
+        # Skip empty lines
+        [ -z "${full_key}" ] && continue
+        local stripped_key="${full_key#IC_ENV_}"
+        # Validate key length (AC-3.4)
+        if [ ${#stripped_key} -gt 1024 ]; then
+            echo "⚠️  IC_ENV_${stripped_key}: key exceeds 1024 chars, skipping" >&2
+            continue
+        fi
+        # Validate value length (AC-3.4)
+        if [ ${#value} -gt 1024 ]; then
+            echo "⚠️  IC_ENV_${stripped_key}: value exceeds 1024 chars, skipping" >&2
+            continue
+        fi
+        ic_env_count=$((ic_env_count + 1))
+        # Max 16 env vars (AC-3.3)
+        if [ ${ic_env_count} -gt 16 ]; then
+            echo "⚠️  More than 16 IC_ENV_* variables defined. Using first 16 only." >&2
+            break
+        fi
+        if [ -n "${IC_ENV_OVERRIDE}" ]; then
+            IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE},"
+        fi
+        IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE}\"${stripped_key}\":\"${value}\""
+    done < <(env | grep "^IC_ENV_" | sort)
+}
 # create_inference_component <ic_config_file>
 #   Creates an inference component from a per-IC config file.
 #
@@ -17,6 +61,10 @@
 #     IC_STARTUP_TIMEOUT — container startup health check timeout in seconds (default: 900)
 #     IC_CONTAINER_ENV_EXTRA — optional extra env vars in "KEY":"value" format
 #
+#   IC_ENV_* prefixed vars from do/config are collected, validated, and passed
+#   as the Environment field in InferenceComponent.create() via SDK v3.
+#   Precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
+#
 #   Multi-spec support (for heterogeneous instance pools):
 #     IC_MULTI_SPEC      — set to "true" to use Specifications (plural) array
 #     IC_SPEC_COUNT      — number of spec entries (e.g., 2)
@@ -38,6 +86,9 @@ create_inference_component() {
     # Source the IC config to get per-IC settings
     source "${ic_conf}"
+    # Collect IC_ENV_* overrides from environment (sourced from do/config)
+    _collect_ic_env_vars
     local ic_timestamp
     ic_timestamp=$(date +%s)
     local ic_basename
@@ -48,9 +99,11 @@ create_inference_component() {
     local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
     # Always inject IC name for CW log forwarder
     local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
-    if [ -n "${CONTAINER_ENV_JSON}${IC_CONTAINER_ENV_EXTRA:-}" ]; then
-        local env_json="${CONTAINER_ENV_JSON}"
-        [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
+    # Build environment JSON with precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
+    local env_json="${CONTAINER_ENV_JSON}"
+    [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
+    [ -n "${IC_ENV_OVERRIDE:-}" ] && env_json="${env_json:+${env_json},}${IC_ENV_OVERRIDE}"
+    if [ -n "${env_json}" ]; then
         container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
     else
         container_spec="${container_spec},\"Environment\":{${ic_env}}"