@aws/ml-container-creator 0.13.5 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/config/parameter-schema-v2.json +32 -4
  2. package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
  3. package/infra/ci-harness/package-lock.json +121 -111
  4. package/infra/ci-harness/package.json +1 -1
  5. package/package.json +2 -2
  6. package/servers/instance-sizer/index.js +72 -4
  7. package/servers/instance-sizer/lib/model-resolver.js +28 -2
  8. package/src/app.js +15 -0
  9. package/src/lib/config-loader.js +18 -0
  10. package/src/lib/config-manager.js +6 -1
  11. package/src/lib/dataset-slug.js +152 -0
  12. package/src/lib/generated/cli-options.js +9 -3
  13. package/src/lib/generated/parameter-matrix.js +14 -3
  14. package/src/lib/generated/validation-rules.js +1 -1
  15. package/src/lib/mcp-query-runner.js +6 -0
  16. package/src/lib/prompt-runner.js +5 -0
  17. package/src/lib/prompts/feature-prompts.js +1 -1
  18. package/src/lib/template-manager.js +0 -7
  19. package/src/lib/template-variable-resolver.js +51 -1
  20. package/src/lib/tune-config-state.js +14 -1
  21. package/templates/do/.benchmark_writer.py +9 -0
  22. package/templates/do/.register_helper.py +1163 -0
  23. package/templates/do/.tune_helper.py +168 -2
  24. package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
  25. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  26. package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
  27. package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
  28. package/templates/do/adapter +319 -27
  29. package/templates/do/add-ic +85 -3
  30. package/templates/do/benchmark +28 -8
  31. package/templates/do/config +20 -0
  32. package/templates/do/lib/inference-component.sh +56 -3
  33. package/templates/do/register +552 -6
  34. package/templates/do/test +12 -2
  35. package/templates/do/tune +201 -6
@@ -39,17 +39,18 @@ while [ $# -gt 0 ]; do
39
39
  echo "Options:"
40
40
  echo " --status Check job status; if completed, download results + write to Athena"
41
41
  echo " --ic <name> Benchmark a specific inference component"
42
- echo " --adapter <name> Benchmark a specific LoRA adapter IC"
42
+ echo " --adapter <name> Benchmark a specific LoRA adapter (routes via adapter IC, records adapter_name in Athena)"
43
43
  echo " --force Create a new benchmark job even if one is already running"
44
44
  echo " --clean Delete workload config and benchmark job after displaying results"
45
45
  echo " --no-stale-warning Suppress schema registry staleness warning"
46
- echo " --no-stale-warning Suppress schema registry staleness warning"
47
46
  echo ""
48
47
  echo "IC resolution:"
49
48
  echo " --adapter <name> Use ADAPTER_IC_NAME from do/adapters/<name>.conf"
50
49
  echo " --ic <name> Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
51
50
  echo " (no flag) Use first IC in do/ic/ alphabetically, or legacy config"
52
51
  echo ""
52
+ echo "Adapter benchmarks are differentiated from base model runs in Athena via the adapter_name column."
53
+ echo ""
53
54
  echo "Status:"
54
55
  echo " After interrupting a running benchmark, use --status to check completion"
55
56
  echo " and trigger results download + Athena write."
@@ -113,10 +114,17 @@ if [ "${ARG_STATUS}" = true ]; then
113
114
  aws s3 sync "${RESULTS_S3_PATH}" "${LOCAL_RESULTS_DIR}/output/" \
114
115
  --region "${AWS_REGION}" --quiet
115
116
  # Untar if output.tar.gz exists
116
- local tar_file
117
+ tar_file=""
117
118
  tar_file=$(find "${LOCAL_RESULTS_DIR}" -name "output.tar.gz" -type f 2>/dev/null | head -1)
118
119
  if [ -n "${tar_file}" ]; then
119
- tar -xzf "${tar_file}" --strip-components=1 -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
120
+ # Detect whether tar has a leading directory prefix
121
+ _tar_first=""
122
+ _tar_first=$(tar -tzf "${tar_file}" 2>/dev/null | head -1)
123
+ if echo "${_tar_first}" | grep -qE '^[^/]+/$'; then
124
+ tar -xzf "${tar_file}" --strip-components=1 -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
125
+ else
126
+ tar -xzf "${tar_file}" -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
127
+ fi
120
128
  fi
121
129
  # Re-search after extraction
122
130
  RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
@@ -145,7 +153,8 @@ if [ "${ARG_STATUS}" = true ]; then
145
153
  --workload "${BENCHMARK_WORKLOAD:-manual}" \
146
154
  --concurrency "${BENCHMARK_CONCURRENCY:-2}" \
147
155
  --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
148
- --region "${AWS_REGION:-${REGION}}"; then
156
+ --region "${AWS_REGION:-${REGION}}" \
157
+ ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
149
158
  echo " ✅ Results persisted to Athena"
150
159
  else
151
160
  echo " ⚠️ Athena write failed (non-fatal)"
@@ -549,7 +558,8 @@ print(f'Combined {n_metrics} concurrency level results')
549
558
  --project-name "${PROJECT_NAME}" \
550
559
  --workload "${BENCHMARK_WORKLOAD:-manual}" \
551
560
  --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
552
- --region "${AWS_REGION:-${REGION}}"; then
561
+ --region "${AWS_REGION:-${REGION}}" \
562
+ ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
553
563
  echo "✅ Multi-level benchmark results persisted to S3"
554
564
  else
555
565
  echo "⚠️ Failed to persist multi-level benchmark results to Athena (non-fatal)"
@@ -1087,7 +1097,16 @@ if [ "${JOB_STATUS}" = "Completed" ]; then
1087
1097
  # Extract any tar.gz archives (benchmark service packages results as output.tar.gz)
1088
1098
  for ARCHIVE in $(find "${LOCAL_RESULTS_DIR}" -name "*.tar.gz" -type f 2>/dev/null); do
1089
1099
  ARCHIVE_DIR=$(dirname "${ARCHIVE}")
1090
- tar -xzf "${ARCHIVE}" --strip-components=1 -C "${ARCHIVE_DIR}" 2>/dev/null || true
1100
+ # Detect whether tar has a leading directory prefix to strip.
1101
+ # Some AIPerf versions wrap in output/, others are flat.
1102
+ _TAR_FIRST=$(tar -tzf "${ARCHIVE}" 2>/dev/null | head -1)
1103
+ if echo "${_TAR_FIRST}" | grep -qE '^[^/]+/$'; then
1104
+ # Leading directory (e.g., "output/") — strip it
1105
+ tar -xzf "${ARCHIVE}" --strip-components=1 -C "${ARCHIVE_DIR}" 2>/dev/null || true
1106
+ else
1107
+ # Flat archive — extract as-is
1108
+ tar -xzf "${ARCHIVE}" -C "${ARCHIVE_DIR}" 2>/dev/null || true
1109
+ fi
1091
1110
  done
1092
1111
 
1093
1112
  # Look for specific result files (priority: JSONL > aiperf JSON)
@@ -1346,7 +1365,8 @@ except Exception as e:
1346
1365
  --workload "${BENCHMARK_WORKLOAD:-manual}" \
1347
1366
  --concurrency "${BENCHMARK_CONCURRENCY}" \
1348
1367
  --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
1349
- --region "${AWS_REGION:-${REGION}}"; then
1368
+ --region "${AWS_REGION:-${REGION}}" \
1369
+ ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
1350
1370
  echo "✅ Benchmark results persisted to S3"
1351
1371
  else
1352
1372
  echo "⚠️ Failed to persist benchmark results to Athena (non-fatal)"
@@ -211,6 +211,26 @@ export <%= key %>=${<%= key %>:-<%= value %>}
211
211
  <% }); %>
212
212
  <% } %>
213
213
 
214
+ <% if (typeof icEnvVars !== 'undefined' && icEnvVars && Object.keys(icEnvVars).length > 0) { %>
215
+ # Deploy-time IC environment variables (applied at IC creation via SDK v3, not baked into image)
216
+ # Max 16 vars, max 1024 chars per key/value
217
+ # WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
218
+ # export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
219
+ # Runtime code resolves the ARN to the secret value (same pattern as HF_TOKEN_ARN in do/build).
220
+ <% Object.entries(icEnvVars).forEach(([key, value]) => { %>
221
+ export IC_ENV_<%= key %>=${IC_ENV_<%= key %>:-<%= value %>}
222
+ <% }); %>
223
+ <% } else if (deploymentTarget === 'realtime-inference') { %>
224
+ # ─── Deploy-time IC environment variables (uncomment to configure) ─────────────
225
+ # These are passed as the Environment field in InferenceComponent.create() at deploy time.
226
+ # They do NOT affect the Docker build — build-time env vars remain in Dockerfile via ENV.
227
+ # Max 16 vars, max 1024 chars per key/value.
228
+ # WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
229
+ # export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
230
+ # export IC_ENV_VLLM_MAX_MODEL_LEN=8192
231
+ # export IC_ENV_VLLM_GPU_MEMORY_UTILIZATION=0.85
232
+ <% } %>
233
+
214
234
  # Framework-specific configuration
215
235
  <% if (framework === 'transformers') { %>
216
236
  export MODEL_NAME="<%= modelName %>"
@@ -6,6 +6,50 @@
6
6
  # PROJECT_NAME, ENDPOINT_NAME, ECR_REPOSITORY, AWS_REGION, CONTAINER_ENV_JSON
7
7
  # Also expects _update_config_var() to be available (from wait.sh).
8
8
 
9
+ # _collect_ic_env_vars()
10
+ # Reads IC_ENV_* prefixed variables from the environment (sourced from do/config),
11
+ # strips the IC_ENV_ prefix, validates constraints, and outputs JSON key-value pairs.
12
+ # Constraints: max 16 entries, max 1024 chars per key/value.
13
+ # IC_ENV_* overrides take precedence over CONTAINER_ENV_JSON.
14
+ #
15
+ # Sets IC_ENV_OVERRIDE in the caller's scope.
16
+ _collect_ic_env_vars() {
17
+ IC_ENV_OVERRIDE=""
18
+ local ic_env_count=0
19
+
20
+ while IFS='=' read -r full_key value; do
21
+ # Skip empty lines
22
+ [ -z "${full_key}" ] && continue
23
+
24
+ local stripped_key="${full_key#IC_ENV_}"
25
+
26
+ # Validate key length (AC-3.4)
27
+ if [ ${#stripped_key} -gt 1024 ]; then
28
+ echo "⚠️ IC_ENV_${stripped_key}: key exceeds 1024 chars, skipping" >&2
29
+ continue
30
+ fi
31
+
32
+ # Validate value length (AC-3.4)
33
+ if [ ${#value} -gt 1024 ]; then
34
+ echo "⚠️ IC_ENV_${stripped_key}: value exceeds 1024 chars, skipping" >&2
35
+ continue
36
+ fi
37
+
38
+ ic_env_count=$((ic_env_count + 1))
39
+
40
+ # Max 16 env vars (AC-3.3)
41
+ if [ ${ic_env_count} -gt 16 ]; then
42
+ echo "⚠️ More than 16 IC_ENV_* variables defined. Using first 16 only." >&2
43
+ break
44
+ fi
45
+
46
+ if [ -n "${IC_ENV_OVERRIDE}" ]; then
47
+ IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE},"
48
+ fi
49
+ IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE}\"${stripped_key}\":\"${value}\""
50
+ done < <(env | grep "^IC_ENV_" | sort)
51
+ }
52
+
9
53
  # create_inference_component <ic_config_file>
10
54
  # Creates an inference component from a per-IC config file.
11
55
  #
@@ -17,6 +61,10 @@
17
61
  # IC_STARTUP_TIMEOUT — container startup health check timeout in seconds (default: 900)
18
62
  # IC_CONTAINER_ENV_EXTRA — optional extra env vars in "KEY":"value" format
19
63
  #
64
+ # IC_ENV_* prefixed vars from do/config are collected, validated, and passed
65
+ # as the Environment field in InferenceComponent.create() via SDK v3.
66
+ # Precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
67
+ #
20
68
  # Multi-spec support (for heterogeneous instance pools):
21
69
  # IC_MULTI_SPEC — set to "true" to use Specifications (plural) array
22
70
  # IC_SPEC_COUNT — number of spec entries (e.g., 2)
@@ -38,6 +86,9 @@ create_inference_component() {
38
86
  # Source the IC config to get per-IC settings
39
87
  source "${ic_conf}"
40
88
 
89
+ # Collect IC_ENV_* overrides from environment (sourced from do/config)
90
+ _collect_ic_env_vars
91
+
41
92
  local ic_timestamp
42
93
  ic_timestamp=$(date +%s)
43
94
  local ic_basename
@@ -48,9 +99,11 @@ create_inference_component() {
48
99
  local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
49
100
  # Always inject IC name for CW log forwarder
50
101
  local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
51
- if [ -n "${CONTAINER_ENV_JSON}${IC_CONTAINER_ENV_EXTRA:-}" ]; then
52
- local env_json="${CONTAINER_ENV_JSON}"
53
- [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
102
+ # Build environment JSON with precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
103
+ local env_json="${CONTAINER_ENV_JSON}"
104
+ [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
105
+ [ -n "${IC_ENV_OVERRIDE:-}" ] && env_json="${env_json:+${env_json},}${IC_ENV_OVERRIDE}"
106
+ if [ -n "${env_json}" ]; then
54
107
  container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
55
108
  else
56
109
  container_spec="${container_spec},\"Environment\":{${ic_env}}"