@aws/ml-container-creator 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,10 +9,10 @@ echo "Starting vLLM-Omni server (diffusion model serving)"
9
9
 
10
10
  # Resolve model URI prefixes that engines cannot handle natively.
11
11
  # The generator's model-picker may store provider-specific URIs
12
- # (e.g. jumpstart://model-txt2img-stabilityai-stable-diffusion-v2-1-base)
13
- # as the model identifier. vLLM expects a HuggingFace repo ID or local path.
12
+ # (e.g. registry://my-model-group/1) as the model identifier.
13
+ # vLLM expects a HuggingFace repo ID or local path.
14
14
  _RAW_MODEL="${VLLM_MODEL:-}"
15
- if [[ "$_RAW_MODEL" == jumpstart://* ]] || [[ "$_RAW_MODEL" == jumpstart-hub://* ]] || [[ "$_RAW_MODEL" == registry://* ]]; then
15
+ if [[ "$_RAW_MODEL" == registry://* ]]; then
16
16
  if [ -d /opt/ml/model ] && [ "$(ls -A /opt/ml/model 2>/dev/null)" ]; then
17
17
  echo "Resolved VLLM_MODEL='${_RAW_MODEL}' → /opt/ml/model (local artifacts found)"
18
18
  export VLLM_MODEL="/opt/ml/model"
@@ -176,7 +176,7 @@ def cmd_submit(args):
176
176
  )
177
177
  elif "ValidationException" in error_msg and "license" in error_msg.lower():
178
178
  _error_exit(
179
- f"Model license not accepted. Accept the license in JumpStart before "
179
+ f"Model license not accepted. Accept the model license before "
180
180
  f"using this model for customization. Details: {error_msg}"
181
181
  )
182
182
  else:
@@ -660,7 +660,7 @@ def main():
660
660
 
661
661
  # ── submit ────────────────────────────────────────────────────────────────
662
662
  submit_parser = subparsers.add_parser("submit", help="Submit a customization job")
663
- submit_parser.add_argument("--model-id", required=True, help="JumpStart model ID")
663
+ submit_parser.add_argument("--model-id", required=True, help="Model ID")
664
664
  submit_parser.add_argument("--technique", required=True,
665
665
  choices=["sft", "dpo", "rlaif", "rlvr"],
666
666
  help="Customization technique")
@@ -2,16 +2,33 @@
2
2
  # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- # Export current configuration as a CLI command or JSON object
6
- # Usage: ./do/export [--json]
5
+ # Export current configuration as a CLI command, JSON object, or Jupyter notebook
6
+ # Usage: ./do/export [--json | --notebook]
7
7
 
8
8
  # Source configuration (suppress the summary output)
9
9
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
10
  source "${SCRIPT_DIR}/config" > /dev/null 2>&1
11
11
 
12
+ # ── Notebook output mode ──────────────────────────────────────────────────────
13
+
14
+ if [ "${1:-}" = "--notebook" ]; then
15
+ # Ensure not combined with --json
16
+ if [ "${2:-}" = "--json" ]; then
17
+ echo "Error: --notebook and --json are mutually exclusive" >&2
18
+ exit 1
19
+ fi
20
+ python3 "${SCRIPT_DIR}/../deploy_notebook_generator.py"
21
+ exit 0
22
+ fi
23
+
12
24
  # ── JSON output mode ─────────────────────────────────────────────────────────
13
25
 
14
26
  if [ "${1:-}" = "--json" ]; then
27
+ # Ensure not combined with --notebook
28
+ if [ "${2:-}" = "--notebook" ]; then
29
+ echo "Error: --notebook and --json are mutually exclusive" >&2
30
+ exit 1
31
+ fi
15
32
  # Build a JSON object with all configuration parameters.
16
33
  # Uses ConfigManager camelCase keys so the output can be fed directly
17
34
  # back into the generator via --config=<file>.
@@ -152,7 +152,9 @@ create_endpoint_config() {
152
152
  variant_json="${variant_json}}]"
153
153
  else
154
154
  # Standard path: single instance type
155
- variant_json="[{\"VariantName\":\"AllTraffic\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1"
155
+ # RoutingConfig is required for IC-based endpoints — without it the IC scheduler
156
+ # cannot place containers and the IC stays in Creating with no logs.
157
+ variant_json="[{\"VariantName\":\"AllTraffic\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1,\"RoutingConfig\":{\"RoutingStrategy\":\"LEAST_OUTSTANDING_REQUESTS\"}"
156
158
 
157
159
  # Optional: AMI version
158
160
  if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then
@@ -46,10 +46,14 @@ create_inference_component() {
46
46
 
47
47
  # Build container spec JSON
48
48
  local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
49
+ # Always inject IC name for CW log forwarder
50
+ local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
49
51
  if [ -n "${CONTAINER_ENV_JSON}${IC_CONTAINER_ENV_EXTRA:-}" ]; then
50
52
  local env_json="${CONTAINER_ENV_JSON}"
51
53
  [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
52
- container_spec="${container_spec},\"Environment\":{${env_json}}"
54
+ container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
55
+ else
56
+ container_spec="${container_spec},\"Environment\":{${ic_env}}"
53
57
  fi
54
58
  container_spec="${container_spec}}"
55
59
 
@@ -191,8 +191,14 @@ fi
191
191
  # ============================================================
192
192
 
193
193
  # DEPLOYMENT_CONFIG format: <architecture>-<backend> (e.g., transformers-vllm, http-flask, triton-fil)
194
- ARCHITECTURE="${DEPLOYMENT_CONFIG%%-*}"
195
- BACKEND="${DEPLOYMENT_CONFIG#*-}"
194
+ # Special case: marketplace has no backend
195
+ if [ "${DEPLOYMENT_CONFIG}" = "marketplace" ]; then
196
+ ARCHITECTURE="marketplace"
197
+ BACKEND=""
198
+ else
199
+ ARCHITECTURE="${DEPLOYMENT_CONFIG%%-*}"
200
+ BACKEND="${DEPLOYMENT_CONFIG#*-}"
201
+ fi
196
202
 
197
203
  echo "📋 Registering deployment to registry"
198
204
  echo " Project: ${PROJECT_NAME}"
package/templates/do/test CHANGED
@@ -103,9 +103,9 @@ case "${FRAMEWORK}" in
103
103
  case "${MODEL_SERVER}" in
104
104
  vllm|sglang)
105
105
  # OpenAI-compatible chat completions format
106
- # For S3/JumpStart models, vLLM registers the model under the local path
106
+ # For S3/registry models, vLLM registers the model under the local path
107
107
  VLLM_MODEL_NAME="${MODEL_NAME}"
108
- if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
108
+ if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
109
109
  VLLM_MODEL_NAME="/opt/ml/model"
110
110
  fi
111
111
  TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
@@ -431,7 +431,7 @@ case "${FRAMEWORK}" in
431
431
  case "${MODEL_SERVER}" in
432
432
  vllm|sglang)
433
433
  VLLM_MODEL_NAME="${MODEL_NAME}"
434
- if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
434
+ if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
435
435
  VLLM_MODEL_NAME="/opt/ml/model"
436
436
  fi
437
437
  TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
@@ -808,7 +808,7 @@ case "${FRAMEWORK}" in
808
808
  vllm|sglang)
809
809
  # OpenAI-compatible chat completions format
810
810
  VLLM_MODEL_NAME="${MODEL_NAME}"
811
- if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
811
+ if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
812
812
  VLLM_MODEL_NAME="/opt/ml/model"
813
813
  fi
814
814
  TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
@@ -1095,7 +1095,7 @@ case "${FRAMEWORK}" in
1095
1095
  case "${MODEL_SERVER}" in
1096
1096
  vllm|sglang)
1097
1097
  VLLM_MODEL_NAME="${MODEL_NAME}"
1098
- if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
1098
+ if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
1099
1099
  VLLM_MODEL_NAME="/opt/ml/model"
1100
1100
  fi
1101
1101
  TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
package/templates/do/tune CHANGED
@@ -67,7 +67,7 @@ _parse_args() {
67
67
  ARG_TRAINING_TYPE="$2"; shift 2 ;;
68
68
  --model)
69
69
  if [ -z "${2:-}" ]; then
70
- echo "❌ --model requires a JumpStart model ID"
70
+ echo "❌ --model requires a model ID"
71
71
  exit 1
72
72
  fi
73
73
  ARG_MODEL="$2"; shift 2 ;;
@@ -287,7 +287,7 @@ for family in sorted(families.keys()):
287
287
  for entry in entries:
288
288
  techniques = list(entry.get('techniques', {}).keys())
289
289
  print(f' • {entry[\"displayName\"]}')
290
- print(f' ID: {entry[\"jumpStartModelId\"]}')
290
+ print(f' ID: {entry[\"modelId\"]}')
291
291
  for t in techniques:
292
292
  tc = entry['techniques'][t]
293
293
  types = ', '.join(tc.get('trainingTypes', []))
@@ -0,0 +1,118 @@
1
+ #!/bin/bash
2
+ # do-framework configuration (marketplace)
3
+ # This file is sourced by all do scripts
4
+
5
+ # Project identification
6
+ export PROJECT_NAME="<%= projectName %>"
7
+ export DEPLOYMENT_CONFIG="marketplace"
8
+
9
+ # Marketplace model package
10
+ export MODEL_PACKAGE_ARN="<%= modelPackageArn %>"
11
+
12
+ # AWS configuration
13
+ export AWS_REGION="<%= awsRegion %>"
14
+
15
+ # Deployment configuration
16
+ export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
17
+ export INSTANCE_TYPE="<%= instanceType %>"
18
+
19
+ <% if (roleArn) { %>
20
+ export ROLE_ARN="<%= roleArn %>"
21
+ <% } %>
22
+
23
+ <% if (deploymentTarget === 'async-inference') { %>
24
+ # Async-specific configuration
25
+ ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
26
+
27
+ <% if (asyncS3OutputPath) { %>
28
+ export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
29
+ <% } else { %>
30
+ export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
31
+ <% } %>
32
+
33
+ <% if (asyncSnsSuccessTopic) { %>
34
+ export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
35
+ <% } else { %>
36
+ export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
37
+ <% } %>
38
+
39
+ <% if (asyncSnsErrorTopic) { %>
40
+ export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
41
+ <% } else { %>
42
+ export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
43
+ <% } %>
44
+
45
+ <% if (asyncMaxConcurrentInvocations) { %>
46
+ export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
47
+ <% } %>
48
+ <% } %>
49
+
50
+ <% if (deploymentTarget === 'batch-transform') { %>
51
+ # Batch Transform configuration
52
+ ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
53
+
54
+ <% if (batchInputPath) { %>
55
+ export BATCH_INPUT_PATH="<%= batchInputPath %>"
56
+ <% } else { %>
57
+ export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
58
+ <% } %>
59
+ <% if (batchOutputPath) { %>
60
+ export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
61
+ <% } else { %>
62
+ export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
63
+ <% } %>
64
+ export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
65
+ export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
66
+ export BATCH_STRATEGY="<%= batchStrategy %>"
67
+ export BATCH_JOIN_SOURCE="<%= batchJoinSource || 'None' %>"
68
+ <% if (batchMaxConcurrentTransforms) { %>
69
+ export BATCH_MAX_CONCURRENT_TRANSFORMS="<%= batchMaxConcurrentTransforms %>"
70
+ <% } %>
71
+ <% if (batchMaxPayloadInMB) { %>
72
+ export BATCH_MAX_PAYLOAD_IN_MB="<%= batchMaxPayloadInMB %>"
73
+ <% } %>
74
+ <% } %>
75
+
76
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
77
+ # SageMaker AI Benchmarking configuration
78
+ export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
79
+ export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
80
+ export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
81
+ export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
82
+ <% if (benchmarkRequestCount) { %>
83
+ export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
84
+ <% } else { %>
85
+ export BENCHMARK_REQUEST_COUNT=""
86
+ <% } %>
87
+ <% if (benchmarkS3OutputPath) { %>
88
+ export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
89
+ <% } else { %>
90
+ export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identity --query Account --output text)-${AWS_REGION}/${PROJECT_NAME}/"
91
+ <% } %>
92
+ export BENCHMARK_JOB_NAME=""
93
+ export BENCHMARK_WORKLOAD_CONFIG_NAME=""
94
+ <% } %>
95
+
96
+ # Allow environment variable overrides
97
+ export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
98
+ export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
99
+
100
+ # Print configuration summary
101
+ echo "⚙️ Configuration loaded"
102
+ echo " Project: ${PROJECT_NAME}"
103
+ echo " Config: ${DEPLOYMENT_CONFIG}"
104
+ echo " Region: ${AWS_REGION}"
105
+ echo " Model package: ${MODEL_PACKAGE_ARN}"
106
+ echo " Deployment target: ${DEPLOYMENT_TARGET}"
107
+ echo " Instance: ${INSTANCE_TYPE}"
108
+ <% if (deploymentTarget === 'async-inference') { %>
109
+ echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
110
+ echo " SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
111
+ echo " SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
112
+ <% } else if (deploymentTarget === 'batch-transform') { %>
113
+ echo " Instance count: ${BATCH_INSTANCE_COUNT}"
114
+ echo " S3 input: ${BATCH_INPUT_PATH}"
115
+ echo " S3 output: ${BATCH_OUTPUT_PATH}"
116
+ echo " Split type: ${BATCH_SPLIT_TYPE}"
117
+ echo " Strategy: ${BATCH_STRATEGY}"
118
+ <% } %>