@aws/ml-container-creator 0.2.6 â 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +38 -2
- package/config/bootstrap-stack.json +14 -0
- package/infra/ci-harness/package-lock.json +22 -9
- package/package.json +1 -1
- package/servers/instance-sizer/index.js +9 -6
- package/servers/instance-sizer/lib/instance-ranker.js +35 -10
- package/servers/instance-sizer/lib/model-resolver.js +10 -6
- package/servers/lib/catalogs/model-servers.json +283 -5
- package/servers/lib/catalogs/models.json +30 -0
- package/servers/lib/schemas/image-catalog.schema.json +6 -0
- package/servers/model-picker/index.js +2 -1
- package/src/app.js +19 -0
- package/src/lib/architecture-sync.js +171 -0
- package/src/lib/arn-detection.js +22 -0
- package/src/lib/bootstrap-command-handler.js +82 -0
- package/src/lib/config-manager.js +43 -0
- package/src/lib/cross-cutting-checker.js +119 -0
- package/src/lib/deployment-entry-schema.js +1 -2
- package/src/lib/prompt-runner.js +427 -20
- package/src/lib/prompts.js +1 -1
- package/src/lib/registry-command-handler.js +236 -0
- package/src/lib/secret-classification.js +56 -0
- package/src/lib/secrets-command-handler.js +550 -0
- package/src/lib/validate-runner.js +49 -0
- package/src/lib/validation-report.js +8 -1
- package/src/prompt-adapter.js +3 -2
- package/templates/do/build +22 -0
- package/templates/do/config +15 -3
- package/templates/do/deploy +60 -5
- package/templates/do/logs +18 -3
- package/templates/do/run +10 -0
package/templates/do/config
CHANGED
|
@@ -151,17 +151,29 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
151
151
|
# Framework-specific configuration
|
|
152
152
|
<% if (framework === 'transformers') { %>
|
|
153
153
|
export MODEL_NAME="<%= modelName %>"
|
|
154
|
-
|
|
154
|
+
# Secrets Manager integration: when an ARN is configured, do-scripts resolve the
|
|
155
|
+
# secret at the appropriate stage (build-time or runtime). When a plaintext value
|
|
156
|
+
# is configured, it is exported directly. The _ARN suffix signals resolution is needed.
|
|
157
|
+
<% if (typeof hfTokenArn !== 'undefined' && hfTokenArn) { %>
|
|
158
|
+
export HF_TOKEN_ARN="<%= hfTokenArn %>"
|
|
159
|
+
<% } else if (hfToken) { %>
|
|
155
160
|
export HF_TOKEN="<%= hfToken %>"
|
|
156
161
|
<% } %>
|
|
157
|
-
<% if (
|
|
162
|
+
<% if (typeof ngcTokenArn !== 'undefined' && ngcTokenArn) { %>
|
|
163
|
+
export NGC_API_KEY_ARN="<%= ngcTokenArn %>"
|
|
164
|
+
<% } else if (ngcApiKey) { %>
|
|
158
165
|
export NGC_API_KEY="<%= ngcApiKey %>"
|
|
159
166
|
<% } %>
|
|
160
167
|
<% } %>
|
|
161
168
|
|
|
162
169
|
<% if (framework === 'diffusors') { %>
|
|
163
170
|
export MODEL_NAME="<%= modelName %>"
|
|
164
|
-
|
|
171
|
+
# Secrets Manager integration: when an ARN is configured, do-scripts resolve the
|
|
172
|
+
# secret at the appropriate stage (build-time or runtime). When a plaintext value
|
|
173
|
+
# is configured, it is exported directly. The _ARN suffix signals resolution is needed.
|
|
174
|
+
<% if (typeof hfTokenArn !== 'undefined' && hfTokenArn) { %>
|
|
175
|
+
export HF_TOKEN_ARN="<%= hfTokenArn %>"
|
|
176
|
+
<% } else if (hfToken) { %>
|
|
165
177
|
export HF_TOKEN="<%= hfToken %>"
|
|
166
178
|
<% } %>
|
|
167
179
|
<% } %>
|
package/templates/do/deploy
CHANGED
|
@@ -95,6 +95,41 @@ fi
|
|
|
95
95
|
echo "â
ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
96
96
|
IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
97
97
|
|
|
98
|
+
# ============================================================
|
|
99
|
+
# Shared: Resolve secrets for container environment
|
|
100
|
+
# ============================================================
|
|
101
|
+
CONTAINER_ENV_JSON=""
|
|
102
|
+
|
|
103
|
+
if [ -n "${HF_TOKEN_ARN:-}" ]; then
|
|
104
|
+
echo "đ Resolving HuggingFace token from Secrets Manager..."
|
|
105
|
+
RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
106
|
+
echo "â Failed to resolve HuggingFace token from Secrets Manager"
|
|
107
|
+
exit 3
|
|
108
|
+
}
|
|
109
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
|
|
110
|
+
elif [ -n "${HF_TOKEN:-}" ]; then
|
|
111
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
if [ -n "${NGC_API_KEY_ARN:-}" ]; then
|
|
115
|
+
echo "đ Resolving NGC API key from Secrets Manager..."
|
|
116
|
+
RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
117
|
+
echo "â Failed to resolve NGC API key from Secrets Manager"
|
|
118
|
+
exit 3
|
|
119
|
+
}
|
|
120
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
121
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
122
|
+
else
|
|
123
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
124
|
+
fi
|
|
125
|
+
elif [ -n "${NGC_API_KEY:-}" ]; then
|
|
126
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
127
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
128
|
+
else
|
|
129
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
130
|
+
fi
|
|
131
|
+
fi
|
|
132
|
+
|
|
98
133
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
99
134
|
# ============================================================
|
|
100
135
|
# SageMaker Real-Time Inference Deployment (Inference Components)
|
|
@@ -400,15 +435,20 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wa
|
|
|
400
435
|
_update_config_var "INFERENCE_COMPONENT_NAME" "${IC_NAME}"
|
|
401
436
|
fi
|
|
402
437
|
|
|
438
|
+
# Build container spec JSON
|
|
439
|
+
CONTAINER_SPEC="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
440
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
441
|
+
CONTAINER_SPEC="${CONTAINER_SPEC},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
442
|
+
fi
|
|
443
|
+
CONTAINER_SPEC="${CONTAINER_SPEC}}"
|
|
444
|
+
|
|
403
445
|
echo "đĻ Creating inference component: ${IC_NAME}"
|
|
404
446
|
if ! aws sagemaker create-inference-component \
|
|
405
447
|
--inference-component-name "${IC_NAME}" \
|
|
406
448
|
--endpoint-name "${ENDPOINT_NAME}" \
|
|
407
449
|
--variant-name "AllTraffic" \
|
|
408
450
|
--specification "{
|
|
409
|
-
\"Container\": {
|
|
410
|
-
\"Image\": \"${ECR_REPOSITORY}:${IMAGE_TAG}\"
|
|
411
|
-
},
|
|
451
|
+
\"Container\": ${CONTAINER_SPEC},
|
|
412
452
|
\"StartupParameters\": {
|
|
413
453
|
\"ContainerStartupHealthCheckTimeoutInSeconds\": 900
|
|
414
454
|
},
|
|
@@ -767,10 +807,17 @@ if [ -z "${SKIP_TO}" ]; then
|
|
|
767
807
|
_update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
768
808
|
|
|
769
809
|
# Step 1: Create SageMaker model
|
|
810
|
+
# Build primary container spec
|
|
811
|
+
PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
812
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
813
|
+
PRIMARY_CONTAINER="${PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
814
|
+
fi
|
|
815
|
+
PRIMARY_CONTAINER="${PRIMARY_CONTAINER}}"
|
|
816
|
+
|
|
770
817
|
echo "đĻ Creating SageMaker model: ${MODEL_NAME_SM}"
|
|
771
818
|
if ! aws sagemaker create-model \
|
|
772
819
|
--model-name "${MODEL_NAME_SM}" \
|
|
773
|
-
--primary-container "
|
|
820
|
+
--primary-container "${PRIMARY_CONTAINER}" \
|
|
774
821
|
--execution-role-arn "${ROLE_ARN}" \
|
|
775
822
|
--region "${AWS_REGION}"; then
|
|
776
823
|
|
|
@@ -1361,9 +1408,17 @@ _update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
|
1361
1408
|
|
|
1362
1409
|
# Step 1: Create SageMaker model
|
|
1363
1410
|
echo "đĻ Creating SageMaker model: ${MODEL_NAME_SM}"
|
|
1411
|
+
|
|
1412
|
+
# Build primary container spec
|
|
1413
|
+
BATCH_PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
1414
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
1415
|
+
BATCH_PRIMARY_CONTAINER="${BATCH_PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
1416
|
+
fi
|
|
1417
|
+
BATCH_PRIMARY_CONTAINER="${BATCH_PRIMARY_CONTAINER}}"
|
|
1418
|
+
|
|
1364
1419
|
if ! aws sagemaker create-model \
|
|
1365
1420
|
--model-name "${MODEL_NAME_SM}" \
|
|
1366
|
-
--primary-container "
|
|
1421
|
+
--primary-container "${BATCH_PRIMARY_CONTAINER}" \
|
|
1367
1422
|
--execution-role-arn "${ROLE_ARN}" \
|
|
1368
1423
|
--region "${AWS_REGION}"; then
|
|
1369
1424
|
|
package/templates/do/logs
CHANGED
|
@@ -51,11 +51,15 @@ echo "ââââââââââââââââââââââââ
|
|
|
51
51
|
echo ""
|
|
52
52
|
|
|
53
53
|
# Wait for log group to exist before tailing
|
|
54
|
-
MAX_WAIT=
|
|
54
|
+
MAX_WAIT=900
|
|
55
55
|
INTERVAL=10
|
|
56
56
|
ELAPSED=0
|
|
57
57
|
|
|
58
|
+
# Try IC-specific log group first, fall back to endpoint log group
|
|
59
|
+
FALLBACK_LOG_GROUP="/aws/sagemaker/Endpoints/${ENDPOINT}"
|
|
60
|
+
|
|
58
61
|
while true; do
|
|
62
|
+
# Check IC-specific log group
|
|
59
63
|
if aws logs describe-log-groups \
|
|
60
64
|
--log-group-name-prefix "${LOG_GROUP}" \
|
|
61
65
|
--region "${AWS_REGION}" \
|
|
@@ -64,6 +68,17 @@ while true; do
|
|
|
64
68
|
break
|
|
65
69
|
fi
|
|
66
70
|
|
|
71
|
+
# Check endpoint-level log group as fallback
|
|
72
|
+
if aws logs describe-log-groups \
|
|
73
|
+
--log-group-name-prefix "${FALLBACK_LOG_GROUP}" \
|
|
74
|
+
--region "${AWS_REGION}" \
|
|
75
|
+
--query "logGroups[?logGroupName=='${FALLBACK_LOG_GROUP}'].logGroupName" \
|
|
76
|
+
--output text 2>/dev/null | grep -q "${FALLBACK_LOG_GROUP}"; then
|
|
77
|
+
LOG_GROUP="${FALLBACK_LOG_GROUP}"
|
|
78
|
+
echo " âšī¸ Using endpoint log group: ${LOG_GROUP}"
|
|
79
|
+
break
|
|
80
|
+
fi
|
|
81
|
+
|
|
67
82
|
if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
|
|
68
83
|
echo "â Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
|
|
69
84
|
echo ""
|
|
@@ -123,7 +138,7 @@ echo "ââââââââââââââââââââââââ
|
|
|
123
138
|
echo ""
|
|
124
139
|
|
|
125
140
|
# Wait for log group to exist before tailing
|
|
126
|
-
MAX_WAIT=
|
|
141
|
+
MAX_WAIT=900
|
|
127
142
|
INTERVAL=10
|
|
128
143
|
ELAPSED=0
|
|
129
144
|
|
|
@@ -195,7 +210,7 @@ echo "ââââââââââââââââââââââââ
|
|
|
195
210
|
echo ""
|
|
196
211
|
|
|
197
212
|
# Wait for log group to exist before tailing
|
|
198
|
-
MAX_WAIT=
|
|
213
|
+
MAX_WAIT=900
|
|
199
214
|
INTERVAL=10
|
|
200
215
|
ELAPSED=0
|
|
201
216
|
|
package/templates/do/run
CHANGED
|
@@ -68,6 +68,16 @@ if [ -n "${MODEL_DIR:-}" ]; then
|
|
|
68
68
|
fi
|
|
69
69
|
fi
|
|
70
70
|
|
|
71
|
+
# --- Secrets Manager resolution (runtime) ---
|
|
72
|
+
if [ -n "${HF_TOKEN_ARN:-}" ]; then
|
|
73
|
+
echo "đ Resolving HuggingFace token from Secrets Manager..."
|
|
74
|
+
HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text) || {
|
|
75
|
+
echo "â Failed to resolve HuggingFace token from Secrets Manager"
|
|
76
|
+
exit 3
|
|
77
|
+
}
|
|
78
|
+
export HF_TOKEN
|
|
79
|
+
fi
|
|
80
|
+
|
|
71
81
|
# Prepare environment variables
|
|
72
82
|
ENV_VARS=""
|
|
73
83
|
<% if (framework === 'transformers') { %>
|