@aws/ml-container-creator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +202 -0
  2. package/LICENSE-THIRD-PARTY +68620 -0
  3. package/NOTICE +2 -0
  4. package/README.md +106 -0
  5. package/bin/cli.js +365 -0
  6. package/config/defaults.json +32 -0
  7. package/config/presets/transformers-djl.json +26 -0
  8. package/config/presets/transformers-gpu.json +24 -0
  9. package/config/presets/transformers-lmi.json +27 -0
  10. package/package.json +129 -0
  11. package/servers/README.md +419 -0
  12. package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
  13. package/servers/base-image-picker/catalogs/python-slim.json +38 -0
  14. package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
  15. package/servers/base-image-picker/catalogs/triton.json +38 -0
  16. package/servers/base-image-picker/index.js +495 -0
  17. package/servers/base-image-picker/manifest.json +17 -0
  18. package/servers/base-image-picker/package.json +15 -0
  19. package/servers/hyperpod-cluster-picker/LICENSE +202 -0
  20. package/servers/hyperpod-cluster-picker/index.js +424 -0
  21. package/servers/hyperpod-cluster-picker/manifest.json +14 -0
  22. package/servers/hyperpod-cluster-picker/package.json +17 -0
  23. package/servers/instance-recommender/LICENSE +202 -0
  24. package/servers/instance-recommender/catalogs/instances.json +852 -0
  25. package/servers/instance-recommender/index.js +284 -0
  26. package/servers/instance-recommender/manifest.json +16 -0
  27. package/servers/instance-recommender/package.json +15 -0
  28. package/servers/lib/LICENSE +202 -0
  29. package/servers/lib/bedrock-client.js +160 -0
  30. package/servers/lib/custom-validators.js +46 -0
  31. package/servers/lib/dynamic-resolver.js +36 -0
  32. package/servers/lib/package.json +11 -0
  33. package/servers/lib/schemas/image-catalog.schema.json +185 -0
  34. package/servers/lib/schemas/instances.schema.json +124 -0
  35. package/servers/lib/schemas/manifest.schema.json +64 -0
  36. package/servers/lib/schemas/model-catalog.schema.json +91 -0
  37. package/servers/lib/schemas/regions.schema.json +26 -0
  38. package/servers/lib/schemas/triton-backends.schema.json +51 -0
  39. package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
  40. package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
  41. package/servers/model-picker/catalogs/popular-transformers.json +226 -0
  42. package/servers/model-picker/index.js +1693 -0
  43. package/servers/model-picker/manifest.json +18 -0
  44. package/servers/model-picker/package.json +20 -0
  45. package/servers/region-picker/LICENSE +202 -0
  46. package/servers/region-picker/catalogs/regions.json +263 -0
  47. package/servers/region-picker/index.js +230 -0
  48. package/servers/region-picker/manifest.json +16 -0
  49. package/servers/region-picker/package.json +15 -0
  50. package/src/app.js +1007 -0
  51. package/src/copy-tpl.js +77 -0
  52. package/src/lib/accelerator-validator.js +39 -0
  53. package/src/lib/asset-manager.js +385 -0
  54. package/src/lib/aws-profile-parser.js +181 -0
  55. package/src/lib/bootstrap-command-handler.js +1647 -0
  56. package/src/lib/bootstrap-config.js +238 -0
  57. package/src/lib/ci-register-helpers.js +124 -0
  58. package/src/lib/ci-report-helpers.js +158 -0
  59. package/src/lib/ci-stage-helpers.js +268 -0
  60. package/src/lib/cli-handler.js +529 -0
  61. package/src/lib/comment-generator.js +544 -0
  62. package/src/lib/community-reports-validator.js +91 -0
  63. package/src/lib/config-manager.js +2106 -0
  64. package/src/lib/configuration-exporter.js +204 -0
  65. package/src/lib/configuration-manager.js +695 -0
  66. package/src/lib/configuration-matcher.js +221 -0
  67. package/src/lib/cpu-validator.js +36 -0
  68. package/src/lib/cuda-validator.js +57 -0
  69. package/src/lib/deployment-config-resolver.js +103 -0
  70. package/src/lib/deployment-entry-schema.js +125 -0
  71. package/src/lib/deployment-registry.js +598 -0
  72. package/src/lib/docker-introspection-validator.js +51 -0
  73. package/src/lib/engine-prefix-resolver.js +60 -0
  74. package/src/lib/huggingface-client.js +172 -0
  75. package/src/lib/key-value-parser.js +37 -0
  76. package/src/lib/known-flags-validator.js +200 -0
  77. package/src/lib/manifest-cli.js +280 -0
  78. package/src/lib/mcp-client.js +303 -0
  79. package/src/lib/mcp-command-handler.js +532 -0
  80. package/src/lib/neuron-validator.js +80 -0
  81. package/src/lib/parameter-schema-validator.js +284 -0
  82. package/src/lib/prompt-runner.js +1349 -0
  83. package/src/lib/prompts.js +1138 -0
  84. package/src/lib/registry-command-handler.js +519 -0
  85. package/src/lib/registry-loader.js +198 -0
  86. package/src/lib/rocm-validator.js +80 -0
  87. package/src/lib/schema-validator.js +157 -0
  88. package/src/lib/sensitive-redactor.js +59 -0
  89. package/src/lib/template-engine.js +156 -0
  90. package/src/lib/template-manager.js +341 -0
  91. package/src/lib/validation-engine.js +314 -0
  92. package/src/prompt-adapter.js +63 -0
  93. package/templates/Dockerfile +300 -0
  94. package/templates/IAM_PERMISSIONS.md +84 -0
  95. package/templates/MIGRATION.md +488 -0
  96. package/templates/PROJECT_README.md +439 -0
  97. package/templates/TEMPLATE_SYSTEM.md +243 -0
  98. package/templates/buildspec.yml +64 -0
  99. package/templates/code/chat_template.jinja +1 -0
  100. package/templates/code/flask/gunicorn_config.py +35 -0
  101. package/templates/code/flask/wsgi.py +10 -0
  102. package/templates/code/model_handler.py +387 -0
  103. package/templates/code/serve +300 -0
  104. package/templates/code/serve.py +175 -0
  105. package/templates/code/serving.properties +105 -0
  106. package/templates/code/start_server.py +39 -0
  107. package/templates/code/start_server.sh +39 -0
  108. package/templates/diffusors/Dockerfile +72 -0
  109. package/templates/diffusors/patch_image_api.py +35 -0
  110. package/templates/diffusors/serve +115 -0
  111. package/templates/diffusors/start_server.sh +114 -0
  112. package/templates/do/.gitkeep +1 -0
  113. package/templates/do/README.md +541 -0
  114. package/templates/do/build +83 -0
  115. package/templates/do/ci +681 -0
  116. package/templates/do/clean +811 -0
  117. package/templates/do/config +260 -0
  118. package/templates/do/deploy +1560 -0
  119. package/templates/do/export +306 -0
  120. package/templates/do/logs +319 -0
  121. package/templates/do/manifest +12 -0
  122. package/templates/do/push +119 -0
  123. package/templates/do/register +580 -0
  124. package/templates/do/run +113 -0
  125. package/templates/do/submit +417 -0
  126. package/templates/do/test +1147 -0
  127. package/templates/hyperpod/configmap.yaml +24 -0
  128. package/templates/hyperpod/deployment.yaml +71 -0
  129. package/templates/hyperpod/pvc.yaml +42 -0
  130. package/templates/hyperpod/service.yaml +17 -0
  131. package/templates/nginx-diffusors.conf +74 -0
  132. package/templates/nginx-predictors.conf +47 -0
  133. package/templates/nginx-tensorrt.conf +74 -0
  134. package/templates/requirements.txt +61 -0
  135. package/templates/sample_model/test_inference.py +123 -0
  136. package/templates/sample_model/train_abalone.py +252 -0
  137. package/templates/test/test_endpoint.sh +79 -0
  138. package/templates/test/test_local_image.sh +80 -0
  139. package/templates/test/test_model_handler.py +180 -0
  140. package/templates/triton/Dockerfile +128 -0
  141. package/templates/triton/config.pbtxt +163 -0
  142. package/templates/triton/model.py +130 -0
  143. package/templates/triton/requirements.txt +11 -0
@@ -0,0 +1,306 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # Export current configuration as a CLI command or JSON object
6
+ # Usage: ./do/export [--json]
7
+
8
+ # Source configuration (suppress the summary output)
9
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
+ source "${SCRIPT_DIR}/config" > /dev/null 2>&1
11
+
12
+ # ── JSON output mode ─────────────────────────────────────────────────────────
13
+
14
+ if [ "${1:-}" = "--json" ]; then
15
+ # Build a JSON object with all configuration parameters.
16
+ # Uses ConfigManager camelCase keys so the output can be fed directly
17
+ # back into the generator via --config=<file>.
18
+ JSON="{"
19
+ JSON="${JSON}\"projectName\":\"${PROJECT_NAME}\""
20
+
21
+ # Use deploymentConfig if available (bundles framework + model server)
22
+ if [ -n "${DEPLOYMENT_CONFIG:-}" ]; then
23
+ JSON="${JSON},\"deploymentConfig\":\"${DEPLOYMENT_CONFIG}\""
24
+ fi
25
+
26
+ # Model format (traditional ML only)
27
+ if [ -n "${MODEL_FORMAT:-}" ]; then
28
+ JSON="${JSON},\"modelFormat\":\"${MODEL_FORMAT}\""
29
+ fi
30
+
31
+ # Model name (transformers only)
32
+ if [ -n "${MODEL_NAME:-}" ]; then
33
+ JSON="${JSON},\"modelName\":\"${MODEL_NAME}\""
34
+ fi
35
+
36
+ # Build configuration
37
+ JSON="${JSON},\"buildTarget\":\"${BUILD_TARGET}\""
38
+ if [ "${BUILD_TARGET}" = "codebuild" ] && [ -n "${CODEBUILD_COMPUTE_TYPE:-}" ]; then
39
+ JSON="${JSON},\"codebuildComputeType\":\"${CODEBUILD_COMPUTE_TYPE}\""
40
+ fi
41
+
42
+ # Deployment target
43
+ JSON="${JSON},\"deploymentTarget\":\"${DEPLOYMENT_TARGET}\""
44
+
45
+ <% if (deploymentTarget === 'managed-inference') { %>
46
+ # SageMaker Managed Inference
47
+ JSON="${JSON},\"instanceType\":\"${INSTANCE_TYPE}\""
48
+ <% } else if (deploymentTarget === 'async-inference') { %>
49
+ # SageMaker Async Inference
50
+ JSON="${JSON},\"instanceType\":\"${INSTANCE_TYPE}\""
51
+ if [ -n "${ASYNC_S3_OUTPUT_PATH:-}" ]; then
52
+ JSON="${JSON},\"asyncS3OutputPath\":\"${ASYNC_S3_OUTPUT_PATH}\""
53
+ fi
54
+ if [ -n "${ASYNC_SNS_SUCCESS_TOPIC:-}" ]; then
55
+ JSON="${JSON},\"asyncSnsSuccessTopic\":\"${ASYNC_SNS_SUCCESS_TOPIC}\""
56
+ fi
57
+ if [ -n "${ASYNC_SNS_ERROR_TOPIC:-}" ]; then
58
+ JSON="${JSON},\"asyncSnsErrorTopic\":\"${ASYNC_SNS_ERROR_TOPIC}\""
59
+ fi
60
+ if [ "${ASYNC_MAX_CONCURRENT_INVOCATIONS:-1}" != "1" ]; then
61
+ JSON="${JSON},\"asyncMaxConcurrentInvocations\":${ASYNC_MAX_CONCURRENT_INVOCATIONS}"
62
+ fi
63
+ <% } else if (deploymentTarget === 'batch-transform') { %>
64
+ # SageMaker Batch Transform
65
+ JSON="${JSON},\"instanceType\":\"${INSTANCE_TYPE}\""
66
+ JSON="${JSON},\"batchInputPath\":\"${BATCH_INPUT_PATH}\""
67
+ JSON="${JSON},\"batchOutputPath\":\"${BATCH_OUTPUT_PATH}\""
68
+ JSON="${JSON},\"batchInstanceCount\":${BATCH_INSTANCE_COUNT}"
69
+ JSON="${JSON},\"batchSplitType\":\"${BATCH_SPLIT_TYPE}\""
70
+ JSON="${JSON},\"batchStrategy\":\"${BATCH_STRATEGY}\""
71
+ if [ "${BATCH_JOIN_SOURCE:-None}" != "None" ]; then
72
+ JSON="${JSON},\"batchJoinSource\":\"${BATCH_JOIN_SOURCE}\""
73
+ fi
74
+ if [ "${BATCH_MAX_CONCURRENT_TRANSFORMS:-1}" != "1" ]; then
75
+ JSON="${JSON},\"batchMaxConcurrentTransforms\":${BATCH_MAX_CONCURRENT_TRANSFORMS}"
76
+ fi
77
+ if [ "${BATCH_MAX_PAYLOAD_IN_MB:-6}" != "6" ]; then
78
+ JSON="${JSON},\"batchMaxPayloadInMB\":${BATCH_MAX_PAYLOAD_IN_MB}"
79
+ fi
80
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
81
+ # HyperPod EKS
82
+ JSON="${JSON},\"hyperPodCluster\":\"${HYPERPOD_CLUSTER_NAME}\""
83
+ if [ "${HYPERPOD_NAMESPACE}" != "default" ]; then
84
+ JSON="${JSON},\"hyperPodNamespace\":\"${HYPERPOD_NAMESPACE}\""
85
+ fi
86
+ if [ "${HYPERPOD_REPLICAS}" != "1" ]; then
87
+ JSON="${JSON},\"hyperPodReplicas\":${HYPERPOD_REPLICAS}"
88
+ fi
89
+ <% if (fsxVolumeHandle) { %>
90
+ JSON="${JSON},\"fsxVolumeHandle\":\"${FSX_VOLUME_HANDLE}\""
91
+ <% } %>
92
+ <% } %>
93
+
94
+ # AWS region
95
+ JSON="${JSON},\"awsRegion\":\"${AWS_REGION}\""
96
+
97
+ # Role ARN
98
+ if [ -n "${ROLE_ARN:-}" ]; then
99
+ JSON="${JSON},\"awsRoleArn\":\"${ROLE_ARN}\""
100
+ fi
101
+
102
+ # HuggingFace token — reference env var, don't leak the actual value
103
+ if [ -n "${HF_TOKEN:-}" ]; then
104
+ JSON="${JSON},\"hfToken\":\"\$HF_TOKEN\""
105
+ fi
106
+
107
+ # Endpoint configuration (non-default values only)
108
+ <%
109
+ var epParts = [];
110
+ if (endpointInitialInstanceCount != null && endpointInitialInstanceCount !== 1) {
111
+ epParts.push('"initialInstanceCount":' + endpointInitialInstanceCount);
112
+ }
113
+ if (endpointDataCapturePercent != null && endpointDataCapturePercent !== 0) {
114
+ epParts.push('"dataCapturePercent":' + endpointDataCapturePercent);
115
+ }
116
+ if (endpointVariantName != null && endpointVariantName !== 'AllTraffic') {
117
+ epParts.push('"variantName":"' + endpointVariantName + '"');
118
+ }
119
+ if (endpointVolumeSize != null) {
120
+ epParts.push('"volumeSize":' + endpointVolumeSize);
121
+ }
122
+ %>
123
+ <% if (epParts.length > 0) { %>
124
+ JSON="${JSON},\"endpointConfig\":{<%- epParts.join(',') %>}"
125
+ <% } %>
126
+
127
+ # IC configuration (non-default values only)
128
+ <%
129
+ var icParts = [];
130
+ if (icCpuCount != null) {
131
+ icParts.push('"cpuCount":' + icCpuCount);
132
+ }
133
+ if (icMemorySize != null) {
134
+ icParts.push('"memorySize":' + icMemorySize);
135
+ }
136
+ if (icGpuCount != null) {
137
+ icParts.push('"gpuCount":' + icGpuCount);
138
+ }
139
+ if (icCopyCount != null && icCopyCount !== 1) {
140
+ icParts.push('"copyCount":' + icCopyCount);
141
+ }
142
+ if (icModelWeight != null && icModelWeight !== 1.0) {
143
+ icParts.push('"modelWeight":' + icModelWeight);
144
+ }
145
+ %>
146
+ <% if (icParts.length > 0) { %>
147
+ JSON="${JSON},\"icConfig\":{<%- icParts.join(',') %>}"
148
+ <% } %>
149
+
150
+ # Model environment variables
151
+ <% if (modelEnvVars && Object.keys(modelEnvVars).length > 0) { %>
152
+ <%
153
+ var mParts = Object.entries(modelEnvVars).map(function(entry) {
154
+ return '"' + entry[0] + '":"' + entry[1] + '"';
155
+ });
156
+ %>
157
+ JSON="${JSON},\"modelEnvVars\":{<%- mParts.join(',') %>}"
158
+ <% } %>
159
+
160
+ # Server environment variables
161
+ <% if (serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
162
+ <%
163
+ var sParts = Object.entries(serverEnvVars).map(function(entry) {
164
+ return '"' + entry[0] + '":"' + entry[1] + '"';
165
+ });
166
+ %>
167
+ JSON="${JSON},\"serverEnvVars\":{<%- sParts.join(',') %>}"
168
+ <% } %>
169
+
170
+ JSON="${JSON}}"
171
+
172
+ echo "${JSON}"
173
+ exit 0
174
+ fi
175
+
176
+ # ── CLI command output mode (default) ────────────────────────────────────────
177
+
178
+ # Build the command
179
+ CMD="ml-container-creator"
180
+ CMD="${CMD} --project-name=${PROJECT_NAME}"
181
+
182
+ # Use deployment-config if available (bundles framework + model server)
183
+ if [ -n "${DEPLOYMENT_CONFIG:-}" ]; then
184
+ CMD="${CMD} --deployment-config=${DEPLOYMENT_CONFIG}"
185
+ else
186
+ CMD="${CMD} --framework=${FRAMEWORK}"
187
+ CMD="${CMD} --model-server=${MODEL_SERVER}"
188
+ fi
189
+
190
+ # Model format (traditional ML only)
191
+ if [ -n "${MODEL_FORMAT:-}" ]; then
192
+ CMD="${CMD} --model-format=${MODEL_FORMAT}"
193
+ fi
194
+
195
+ # Model name (transformers only)
196
+ if [ -n "${MODEL_NAME:-}" ]; then
197
+ CMD="${CMD} --model-name=${MODEL_NAME}"
198
+ fi
199
+
200
+ # Build configuration
201
+ CMD="${CMD} --build-target=${BUILD_TARGET}"
202
+ if [ "${BUILD_TARGET}" = "codebuild" ] && [ -n "${CODEBUILD_COMPUTE_TYPE:-}" ]; then
203
+ CMD="${CMD} --codebuild-compute-type=${CODEBUILD_COMPUTE_TYPE}"
204
+ fi
205
+
206
+ # Deployment target
207
+ CMD="${CMD} --deployment-target=${DEPLOYMENT_TARGET}"
208
+
209
+ <% if (deploymentTarget === 'managed-inference') { %>
210
+ # SageMaker Managed Inference
211
+ CMD="${CMD} --instance-type=${INSTANCE_TYPE}"
212
+ <% } else if (deploymentTarget === 'batch-transform') { %>
213
+ # SageMaker Managed Inference - Batch
214
+ CMD="${CMD} --instance-type=${INSTANCE_TYPE}"
215
+ CMD="${CMD} --batch-input-path=${BATCH_INPUT_PATH}"
216
+ CMD="${CMD} --batch-output-path=${BATCH_OUTPUT_PATH}"
217
+ CMD="${CMD} --batch-instance-count=${BATCH_INSTANCE_COUNT}"
218
+ CMD="${CMD} --batch-split-type=${BATCH_SPLIT_TYPE}"
219
+ CMD="${CMD} --batch-strategy=${BATCH_STRATEGY}"
220
+ if [ "${BATCH_JOIN_SOURCE:-None}" != "None" ]; then
221
+ CMD="${CMD} --batch-join-source=${BATCH_JOIN_SOURCE}"
222
+ fi
223
+ if [ "${BATCH_MAX_CONCURRENT_TRANSFORMS:-1}" != "1" ]; then
224
+ CMD="${CMD} --batch-max-concurrent=${BATCH_MAX_CONCURRENT_TRANSFORMS}"
225
+ fi
226
+ if [ "${BATCH_MAX_PAYLOAD_IN_MB:-6}" != "6" ]; then
227
+ CMD="${CMD} --batch-max-payload=${BATCH_MAX_PAYLOAD_IN_MB}"
228
+ fi
229
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
230
+ # HyperPod EKS
231
+ CMD="${CMD} --hyperpod-cluster=${HYPERPOD_CLUSTER_NAME}"
232
+ if [ "${HYPERPOD_NAMESPACE}" != "default" ]; then
233
+ CMD="${CMD} --hyperpod-namespace=${HYPERPOD_NAMESPACE}"
234
+ fi
235
+ if [ "${HYPERPOD_REPLICAS}" != "1" ]; then
236
+ CMD="${CMD} --hyperpod-replicas=${HYPERPOD_REPLICAS}"
237
+ fi
238
+ <% if (fsxVolumeHandle) { %>
239
+ CMD="${CMD} --fsx-volume-handle=${FSX_VOLUME_HANDLE}"
240
+ <% } %>
241
+ <% } %>
242
+
243
+ # AWS region
244
+ CMD="${CMD} --region=${AWS_REGION}"
245
+
246
+ # Role ARN
247
+ if [ -n "${ROLE_ARN:-}" ]; then
248
+ CMD="${CMD} --role-arn=${ROLE_ARN}"
249
+ fi
250
+
251
+ # HuggingFace token — reference env var, don't leak the actual value
252
+ if [ -n "${HF_TOKEN:-}" ]; then
253
+ CMD="${CMD} --hf-token=\$HF_TOKEN"
254
+ fi
255
+
256
+ # Endpoint configuration (non-default values only)
257
+ <% if (endpointInitialInstanceCount != null && endpointInitialInstanceCount !== 1) { %>
258
+ CMD="${CMD} --endpoint-initial-instance-count=<%= endpointInitialInstanceCount %>"
259
+ <% } %>
260
+ <% if (endpointDataCapturePercent != null && endpointDataCapturePercent !== 0) { %>
261
+ CMD="${CMD} --endpoint-data-capture-percent=<%= endpointDataCapturePercent %>"
262
+ <% } %>
263
+ <% if (endpointVariantName != null && endpointVariantName !== 'AllTraffic') { %>
264
+ CMD="${CMD} --endpoint-variant-name=<%= endpointVariantName %>"
265
+ <% } %>
266
+ <% if (endpointVolumeSize != null) { %>
267
+ CMD="${CMD} --endpoint-volume-size=<%= endpointVolumeSize %>"
268
+ <% } %>
269
+
270
+ # IC configuration (non-default values only)
271
+ <% if (icCpuCount != null) { %>
272
+ CMD="${CMD} --ic-cpu-count=<%= icCpuCount %>"
273
+ <% } %>
274
+ <% if (icMemorySize != null) { %>
275
+ CMD="${CMD} --ic-memory-size=<%= icMemorySize %>"
276
+ <% } %>
277
+ <% if (icGpuCount != null) { %>
278
+ CMD="${CMD} --ic-gpu-count=<%= icGpuCount %>"
279
+ <% } %>
280
+ <% if (icCopyCount != null && icCopyCount !== 1) { %>
281
+ CMD="${CMD} --ic-copy-count=<%= icCopyCount %>"
282
+ <% } %>
283
+ <% if (icModelWeight != null && icModelWeight !== 1.0) { %>
284
+ CMD="${CMD} --ic-model-weight=<%= icModelWeight %>"
285
+ <% } %>
286
+
287
+ # Model environment variables
288
+ <% if (modelEnvVars && Object.keys(modelEnvVars).length > 0) { %>
289
+ <% Object.entries(modelEnvVars).forEach(([key, value]) => { %>
290
+ CMD="${CMD} --model-env=<%= key %>=<%= value %>"
291
+ <% }); %>
292
+ <% } %>
293
+
294
+ # Server environment variables
295
+ <% if (serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
296
+ <% Object.entries(serverEnvVars).forEach(([key, value]) => { %>
297
+ CMD="${CMD} --server-env=<%= key %>=<%= value %>"
298
+ <% }); %>
299
+ <% } %>
300
+
301
+ CMD="${CMD} --skip-prompts"
302
+
303
+ echo ""
304
+ echo "# Reproduce this project with a single command:"
305
+ echo "${CMD}"
306
+ echo ""
@@ -0,0 +1,319 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ set -e
6
+ set -u
7
+ set -o pipefail
8
+
9
+ # Source configuration
10
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
+ source "${SCRIPT_DIR}/config"
12
+
13
+ <% if (deploymentTarget === 'managed-inference') { %>
14
+ # ============================================================
15
+ # SageMaker Managed Inference Logs (CloudWatch)
16
+ # ============================================================
17
+
18
+ # Allow inference component name as argument or from config
19
+ IC_NAME="${1:-${INFERENCE_COMPONENT_NAME:-}}"
20
+ ENDPOINT="${ENDPOINT_NAME:-}"
21
+
22
+ if [ -z "${IC_NAME}" ] && [ -z "${ENDPOINT}" ]; then
23
+ echo "❌ No inference component or endpoint name provided"
24
+ echo ""
25
+ echo "Usage:"
26
+ echo " ./do/logs <inference-component-name>"
27
+ echo " ./do/logs # uses INFERENCE_COMPONENT_NAME from do/config"
28
+ echo ""
29
+ echo "Run ./do/deploy first to set INFERENCE_COMPONENT_NAME automatically."
30
+ exit 1
31
+ fi
32
+
33
+ # Inference component logs live under the endpoint log group
34
+ # but in log streams named after the inference component
35
+ if [ -z "${ENDPOINT}" ]; then
36
+ echo "⚠️ ENDPOINT_NAME not set in config — cannot determine log group"
37
+ echo " Run ./do/deploy first, or set ENDPOINT_NAME in do/config"
38
+ exit 1
39
+ fi
40
+
41
+ LOG_GROUP="/aws/sagemaker/InferenceComponents/${IC_NAME}"
42
+
43
+ echo "📋 Tailing logs for inference component: ${IC_NAME}"
44
+ echo " Endpoint: ${ENDPOINT}"
45
+ echo " Log group: ${LOG_GROUP}"
46
+ echo " Region: ${AWS_REGION}"
47
+ echo ""
48
+ echo " Press Ctrl+C to stop"
49
+ echo ""
50
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
51
+ echo ""
52
+
53
+ # Wait for log group to exist before tailing
54
+ MAX_WAIT=300
55
+ INTERVAL=10
56
+ ELAPSED=0
57
+
58
+ while true; do
59
+ if aws logs describe-log-groups \
60
+ --log-group-name-prefix "${LOG_GROUP}" \
61
+ --region "${AWS_REGION}" \
62
+ --query "logGroups[?logGroupName=='${LOG_GROUP}'].logGroupName" \
63
+ --output text 2>/dev/null | grep -q "${LOG_GROUP}"; then
64
+ break
65
+ fi
66
+
67
+ if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
68
+ echo "❌ Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
69
+ echo ""
70
+ echo " The inference component may have failed to start."
71
+ echo " Check the endpoint log group for errors:"
72
+ echo " aws logs tail /aws/sagemaker/Endpoints/${ENDPOINT} --follow --region ${AWS_REGION}"
73
+ exit 1
74
+ fi
75
+
76
+ if [ "${ELAPSED}" -eq 0 ]; then
77
+ echo "⏳ Log group not found yet: ${LOG_GROUP}"
78
+ echo " The inference component may still be starting up. Waiting up to ${MAX_WAIT}s..."
79
+ echo ""
80
+ fi
81
+
82
+ sleep "${INTERVAL}"
83
+ ELAPSED=$((ELAPSED + INTERVAL))
84
+ echo " Waiting for log group... (${ELAPSED}s/${MAX_WAIT}s)"
85
+ done
86
+
87
+ echo "✅ Log group found. Tailing logs..."
88
+ echo ""
89
+
90
+ # Tail logs
91
+ aws logs tail "${LOG_GROUP}" \
92
+ --region "${AWS_REGION}" \
93
+ --follow \
94
+ --format short
95
+
96
+ <% } else if (deploymentTarget === 'async-inference') { %>
97
+ # ============================================================
98
+ # SageMaker Managed Inference - Async Logs (CloudWatch)
99
+ # ============================================================
100
+
101
+ ENDPOINT="${1:-${ENDPOINT_NAME:-}}"
102
+
103
+ if [ -z "${ENDPOINT}" ]; then
104
+ echo "❌ No endpoint name provided"
105
+ echo ""
106
+ echo "Usage:"
107
+ echo " ./do/logs <endpoint-name>"
108
+ echo " ./do/logs # uses ENDPOINT_NAME from do/config"
109
+ echo ""
110
+ echo "Run ./do/deploy first to set ENDPOINT_NAME automatically."
111
+ exit 1
112
+ fi
113
+
114
+ LOG_GROUP="/aws/sagemaker/Endpoints/${ENDPOINT}"
115
+
116
+ echo "📋 Tailing logs for async inference endpoint: ${ENDPOINT}"
117
+ echo " Log group: ${LOG_GROUP}"
118
+ echo " Region: ${AWS_REGION}"
119
+ echo ""
120
+ echo " Press Ctrl+C to stop"
121
+ echo ""
122
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
123
+ echo ""
124
+
125
+ # Wait for log group to exist before tailing
126
+ MAX_WAIT=300
127
+ INTERVAL=10
128
+ ELAPSED=0
129
+
130
+ while true; do
131
+ if aws logs describe-log-groups \
132
+ --log-group-name-prefix "${LOG_GROUP}" \
133
+ --region "${AWS_REGION}" \
134
+ --query "logGroups[?logGroupName=='${LOG_GROUP}'].logGroupName" \
135
+ --output text 2>/dev/null | grep -q "${LOG_GROUP}"; then
136
+ break
137
+ fi
138
+
139
+ if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
140
+ echo "❌ Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
141
+ echo ""
142
+ echo " The endpoint may have failed to start."
143
+ echo " Check the SageMaker console for endpoint status."
144
+ exit 1
145
+ fi
146
+
147
+ if [ "${ELAPSED}" -eq 0 ]; then
148
+ echo "⏳ Log group not found yet: ${LOG_GROUP}"
149
+ echo " The endpoint may still be starting up. Waiting up to ${MAX_WAIT}s..."
150
+ echo ""
151
+ fi
152
+
153
+ sleep "${INTERVAL}"
154
+ ELAPSED=$((ELAPSED + INTERVAL))
155
+ echo " Waiting for log group... (${ELAPSED}s/${MAX_WAIT}s)"
156
+ done
157
+
158
+ echo "✅ Log group found. Tailing logs..."
159
+ echo ""
160
+
161
+ # Tail logs
162
+ aws logs tail "${LOG_GROUP}" \
163
+ --region "${AWS_REGION}" \
164
+ --follow \
165
+ --format short
166
+
167
+ <% } else if (deploymentTarget === 'batch-transform') { %>
168
+ # ============================================================
169
+ # SageMaker Managed Inference - Batch Logs (CloudWatch)
170
+ # ============================================================
171
+
172
+ # Allow transform job name as argument or from config
173
+ JOB_NAME="${1:-${TRANSFORM_JOB_NAME:-}}"
174
+
175
+ if [ -z "${JOB_NAME}" ]; then
176
+ echo "❌ No transform job name provided"
177
+ echo ""
178
+ echo "Usage:"
179
+ echo " ./do/logs <transform-job-name>"
180
+ echo " ./do/logs # uses TRANSFORM_JOB_NAME from do/config"
181
+ echo ""
182
+ echo "Run ./do/deploy first to set TRANSFORM_JOB_NAME automatically."
183
+ exit 1
184
+ fi
185
+
186
+ LOG_GROUP="/aws/sagemaker/TransformJobs"
187
+
188
+ echo "📋 Tailing logs for batch transform job: ${JOB_NAME}"
189
+ echo " Log group: ${LOG_GROUP}"
190
+ echo " Region: ${AWS_REGION}"
191
+ echo ""
192
+ echo " Press Ctrl+C to stop"
193
+ echo ""
194
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
195
+ echo ""
196
+
197
+ # Wait for log group to exist before tailing
198
+ MAX_WAIT=300
199
+ INTERVAL=10
200
+ ELAPSED=0
201
+
202
+ while true; do
203
+ if aws logs describe-log-groups \
204
+ --log-group-name-prefix "${LOG_GROUP}" \
205
+ --region "${AWS_REGION}" \
206
+ --query "logGroups[?logGroupName=='${LOG_GROUP}'].logGroupName" \
207
+ --output text 2>/dev/null | grep -q "${LOG_GROUP}"; then
208
+ break
209
+ fi
210
+
211
+ if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
212
+ echo "❌ Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
213
+ echo ""
214
+ echo " The transform job may not have started logging yet."
215
+ echo " Check the job status:"
216
+ echo " aws sagemaker describe-transform-job --transform-job-name ${JOB_NAME} --region ${AWS_REGION}"
217
+ exit 1
218
+ fi
219
+
220
+ if [ "${ELAPSED}" -eq 0 ]; then
221
+ echo "⏳ Log group not found yet: ${LOG_GROUP}"
222
+ echo " The transform job may still be starting up. Waiting up to ${MAX_WAIT}s..."
223
+ echo ""
224
+ fi
225
+
226
+ sleep "${INTERVAL}"
227
+ ELAPSED=$((ELAPSED + INTERVAL))
228
+ echo " Waiting for log group... (${ELAPSED}s/${MAX_WAIT}s)"
229
+ done
230
+
231
+ echo "✅ Log group found. Tailing logs..."
232
+ echo ""
233
+
234
+ # Tail logs, filtering by transform job name
235
+ aws logs tail "${LOG_GROUP}" \
236
+ --region "${AWS_REGION}" \
237
+ --log-stream-name-prefix "${JOB_NAME}" \
238
+ --follow \
239
+ --format short
240
+
241
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
242
+ # ============================================================
243
+ # HyperPod EKS Logs (kubectl)
244
+ # ============================================================
245
+
246
+ # Allow pod selector as argument, default to app label
247
+ POD_SELECTOR="${1:-app=${PROJECT_NAME}}"
248
+
249
+ echo "📋 Tailing logs for HyperPod EKS deployment"
250
+ echo " Cluster: ${HYPERPOD_CLUSTER_NAME}"
251
+ echo " Namespace: ${HYPERPOD_NAMESPACE}"
252
+ echo " Selector: ${POD_SELECTOR}"
253
+ echo " Region: ${AWS_REGION}"
254
+ echo ""
255
+
256
+ # Get kubeconfig for HyperPod cluster
257
+ echo "🔑 Configuring kubectl for HyperPod cluster..."
258
+ KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"
259
+
260
+ EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
261
+ --cluster-name "${HYPERPOD_CLUSTER_NAME}" \
262
+ --region "${AWS_REGION}" \
263
+ --query "Orchestrator.Eks.ClusterArn" \
264
+ --output text 2>&1) || {
265
+ echo "❌ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
266
+ echo ""
267
+ echo " Check that:"
268
+ echo " • The cluster name is correct"
269
+ echo " • The cluster exists in region: ${AWS_REGION}"
270
+ echo " • Your IAM user/role has permission to access the cluster"
271
+ exit 4
272
+ }
273
+
274
+ EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')
275
+
276
+ if ! aws eks update-kubeconfig \
277
+ --name "${EKS_CLUSTER_NAME}" \
278
+ --region "${AWS_REGION}" \
279
+ --kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
280
+ echo "❌ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
281
+ exit 4
282
+ fi
283
+
284
+ export KUBECONFIG="${KUBECONFIG_PATH}"
285
+
286
+ # Verify cluster connectivity
287
+ if ! kubectl cluster-info &> /dev/null; then
288
+ echo "❌ Cannot connect to HyperPod cluster"
289
+ echo ""
290
+ echo " Check that:"
291
+ echo " • The cluster is in 'InService' status"
292
+ echo " • Your network can reach the cluster API server"
293
+ exit 4
294
+ fi
295
+
296
+ # Check if any pods match the selector
297
+ POD_COUNT=$(kubectl get pods -n "${HYPERPOD_NAMESPACE}" -l "${POD_SELECTOR}" --no-headers 2>/dev/null | wc -l || echo "0")
298
+ if [ "${POD_COUNT}" -eq 0 ]; then
299
+ echo "⚠️ No pods found matching selector: ${POD_SELECTOR}"
300
+ echo " Namespace: ${HYPERPOD_NAMESPACE}"
301
+ echo ""
302
+ echo " Run ./do/deploy first to create the deployment."
303
+ echo ""
304
+ echo " To list all pods in the namespace:"
305
+ echo " kubectl get pods -n ${HYPERPOD_NAMESPACE}"
306
+ exit 1
307
+ fi
308
+
309
+ echo " Found ${POD_COUNT} pod(s) matching selector"
310
+ echo ""
311
+ echo " Press Ctrl+C to stop"
312
+ echo ""
313
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
314
+ echo ""
315
+
316
+ # Tail logs from all matching pods
317
+ kubectl logs -f -l "${POD_SELECTOR}" -n "${HYPERPOD_NAMESPACE}" --all-containers --prefix
318
+
319
+ <% } %>
@@ -0,0 +1,12 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # Asset manifest helper — records and queries deployed resources
6
+ # Usage:
7
+ # ./do/manifest add --type <resourceType> --id <resourceId> --project <projectName> [--meta <json>]
8
+ # ./do/manifest delete --id <resourceId>
9
+ # ./do/manifest list [--project <project>] [--status <status>] [--type <type>]
10
+
11
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
+ node "${SCRIPT_DIR}/lib/manifest-cli.js" "$@"