@aws/ml-container-creator 0.13.3 → 0.13.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -5
- package/infra/ci-harness/package-lock.json +1 -5
- package/package.json +5 -3
- package/pyproject.toml +21 -0
- package/requirements.txt +19 -0
- package/servers/instance-sizer/lib/model-resolver.js +127 -185
- package/servers/instance-sizer/lib/vram-estimator.js +86 -0
- package/servers/lib/catalogs/instances.json +0 -27
- package/src/app.js +2 -0
- package/src/lib/bootstrap-command-handler.js +35 -25
- package/src/lib/generated/cli-options.js +1 -1
- package/src/lib/generated/parameter-matrix.js +1 -1
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/prompt-runner.js +14 -31
- package/templates/IAM_PERMISSIONS.md +64 -13
- package/templates/do/.adapter_helper.py +451 -0
- package/templates/do/.benchmark_writer.py +13 -0
- package/templates/do/.stage_helper.py +419 -0
- package/templates/do/.tune_helper.py +218 -67
- package/templates/do/README.md +50 -604
- package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +109 -4
- package/templates/do/benchmark +150 -12
- package/templates/do/build +2 -5
- package/templates/do/clean.d/async-inference.ejs +2 -5
- package/templates/do/clean.d/batch-transform.ejs +2 -5
- package/templates/do/clean.d/hyperpod-eks.ejs +2 -5
- package/templates/do/clean.d/managed-inference.ejs +2 -5
- package/templates/do/config +4 -0
- package/templates/do/deploy.d/async-inference.ejs +6 -9
- package/templates/do/deploy.d/batch-transform.ejs +4 -7
- package/templates/do/deploy.d/hyperpod-eks.ejs +1 -4
- package/templates/do/deploy.d/managed-inference.ejs +15 -6
- package/templates/do/lib/profile.sh +24 -15
- package/templates/do/push +2 -5
- package/templates/do/register +2 -5
- package/templates/do/stage +114 -292
- package/templates/do/submit +1 -4
- package/templates/do/tune +64 -10
- package/templates/MIGRATION.md +0 -488
- package/templates/TEMPLATE_SYSTEM.md +0 -243
package/templates/do/tune
CHANGED
|
@@ -16,10 +16,7 @@ source "${SCRIPT_DIR}/config"
|
|
|
16
16
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
17
17
|
|
|
18
18
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
19
|
-
|
|
20
|
-
set +u
|
|
21
|
-
TUNE_S3_BUCKET="${TUNE_S3_BUCKET:-mlcc-tune-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
22
|
-
set -u
|
|
19
|
+
TUNE_S3_BUCKET="${TUNE_S3_BUCKET:-mlcc-tune-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
|
|
23
20
|
|
|
24
21
|
# ── Constants ─────────────────────────────────────────────────────────────────
|
|
25
22
|
CATALOG_FILE="${SCRIPT_DIR}/.tune_catalog.json"
|
|
@@ -51,6 +48,7 @@ ARG_NO_STALE_WARNING=false
|
|
|
51
48
|
ARG_DISCOVER=false
|
|
52
49
|
ARG_DISCOVER_FILTER=""
|
|
53
50
|
ARG_COLUMN_MAP=""
|
|
51
|
+
ARG_TAKE=""
|
|
54
52
|
ARG_ACCEPT_EULA=false
|
|
55
53
|
|
|
56
54
|
|
|
@@ -165,6 +163,12 @@ _parse_args() {
|
|
|
165
163
|
shift
|
|
166
164
|
fi
|
|
167
165
|
;;
|
|
166
|
+
--take)
|
|
167
|
+
if [ -z "${2:-}" ]; then
|
|
168
|
+
echo "❌ --take requires an integer value"
|
|
169
|
+
exit 1
|
|
170
|
+
fi
|
|
171
|
+
ARG_TAKE="$2"; shift 2 ;;
|
|
168
172
|
*)
|
|
169
173
|
echo "❌ Unknown option: $1"
|
|
170
174
|
echo " Run ./do/tune --help for usage."
|
|
@@ -221,7 +225,8 @@ _show_help() {
|
|
|
221
225
|
echo ""
|
|
222
226
|
echo "Required:"
|
|
223
227
|
echo " --technique <t> Customization technique: sft, dpo, rlaif, rlvr"
|
|
224
|
-
echo " --dataset <source> Dataset: s3://bucket/path.jsonl or hf://org/name[/split]"
|
|
228
|
+
echo " --dataset <source> Dataset: s3://bucket/path.jsonl or hf://org/name[/split][?file=pattern]"
|
|
229
|
+
echo " ⚠️ Quote the URI if it contains ? or * to prevent shell expansion"
|
|
225
230
|
echo ""
|
|
226
231
|
echo "Model selection:"
|
|
227
232
|
echo " --model <id> JumpStart Hub content name to use for fine-tuning."
|
|
@@ -254,6 +259,13 @@ _show_help() {
|
|
|
254
259
|
echo " --no-wait Submit and exit without polling for completion"
|
|
255
260
|
echo " --status Show status of all tracked tune jobs"
|
|
256
261
|
echo ""
|
|
262
|
+
echo "Dataset options:"
|
|
263
|
+
echo " --column-map <map> Rename columns (e.g., prompt=question,completion=answer)"
|
|
264
|
+
echo " --take <n> Take only the first N records from the dataset"
|
|
265
|
+
echo ""
|
|
266
|
+
echo " Note: Always quote --dataset values containing ? or * characters."
|
|
267
|
+
echo " Unquoted, bash may interpret ? as a glob and * as a wildcard expansion."
|
|
268
|
+
echo ""
|
|
257
269
|
echo "Discovery and diagnostics:"
|
|
258
270
|
echo " --discover [filter] Query JumpStart Hub for tune-eligible models."
|
|
259
271
|
echo " Without a filter, shows models for the current family."
|
|
@@ -279,6 +291,12 @@ _show_help() {
|
|
|
279
291
|
echo " # Fine-tune a gated model (Meta Llama) — requires EULA acceptance:"
|
|
280
292
|
echo " ./do/tune --technique dpo --dataset hf://argilla/ultrafeedback-binarized-preferences-cleaned --accept-eula"
|
|
281
293
|
echo ""
|
|
294
|
+
echo " # Take only 500 records for a quick test run:"
|
|
295
|
+
echo " ./do/tune --technique sft --dataset hf://timdettmers/openassistant-guanaco --take 500"
|
|
296
|
+
echo ""
|
|
297
|
+
echo " # Filter files in a multi-file dataset (quote to prevent shell glob expansion):"
|
|
298
|
+
echo " ./do/tune --technique sft --dataset \"hf://Open-Orca/OpenOrca?file=1M-GPT4\""
|
|
299
|
+
echo ""
|
|
282
300
|
echo " # Discover available models:"
|
|
283
301
|
echo " ./do/tune --discover # Models for current family"
|
|
284
302
|
echo " ./do/tune --discover qwen # Filter by keyword"
|
|
@@ -332,13 +350,47 @@ _show_status() {
|
|
|
332
350
|
echo " Elapsed: ${mins}m ${secs}s"
|
|
333
351
|
fi
|
|
334
352
|
|
|
335
|
-
# Show output path if completed
|
|
353
|
+
# Show output path if completed — resolve if not yet set
|
|
336
354
|
local output_var="TUNE_ADAPTER_PATH_$(echo "${technique}" | tr '[:lower:]' '[:upper:]')"
|
|
337
355
|
local model_var="TUNE_MODEL_PATH_$(echo "${technique}" | tr '[:lower:]' '[:upper:]')"
|
|
338
356
|
if [ -n "${!output_var:-}" ]; then
|
|
339
357
|
echo " Output (adapter): ${!output_var}"
|
|
340
358
|
elif [ -n "${!model_var:-}" ]; then
|
|
341
359
|
echo " Output (model): ${!model_var}"
|
|
360
|
+
elif [ "${status}" = "Completed" ]; then
|
|
361
|
+
# Job is complete but output path not set — resolve now
|
|
362
|
+
echo " 🔄 Resolving artifacts..."
|
|
363
|
+
local training_type="${TUNE_TRAINING_TYPE:-lora}"
|
|
364
|
+
local resolve_result
|
|
365
|
+
resolve_result=$(python3 "${HELPER_SCRIPT}" resolve \
|
|
366
|
+
--job-name "${job_name}" \
|
|
367
|
+
--region "${AWS_REGION}" \
|
|
368
|
+
--training-type "${training_type}" \
|
|
369
|
+
--model-package-group "${PROJECT_NAME}-tune-models" 2>/dev/null) || resolve_result=""
|
|
370
|
+
|
|
371
|
+
if [ -n "${resolve_result}" ]; then
|
|
372
|
+
local artifact_path
|
|
373
|
+
artifact_path=$(echo "${resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('artifact_path',''))" 2>/dev/null) || artifact_path=""
|
|
374
|
+
local output_type
|
|
375
|
+
output_type=$(echo "${resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('output_type',''))" 2>/dev/null) || output_type=""
|
|
376
|
+
|
|
377
|
+
if [ -n "${artifact_path}" ]; then
|
|
378
|
+
local technique_upper
|
|
379
|
+
technique_upper=$(echo "${technique}" | tr '[:lower:]' '[:upper:]')
|
|
380
|
+
|
|
381
|
+
# Update config
|
|
382
|
+
if [ "${output_type}" = "adapter" ]; then
|
|
383
|
+
_update_config_var "TUNE_ADAPTER_PATH_${technique_upper}" "${artifact_path}"
|
|
384
|
+
echo " Output (adapter): ${artifact_path}"
|
|
385
|
+
else
|
|
386
|
+
_update_config_var "TUNE_MODEL_PATH_${technique_upper}" "${artifact_path}"
|
|
387
|
+
echo " Output (model): ${artifact_path}"
|
|
388
|
+
fi
|
|
389
|
+
_update_config_var "TUNE_OUTPUT_PATH_LATEST" "${artifact_path}"
|
|
390
|
+
_update_config_var "TUNE_OUTPUT_TYPE_LATEST" "${output_type}"
|
|
391
|
+
echo " ✅ Updated do/config with output paths"
|
|
392
|
+
fi
|
|
393
|
+
fi
|
|
342
394
|
fi
|
|
343
395
|
echo ""
|
|
344
396
|
fi
|
|
@@ -829,6 +881,9 @@ _validate_dataset() {
|
|
|
829
881
|
stage_args+=(--column-map "${ARG_COLUMN_MAP}")
|
|
830
882
|
fi
|
|
831
883
|
stage_args+=(--technique "${ARG_TECHNIQUE}")
|
|
884
|
+
if [ -n "${ARG_TAKE}" ]; then
|
|
885
|
+
stage_args+=(--take "${ARG_TAKE}")
|
|
886
|
+
fi
|
|
832
887
|
if [ -n "${hf_file}" ]; then
|
|
833
888
|
stage_args+=(--hf-file "${hf_file}")
|
|
834
889
|
fi
|
|
@@ -862,7 +917,8 @@ _validate_dataset() {
|
|
|
862
917
|
|
|
863
918
|
else
|
|
864
919
|
echo "❌ Invalid dataset format: ${dataset}"
|
|
865
|
-
echo " Expected: s3://bucket/path.jsonl or hf://org/name[/split]"
|
|
920
|
+
echo " Expected: s3://bucket/path.jsonl or hf://org/name[/split][?file=pattern]"
|
|
921
|
+
echo " Hint: Quote the value if it contains ? or * (e.g., \"hf://org/name?file=pattern\")"
|
|
866
922
|
exit 1
|
|
867
923
|
fi
|
|
868
924
|
}
|
|
@@ -1232,9 +1288,7 @@ _handle_interrupt() {
|
|
|
1232
1288
|
echo ""
|
|
1233
1289
|
echo "⚠️ Interrupted — job continues running in background"
|
|
1234
1290
|
echo " Job: ${job_name}"
|
|
1235
|
-
echo ""
|
|
1236
|
-
echo " Resume monitoring: ./do/tune --technique ${ARG_TECHNIQUE} --dataset ${ARG_DATASET}"
|
|
1237
|
-
echo " Check status: ./do/tune --status"
|
|
1291
|
+
echo " Check status: ./do/tune --status"
|
|
1238
1292
|
exit 130
|
|
1239
1293
|
}
|
|
1240
1294
|
|
package/templates/MIGRATION.md
DELETED
|
@@ -1,488 +0,0 @@
|
|
|
1
|
-
# Migration Guide: Legacy Scripts to do-framework
|
|
2
|
-
|
|
3
|
-
This guide helps you transition from the legacy `deploy/` scripts to the new do-framework commands.
|
|
4
|
-
|
|
5
|
-
## Why Migrate?
|
|
6
|
-
|
|
7
|
-
The do-framework provides:
|
|
8
|
-
|
|
9
|
-
- **Standardization**: Consistent interface across all ML Container Creator projects
|
|
10
|
-
- **Better Organization**: Clear separation of concerns with dedicated scripts
|
|
11
|
-
- **Enhanced Features**: More granular control over build, push, deploy, test, and cleanup
|
|
12
|
-
- **Community Standard**: Follows the widely-adopted do-framework conventions
|
|
13
|
-
- **Improved Maintainability**: Centralized configuration in `do/config`
|
|
14
|
-
|
|
15
|
-
## Quick Reference
|
|
16
|
-
|
|
17
|
-
| Legacy Command | do-framework Command | Notes |
|
|
18
|
-
|----------------|---------------------|-------|
|
|
19
|
-
| `./deploy/build_and_push.sh` | `./do/build && ./do/push` | Now split into two commands |
|
|
20
|
-
| `./deploy/deploy.sh <role>` | `./do/deploy <role>` | Same functionality |
|
|
21
|
-
<% if (buildTarget === 'codebuild') { %>| `./deploy/submit_build.sh` | `./do/submit` | CodeBuild integration |
|
|
22
|
-
<% } %>| N/A | `./do/run` | New: Run container locally |
|
|
23
|
-
| N/A | `./do/test [endpoint]` | New: Test container or endpoint |
|
|
24
|
-
| N/A | `./do/clean <target>` | New: Clean up resources |
|
|
25
|
-
|
|
26
|
-
## Detailed Migration Steps
|
|
27
|
-
|
|
28
|
-
### Step 1: Understand the New Structure
|
|
29
|
-
|
|
30
|
-
The do-framework organizes scripts in the `do/` directory:
|
|
31
|
-
|
|
32
|
-
```
|
|
33
|
-
do/
|
|
34
|
-
├── config # Centralized configuration
|
|
35
|
-
├── build # Build Docker image
|
|
36
|
-
├── push # Push to ECR
|
|
37
|
-
├── deploy # Deploy to SageMaker
|
|
38
|
-
├── run # Run locally
|
|
39
|
-
├── test # Test container/endpoint
|
|
40
|
-
├── clean # Clean up resources
|
|
41
|
-
<% if (buildTarget === 'codebuild') { %>├── submit # Submit to CodeBuild
|
|
42
|
-
<% } %>└── README.md # Detailed documentation
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
### Step 2: Update Your Workflow
|
|
46
|
-
|
|
47
|
-
#### Old Workflow
|
|
48
|
-
|
|
49
|
-
```bash
|
|
50
|
-
# Build and push
|
|
51
|
-
./deploy/build_and_push.sh
|
|
52
|
-
|
|
53
|
-
# Deploy
|
|
54
|
-
./deploy/deploy.sh arn:aws:iam::123456789012:role/SageMakerRole
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
#### New Workflow
|
|
58
|
-
|
|
59
|
-
```bash
|
|
60
|
-
# Build
|
|
61
|
-
./do/build
|
|
62
|
-
|
|
63
|
-
# Test locally (optional but recommended)
|
|
64
|
-
./do/run &
|
|
65
|
-
./do/test
|
|
66
|
-
|
|
67
|
-
# Push to ECR
|
|
68
|
-
./do/push
|
|
69
|
-
|
|
70
|
-
# Deploy to SageMaker
|
|
71
|
-
./do/deploy arn:aws:iam::123456789012:role/SageMakerRole
|
|
72
|
-
|
|
73
|
-
# Test the endpoint
|
|
74
|
-
./do/test <%= projectName %>-endpoint
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
<% if (buildTarget === 'codebuild') { %>#### CodeBuild Workflow
|
|
78
|
-
|
|
79
|
-
**Old**:
|
|
80
|
-
```bash
|
|
81
|
-
./deploy/submit_build.sh
|
|
82
|
-
./deploy/deploy.sh <role-arn>
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
**New**:
|
|
86
|
-
```bash
|
|
87
|
-
./do/submit # Builds and pushes via CodeBuild
|
|
88
|
-
./do/deploy <role-arn>
|
|
89
|
-
./do/test <%= projectName %>-endpoint
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
<% } %>### Step 3: Update Configuration
|
|
93
|
-
|
|
94
|
-
#### Old: Hardcoded in Scripts
|
|
95
|
-
|
|
96
|
-
Legacy scripts had configuration hardcoded or passed as arguments.
|
|
97
|
-
|
|
98
|
-
#### New: Centralized in do/config
|
|
99
|
-
|
|
100
|
-
All configuration is now in `do/config`:
|
|
101
|
-
|
|
102
|
-
```bash
|
|
103
|
-
# Edit do/config
|
|
104
|
-
export PROJECT_NAME="<%= projectName %>"
|
|
105
|
-
export AWS_REGION="<%= awsRegion %>"
|
|
106
|
-
export INSTANCE_TYPE="<%= instanceType %>"
|
|
107
|
-
export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
You can override these with environment variables:
|
|
111
|
-
|
|
112
|
-
```bash
|
|
113
|
-
AWS_REGION=us-west-2 ./do/push
|
|
114
|
-
INSTANCE_TYPE=ml.m5.2xlarge ./do/deploy <role-arn>
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
### Step 4: Update CI/CD Pipelines
|
|
118
|
-
|
|
119
|
-
#### Old Pipeline
|
|
120
|
-
|
|
121
|
-
```yaml
|
|
122
|
-
# .github/workflows/deploy.yml
|
|
123
|
-
- name: Build and Push
|
|
124
|
-
run: ./deploy/build_and_push.sh
|
|
125
|
-
|
|
126
|
-
- name: Deploy
|
|
127
|
-
run: ./deploy/deploy.sh ${{ secrets.SAGEMAKER_ROLE }}
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
#### New Pipeline
|
|
131
|
-
|
|
132
|
-
```yaml
|
|
133
|
-
# .github/workflows/deploy.yml
|
|
134
|
-
- name: Build
|
|
135
|
-
run: ./do/build
|
|
136
|
-
|
|
137
|
-
- name: Push
|
|
138
|
-
run: ./do/push
|
|
139
|
-
|
|
140
|
-
- name: Deploy
|
|
141
|
-
run: ./do/deploy ${{ secrets.SAGEMAKER_ROLE }}
|
|
142
|
-
|
|
143
|
-
- name: Test
|
|
144
|
-
run: ./do/test <%= projectName %>-endpoint
|
|
145
|
-
```
|
|
146
|
-
|
|
147
|
-
### Step 5: Update Documentation
|
|
148
|
-
|
|
149
|
-
Update any project documentation that references the old scripts:
|
|
150
|
-
|
|
151
|
-
**Find and replace**:
|
|
152
|
-
- `./deploy/build_and_push.sh` → `./do/build && ./do/push`
|
|
153
|
-
- `./deploy/deploy.sh` → `./do/deploy`
|
|
154
|
-
<% if (buildTarget === 'codebuild') { %>- `./deploy/submit_build.sh` → `./do/submit`
|
|
155
|
-
<% } %>
|
|
156
|
-
## Command Mapping Details
|
|
157
|
-
|
|
158
|
-
### Build and Push
|
|
159
|
-
|
|
160
|
-
**Legacy**:
|
|
161
|
-
```bash
|
|
162
|
-
./deploy/build_and_push.sh
|
|
163
|
-
```
|
|
164
|
-
|
|
165
|
-
This single script built the Docker image and pushed it to ECR.
|
|
166
|
-
|
|
167
|
-
**do-framework**:
|
|
168
|
-
```bash
|
|
169
|
-
./do/build # Build Docker image
|
|
170
|
-
./do/push # Push to ECR
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
**Why the change?** Separating build and push allows you to:
|
|
174
|
-
- Test the image locally before pushing
|
|
175
|
-
- Build once and push to multiple registries
|
|
176
|
-
- Skip pushing if you only need local testing
|
|
177
|
-
|
|
178
|
-
**Benefits**:
|
|
179
|
-
- Test locally with `./do/run` before pushing
|
|
180
|
-
- More granular control over the workflow
|
|
181
|
-
- Clearer error messages for each step
|
|
182
|
-
|
|
183
|
-
### Deploy
|
|
184
|
-
|
|
185
|
-
**Legacy**:
|
|
186
|
-
```bash
|
|
187
|
-
./deploy/deploy.sh arn:aws:iam::123456789012:role/SageMakerRole
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
**do-framework**:
|
|
191
|
-
```bash
|
|
192
|
-
./do/deploy arn:aws:iam::123456789012:role/SageMakerRole
|
|
193
|
-
```
|
|
194
|
-
|
|
195
|
-
**What's the same?**
|
|
196
|
-
- Same command-line interface
|
|
197
|
-
- Same functionality
|
|
198
|
-
- Same SageMaker endpoint creation
|
|
199
|
-
|
|
200
|
-
**What's different?**
|
|
201
|
-
- Better error messages
|
|
202
|
-
- Progress indicators
|
|
203
|
-
- Automatic endpoint status polling
|
|
204
|
-
- Displays test command when complete
|
|
205
|
-
|
|
206
|
-
<% if (buildTarget === 'codebuild') { %>### CodeBuild Submit
|
|
207
|
-
|
|
208
|
-
**Legacy**:
|
|
209
|
-
```bash
|
|
210
|
-
./deploy/submit_build.sh
|
|
211
|
-
```
|
|
212
|
-
|
|
213
|
-
**do-framework**:
|
|
214
|
-
```bash
|
|
215
|
-
./do/submit
|
|
216
|
-
```
|
|
217
|
-
|
|
218
|
-
**What's improved?**
|
|
219
|
-
- Better build progress monitoring
|
|
220
|
-
- Clearer error messages
|
|
221
|
-
- Automatic ECR image URI display
|
|
222
|
-
- Build log streaming
|
|
223
|
-
|
|
224
|
-
<% } %>### New Commands
|
|
225
|
-
|
|
226
|
-
The do-framework adds several new commands that weren't available with legacy scripts:
|
|
227
|
-
|
|
228
|
-
#### Run Locally
|
|
229
|
-
|
|
230
|
-
```bash
|
|
231
|
-
./do/run
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
Starts the container locally on port 8080 for testing before deployment.
|
|
235
|
-
|
|
236
|
-
**Use cases**:
|
|
237
|
-
- Test model loading
|
|
238
|
-
- Verify inference logic
|
|
239
|
-
- Debug issues locally
|
|
240
|
-
- Validate container configuration
|
|
241
|
-
|
|
242
|
-
#### Test
|
|
243
|
-
|
|
244
|
-
```bash
|
|
245
|
-
# Test local container
|
|
246
|
-
./do/test
|
|
247
|
-
|
|
248
|
-
# Test SageMaker endpoint
|
|
249
|
-
./do/test <%= projectName %>-endpoint
|
|
250
|
-
```
|
|
251
|
-
|
|
252
|
-
Sends health check and inference requests to validate functionality.
|
|
253
|
-
|
|
254
|
-
**Use cases**:
|
|
255
|
-
- Verify endpoints are working
|
|
256
|
-
- Validate inference responses
|
|
257
|
-
- Automated testing in CI/CD
|
|
258
|
-
- Quick smoke tests
|
|
259
|
-
|
|
260
|
-
#### Clean
|
|
261
|
-
|
|
262
|
-
```bash
|
|
263
|
-
# Remove local images
|
|
264
|
-
./do/clean local
|
|
265
|
-
|
|
266
|
-
# Remove ECR images
|
|
267
|
-
./do/clean ecr
|
|
268
|
-
|
|
269
|
-
# Delete SageMaker endpoint
|
|
270
|
-
./do/clean endpoint
|
|
271
|
-
|
|
272
|
-
# Clean everything
|
|
273
|
-
./do/clean all
|
|
274
|
-
```
|
|
275
|
-
|
|
276
|
-
Manages cleanup of resources across different environments.
|
|
277
|
-
|
|
278
|
-
**Use cases**:
|
|
279
|
-
- Free up disk space
|
|
280
|
-
- Remove old ECR images
|
|
281
|
-
- Delete test endpoints
|
|
282
|
-
- Complete project cleanup
|
|
283
|
-
|
|
284
|
-
## Configuration Changes
|
|
285
|
-
|
|
286
|
-
### Legacy Configuration
|
|
287
|
-
|
|
288
|
-
Configuration was scattered across multiple scripts:
|
|
289
|
-
|
|
290
|
-
```bash
|
|
291
|
-
# In deploy/build_and_push.sh
|
|
292
|
-
PROJECT_NAME="my-model"
|
|
293
|
-
REGION="us-east-1"
|
|
294
|
-
|
|
295
|
-
# In deploy/deploy.sh
|
|
296
|
-
INSTANCE_TYPE="ml.m5.xlarge"
|
|
297
|
-
```
|
|
298
|
-
|
|
299
|
-
### do-framework Configuration
|
|
300
|
-
|
|
301
|
-
All configuration is centralized in `do/config`:
|
|
302
|
-
|
|
303
|
-
```bash
|
|
304
|
-
# do/config
|
|
305
|
-
export PROJECT_NAME="<%= projectName %>"
|
|
306
|
-
export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
|
|
307
|
-
export FRAMEWORK="<%= framework %>"
|
|
308
|
-
export MODEL_SERVER="<%= modelServer %>"
|
|
309
|
-
export AWS_REGION="<%= awsRegion %>"
|
|
310
|
-
export INSTANCE_TYPE="<%= instanceType %>"
|
|
311
|
-
export ECR_REPOSITORY_NAME="ml-container-creator"
|
|
312
|
-
<% if (buildTarget === 'codebuild') { %>export BUILD_TARGET="codebuild"
|
|
313
|
-
export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
|
|
314
|
-
<% } %><% if (framework === 'transformers') { %>export MODEL_NAME="<%= modelName %>"
|
|
315
|
-
<% if (hfToken) { %>export HF_TOKEN="<%= hfToken %>"
|
|
316
|
-
<% } %><% } %>
|
|
317
|
-
```
|
|
318
|
-
|
|
319
|
-
**Benefits**:
|
|
320
|
-
- Single source of truth
|
|
321
|
-
- Easy to override with environment variables
|
|
322
|
-
- Clear documentation of all settings
|
|
323
|
-
- Consistent across all scripts
|
|
324
|
-
|
|
325
|
-
## Backward Compatibility
|
|
326
|
-
|
|
327
|
-
The legacy scripts are still available in the `deploy/` directory for backward compatibility:
|
|
328
|
-
|
|
329
|
-
```bash
|
|
330
|
-
./deploy/build_and_push.sh # Still works
|
|
331
|
-
./deploy/deploy.sh # Still works
|
|
332
|
-
<% if (buildTarget === 'codebuild') { %>./deploy/submit_build.sh # Still works
|
|
333
|
-
<% } %>
|
|
334
|
-
```
|
|
335
|
-
|
|
336
|
-
**However**:
|
|
337
|
-
- They display deprecation warnings
|
|
338
|
-
- They forward to do-framework commands
|
|
339
|
-
- They will be removed in a future version
|
|
340
|
-
|
|
341
|
-
**Deprecation timeline**:
|
|
342
|
-
- Current version: Legacy scripts work with warnings
|
|
343
|
-
- Next major version: Legacy scripts may be removed
|
|
344
|
-
- Recommendation: Migrate now to avoid future issues
|
|
345
|
-
|
|
346
|
-
## Troubleshooting Migration
|
|
347
|
-
|
|
348
|
-
### Issue: "Command not found"
|
|
349
|
-
|
|
350
|
-
**Problem**: `./do/build: command not found`
|
|
351
|
-
|
|
352
|
-
**Solution**: Ensure scripts are executable:
|
|
353
|
-
```bash
|
|
354
|
-
chmod +x do/*
|
|
355
|
-
```
|
|
356
|
-
|
|
357
|
-
The generator should set this automatically, but if you copied files manually, you may need to set permissions.
|
|
358
|
-
|
|
359
|
-
### Issue: "Configuration variable not set"
|
|
360
|
-
|
|
361
|
-
**Problem**: `PROJECT_NAME not set in do/config`
|
|
362
|
-
|
|
363
|
-
**Solution**: Ensure `do/config` is properly sourced:
|
|
364
|
-
```bash
|
|
365
|
-
# Check if config exists
|
|
366
|
-
cat do/config
|
|
367
|
-
|
|
368
|
-
# Manually source to test
|
|
369
|
-
source do/config
|
|
370
|
-
echo $PROJECT_NAME
|
|
371
|
-
```
|
|
372
|
-
|
|
373
|
-
### Issue: "AWS credentials not configured"
|
|
374
|
-
|
|
375
|
-
**Problem**: `AWS credentials not configured`
|
|
376
|
-
|
|
377
|
-
**Solution**: Configure AWS CLI:
|
|
378
|
-
```bash
|
|
379
|
-
aws configure
|
|
380
|
-
# Or set environment variables
|
|
381
|
-
export AWS_ACCESS_KEY_ID=your-key
|
|
382
|
-
export AWS_SECRET_ACCESS_KEY=your-secret
|
|
383
|
-
```
|
|
384
|
-
|
|
385
|
-
### Issue: "Docker permission denied"
|
|
386
|
-
|
|
387
|
-
**Problem**: `permission denied while trying to connect to the Docker daemon`
|
|
388
|
-
|
|
389
|
-
**Solution**: Add user to docker group:
|
|
390
|
-
```bash
|
|
391
|
-
sudo usermod -aG docker $USER
|
|
392
|
-
# Log out and back in for changes to take effect
|
|
393
|
-
```
|
|
394
|
-
|
|
395
|
-
### Issue: Legacy scripts not working
|
|
396
|
-
|
|
397
|
-
**Problem**: Legacy scripts fail after migration
|
|
398
|
-
|
|
399
|
-
**Solution**:
|
|
400
|
-
1. Check that do-framework scripts work: `./do/build`
|
|
401
|
-
2. Verify do/config exists and is valid
|
|
402
|
-
3. Check script permissions: `ls -la do/`
|
|
403
|
-
4. Review deprecation warnings for guidance
|
|
404
|
-
|
|
405
|
-
## FAQ
|
|
406
|
-
|
|
407
|
-
### Q: Do I have to migrate immediately?
|
|
408
|
-
|
|
409
|
-
**A**: No, legacy scripts still work. However, we recommend migrating to benefit from new features and avoid future compatibility issues.
|
|
410
|
-
|
|
411
|
-
### Q: Can I use both legacy and do-framework commands?
|
|
412
|
-
|
|
413
|
-
**A**: Yes, but it's not recommended. Choose one approach for consistency.
|
|
414
|
-
|
|
415
|
-
### Q: Will my existing CI/CD pipelines break?
|
|
416
|
-
|
|
417
|
-
**A**: No, legacy scripts still work. But you should update pipelines to use do-framework commands for better features and future compatibility.
|
|
418
|
-
|
|
419
|
-
### Q: What if I have custom modifications to legacy scripts?
|
|
420
|
-
|
|
421
|
-
**A**: Review your modifications and apply them to the appropriate do-framework scripts. The modular structure makes customization easier.
|
|
422
|
-
|
|
423
|
-
### Q: Can I customize do-framework scripts?
|
|
424
|
-
|
|
425
|
-
**A**: Yes! The scripts are designed to be customizable. Edit them as needed for your use case.
|
|
426
|
-
|
|
427
|
-
### Q: Where can I find detailed documentation?
|
|
428
|
-
|
|
429
|
-
**A**: See `do/README.md` for comprehensive documentation of all do-framework commands.
|
|
430
|
-
|
|
431
|
-
### Q: What if I encounter issues during migration?
|
|
432
|
-
|
|
433
|
-
**A**:
|
|
434
|
-
1. Check this migration guide
|
|
435
|
-
2. Review `do/README.md`
|
|
436
|
-
3. Check CloudWatch logs for deployment issues
|
|
437
|
-
4. Open an issue on the ML Container Creator repository
|
|
438
|
-
|
|
439
|
-
## Benefits Summary
|
|
440
|
-
|
|
441
|
-
### For Developers
|
|
442
|
-
|
|
443
|
-
- **Clearer workflow**: Separate commands for each step
|
|
444
|
-
- **Better testing**: Test locally before deploying
|
|
445
|
-
- **Easier debugging**: Granular control over each phase
|
|
446
|
-
- **Consistent interface**: Same commands across all projects
|
|
447
|
-
|
|
448
|
-
### For Teams
|
|
449
|
-
|
|
450
|
-
- **Standardization**: Everyone uses the same commands
|
|
451
|
-
- **Better documentation**: Clear, comprehensive guides
|
|
452
|
-
- **Easier onboarding**: New team members learn one system
|
|
453
|
-
- **Community alignment**: Follows do-framework conventions
|
|
454
|
-
|
|
455
|
-
### For CI/CD
|
|
456
|
-
|
|
457
|
-
- **More control**: Fine-grained pipeline steps
|
|
458
|
-
- **Better error handling**: Clear failure points
|
|
459
|
-
- **Easier testing**: Test at each stage
|
|
460
|
-
- **Improved monitoring**: Track each step separately
|
|
461
|
-
|
|
462
|
-
## Next Steps
|
|
463
|
-
|
|
464
|
-
1. **Read** `do/README.md` for detailed command documentation
|
|
465
|
-
2. **Test** the new commands in a development environment
|
|
466
|
-
3. **Update** your CI/CD pipelines
|
|
467
|
-
4. **Update** your team documentation
|
|
468
|
-
5. **Remove** references to legacy scripts from your workflows
|
|
469
|
-
|
|
470
|
-
## Additional Resources
|
|
471
|
-
|
|
472
|
-
- [do-framework Documentation](https://github.com/iankoulski/do-framework)
|
|
473
|
-
- [ML Container Creator Documentation](https://github.com/yourusername/ml-container-creator)
|
|
474
|
-
- [AWS SageMaker BYOC Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html)
|
|
475
|
-
|
|
476
|
-
## Feedback
|
|
477
|
-
|
|
478
|
-
We'd love to hear about your migration experience! If you encounter issues or have suggestions, please:
|
|
479
|
-
|
|
480
|
-
1. Open an issue on the ML Container Creator repository
|
|
481
|
-
2. Share your feedback with the team
|
|
482
|
-
3. Contribute improvements to this guide
|
|
483
|
-
|
|
484
|
-
---
|
|
485
|
-
|
|
486
|
-
**Last Updated**: <%= buildTimestamp %>
|
|
487
|
-
|
|
488
|
-
**Generated by**: ML Container Creator v2.0 (do-framework integration)
|