@aws/ml-container-creator 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +40 -9
  3. package/infra/ci-harness/buildspec.yml +60 -0
  4. package/infra/ci-harness/package-lock.json +5 -1
  5. package/package.json +1 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +10 -4
  8. package/servers/instance-sizer/lib/model-resolver.js +1 -1
  9. package/servers/lib/catalogs/model-sizes.json +135 -90
  10. package/servers/lib/catalogs/models.json +483 -411
  11. package/src/app.js +33 -2
  12. package/src/lib/bootstrap-command-handler.js +6 -0
  13. package/src/lib/cli-handler.js +1 -1
  14. package/src/lib/config-manager.js +41 -2
  15. package/src/lib/deployment-entry-schema.js +16 -0
  16. package/src/lib/mcp-client.js +3 -3
  17. package/src/lib/prompt-runner.js +179 -8
  18. package/src/lib/prompts.js +253 -7
  19. package/src/lib/registry-command-handler.js +12 -0
  20. package/templates/Dockerfile +12 -0
  21. package/templates/code/serving.properties +14 -0
  22. package/templates/do/adapter +1230 -0
  23. package/templates/do/adapters/.gitkeep +2 -0
  24. package/templates/do/add-ic +130 -0
  25. package/templates/do/benchmark +81 -9
  26. package/templates/do/clean +507 -17
  27. package/templates/do/config +28 -5
  28. package/templates/do/deploy +513 -367
  29. package/templates/do/ic/default.conf +32 -0
  30. package/templates/do/lib/endpoint-config.sh +216 -0
  31. package/templates/do/lib/inference-component.sh +167 -0
  32. package/templates/do/lib/secrets.sh +44 -0
  33. package/templates/do/lib/wait.sh +131 -0
  34. package/templates/do/logs +107 -27
  35. package/templates/do/optimize +528 -0
  36. package/templates/do/register +111 -1
  37. package/templates/do/status +337 -0
  38. package/templates/do/test +80 -28
@@ -0,0 +1,1230 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # do/adapter — Manage LoRA adapter inference components
6
+ #
7
+ # Usage:
8
+ # ./do/adapter add <name> --weights <s3-uri>
9
+ # ./do/adapter list
10
+ # ./do/adapter remove <name>
11
+ # ./do/adapter update <name> --weights <new-s3-uri>
12
+ # ./do/adapter --help
13
+
14
+ set -e
15
+ set -u
16
+ set -o pipefail
17
+
18
+ # ── Source project configuration ──────────────────────────────────────────────
19
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20
+ source "${SCRIPT_DIR}/config"
21
+ source "${SCRIPT_DIR}/lib/wait.sh"
22
+
23
+ # ── Usage ─────────────────────────────────────────────────────────────────────
24
+ _usage() {
25
+ echo "Usage: ./do/adapter <command> [options]"
26
+ echo ""
27
+ echo "Manage LoRA adapter inference components on endpoint: ${ENDPOINT_NAME:-<not deployed>}"
28
+ echo ""
29
+ echo "Commands:"
30
+ echo " add <name> --weights <s3-uri> Add a new LoRA adapter from S3"
31
+ echo " add <name> --from-hub <hf-repo-id> Add a new LoRA adapter from HuggingFace Hub"
32
+ echo " list List all adapters on the endpoint"
33
+ echo " remove <name> Remove an adapter"
34
+ echo " update <name> --weights <new-s3-uri> Update adapter weights from S3"
35
+ echo " update <name> --from-hub <hf-repo-id> Update adapter weights from HuggingFace Hub"
36
+ echo " search [--limit N] Search HuggingFace Hub for compatible adapters"
37
+ echo ""
38
+ echo "Options:"
39
+ echo " --help, -h Show this help message"
40
+ echo ""
41
+ echo "Examples:"
42
+ echo " ./do/adapter add ectsum --weights s3://my-bucket/adapters/ectsum/adapter.tar.gz"
43
+ echo " ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
44
+ echo " ./do/adapter list"
45
+ echo " ./do/adapter remove ectsum"
46
+ echo " ./do/adapter update ectsum --weights s3://my-bucket/adapters/ectsum-v2/adapter.tar.gz"
47
+ echo " ./do/adapter update ectsum --from-hub predibase/llama-3.1-8b-ectsum-v2"
48
+ echo ""
49
+ echo "Adapter metadata is stored in do/adapters/<name>.conf"
50
+ echo ""
51
+ echo "Note: --weights and --from-hub are mutually exclusive."
52
+ }
53
+
54
+ # ── Validate LoRA is enabled ──────────────────────────────────────────────────
55
+ _validate_lora_enabled() {
56
+ if [ "${ENABLE_LORA:-}" != "true" ]; then
57
+ echo "❌ LoRA adapter serving is not enabled for this project."
58
+ echo ""
59
+ echo " ENABLE_LORA=true was not found in do/config."
60
+ echo ""
61
+ echo " To enable LoRA adapters, regenerate your project with --enable-lora"
62
+ echo " or add ENABLE_LORA=true to do/config and configure your model server"
63
+ echo " environment (e.g., VLLM_ENABLE_LORA=true)."
64
+ exit 1
65
+ fi
66
+ }
67
+
68
+ # ── Resolve base IC name ──────────────────────────────────────────────────────
69
+ _resolve_base_ic_name() {
70
+ local base_ic_name=""
71
+
72
+ # Try multi-IC path first: do/ic/default.conf
73
+ if [ -f "${SCRIPT_DIR}/ic/default.conf" ]; then
74
+ base_ic_name=$(grep "^export IC_DEPLOYED_NAME=" "${SCRIPT_DIR}/ic/default.conf" 2>/dev/null | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//' || echo "")
75
+ fi
76
+
77
+ # Fallback to legacy config: INFERENCE_COMPONENT_NAME
78
+ if [ -z "${base_ic_name}" ]; then
79
+ base_ic_name="${INFERENCE_COMPONENT_NAME:-}"
80
+ fi
81
+
82
+ if [ -z "${base_ic_name}" ]; then
83
+ echo "❌ Cannot determine base inference component name."
84
+ echo ""
85
+ echo " No IC_DEPLOYED_NAME found in do/ic/default.conf and no"
86
+ echo " INFERENCE_COMPONENT_NAME in do/config."
87
+ echo ""
88
+ echo " Deploy your base model first with: ./do/deploy"
89
+ exit 1
90
+ fi
91
+
92
+ echo "${base_ic_name}"
93
+ }
94
+
95
+ # ── Best-effort adapter_config.json validation ────────────────────────────────
96
+ # Downloads the adapter tar.gz, extracts adapter_config.json, and checks that
97
+ # base_model_name_or_path matches MODEL_NAME from do/config.
98
+ # Returns 0 always — failures are silently ignored (best-effort).
99
+ _validate_adapter_config() {
100
+ local weights_uri="$1"
101
+ local tmp_dir="/tmp/adapter_config_check_$$"
102
+
103
+ (
104
+ # Run in subshell so any failure is contained
105
+ set +e
106
+
107
+ # Skip if MODEL_NAME is not configured (non-transformers projects)
108
+ if [ -z "${MODEL_NAME:-}" ]; then
109
+ exit 0
110
+ fi
111
+
112
+ mkdir -p "${tmp_dir}"
113
+
114
+ # Download the tar.gz
115
+ if ! aws s3 cp "${weights_uri}" "${tmp_dir}/adapter.tar.gz" --region "${AWS_REGION}" --quiet 2>/dev/null; then
116
+ exit 0
117
+ fi
118
+
119
+ # Extract just adapter_config.json
120
+ if ! tar -xzf "${tmp_dir}/adapter.tar.gz" -C "${tmp_dir}" adapter_config.json 2>/dev/null; then
121
+ exit 0
122
+ fi
123
+
124
+ # Read base_model_name_or_path from the JSON
125
+ local adapter_base_model=""
126
+ if command -v jq &>/dev/null; then
127
+ adapter_base_model=$(jq -r '.base_model_name_or_path // empty' "${tmp_dir}/adapter_config.json" 2>/dev/null)
128
+ else
129
+ # Fallback: use grep/sed for environments without jq
130
+ adapter_base_model=$(grep -o '"base_model_name_or_path"[[:space:]]*:[[:space:]]*"[^"]*"' "${tmp_dir}/adapter_config.json" 2>/dev/null | sed 's/.*"base_model_name_or_path"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//')
131
+ fi
132
+
133
+ # Compare with MODEL_NAME
134
+ if [ -n "${adapter_base_model}" ] && [ "${adapter_base_model}" != "${MODEL_NAME}" ]; then
135
+ echo "⚠️ Adapter was trained on '${adapter_base_model}' but base model is '${MODEL_NAME}'. Adapter may not work correctly."
136
+ fi
137
+ ) 2>/dev/null
138
+
139
+ # Clean up temp files
140
+ rm -rf "${tmp_dir}" 2>/dev/null
141
+
142
+ return 0
143
+ }
144
+
145
+ # ── Download adapter from HuggingFace Hub ─────────────────────────────────────
146
+ # Downloads adapter files from a HuggingFace Hub repository, validates
147
+ # adapter_config.json exists, creates a tar.gz, and uploads to S3.
148
+ # Sets the variable `weights_uri` to the resulting S3 path.
149
+ #
150
+ # Arguments:
151
+ # $1 - HuggingFace repo ID (e.g., "org/adapter-name" or "adapter-name")
152
+ # $2 - Adapter name (for S3 path construction)
153
+ #
154
+ # Returns 0 on success, exits on failure.
155
+ _download_from_hub() {
156
+ local hf_repo_id="$1"
157
+ local adapter_name="$2"
158
+ local tmp_dir="/tmp/adapter_hub_download_$$"
159
+
160
+ echo "📥 Downloading adapter from HuggingFace Hub: ${hf_repo_id}"
161
+ echo ""
162
+
163
+ # ── Resolve S3 bucket ─────────────────────────────────────────────────
164
+ local s3_bucket=""
165
+ if [ -n "${ADAPTER_S3_BUCKET:-}" ]; then
166
+ s3_bucket="${ADAPTER_S3_BUCKET}"
167
+ else
168
+ local account_id
169
+ account_id=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "")
170
+ if [ -z "${account_id}" ]; then
171
+ echo "❌ Could not determine AWS account ID."
172
+ echo " Ensure AWS credentials are configured."
173
+ exit 1
174
+ fi
175
+ s3_bucket="mlcc-adapters-${account_id}-${AWS_REGION}"
176
+ fi
177
+
178
+ # ── Create temp directory ─────────────────────────────────────────────
179
+ mkdir -p "${tmp_dir}/adapter_files"
180
+
181
+ # ── Download adapter files ────────────────────────────────────────────
182
+ if command -v hf &>/dev/null; then
183
+ echo " Using hf CLI to download..."
184
+ local hf_args=("download" "${hf_repo_id}" "--local-dir" "${tmp_dir}/adapter_files")
185
+ if [ -n "${HF_TOKEN:-}" ]; then
186
+ hf_args+=("--token" "${HF_TOKEN}")
187
+ fi
188
+ if ! hf "${hf_args[@]}" 2>/dev/null; then
189
+ echo "❌ Failed to download adapter from HuggingFace Hub: ${hf_repo_id}"
190
+ echo ""
191
+ echo " Check that:"
192
+ echo " • The repository exists: https://huggingface.co/${hf_repo_id}"
193
+ echo " • For gated repos, set HF_TOKEN environment variable"
194
+ echo " • You have network connectivity to huggingface.co"
195
+ rm -rf "${tmp_dir}"
196
+ exit 1
197
+ fi
198
+ elif command -v huggingface-cli &>/dev/null; then
199
+ echo " Using huggingface-cli to download..."
200
+ local hf_args=("download" "${hf_repo_id}" "--local-dir" "${tmp_dir}/adapter_files")
201
+ if [ -n "${HF_TOKEN:-}" ]; then
202
+ hf_args+=("--token" "${HF_TOKEN}")
203
+ fi
204
+ if ! huggingface-cli "${hf_args[@]}" 2>/dev/null; then
205
+ echo "❌ Failed to download adapter from HuggingFace Hub: ${hf_repo_id}"
206
+ echo ""
207
+ echo " Check that:"
208
+ echo " • The repository exists: https://huggingface.co/${hf_repo_id}"
209
+ echo " • For gated repos, set HF_TOKEN environment variable"
210
+ echo " • You have network connectivity to huggingface.co"
211
+ rm -rf "${tmp_dir}"
212
+ exit 1
213
+ fi
214
+ else
215
+ # Fallback: use curl with HF Hub API
216
+ echo " Using curl to download (huggingface-cli not found)..."
217
+
218
+ # Get file listing from the repo
219
+ local api_url="https://huggingface.co/api/models/${hf_repo_id}"
220
+ local auth_header=""
221
+ if [ -n "${HF_TOKEN:-}" ]; then
222
+ auth_header="Authorization: Bearer ${HF_TOKEN}"
223
+ fi
224
+
225
+ local repo_info
226
+ if [ -n "${auth_header}" ]; then
227
+ repo_info=$(curl -sS -H "${auth_header}" "${api_url}" 2>/dev/null)
228
+ else
229
+ repo_info=$(curl -sS "${api_url}" 2>/dev/null)
230
+ fi
231
+
232
+ if [ -z "${repo_info}" ] || echo "${repo_info}" | grep -q '"error"'; then
233
+ echo "❌ Failed to access HuggingFace Hub repository: ${hf_repo_id}"
234
+ echo ""
235
+ echo " Check that:"
236
+ echo " • The repository exists: https://huggingface.co/${hf_repo_id}"
237
+ echo " • For gated repos, set HF_TOKEN environment variable"
238
+ echo " • You have network connectivity to huggingface.co"
239
+ rm -rf "${tmp_dir}"
240
+ exit 1
241
+ fi
242
+
243
+ # Extract file list from siblings array
244
+ local files
245
+ if command -v jq &>/dev/null; then
246
+ files=$(echo "${repo_info}" | jq -r '.siblings[]?.rfilename // empty' 2>/dev/null)
247
+ else
248
+ files=$(echo "${repo_info}" | grep -o '"rfilename":"[^"]*"' | sed 's/"rfilename":"//;s/"$//')
249
+ fi
250
+
251
+ if [ -z "${files}" ]; then
252
+ echo "❌ No files found in repository: ${hf_repo_id}"
253
+ rm -rf "${tmp_dir}"
254
+ exit 1
255
+ fi
256
+
257
+ # Download each file (only root-level files, skip subdirectories)
258
+ local download_base="https://huggingface.co/${hf_repo_id}/resolve/main"
259
+ while IFS= read -r filename; do
260
+ # Skip files in subdirectories (we only want root-level adapter files)
261
+ if echo "${filename}" | grep -q '/'; then
262
+ continue
263
+ fi
264
+ # Skip hidden files and READMEs
265
+ case "${filename}" in
266
+ .gitattributes|.gitignore|README.md|LICENSE*) continue ;;
267
+ esac
268
+
269
+ echo " Downloading: ${filename}"
270
+ local curl_args=("-sS" "-L" "-o" "${tmp_dir}/adapter_files/${filename}")
271
+ if [ -n "${auth_header}" ]; then
272
+ curl_args+=("-H" "${auth_header}")
273
+ fi
274
+ if ! curl "${curl_args[@]}" "${download_base}/${filename}" 2>/dev/null; then
275
+ echo " ⚠️ Failed to download: ${filename} (skipping)"
276
+ fi
277
+ done <<< "${files}"
278
+ fi
279
+
280
+ # ── Remove .huggingface metadata if present ───────────────────────────
281
+ rm -rf "${tmp_dir}/adapter_files/.cache" "${tmp_dir}/adapter_files/.huggingface" 2>/dev/null
282
+ # Remove hidden files that huggingface-cli may create
283
+ find "${tmp_dir}/adapter_files" -name ".*" -delete 2>/dev/null || true
284
+ # Remove subdirectories (flatten to root-level files only)
285
+ find "${tmp_dir}/adapter_files" -mindepth 2 -type f -exec mv {} "${tmp_dir}/adapter_files/" \; 2>/dev/null || true
286
+ find "${tmp_dir}/adapter_files" -mindepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
287
+
288
+ # ── Validate adapter_config.json exists ───────────────────────────────
289
+ if [ ! -f "${tmp_dir}/adapter_files/adapter_config.json" ]; then
290
+ echo "❌ adapter_config.json not found in downloaded files."
291
+ echo ""
292
+ echo " The repository '${hf_repo_id}' does not appear to contain"
293
+ echo " a valid PEFT/LoRA adapter. A valid adapter must include:"
294
+ echo " • adapter_config.json"
295
+ echo " • adapter_model.safetensors (or adapter_model.bin)"
296
+ echo ""
297
+ echo " Verify the repository at: https://huggingface.co/${hf_repo_id}"
298
+ rm -rf "${tmp_dir}"
299
+ exit 1
300
+ fi
301
+
302
+ echo "✅ adapter_config.json found"
303
+
304
+ # ── Optional: check base_model_name_or_path matches MODEL_NAME ────────
305
+ if [ -n "${MODEL_NAME:-}" ]; then
306
+ local adapter_base_model=""
307
+ if command -v jq &>/dev/null; then
308
+ adapter_base_model=$(jq -r '.base_model_name_or_path // empty' "${tmp_dir}/adapter_files/adapter_config.json" 2>/dev/null)
309
+ else
310
+ adapter_base_model=$(grep -o '"base_model_name_or_path"[[:space:]]*:[[:space:]]*"[^"]*"' "${tmp_dir}/adapter_files/adapter_config.json" 2>/dev/null | sed 's/.*"base_model_name_or_path"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//')
311
+ fi
312
+
313
+ if [ -n "${adapter_base_model}" ] && [ "${adapter_base_model}" != "${MODEL_NAME}" ]; then
314
+ echo "⚠️ Adapter was trained on '${adapter_base_model}' but base model is '${MODEL_NAME}'. Adapter may not work correctly."
315
+ fi
316
+ fi
317
+
318
+ # ── Create adapter.tar.gz from downloaded files (flat, no subdirs) ────
319
+ echo "📦 Creating adapter.tar.gz..."
320
+ if ! tar -czf "${tmp_dir}/adapter.tar.gz" -C "${tmp_dir}/adapter_files" . 2>/dev/null; then
321
+ echo "❌ Failed to create adapter.tar.gz"
322
+ rm -rf "${tmp_dir}"
323
+ exit 1
324
+ fi
325
+
326
+ local tar_size
327
+ tar_size=$(du -h "${tmp_dir}/adapter.tar.gz" | cut -f1)
328
+ echo " Archive size: ${tar_size}"
329
+
330
+ # ── Upload to S3 ─────────────────────────────────────────────────────
331
+ local s3_path="s3://${s3_bucket}/adapters/${PROJECT_NAME}/${adapter_name}/adapter.tar.gz"
332
+ echo "☁️ Uploading to S3: ${s3_path}"
333
+
334
+ if ! aws s3 cp "${tmp_dir}/adapter.tar.gz" "${s3_path}" --region "${AWS_REGION}"; then
335
+ echo "❌ Failed to upload adapter to S3."
336
+ echo ""
337
+ echo " Check that:"
338
+ echo " • The S3 bucket '${s3_bucket}' exists"
339
+ echo " • Your IAM credentials have s3:PutObject permission"
340
+ echo " • Run bootstrap if the bucket doesn't exist: ./do/bootstrap"
341
+ rm -rf "${tmp_dir}"
342
+ exit 1
343
+ fi
344
+
345
+ echo "✅ Uploaded to S3: ${s3_path}"
346
+
347
+ # ── Clean up ──────────────────────────────────────────────────────────
348
+ rm -rf "${tmp_dir}"
349
+
350
+ # Set the weights_uri variable for the caller
351
+ weights_uri="${s3_path}"
352
+ }
353
+
354
+ # ── Subcommand implementations ────────────────────────────────────────────────
355
+
356
+ _adapter_add() {
357
+ local adapter_name=""
358
+ local weights_uri=""
359
+ local from_hub=""
360
+
361
+ # Parse add arguments
362
+ shift # remove 'add' from args
363
+ while [ $# -gt 0 ]; do
364
+ case "$1" in
365
+ --weights)
366
+ if [ -z "${2:-}" ]; then
367
+ echo "❌ --weights requires an S3 URI argument"
368
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
369
+ exit 1
370
+ fi
371
+ weights_uri="$2"
372
+ shift 2
373
+ ;;
374
+ --from-hub)
375
+ if [ -z "${2:-}" ]; then
376
+ echo "❌ --from-hub requires a HuggingFace repo ID argument"
377
+ echo " Usage: ./do/adapter add <name> --from-hub <hf-repo-id>"
378
+ exit 1
379
+ fi
380
+ from_hub="$2"
381
+ shift 2
382
+ ;;
383
+ --help|-h)
384
+ echo "Usage: ./do/adapter add <name> --weights <s3-uri>"
385
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
386
+ echo ""
387
+ echo "Add a new LoRA adapter to the endpoint."
388
+ echo ""
389
+ echo "Arguments:"
390
+ echo " <name> Adapter name (lowercase alphanumeric + hyphens, 1-50 chars)"
391
+ echo " --weights <s3-uri> S3 URI to adapter weights (.tar.gz)"
392
+ echo " --from-hub <hf-repo-id> Download adapter from HuggingFace Hub"
393
+ echo ""
394
+ echo "Note: --weights and --from-hub are mutually exclusive."
395
+ echo ""
396
+ echo "Examples:"
397
+ echo " ./do/adapter add ectsum --weights s3://bucket/adapters/ectsum/adapter.tar.gz"
398
+ echo " ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
399
+ exit 0
400
+ ;;
401
+ -*)
402
+ echo "❌ Unknown option: $1"
403
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
404
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
405
+ exit 1
406
+ ;;
407
+ *)
408
+ if [ -z "${adapter_name}" ]; then
409
+ adapter_name="$1"
410
+ else
411
+ echo "❌ Unexpected argument: $1"
412
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
413
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
414
+ exit 1
415
+ fi
416
+ shift
417
+ ;;
418
+ esac
419
+ done
420
+
421
+ # Validate required arguments
422
+ if [ -z "${adapter_name}" ]; then
423
+ echo "❌ Adapter name is required"
424
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
425
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
426
+ exit 1
427
+ fi
428
+
429
+ # ── Mutual exclusivity check ─────────────────────────────────────────
430
+ if [ -n "${weights_uri}" ] && [ -n "${from_hub}" ]; then
431
+ echo "❌ --weights and --from-hub are mutually exclusive"
432
+ echo ""
433
+ echo " Use one or the other:"
434
+ echo " ./do/adapter add ${adapter_name} --weights <s3-uri>"
435
+ echo " ./do/adapter add ${adapter_name} --from-hub <hf-repo-id>"
436
+ exit 1
437
+ fi
438
+
439
+ if [ -z "${weights_uri}" ] && [ -z "${from_hub}" ]; then
440
+ echo "❌ Either --weights or --from-hub is required"
441
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
442
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
443
+ exit 1
444
+ fi
445
+
446
+ # ── Validate HF repo ID format (if --from-hub) ───────────────────────
447
+ if [ -n "${from_hub}" ]; then
448
+ # Valid formats: "org/name" or "name" (alphanumeric, hyphens, underscores, dots)
449
+ if ! echo "${from_hub}" | grep -qE '^[a-zA-Z0-9._-]+(/[a-zA-Z0-9._-]+)?$'; then
450
+ echo "❌ Invalid HuggingFace repo ID: ${from_hub}"
451
+ echo ""
452
+ echo " Repo ID must be in format 'org/name' or 'name'"
453
+ echo " Examples: predibase/llama-3.1-8b-ectsum, my-adapter"
454
+ exit 1
455
+ fi
456
+ fi
457
+
458
+ # ── Validate adapter name format ──────────────────────────────────────
459
+ if ! echo "${adapter_name}" | grep -qE '^[a-z0-9][a-z0-9-]{0,49}$'; then
460
+ echo "❌ Invalid adapter name: ${adapter_name}"
461
+ echo ""
462
+ echo " Adapter names must be:"
463
+ echo " • 1-50 characters long"
464
+ echo " • Lowercase alphanumeric and hyphens only"
465
+ echo " • Start with a letter or number"
466
+ echo ""
467
+ echo " Examples: ectsum, finance-qa, my-adapter-v2"
468
+ exit 1
469
+ fi
470
+
471
+ # ── Validate S3 URI format (only when --weights is used) ─────────────
472
+ if [ -n "${weights_uri}" ]; then
473
+ if ! echo "${weights_uri}" | grep -qE '^s3://.*\.tar\.gz$'; then
474
+ echo "❌ Invalid S3 URI: ${weights_uri}"
475
+ echo ""
476
+ echo " Adapter weights must be:"
477
+ echo " • An S3 URI starting with s3://"
478
+ echo " • A .tar.gz archive containing adapter files"
479
+ echo ""
480
+ echo " Example: s3://my-bucket/adapters/ectsum/adapter.tar.gz"
481
+ exit 1
482
+ fi
483
+ fi
484
+
485
+ # ── Validate adapter name uniqueness ──────────────────────────────────
486
+ if [ -f "${SCRIPT_DIR}/adapters/${adapter_name}.conf" ]; then
487
+ echo "❌ Adapter already exists: ${adapter_name}"
488
+ echo ""
489
+ echo " An adapter with this name is already registered."
490
+ echo " To update its weights, use: ./do/adapter update ${adapter_name} --weights <new-uri>"
491
+ echo " To remove it first: ./do/adapter remove ${adapter_name}"
492
+ exit 1
493
+ fi
494
+
495
+ echo "🔌 Adding adapter: ${adapter_name}"
496
+ if [ -n "${from_hub}" ]; then
497
+ echo " Source: HuggingFace Hub (${from_hub})"
498
+ else
499
+ echo " Weights: ${weights_uri}"
500
+ fi
501
+ echo ""
502
+
503
+ # ── If --from-hub: download, tar, upload to S3 ────────────────────────
504
+ if [ -n "${from_hub}" ]; then
505
+ _download_from_hub "${from_hub}" "${adapter_name}"
506
+ # weights_uri is now set by _download_from_hub
507
+ echo ""
508
+ fi
509
+
510
+ # ── Validate base IC is InService ─────────────────────────────────────
511
+ local base_ic_name
512
+ base_ic_name=$(_resolve_base_ic_name)
513
+
514
+ echo "🔍 Checking base inference component: ${base_ic_name}"
515
+ local base_status
516
+ base_status=$(_get_ic_status "${base_ic_name}")
517
+
518
+ if [ "${base_status}" != "InService" ]; then
519
+ echo "❌ Base inference component is not InService: ${base_ic_name}"
520
+ echo " Current status: ${base_status:-not found}"
521
+ echo ""
522
+ echo " Adapters require a running base model. Deploy first with:"
523
+ echo " ./do/deploy"
524
+ exit 1
525
+ fi
526
+ echo "✅ Base IC is InService: ${base_ic_name}"
527
+
528
+ # ── Validate S3 object exists (best-effort, only for --weights) ──────
529
+ if [ -z "${from_hub}" ]; then
530
+ echo "🔍 Checking S3 object exists..."
531
+ if ! aws s3 ls "${weights_uri}" --region "${AWS_REGION}" &>/dev/null; then
532
+ echo "⚠️ Could not verify S3 object: ${weights_uri}"
533
+ echo " This may be a permissions issue. Proceeding anyway..."
534
+ echo " SageMaker will fail at load time if the object doesn't exist."
535
+ echo ""
536
+ else
537
+ echo "✅ S3 object verified: ${weights_uri}"
538
+ fi
539
+
540
+ # ── Best-effort: validate adapter_config.json base model ─────────────
541
+ # Downloads the tar.gz, extracts adapter_config.json, and checks that
542
+ # base_model_name_or_path matches MODEL_NAME from do/config.
543
+ # If anything fails (download, extraction, parsing), skip silently.
544
+ _validate_adapter_config "${weights_uri}" || true
545
+ fi
546
+
547
+ # ── Build adapter IC name ─────────────────────────────────────────────
548
+ local adapter_ic_name="${PROJECT_NAME}-adapter-${adapter_name}"
549
+
550
+ # ── Create adapter inference component ────────────────────────────────
551
+ echo "🚀 Creating adapter inference component: ${adapter_ic_name}"
552
+ if ! aws sagemaker create-inference-component \
553
+ --inference-component-name "${adapter_ic_name}" \
554
+ --endpoint-name "${ENDPOINT_NAME}" \
555
+ --specification "{\"BaseInferenceComponentName\":\"${base_ic_name}\",\"Container\":{\"ArtifactUrl\":\"${weights_uri}\"}}" \
556
+ --region "${AWS_REGION}"; then
557
+
558
+ echo "❌ Failed to create adapter inference component"
559
+ echo " Check that:"
560
+ echo " • Your IAM credentials have sagemaker:CreateInferenceComponent permission"
561
+ echo " • The base IC '${base_ic_name}' is InService"
562
+ echo " • The S3 URI is accessible by the SageMaker execution role"
563
+ exit 1
564
+ fi
565
+
566
+ echo "✅ Adapter IC creation initiated: ${adapter_ic_name}"
567
+
568
+ # ── Wait for adapter IC to reach InService ────────────────────────────
569
+ echo "⏳ Waiting for adapter IC to reach InService..."
570
+ echo " This typically takes 1-3 minutes for adapters."
571
+
572
+ wait_ic "${adapter_ic_name}"
573
+
574
+ echo "✅ Adapter IC is InService: ${adapter_ic_name}"
575
+
576
+ # ── Create adapter metadata conf file ─────────────────────────────────
577
+ local created_at
578
+ created_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
579
+
580
+ mkdir -p "${SCRIPT_DIR}/adapters"
581
+ cat > "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
582
+ export ADAPTER_NAME="${adapter_name}"
583
+ export ADAPTER_IC_NAME="${adapter_ic_name}"
584
+ export ADAPTER_WEIGHTS_URI="${weights_uri}"
585
+ export ADAPTER_CREATED_AT="${created_at}"
586
+ EOF
587
+
588
+ # Add hub-specific metadata if --from-hub was used
589
+ if [ -n "${from_hub}" ]; then
590
+ cat >> "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
591
+ export ADAPTER_SOURCE="hub"
592
+ export ADAPTER_HF_REPO="${from_hub}"
593
+ EOF
594
+ fi
595
+
596
+ echo ""
597
+ echo "✅ Adapter added successfully!"
598
+ echo ""
599
+ echo "📋 Adapter Details:"
600
+ echo " Name: ${adapter_name}"
601
+ echo " IC Name: ${adapter_ic_name}"
602
+ echo " Weights: ${weights_uri}"
603
+ if [ -n "${from_hub}" ]; then
604
+ echo " Source: HuggingFace Hub (${from_hub})"
605
+ fi
606
+ echo " Created: ${created_at}"
607
+ echo ""
608
+ echo "🧪 Test your adapter:"
609
+ echo " ./do/test ${adapter_name}"
610
+ echo ""
611
+ echo "🗑️ Remove when done:"
612
+ echo " ./do/adapter remove ${adapter_name}"
613
+ }
614
+
615
+ _adapter_list() {
616
+ if [ -z "${ENDPOINT_NAME:-}" ]; then
617
+ echo "❌ No endpoint configured. Deploy first with: ./do/deploy"
618
+ exit 1
619
+ fi
620
+
621
+ echo "Adapters on endpoint: ${ENDPOINT_NAME}"
622
+ echo ""
623
+
624
+ # ── List all inference components on the endpoint ─────────────────────
625
+ local ic_list
626
+ ic_list=$(aws sagemaker list-inference-components \
627
+ --endpoint-name-equals "${ENDPOINT_NAME}" \
628
+ --region "${AWS_REGION}" 2>/dev/null) || {
629
+ echo "❌ Failed to list inference components on endpoint: ${ENDPOINT_NAME}"
630
+ echo " Check that the endpoint exists and you have sagemaker:ListInferenceComponents permission."
631
+ exit 1
632
+ }
633
+
634
+ # Extract IC names from the list response
635
+ local ic_names
636
+ ic_names=$(echo "${ic_list}" | jq -r '.InferenceComponents[].InferenceComponentName' 2>/dev/null)
637
+
638
+ if [ -z "${ic_names}" ]; then
639
+ echo "No adapters found on this endpoint."
640
+ echo ""
641
+ echo "Add one with: ./do/adapter add <name> --weights <s3-uri>"
642
+ return 0
643
+ fi
644
+
645
+ # ── Collect local adapter names for ownership check ───────────────────
646
+ local local_adapters=""
647
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
648
+ for conf_file in "${SCRIPT_DIR}"/adapters/*.conf; do
649
+ [ -f "${conf_file}" ] || continue
650
+ local conf_adapter_name
651
+ conf_adapter_name=$(grep "^export ADAPTER_IC_NAME=" "${conf_file}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
652
+ if [ -n "${conf_adapter_name}" ]; then
653
+ local_adapters="${local_adapters} ${conf_adapter_name}"
654
+ fi
655
+ done
656
+ fi
657
+
658
+ # ── Filter to adapter ICs and collect details ─────────────────────────
659
+ local found_adapters=0
660
+ local output_lines=""
661
+
662
+ for ic_name in ${ic_names}; do
663
+ # Describe each IC to check if it's an adapter (has BaseInferenceComponentName)
664
+ local ic_detail
665
+ ic_detail=$(aws sagemaker describe-inference-component \
666
+ --inference-component-name "${ic_name}" \
667
+ --region "${AWS_REGION}" 2>/dev/null) || continue
668
+
669
+ # Check if this IC has a BaseInferenceComponentName (adapter IC)
670
+ local base_ic
671
+ base_ic=$(echo "${ic_detail}" | jq -r '.Specification.BaseInferenceComponentName // empty' 2>/dev/null)
672
+
673
+ if [ -z "${base_ic}" ]; then
674
+ # Not an adapter IC — skip
675
+ continue
676
+ fi
677
+
678
+ # Extract status and artifact URL
679
+ local status
680
+ status=$(echo "${ic_detail}" | jq -r '.InferenceComponentStatus // "Unknown"' 2>/dev/null)
681
+
682
+ local weights_url
683
+ weights_url=$(echo "${ic_detail}" | jq -r '.Specification.Container.ArtifactUrl // "N/A"' 2>/dev/null)
684
+
685
+ # Derive display name (strip project prefix if present)
686
+ local display_name="${ic_name}"
687
+ if [[ "${ic_name}" == "${PROJECT_NAME}-adapter-"* ]]; then
688
+ display_name="${ic_name#${PROJECT_NAME}-adapter-}"
689
+ fi
690
+
691
+ # Check ownership: is this adapter in our local do/adapters/*.conf?
692
+ local ownership=""
693
+ if echo "${local_adapters}" | grep -qw "${ic_name}"; then
694
+ ownership=""
695
+ else
696
+ ownership=" (external)"
697
+ fi
698
+
699
+ output_lines="${output_lines}$(printf '%-14s%-12s%s%s' "${display_name}" "${status}" "${weights_url}" "${ownership}")\n"
700
+ found_adapters=$((found_adapters + 1))
701
+ done
702
+
703
+ if [ "${found_adapters}" -eq 0 ]; then
704
+ echo "No adapters found on this endpoint."
705
+ echo ""
706
+ echo "Add one with: ./do/adapter add <name> --weights <s3-uri>"
707
+ return 0
708
+ fi
709
+
710
+ # ── Print table ───────────────────────────────────────────────────────
711
+ printf '%-14s%-12s%s\n' "NAME" "STATUS" "WEIGHTS"
712
+ echo -e "${output_lines}" | sed '$ { /^$/d; }'
713
+ }
714
+
715
+ _adapter_remove() {
716
+ local adapter_name=""
717
+
718
+ # Parse remove arguments
719
+ shift # remove 'remove' from args
720
+ while [ $# -gt 0 ]; do
721
+ case "$1" in
722
+ --help|-h)
723
+ echo "Usage: ./do/adapter remove <name>"
724
+ echo ""
725
+ echo "Remove a LoRA adapter from the endpoint."
726
+ echo ""
727
+ echo "Arguments:"
728
+ echo " <name> Adapter name to remove"
729
+ exit 0
730
+ ;;
731
+ -*)
732
+ echo "❌ Unknown option: $1"
733
+ echo " Usage: ./do/adapter remove <name>"
734
+ exit 1
735
+ ;;
736
+ *)
737
+ if [ -z "${adapter_name}" ]; then
738
+ adapter_name="$1"
739
+ else
740
+ echo "❌ Unexpected argument: $1"
741
+ echo " Usage: ./do/adapter remove <name>"
742
+ exit 1
743
+ fi
744
+ shift
745
+ ;;
746
+ esac
747
+ done
748
+
749
+ if [ -z "${adapter_name}" ]; then
750
+ echo "❌ Adapter name is required"
751
+ echo " Usage: ./do/adapter remove <name>"
752
+ exit 1
753
+ fi
754
+
755
+ echo "🗑️ Removing adapter: ${adapter_name}"
756
+ echo ""
757
+
758
+ # ── Validate adapter conf exists ──────────────────────────────────────
759
+ local conf_file="${SCRIPT_DIR}/adapters/${adapter_name}.conf"
760
+ if [ ! -f "${conf_file}" ]; then
761
+ echo "❌ Adapter not found: ${adapter_name}"
762
+ echo ""
763
+ echo " No configuration file at: do/adapters/${adapter_name}.conf"
764
+ echo ""
765
+ echo " Available adapters:"
766
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
767
+ for f in "${SCRIPT_DIR}"/adapters/*.conf; do
768
+ [ -f "${f}" ] || continue
769
+ echo " • $(basename "${f}" .conf)"
770
+ done
771
+ else
772
+ echo " (none)"
773
+ fi
774
+ exit 1
775
+ fi
776
+
777
+ # ── Read adapter IC name from conf ────────────────────────────────────
778
+ local adapter_ic_name
779
+ adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${conf_file}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//')
780
+
781
+ if [ -z "${adapter_ic_name}" ]; then
782
+ echo "❌ Could not read ADAPTER_IC_NAME from: do/adapters/${adapter_name}.conf"
783
+ echo " The conf file may be corrupted. Removing it manually."
784
+ rm -f "${conf_file}"
785
+ exit 1
786
+ fi
787
+
788
+ echo "📋 Adapter IC: ${adapter_ic_name}"
789
+
790
+ # ── Delete the inference component ────────────────────────────────────
791
+ echo "🔄 Deleting inference component: ${adapter_ic_name}"
792
+ if ! aws sagemaker delete-inference-component \
793
+ --inference-component-name "${adapter_ic_name}" \
794
+ --region "${AWS_REGION}" 2>/dev/null; then
795
+
796
+ # Check if it's already gone
797
+ local current_status
798
+ current_status=$(_get_ic_status "${adapter_ic_name}")
799
+ if [ -z "${current_status}" ]; then
800
+ echo " Inference component already deleted or not found. Cleaning up local files."
801
+ else
802
+ echo "❌ Failed to delete inference component: ${adapter_ic_name}"
803
+ echo " Current status: ${current_status}"
804
+ echo ""
805
+ echo " Check that your IAM credentials have sagemaker:DeleteInferenceComponent permission."
806
+ exit 1
807
+ fi
808
+ fi
809
+
810
+ # ── Wait for deletion to complete ─────────────────────────────────────
811
+ echo "⏳ Waiting for adapter IC deletion to complete..."
812
+ local wait_start
813
+ wait_start=$(date +%s)
814
+ local timeout=600 # 10 minutes
815
+
816
+ while true; do
817
+ local status
818
+ status=$(_get_ic_status "${adapter_ic_name}")
819
+
820
+ if [ -z "${status}" ] || [ "${status}" = "None" ]; then
821
+ break
822
+ fi
823
+
824
+ local elapsed=$(( $(date +%s) - wait_start ))
825
+ if [ "${elapsed}" -ge "${timeout}" ]; then
826
+ echo "⚠️ Adapter IC still deleting after ${timeout}s."
827
+ echo " It may complete in the background. Local conf removed."
828
+ break
829
+ fi
830
+
831
+ echo " $(date +%H:%M:%S) Status: ${status} (${elapsed}s elapsed)..."
832
+ sleep 10
833
+ done
834
+
835
+ echo "✅ Adapter IC deleted: ${adapter_ic_name}"
836
+
837
+ # ── Remove local conf file ────────────────────────────────────────────
838
+ rm -f "${conf_file}"
839
+ echo "✅ Removed: do/adapters/${adapter_name}.conf"
840
+
841
+ echo ""
842
+ echo "✅ Adapter removed successfully: ${adapter_name}"
843
+ }
844
+
845
+ _adapter_update() {
846
+ local adapter_name=""
847
+ local weights_uri=""
848
+ local from_hub=""
849
+
850
+ # Parse update arguments
851
+ shift # remove 'update' from args
852
+ while [ $# -gt 0 ]; do
853
+ case "$1" in
854
+ --weights)
855
+ if [ -z "${2:-}" ]; then
856
+ echo "❌ --weights requires an S3 URI argument"
857
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
858
+ exit 1
859
+ fi
860
+ weights_uri="$2"
861
+ shift 2
862
+ ;;
863
+ --from-hub)
864
+ if [ -z "${2:-}" ]; then
865
+ echo "❌ --from-hub requires a HuggingFace repo ID argument"
866
+ echo " Usage: ./do/adapter update <name> --from-hub <hf-repo-id>"
867
+ exit 1
868
+ fi
869
+ from_hub="$2"
870
+ shift 2
871
+ ;;
872
+ --help|-h)
873
+ echo "Usage: ./do/adapter update <name> --weights <new-s3-uri>"
874
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
875
+ echo ""
876
+ echo "Update the weights of an existing LoRA adapter."
877
+ echo ""
878
+ echo "Arguments:"
879
+ echo " <name> Adapter name to update"
880
+ echo " --weights <new-s3-uri> New S3 URI to adapter weights (.tar.gz)"
881
+ echo " --from-hub <hf-repo-id> Download new weights from HuggingFace Hub"
882
+ echo ""
883
+ echo "Note: --weights and --from-hub are mutually exclusive."
884
+ echo ""
885
+ echo "Examples:"
886
+ echo " ./do/adapter update ectsum --weights s3://bucket/adapters/ectsum-v2/adapter.tar.gz"
887
+ echo " ./do/adapter update ectsum --from-hub predibase/llama-3.1-8b-ectsum-v2"
888
+ exit 0
889
+ ;;
890
+ -*)
891
+ echo "❌ Unknown option: $1"
892
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
893
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
894
+ exit 1
895
+ ;;
896
+ *)
897
+ if [ -z "${adapter_name}" ]; then
898
+ adapter_name="$1"
899
+ else
900
+ echo "❌ Unexpected argument: $1"
901
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
902
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
903
+ exit 1
904
+ fi
905
+ shift
906
+ ;;
907
+ esac
908
+ done
909
+
910
+ # Validate required arguments
911
+ if [ -z "${adapter_name}" ]; then
912
+ echo "❌ Adapter name is required"
913
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
914
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
915
+ exit 1
916
+ fi
917
+
918
+ # ── Mutual exclusivity check ─────────────────────────────────────────
919
+ if [ -n "${weights_uri}" ] && [ -n "${from_hub}" ]; then
920
+ echo "❌ --weights and --from-hub are mutually exclusive"
921
+ echo ""
922
+ echo " Use one or the other:"
923
+ echo " ./do/adapter update ${adapter_name} --weights <s3-uri>"
924
+ echo " ./do/adapter update ${adapter_name} --from-hub <hf-repo-id>"
925
+ exit 1
926
+ fi
927
+
928
+ if [ -z "${weights_uri}" ] && [ -z "${from_hub}" ]; then
929
+ echo "❌ Either --weights or --from-hub is required"
930
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
931
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
932
+ exit 1
933
+ fi
934
+
935
+ # ── Validate HF repo ID format (if --from-hub) ───────────────────────
936
+ if [ -n "${from_hub}" ]; then
937
+ if ! echo "${from_hub}" | grep -qE '^[a-zA-Z0-9._-]+(/[a-zA-Z0-9._-]+)?$'; then
938
+ echo "❌ Invalid HuggingFace repo ID: ${from_hub}"
939
+ echo ""
940
+ echo " Repo ID must be in format 'org/name' or 'name'"
941
+ echo " Examples: predibase/llama-3.1-8b-ectsum-v2, my-adapter"
942
+ exit 1
943
+ fi
944
+ fi
945
+
946
+ # ── Validate S3 URI format (only when --weights is used) ─────────────
947
+ if [ -n "${weights_uri}" ]; then
948
+ if ! echo "${weights_uri}" | grep -qE '^s3://.*\.tar\.gz$'; then
949
+ echo "❌ Invalid S3 URI: ${weights_uri}"
950
+ echo ""
951
+ echo " Adapter weights must be:"
952
+ echo " • An S3 URI starting with s3://"
953
+ echo " • A .tar.gz archive containing adapter files"
954
+ echo ""
955
+ echo " Example: s3://my-bucket/adapters/ectsum-v2/adapter.tar.gz"
956
+ exit 1
957
+ fi
958
+ fi
959
+
960
+ # ── Validate adapter conf exists ──────────────────────────────────────
961
+ local conf_file="${SCRIPT_DIR}/adapters/${adapter_name}.conf"
962
+ if [ ! -f "${conf_file}" ]; then
963
+ echo "❌ Adapter not found: ${adapter_name}"
964
+ echo ""
965
+ echo " No configuration file at: do/adapters/${adapter_name}.conf"
966
+ echo ""
967
+ echo " Available adapters:"
968
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
969
+ for f in "${SCRIPT_DIR}"/adapters/*.conf; do
970
+ [ -f "${f}" ] || continue
971
+ echo " • $(basename "${f}" .conf)"
972
+ done
973
+ else
974
+ echo " (none)"
975
+ fi
976
+ exit 1
977
+ fi
978
+
979
+ # ── Read adapter IC name from conf ────────────────────────────────────
980
+ local adapter_ic_name
981
+ adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${conf_file}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//')
982
+
983
+ if [ -z "${adapter_ic_name}" ]; then
984
+ echo "❌ Could not read ADAPTER_IC_NAME from: do/adapters/${adapter_name}.conf"
985
+ echo " The conf file may be corrupted."
986
+ exit 1
987
+ fi
988
+
989
+ echo "🔄 Updating adapter: ${adapter_name}"
990
+ echo " IC Name: ${adapter_ic_name}"
991
+ if [ -n "${from_hub}" ]; then
992
+ echo " Source: HuggingFace Hub (${from_hub})"
993
+ else
994
+ echo " New weights: ${weights_uri}"
995
+ fi
996
+ echo ""
997
+
998
+ # ── If --from-hub: download, tar, upload to S3 ────────────────────────
999
+ if [ -n "${from_hub}" ]; then
1000
+ _download_from_hub "${from_hub}" "${adapter_name}"
1001
+ # weights_uri is now set by _download_from_hub
1002
+ echo ""
1003
+ fi
1004
+
1005
+ # ── Update the inference component ────────────────────────────────────
1006
+ echo "🚀 Updating inference component: ${adapter_ic_name}"
1007
+ if ! aws sagemaker update-inference-component \
1008
+ --inference-component-name "${adapter_ic_name}" \
1009
+ --specification "{\"Container\":{\"ArtifactUrl\":\"${weights_uri}\"}}" \
1010
+ --region "${AWS_REGION}"; then
1011
+
1012
+ echo "❌ Failed to update adapter inference component"
1013
+ echo " Check that:"
1014
+ echo " • Your IAM credentials have sagemaker:UpdateInferenceComponent permission"
1015
+ echo " • The adapter IC '${adapter_ic_name}' exists and is InService"
1016
+ echo " • The new S3 URI is accessible by the SageMaker execution role"
1017
+ exit 1
1018
+ fi
1019
+
1020
+ echo "✅ Adapter IC update initiated: ${adapter_ic_name}"
1021
+
1022
+ # ── Wait for adapter IC to return to InService ────────────────────────
1023
+ echo "⏳ Waiting for adapter IC to return to InService..."
1024
+ echo " The IC will transition through Updating state."
1025
+
1026
+ wait_ic "${adapter_ic_name}"
1027
+
1028
+ echo "✅ Adapter IC is InService: ${adapter_ic_name}"
1029
+
1030
+ # ── Update conf file ──────────────────────────────────────────────────
1031
+ sed -i.bak "s|^export ADAPTER_WEIGHTS_URI=.*|export ADAPTER_WEIGHTS_URI=\"${weights_uri}\"|" "${conf_file}"
1032
+ rm -f "${conf_file}.bak"
1033
+
1034
+ # Update hub-specific metadata
1035
+ if [ -n "${from_hub}" ]; then
1036
+ # Add or update ADAPTER_SOURCE
1037
+ if grep -q "^export ADAPTER_SOURCE=" "${conf_file}"; then
1038
+ sed -i.bak "s|^export ADAPTER_SOURCE=.*|export ADAPTER_SOURCE=\"hub\"|" "${conf_file}"
1039
+ rm -f "${conf_file}.bak"
1040
+ else
1041
+ echo "export ADAPTER_SOURCE=\"hub\"" >> "${conf_file}"
1042
+ fi
1043
+
1044
+ # Add or update ADAPTER_HF_REPO
1045
+ if grep -q "^export ADAPTER_HF_REPO=" "${conf_file}"; then
1046
+ sed -i.bak "s|^export ADAPTER_HF_REPO=.*|export ADAPTER_HF_REPO=\"${from_hub}\"|" "${conf_file}"
1047
+ rm -f "${conf_file}.bak"
1048
+ else
1049
+ echo "export ADAPTER_HF_REPO=\"${from_hub}\"" >> "${conf_file}"
1050
+ fi
1051
+ fi
1052
+
1053
+ echo ""
1054
+ echo "✅ Adapter updated successfully!"
1055
+ echo ""
1056
+ echo "📋 Updated Details:"
1057
+ echo " Name: ${adapter_name}"
1058
+ echo " IC Name: ${adapter_ic_name}"
1059
+ echo " New Weights: ${weights_uri}"
1060
+ if [ -n "${from_hub}" ]; then
1061
+ echo " Source: HuggingFace Hub (${from_hub})"
1062
+ fi
1063
+ echo ""
1064
+ echo "🧪 Test your updated adapter:"
1065
+ echo " ./do/test ${adapter_name}"
1066
+ }
1067
+
1068
+ _adapter_search() {
1069
+ local limit=10
1070
+
1071
+ # Parse search arguments
1072
+ shift # remove 'search' from args
1073
+ while [ $# -gt 0 ]; do
1074
+ case "$1" in
1075
+ --limit)
1076
+ if [ -z "${2:-}" ]; then
1077
+ echo "❌ --limit requires a numeric argument"
1078
+ echo " Usage: ./do/adapter search [--limit N]"
1079
+ exit 1
1080
+ fi
1081
+ limit="$2"
1082
+ shift 2
1083
+ ;;
1084
+ --help|-h)
1085
+ echo "Usage: ./do/adapter search [--limit N]"
1086
+ echo ""
1087
+ echo "Search HuggingFace Hub for LoRA adapters compatible with your base model."
1088
+ echo ""
1089
+ echo "Options:"
1090
+ echo " --limit N Maximum number of results (default: 10)"
1091
+ echo ""
1092
+ echo "Examples:"
1093
+ echo " ./do/adapter search"
1094
+ echo " ./do/adapter search --limit 20"
1095
+ echo ""
1096
+ echo "To add a found adapter:"
1097
+ echo " ./do/adapter add <name> --from-hub <repo-id>"
1098
+ exit 0
1099
+ ;;
1100
+ -*)
1101
+ echo "❌ Unknown option: $1"
1102
+ echo " Usage: ./do/adapter search [--limit N]"
1103
+ exit 1
1104
+ ;;
1105
+ *)
1106
+ echo "❌ Unexpected argument: $1"
1107
+ echo " Usage: ./do/adapter search [--limit N]"
1108
+ exit 1
1109
+ ;;
1110
+ esac
1111
+ done
1112
+
1113
+ # ── Validate MODEL_NAME is set ────────────────────────────────────────
1114
+ if [ -z "${MODEL_NAME:-}" ]; then
1115
+ echo "❌ MODEL_NAME is not configured in do/config."
1116
+ echo ""
1117
+ echo " The search command requires a base model to find compatible adapters."
1118
+ exit 1
1119
+ fi
1120
+
1121
+ echo "LoRA adapters for ${MODEL_NAME}:"
1122
+ echo ""
1123
+
1124
+ # ── Build API URL ─────────────────────────────────────────────────────
1125
+ local encoded_model
1126
+ encoded_model=$(echo "${MODEL_NAME}" | sed 's|/|%2F|g')
1127
+ local api_url="https://huggingface.co/api/models?filter=peft&other=base_model:adapter:${encoded_model}&sort=downloads&direction=-1&limit=${limit}"
1128
+
1129
+ # ── Make API request ──────────────────────────────────────────────────
1130
+ local curl_args=("-sS" "-f")
1131
+ if [ -n "${HF_TOKEN:-}" ]; then
1132
+ curl_args+=("-H" "Authorization: Bearer ${HF_TOKEN}")
1133
+ fi
1134
+
1135
+ local response
1136
+ if ! response=$(curl "${curl_args[@]}" "${api_url}" 2>/dev/null); then
1137
+ echo "❌ Could not reach HuggingFace Hub. Check network connectivity."
1138
+ exit 1
1139
+ fi
1140
+
1141
+ # ── Parse and display results ─────────────────────────────────────────
1142
+ local count=0
1143
+
1144
+ if command -v jq &>/dev/null; then
1145
+ count=$(echo "${response}" | jq 'length' 2>/dev/null)
1146
+ else
1147
+ # Fallback: count array elements by counting "id" fields
1148
+ count=$(echo "${response}" | grep -o '"id"' | wc -l | tr -d ' ')
1149
+ fi
1150
+
1151
+ if [ "${count}" -eq 0 ] || [ -z "${count}" ]; then
1152
+ echo "No adapters found for ${MODEL_NAME}."
1153
+ echo ""
1154
+ echo "Try searching with a different model name or check:"
1155
+ echo " https://huggingface.co/models?other=base_model:adapter:${MODEL_NAME}&sort=downloads"
1156
+ return 0
1157
+ fi
1158
+
1159
+ # ── Print results table ───────────────────────────────────────────────
1160
+ printf '%-4s%-42s%-12s%s\n' "#" "REPO ID" "DOWNLOADS" "DESCRIPTION"
1161
+
1162
+ if command -v jq &>/dev/null; then
1163
+ local i=0
1164
+ while [ "${i}" -lt "${count}" ]; do
1165
+ local repo_id downloads description
1166
+ repo_id=$(echo "${response}" | jq -r ".[${i}].id // \"\"" 2>/dev/null)
1167
+ downloads=$(echo "${response}" | jq -r ".[${i}].downloads // 0" 2>/dev/null)
1168
+ description=$(echo "${response}" | jq -r ".[${i}].pipeline_tag // .[${i}].tags[0] // \"\"" 2>/dev/null)
1169
+
1170
+ # Format downloads with commas
1171
+ local formatted_downloads
1172
+ formatted_downloads=$(printf "%'d" "${downloads}" 2>/dev/null || echo "${downloads}")
1173
+
1174
+ local num=$((i + 1))
1175
+ printf '%-4s%-42s%-12s%s\n' "${num}" "${repo_id}" "${formatted_downloads}" "${description}"
1176
+ i=$((i + 1))
1177
+ done
1178
+ else
1179
+ # Fallback without jq: basic parsing
1180
+ local idx=1
1181
+ echo "${response}" | grep -o '"id":"[^"]*"' | sed 's/"id":"//;s/"$//' | while IFS= read -r repo_id; do
1182
+ printf '%-4s%-42s\n' "${idx}" "${repo_id}"
1183
+ idx=$((idx + 1))
1184
+ done
1185
+ fi
1186
+
1187
+ echo ""
1188
+ echo "Add an adapter: ./do/adapter add <name> --from-hub <repo-id>"
1189
+ }
1190
+
1191
+ # ── Main: parse subcommand ────────────────────────────────────────────────────
1192
+ if [ $# -eq 0 ]; then
1193
+ _usage
1194
+ exit 1
1195
+ fi
1196
+
1197
+ SUBCOMMAND="$1"
1198
+
1199
+ case "${SUBCOMMAND}" in
1200
+ add)
1201
+ _validate_lora_enabled
1202
+ _adapter_add "$@"
1203
+ ;;
1204
+ list)
1205
+ _validate_lora_enabled
1206
+ _adapter_list
1207
+ ;;
1208
+ remove)
1209
+ _validate_lora_enabled
1210
+ _adapter_remove "$@"
1211
+ ;;
1212
+ update)
1213
+ _validate_lora_enabled
1214
+ _adapter_update "$@"
1215
+ ;;
1216
+ search)
1217
+ _validate_lora_enabled
1218
+ _adapter_search "$@"
1219
+ ;;
1220
+ --help|-h)
1221
+ _usage
1222
+ exit 0
1223
+ ;;
1224
+ *)
1225
+ echo "❌ Unknown command: ${SUBCOMMAND}"
1226
+ echo ""
1227
+ _usage
1228
+ exit 1
1229
+ ;;
1230
+ esac