@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -0,0 +1,1214 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # do/adapter — Manage LoRA adapter inference components
6
+ #
7
+ # Usage:
8
+ # ./do/adapter add <name> --weights <s3-uri>
9
+ # ./do/adapter list
10
+ # ./do/adapter remove <name>
11
+ # ./do/adapter update <name> --weights <new-s3-uri>
12
+ # ./do/adapter --help
13
+
14
+ set -e
15
+ set -u
16
+ set -o pipefail
17
+
18
+ # ── Source project configuration ──────────────────────────────────────────────
19
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20
+ source "${SCRIPT_DIR}/config"
21
+ source "${SCRIPT_DIR}/lib/wait.sh"
22
+
23
+ # ── Usage ─────────────────────────────────────────────────────────────────────
24
+ _usage() {
25
+ echo "Usage: ./do/adapter <command> [options]"
26
+ echo ""
27
+ echo "Manage LoRA adapter inference components on endpoint: ${ENDPOINT_NAME:-<not deployed>}"
28
+ echo ""
29
+ echo "Commands:"
30
+ echo " add <name> --weights <s3-uri> Add a new LoRA adapter from S3"
31
+ echo " add <name> --from-hub <hf-repo-id> Add a new LoRA adapter from HuggingFace Hub"
32
+ echo " list List all adapters on the endpoint"
33
+ echo " remove <name> Remove an adapter"
34
+ echo " update <name> --weights <new-s3-uri> Update adapter weights from S3"
35
+ echo " update <name> --from-hub <hf-repo-id> Update adapter weights from HuggingFace Hub"
36
+ echo " search [--limit N] Search HuggingFace Hub for compatible adapters"
37
+ echo ""
38
+ echo "Options:"
39
+ echo " --help, -h Show this help message"
40
+ echo ""
41
+ echo "Examples:"
42
+ echo " ./do/adapter add ectsum --weights s3://my-bucket/adapters/ectsum/adapter.tar.gz"
43
+ echo " ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
44
+ echo " ./do/adapter list"
45
+ echo " ./do/adapter remove ectsum"
46
+ echo " ./do/adapter update ectsum --weights s3://my-bucket/adapters/ectsum-v2/adapter.tar.gz"
47
+ echo " ./do/adapter update ectsum --from-hub predibase/llama-3.1-8b-ectsum-v2"
48
+ echo ""
49
+ echo "Adapter metadata is stored in do/adapters/<name>.conf"
50
+ echo ""
51
+ echo "Note: --weights and --from-hub are mutually exclusive."
52
+ }
53
+
54
+ # ── Validate LoRA is enabled ──────────────────────────────────────────────────
55
+ _validate_lora_enabled() {
56
+ if [ "${ENABLE_LORA:-}" != "true" ]; then
57
+ echo "❌ LoRA adapter serving is not enabled for this project."
58
+ echo ""
59
+ echo " ENABLE_LORA=true was not found in do/config."
60
+ echo ""
61
+ echo " To enable LoRA adapters, regenerate your project with --enable-lora"
62
+ echo " or add ENABLE_LORA=true to do/config and configure your model server"
63
+ echo " environment (e.g., VLLM_ENABLE_LORA=true)."
64
+ exit 1
65
+ fi
66
+ }
67
+
68
+ # ── Resolve base IC name ──────────────────────────────────────────────────────
69
+ _resolve_base_ic_name() {
70
+ local base_ic_name=""
71
+
72
+ # Try multi-IC path first: do/ic/default.conf
73
+ if [ -f "${SCRIPT_DIR}/ic/default.conf" ]; then
74
+ base_ic_name=$(grep "^export IC_DEPLOYED_NAME=" "${SCRIPT_DIR}/ic/default.conf" 2>/dev/null | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//' || echo "")
75
+ fi
76
+
77
+ # Fallback to legacy config: INFERENCE_COMPONENT_NAME
78
+ if [ -z "${base_ic_name}" ]; then
79
+ base_ic_name="${INFERENCE_COMPONENT_NAME:-}"
80
+ fi
81
+
82
+ if [ -z "${base_ic_name}" ]; then
83
+ echo "❌ Cannot determine base inference component name."
84
+ echo ""
85
+ echo " No IC_DEPLOYED_NAME found in do/ic/default.conf and no"
86
+ echo " INFERENCE_COMPONENT_NAME in do/config."
87
+ echo ""
88
+ echo " Deploy your base model first with: ./do/deploy"
89
+ exit 1
90
+ fi
91
+
92
+ echo "${base_ic_name}"
93
+ }
94
+
95
+ # ── Best-effort adapter_config.json validation ────────────────────────────────
96
+ # Downloads the adapter tar.gz, extracts adapter_config.json, and checks that
97
+ # base_model_name_or_path matches MODEL_NAME from do/config.
98
+ # Returns 0 always — failures are silently ignored (best-effort).
99
+ _validate_adapter_config() {
100
+ local weights_uri="$1"
101
+ local tmp_dir="/tmp/adapter_config_check_$$"
102
+
103
+ (
104
+ # Run in subshell so any failure is contained
105
+ set +e
106
+
107
+ # Skip if MODEL_NAME is not configured (non-transformers projects)
108
+ if [ -z "${MODEL_NAME:-}" ]; then
109
+ exit 0
110
+ fi
111
+
112
+ mkdir -p "${tmp_dir}"
113
+
114
+ # Download the tar.gz
115
+ if ! aws s3 cp "${weights_uri}" "${tmp_dir}/adapter.tar.gz" --region "${AWS_REGION}" --quiet 2>/dev/null; then
116
+ exit 0
117
+ fi
118
+
119
+ # Extract just adapter_config.json
120
+ if ! tar -xzf "${tmp_dir}/adapter.tar.gz" -C "${tmp_dir}" adapter_config.json 2>/dev/null; then
121
+ exit 0
122
+ fi
123
+
124
+ # Read base_model_name_or_path from the JSON
125
+ local adapter_base_model=""
126
+ if command -v jq &>/dev/null; then
127
+ adapter_base_model=$(jq -r '.base_model_name_or_path // empty' "${tmp_dir}/adapter_config.json" 2>/dev/null)
128
+ else
129
+ # Fallback: use grep/sed for environments without jq
130
+ adapter_base_model=$(grep -o '"base_model_name_or_path"[[:space:]]*:[[:space:]]*"[^"]*"' "${tmp_dir}/adapter_config.json" 2>/dev/null | sed 's/.*"base_model_name_or_path"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//')
131
+ fi
132
+
133
+ # Compare with MODEL_NAME
134
+ if [ -n "${adapter_base_model}" ] && [ "${adapter_base_model}" != "${MODEL_NAME}" ]; then
135
+ echo "⚠️ Adapter was trained on '${adapter_base_model}' but base model is '${MODEL_NAME}'. Adapter may not work correctly."
136
+ fi
137
+ ) 2>/dev/null
138
+
139
+ # Clean up temp files
140
+ rm -rf "${tmp_dir}" 2>/dev/null
141
+
142
+ return 0
143
+ }
144
+
145
+ # ── Download adapter from HuggingFace Hub ─────────────────────────────────────
146
+ # Downloads adapter files from a HuggingFace Hub repository, validates
147
+ # adapter_config.json exists, creates a tar.gz, and uploads to S3.
148
+ # Sets the variable `weights_uri` to the resulting S3 path.
149
+ #
150
+ # Arguments:
151
+ # $1 - HuggingFace repo ID (e.g., "org/adapter-name" or "adapter-name")
152
+ # $2 - Adapter name (for S3 path construction)
153
+ #
154
+ # Returns 0 on success, exits on failure.
155
+ _download_from_hub() {
156
+ local hf_repo_id="$1"
157
+ local adapter_name="$2"
158
+ local tmp_dir="/tmp/adapter_hub_download_$$"
159
+
160
+ echo "📥 Downloading adapter from HuggingFace Hub: ${hf_repo_id}"
161
+ echo ""
162
+
163
+ # ── Resolve S3 bucket ─────────────────────────────────────────────────
164
+ local s3_bucket=""
165
+ if [ -n "${ASYNC_S3_BUCKET:-}" ]; then
166
+ s3_bucket="${ASYNC_S3_BUCKET}"
167
+ else
168
+ local account_id
169
+ account_id=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "")
170
+ if [ -z "${account_id}" ]; then
171
+ echo "❌ Could not determine AWS account ID."
172
+ echo " Ensure AWS credentials are configured."
173
+ exit 1
174
+ fi
175
+ s3_bucket="ml-container-creator-${account_id}-${AWS_REGION}"
176
+ fi
177
+
178
+ # ── Create temp directory ─────────────────────────────────────────────
179
+ mkdir -p "${tmp_dir}/adapter_files"
180
+
181
+ # ── Download adapter files ────────────────────────────────────────────
182
+ if command -v huggingface-cli &>/dev/null; then
183
+ echo " Using huggingface-cli to download..."
184
+ local hf_args=("download" "${hf_repo_id}" "--local-dir" "${tmp_dir}/adapter_files")
185
+ if [ -n "${HF_TOKEN:-}" ]; then
186
+ hf_args+=("--token" "${HF_TOKEN}")
187
+ fi
188
+ if ! huggingface-cli "${hf_args[@]}" 2>/dev/null; then
189
+ echo "❌ Failed to download adapter from HuggingFace Hub: ${hf_repo_id}"
190
+ echo ""
191
+ echo " Check that:"
192
+ echo " • The repository exists: https://huggingface.co/${hf_repo_id}"
193
+ echo " • For gated repos, set HF_TOKEN environment variable"
194
+ echo " • You have network connectivity to huggingface.co"
195
+ rm -rf "${tmp_dir}"
196
+ exit 1
197
+ fi
198
+ else
199
+ # Fallback: use curl with HF Hub API
200
+ echo " Using curl to download (huggingface-cli not found)..."
201
+
202
+ # Get file listing from the repo
203
+ local api_url="https://huggingface.co/api/models/${hf_repo_id}"
204
+ local auth_header=""
205
+ if [ -n "${HF_TOKEN:-}" ]; then
206
+ auth_header="Authorization: Bearer ${HF_TOKEN}"
207
+ fi
208
+
209
+ local repo_info
210
+ if [ -n "${auth_header}" ]; then
211
+ repo_info=$(curl -sS -H "${auth_header}" "${api_url}" 2>/dev/null)
212
+ else
213
+ repo_info=$(curl -sS "${api_url}" 2>/dev/null)
214
+ fi
215
+
216
+ if [ -z "${repo_info}" ] || echo "${repo_info}" | grep -q '"error"'; then
217
+ echo "❌ Failed to access HuggingFace Hub repository: ${hf_repo_id}"
218
+ echo ""
219
+ echo " Check that:"
220
+ echo " • The repository exists: https://huggingface.co/${hf_repo_id}"
221
+ echo " • For gated repos, set HF_TOKEN environment variable"
222
+ echo " • You have network connectivity to huggingface.co"
223
+ rm -rf "${tmp_dir}"
224
+ exit 1
225
+ fi
226
+
227
+ # Extract file list from siblings array
228
+ local files
229
+ if command -v jq &>/dev/null; then
230
+ files=$(echo "${repo_info}" | jq -r '.siblings[]?.rfilename // empty' 2>/dev/null)
231
+ else
232
+ files=$(echo "${repo_info}" | grep -o '"rfilename":"[^"]*"' | sed 's/"rfilename":"//;s/"$//')
233
+ fi
234
+
235
+ if [ -z "${files}" ]; then
236
+ echo "❌ No files found in repository: ${hf_repo_id}"
237
+ rm -rf "${tmp_dir}"
238
+ exit 1
239
+ fi
240
+
241
+ # Download each file (only root-level files, skip subdirectories)
242
+ local download_base="https://huggingface.co/${hf_repo_id}/resolve/main"
243
+ while IFS= read -r filename; do
244
+ # Skip files in subdirectories (we only want root-level adapter files)
245
+ if echo "${filename}" | grep -q '/'; then
246
+ continue
247
+ fi
248
+ # Skip hidden files and READMEs
249
+ case "${filename}" in
250
+ .gitattributes|.gitignore|README.md|LICENSE*) continue ;;
251
+ esac
252
+
253
+ echo " Downloading: ${filename}"
254
+ local curl_args=("-sS" "-L" "-o" "${tmp_dir}/adapter_files/${filename}")
255
+ if [ -n "${auth_header}" ]; then
256
+ curl_args+=("-H" "${auth_header}")
257
+ fi
258
+ if ! curl "${curl_args[@]}" "${download_base}/${filename}" 2>/dev/null; then
259
+ echo " ⚠️ Failed to download: ${filename} (skipping)"
260
+ fi
261
+ done <<< "${files}"
262
+ fi
263
+
264
+ # ── Remove .huggingface metadata if present ───────────────────────────
265
+ rm -rf "${tmp_dir}/adapter_files/.cache" "${tmp_dir}/adapter_files/.huggingface" 2>/dev/null
266
+ # Remove hidden files that huggingface-cli may create
267
+ find "${tmp_dir}/adapter_files" -name ".*" -delete 2>/dev/null || true
268
+ # Remove subdirectories (flatten to root-level files only)
269
+ find "${tmp_dir}/adapter_files" -mindepth 2 -type f -exec mv {} "${tmp_dir}/adapter_files/" \; 2>/dev/null || true
270
+ find "${tmp_dir}/adapter_files" -mindepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
271
+
272
+ # ── Validate adapter_config.json exists ───────────────────────────────
273
+ if [ ! -f "${tmp_dir}/adapter_files/adapter_config.json" ]; then
274
+ echo "❌ adapter_config.json not found in downloaded files."
275
+ echo ""
276
+ echo " The repository '${hf_repo_id}' does not appear to contain"
277
+ echo " a valid PEFT/LoRA adapter. A valid adapter must include:"
278
+ echo " • adapter_config.json"
279
+ echo " • adapter_model.safetensors (or adapter_model.bin)"
280
+ echo ""
281
+ echo " Verify the repository at: https://huggingface.co/${hf_repo_id}"
282
+ rm -rf "${tmp_dir}"
283
+ exit 1
284
+ fi
285
+
286
+ echo "✅ adapter_config.json found"
287
+
288
+ # ── Optional: check base_model_name_or_path matches MODEL_NAME ────────
289
+ if [ -n "${MODEL_NAME:-}" ]; then
290
+ local adapter_base_model=""
291
+ if command -v jq &>/dev/null; then
292
+ adapter_base_model=$(jq -r '.base_model_name_or_path // empty' "${tmp_dir}/adapter_files/adapter_config.json" 2>/dev/null)
293
+ else
294
+ adapter_base_model=$(grep -o '"base_model_name_or_path"[[:space:]]*:[[:space:]]*"[^"]*"' "${tmp_dir}/adapter_files/adapter_config.json" 2>/dev/null | sed 's/.*"base_model_name_or_path"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//')
295
+ fi
296
+
297
+ if [ -n "${adapter_base_model}" ] && [ "${adapter_base_model}" != "${MODEL_NAME}" ]; then
298
+ echo "⚠️ Adapter was trained on '${adapter_base_model}' but base model is '${MODEL_NAME}'. Adapter may not work correctly."
299
+ fi
300
+ fi
301
+
302
+ # ── Create adapter.tar.gz from downloaded files (flat, no subdirs) ────
303
+ echo "📦 Creating adapter.tar.gz..."
304
+ if ! tar -czf "${tmp_dir}/adapter.tar.gz" -C "${tmp_dir}/adapter_files" . 2>/dev/null; then
305
+ echo "❌ Failed to create adapter.tar.gz"
306
+ rm -rf "${tmp_dir}"
307
+ exit 1
308
+ fi
309
+
310
+ local tar_size
311
+ tar_size=$(du -h "${tmp_dir}/adapter.tar.gz" | cut -f1)
312
+ echo " Archive size: ${tar_size}"
313
+
314
+ # ── Upload to S3 ─────────────────────────────────────────────────────
315
+ local s3_path="s3://${s3_bucket}/adapters/${PROJECT_NAME}/${adapter_name}/adapter.tar.gz"
316
+ echo "☁️ Uploading to S3: ${s3_path}"
317
+
318
+ if ! aws s3 cp "${tmp_dir}/adapter.tar.gz" "${s3_path}" --region "${AWS_REGION}"; then
319
+ echo "❌ Failed to upload adapter to S3."
320
+ echo ""
321
+ echo " Check that:"
322
+ echo " • The S3 bucket '${s3_bucket}' exists"
323
+ echo " • Your IAM credentials have s3:PutObject permission"
324
+ echo " • Run bootstrap if the bucket doesn't exist: ./do/bootstrap"
325
+ rm -rf "${tmp_dir}"
326
+ exit 1
327
+ fi
328
+
329
+ echo "✅ Uploaded to S3: ${s3_path}"
330
+
331
+ # ── Clean up ──────────────────────────────────────────────────────────
332
+ rm -rf "${tmp_dir}"
333
+
334
+ # Set the weights_uri variable for the caller
335
+ weights_uri="${s3_path}"
336
+ }
337
+
338
+ # ── Subcommand implementations ────────────────────────────────────────────────
339
+
340
+ _adapter_add() {
341
+ local adapter_name=""
342
+ local weights_uri=""
343
+ local from_hub=""
344
+
345
+ # Parse add arguments
346
+ shift # remove 'add' from args
347
+ while [ $# -gt 0 ]; do
348
+ case "$1" in
349
+ --weights)
350
+ if [ -z "${2:-}" ]; then
351
+ echo "❌ --weights requires an S3 URI argument"
352
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
353
+ exit 1
354
+ fi
355
+ weights_uri="$2"
356
+ shift 2
357
+ ;;
358
+ --from-hub)
359
+ if [ -z "${2:-}" ]; then
360
+ echo "❌ --from-hub requires a HuggingFace repo ID argument"
361
+ echo " Usage: ./do/adapter add <name> --from-hub <hf-repo-id>"
362
+ exit 1
363
+ fi
364
+ from_hub="$2"
365
+ shift 2
366
+ ;;
367
+ --help|-h)
368
+ echo "Usage: ./do/adapter add <name> --weights <s3-uri>"
369
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
370
+ echo ""
371
+ echo "Add a new LoRA adapter to the endpoint."
372
+ echo ""
373
+ echo "Arguments:"
374
+ echo " <name> Adapter name (lowercase alphanumeric + hyphens, 1-50 chars)"
375
+ echo " --weights <s3-uri> S3 URI to adapter weights (.tar.gz)"
376
+ echo " --from-hub <hf-repo-id> Download adapter from HuggingFace Hub"
377
+ echo ""
378
+ echo "Note: --weights and --from-hub are mutually exclusive."
379
+ echo ""
380
+ echo "Examples:"
381
+ echo " ./do/adapter add ectsum --weights s3://bucket/adapters/ectsum/adapter.tar.gz"
382
+ echo " ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
383
+ exit 0
384
+ ;;
385
+ -*)
386
+ echo "❌ Unknown option: $1"
387
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
388
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
389
+ exit 1
390
+ ;;
391
+ *)
392
+ if [ -z "${adapter_name}" ]; then
393
+ adapter_name="$1"
394
+ else
395
+ echo "❌ Unexpected argument: $1"
396
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
397
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
398
+ exit 1
399
+ fi
400
+ shift
401
+ ;;
402
+ esac
403
+ done
404
+
405
+ # Validate required arguments
406
+ if [ -z "${adapter_name}" ]; then
407
+ echo "❌ Adapter name is required"
408
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
409
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
410
+ exit 1
411
+ fi
412
+
413
+ # ── Mutual exclusivity check ─────────────────────────────────────────
414
+ if [ -n "${weights_uri}" ] && [ -n "${from_hub}" ]; then
415
+ echo "❌ --weights and --from-hub are mutually exclusive"
416
+ echo ""
417
+ echo " Use one or the other:"
418
+ echo " ./do/adapter add ${adapter_name} --weights <s3-uri>"
419
+ echo " ./do/adapter add ${adapter_name} --from-hub <hf-repo-id>"
420
+ exit 1
421
+ fi
422
+
423
+ if [ -z "${weights_uri}" ] && [ -z "${from_hub}" ]; then
424
+ echo "❌ Either --weights or --from-hub is required"
425
+ echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
426
+ echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
427
+ exit 1
428
+ fi
429
+
430
+ # ── Validate HF repo ID format (if --from-hub) ───────────────────────
431
+ if [ -n "${from_hub}" ]; then
432
+ # Valid formats: "org/name" or "name" (alphanumeric, hyphens, underscores, dots)
433
+ if ! echo "${from_hub}" | grep -qE '^[a-zA-Z0-9._-]+(/[a-zA-Z0-9._-]+)?$'; then
434
+ echo "❌ Invalid HuggingFace repo ID: ${from_hub}"
435
+ echo ""
436
+ echo " Repo ID must be in format 'org/name' or 'name'"
437
+ echo " Examples: predibase/llama-3.1-8b-ectsum, my-adapter"
438
+ exit 1
439
+ fi
440
+ fi
441
+
442
+ # ── Validate adapter name format ──────────────────────────────────────
443
+ if ! echo "${adapter_name}" | grep -qE '^[a-z0-9][a-z0-9-]{0,49}$'; then
444
+ echo "❌ Invalid adapter name: ${adapter_name}"
445
+ echo ""
446
+ echo " Adapter names must be:"
447
+ echo " • 1-50 characters long"
448
+ echo " • Lowercase alphanumeric and hyphens only"
449
+ echo " • Start with a letter or number"
450
+ echo ""
451
+ echo " Examples: ectsum, finance-qa, my-adapter-v2"
452
+ exit 1
453
+ fi
454
+
455
+ # ── Validate S3 URI format (only when --weights is used) ─────────────
456
+ if [ -n "${weights_uri}" ]; then
457
+ if ! echo "${weights_uri}" | grep -qE '^s3://.*\.tar\.gz$'; then
458
+ echo "❌ Invalid S3 URI: ${weights_uri}"
459
+ echo ""
460
+ echo " Adapter weights must be:"
461
+ echo " • An S3 URI starting with s3://"
462
+ echo " • A .tar.gz archive containing adapter files"
463
+ echo ""
464
+ echo " Example: s3://my-bucket/adapters/ectsum/adapter.tar.gz"
465
+ exit 1
466
+ fi
467
+ fi
468
+
469
+ # ── Validate adapter name uniqueness ──────────────────────────────────
470
+ if [ -f "${SCRIPT_DIR}/adapters/${adapter_name}.conf" ]; then
471
+ echo "❌ Adapter already exists: ${adapter_name}"
472
+ echo ""
473
+ echo " An adapter with this name is already registered."
474
+ echo " To update its weights, use: ./do/adapter update ${adapter_name} --weights <new-uri>"
475
+ echo " To remove it first: ./do/adapter remove ${adapter_name}"
476
+ exit 1
477
+ fi
478
+
479
+ echo "🔌 Adding adapter: ${adapter_name}"
480
+ if [ -n "${from_hub}" ]; then
481
+ echo " Source: HuggingFace Hub (${from_hub})"
482
+ else
483
+ echo " Weights: ${weights_uri}"
484
+ fi
485
+ echo ""
486
+
487
+ # ── If --from-hub: download, tar, upload to S3 ────────────────────────
488
+ if [ -n "${from_hub}" ]; then
489
+ _download_from_hub "${from_hub}" "${adapter_name}"
490
+ # weights_uri is now set by _download_from_hub
491
+ echo ""
492
+ fi
493
+
494
+ # ── Validate base IC is InService ─────────────────────────────────────
495
+ local base_ic_name
496
+ base_ic_name=$(_resolve_base_ic_name)
497
+
498
+ echo "🔍 Checking base inference component: ${base_ic_name}"
499
+ local base_status
500
+ base_status=$(_get_ic_status "${base_ic_name}")
501
+
502
+ if [ "${base_status}" != "InService" ]; then
503
+ echo "❌ Base inference component is not InService: ${base_ic_name}"
504
+ echo " Current status: ${base_status:-not found}"
505
+ echo ""
506
+ echo " Adapters require a running base model. Deploy first with:"
507
+ echo " ./do/deploy"
508
+ exit 1
509
+ fi
510
+ echo "✅ Base IC is InService: ${base_ic_name}"
511
+
512
+ # ── Validate S3 object exists (best-effort, only for --weights) ──────
513
+ if [ -z "${from_hub}" ]; then
514
+ echo "🔍 Checking S3 object exists..."
515
+ if ! aws s3 ls "${weights_uri}" --region "${AWS_REGION}" &>/dev/null; then
516
+ echo "⚠️ Could not verify S3 object: ${weights_uri}"
517
+ echo " This may be a permissions issue. Proceeding anyway..."
518
+ echo " SageMaker will fail at load time if the object doesn't exist."
519
+ echo ""
520
+ else
521
+ echo "✅ S3 object verified: ${weights_uri}"
522
+ fi
523
+
524
+ # ── Best-effort: validate adapter_config.json base model ─────────────
525
+ # Downloads the tar.gz, extracts adapter_config.json, and checks that
526
+ # base_model_name_or_path matches MODEL_NAME from do/config.
527
+ # If anything fails (download, extraction, parsing), skip silently.
528
+ _validate_adapter_config "${weights_uri}" || true
529
+ fi
530
+
531
+ # ── Build adapter IC name ─────────────────────────────────────────────
532
+ local adapter_ic_name="${PROJECT_NAME}-adapter-${adapter_name}"
533
+
534
+ # ── Create adapter inference component ────────────────────────────────
535
+ echo "🚀 Creating adapter inference component: ${adapter_ic_name}"
536
+ if ! aws sagemaker create-inference-component \
537
+ --inference-component-name "${adapter_ic_name}" \
538
+ --endpoint-name "${ENDPOINT_NAME}" \
539
+ --specification "{\"BaseInferenceComponentName\":\"${base_ic_name}\",\"Container\":{\"ArtifactUrl\":\"${weights_uri}\"}}" \
540
+ --region "${AWS_REGION}"; then
541
+
542
+ echo "❌ Failed to create adapter inference component"
543
+ echo " Check that:"
544
+ echo " • Your IAM credentials have sagemaker:CreateInferenceComponent permission"
545
+ echo " • The base IC '${base_ic_name}' is InService"
546
+ echo " • The S3 URI is accessible by the SageMaker execution role"
547
+ exit 1
548
+ fi
549
+
550
+ echo "✅ Adapter IC creation initiated: ${adapter_ic_name}"
551
+
552
+ # ── Wait for adapter IC to reach InService ────────────────────────────
553
+ echo "⏳ Waiting for adapter IC to reach InService..."
554
+ echo " This typically takes 1-3 minutes for adapters."
555
+
556
+ wait_ic "${adapter_ic_name}"
557
+
558
+ echo "✅ Adapter IC is InService: ${adapter_ic_name}"
559
+
560
+ # ── Create adapter metadata conf file ─────────────────────────────────
561
+ local created_at
562
+ created_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
563
+
564
+ mkdir -p "${SCRIPT_DIR}/adapters"
565
+ cat > "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
566
+ export ADAPTER_NAME="${adapter_name}"
567
+ export ADAPTER_IC_NAME="${adapter_ic_name}"
568
+ export ADAPTER_WEIGHTS_URI="${weights_uri}"
569
+ export ADAPTER_CREATED_AT="${created_at}"
570
+ EOF
571
+
572
+ # Add hub-specific metadata if --from-hub was used
573
+ if [ -n "${from_hub}" ]; then
574
+ cat >> "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
575
+ export ADAPTER_SOURCE="hub"
576
+ export ADAPTER_HF_REPO="${from_hub}"
577
+ EOF
578
+ fi
579
+
580
+ echo ""
581
+ echo "✅ Adapter added successfully!"
582
+ echo ""
583
+ echo "📋 Adapter Details:"
584
+ echo " Name: ${adapter_name}"
585
+ echo " IC Name: ${adapter_ic_name}"
586
+ echo " Weights: ${weights_uri}"
587
+ if [ -n "${from_hub}" ]; then
588
+ echo " Source: HuggingFace Hub (${from_hub})"
589
+ fi
590
+ echo " Created: ${created_at}"
591
+ echo ""
592
+ echo "🧪 Test your adapter:"
593
+ echo " ./do/test ${adapter_name}"
594
+ echo ""
595
+ echo "🗑️ Remove when done:"
596
+ echo " ./do/adapter remove ${adapter_name}"
597
+ }
598
+
599
+ _adapter_list() {
600
+ if [ -z "${ENDPOINT_NAME:-}" ]; then
601
+ echo "❌ No endpoint configured. Deploy first with: ./do/deploy"
602
+ exit 1
603
+ fi
604
+
605
+ echo "Adapters on endpoint: ${ENDPOINT_NAME}"
606
+ echo ""
607
+
608
+ # ── List all inference components on the endpoint ─────────────────────
609
+ local ic_list
610
+ ic_list=$(aws sagemaker list-inference-components \
611
+ --endpoint-name-equals "${ENDPOINT_NAME}" \
612
+ --region "${AWS_REGION}" 2>/dev/null) || {
613
+ echo "❌ Failed to list inference components on endpoint: ${ENDPOINT_NAME}"
614
+ echo " Check that the endpoint exists and you have sagemaker:ListInferenceComponents permission."
615
+ exit 1
616
+ }
617
+
618
+ # Extract IC names from the list response
619
+ local ic_names
620
+ ic_names=$(echo "${ic_list}" | jq -r '.InferenceComponents[].InferenceComponentName' 2>/dev/null)
621
+
622
+ if [ -z "${ic_names}" ]; then
623
+ echo "No adapters found on this endpoint."
624
+ echo ""
625
+ echo "Add one with: ./do/adapter add <name> --weights <s3-uri>"
626
+ return 0
627
+ fi
628
+
629
+ # ── Collect local adapter names for ownership check ───────────────────
630
+ local local_adapters=""
631
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
632
+ for conf_file in "${SCRIPT_DIR}"/adapters/*.conf; do
633
+ [ -f "${conf_file}" ] || continue
634
+ local conf_adapter_name
635
+ conf_adapter_name=$(grep "^export ADAPTER_IC_NAME=" "${conf_file}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
636
+ if [ -n "${conf_adapter_name}" ]; then
637
+ local_adapters="${local_adapters} ${conf_adapter_name}"
638
+ fi
639
+ done
640
+ fi
641
+
642
+ # ── Filter to adapter ICs and collect details ─────────────────────────
643
+ local found_adapters=0
644
+ local output_lines=""
645
+
646
+ for ic_name in ${ic_names}; do
647
+ # Describe each IC to check if it's an adapter (has BaseInferenceComponentName)
648
+ local ic_detail
649
+ ic_detail=$(aws sagemaker describe-inference-component \
650
+ --inference-component-name "${ic_name}" \
651
+ --region "${AWS_REGION}" 2>/dev/null) || continue
652
+
653
+ # Check if this IC has a BaseInferenceComponentName (adapter IC)
654
+ local base_ic
655
+ base_ic=$(echo "${ic_detail}" | jq -r '.Specification.BaseInferenceComponentName // empty' 2>/dev/null)
656
+
657
+ if [ -z "${base_ic}" ]; then
658
+ # Not an adapter IC — skip
659
+ continue
660
+ fi
661
+
662
+ # Extract status and artifact URL
663
+ local status
664
+ status=$(echo "${ic_detail}" | jq -r '.InferenceComponentStatus // "Unknown"' 2>/dev/null)
665
+
666
+ local weights_url
667
+ weights_url=$(echo "${ic_detail}" | jq -r '.Specification.Container.ArtifactUrl // "N/A"' 2>/dev/null)
668
+
669
+ # Derive display name (strip project prefix if present)
670
+ local display_name="${ic_name}"
671
+ if [[ "${ic_name}" == "${PROJECT_NAME}-adapter-"* ]]; then
672
+ display_name="${ic_name#${PROJECT_NAME}-adapter-}"
673
+ fi
674
+
675
+ # Check ownership: is this adapter in our local do/adapters/*.conf?
676
+ local ownership=""
677
+ if echo "${local_adapters}" | grep -qw "${ic_name}"; then
678
+ ownership=""
679
+ else
680
+ ownership=" (external)"
681
+ fi
682
+
683
+ output_lines="${output_lines}$(printf '%-14s%-12s%s%s' "${display_name}" "${status}" "${weights_url}" "${ownership}")\n"
684
+ found_adapters=$((found_adapters + 1))
685
+ done
686
+
687
+ if [ "${found_adapters}" -eq 0 ]; then
688
+ echo "No adapters found on this endpoint."
689
+ echo ""
690
+ echo "Add one with: ./do/adapter add <name> --weights <s3-uri>"
691
+ return 0
692
+ fi
693
+
694
+ # ── Print table ───────────────────────────────────────────────────────
695
+ printf '%-14s%-12s%s\n' "NAME" "STATUS" "WEIGHTS"
696
+ echo -e "${output_lines}" | head -n -1
697
+ }
698
+
699
+ _adapter_remove() {
700
+ local adapter_name=""
701
+
702
+ # Parse remove arguments
703
+ shift # remove 'remove' from args
704
+ while [ $# -gt 0 ]; do
705
+ case "$1" in
706
+ --help|-h)
707
+ echo "Usage: ./do/adapter remove <name>"
708
+ echo ""
709
+ echo "Remove a LoRA adapter from the endpoint."
710
+ echo ""
711
+ echo "Arguments:"
712
+ echo " <name> Adapter name to remove"
713
+ exit 0
714
+ ;;
715
+ -*)
716
+ echo "❌ Unknown option: $1"
717
+ echo " Usage: ./do/adapter remove <name>"
718
+ exit 1
719
+ ;;
720
+ *)
721
+ if [ -z "${adapter_name}" ]; then
722
+ adapter_name="$1"
723
+ else
724
+ echo "❌ Unexpected argument: $1"
725
+ echo " Usage: ./do/adapter remove <name>"
726
+ exit 1
727
+ fi
728
+ shift
729
+ ;;
730
+ esac
731
+ done
732
+
733
+ if [ -z "${adapter_name}" ]; then
734
+ echo "❌ Adapter name is required"
735
+ echo " Usage: ./do/adapter remove <name>"
736
+ exit 1
737
+ fi
738
+
739
+ echo "🗑️ Removing adapter: ${adapter_name}"
740
+ echo ""
741
+
742
+ # ── Validate adapter conf exists ──────────────────────────────────────
743
+ local conf_file="${SCRIPT_DIR}/adapters/${adapter_name}.conf"
744
+ if [ ! -f "${conf_file}" ]; then
745
+ echo "❌ Adapter not found: ${adapter_name}"
746
+ echo ""
747
+ echo " No configuration file at: do/adapters/${adapter_name}.conf"
748
+ echo ""
749
+ echo " Available adapters:"
750
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
751
+ for f in "${SCRIPT_DIR}"/adapters/*.conf; do
752
+ [ -f "${f}" ] || continue
753
+ echo " • $(basename "${f}" .conf)"
754
+ done
755
+ else
756
+ echo " (none)"
757
+ fi
758
+ exit 1
759
+ fi
760
+
761
+ # ── Read adapter IC name from conf ────────────────────────────────────
762
+ local adapter_ic_name
763
+ adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${conf_file}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//')
764
+
765
+ if [ -z "${adapter_ic_name}" ]; then
766
+ echo "❌ Could not read ADAPTER_IC_NAME from: do/adapters/${adapter_name}.conf"
767
+ echo " The conf file may be corrupted. Removing it manually."
768
+ rm -f "${conf_file}"
769
+ exit 1
770
+ fi
771
+
772
+ echo "📋 Adapter IC: ${adapter_ic_name}"
773
+
774
+ # ── Delete the inference component ────────────────────────────────────
775
+ echo "🔄 Deleting inference component: ${adapter_ic_name}"
776
+ if ! aws sagemaker delete-inference-component \
777
+ --inference-component-name "${adapter_ic_name}" \
778
+ --region "${AWS_REGION}" 2>/dev/null; then
779
+
780
+ # Check if it's already gone
781
+ local current_status
782
+ current_status=$(_get_ic_status "${adapter_ic_name}")
783
+ if [ -z "${current_status}" ]; then
784
+ echo " Inference component already deleted or not found. Cleaning up local files."
785
+ else
786
+ echo "❌ Failed to delete inference component: ${adapter_ic_name}"
787
+ echo " Current status: ${current_status}"
788
+ echo ""
789
+ echo " Check that your IAM credentials have sagemaker:DeleteInferenceComponent permission."
790
+ exit 1
791
+ fi
792
+ fi
793
+
794
+ # ── Wait for deletion to complete ─────────────────────────────────────
795
+ echo "⏳ Waiting for adapter IC deletion to complete..."
796
+ local wait_start
797
+ wait_start=$(date +%s)
798
+ local timeout=600 # 10 minutes
799
+
800
+ while true; do
801
+ local status
802
+ status=$(_get_ic_status "${adapter_ic_name}")
803
+
804
+ if [ -z "${status}" ] || [ "${status}" = "None" ]; then
805
+ break
806
+ fi
807
+
808
+ local elapsed=$(( $(date +%s) - wait_start ))
809
+ if [ "${elapsed}" -ge "${timeout}" ]; then
810
+ echo "⚠️ Adapter IC still deleting after ${timeout}s."
811
+ echo " It may complete in the background. Local conf removed."
812
+ break
813
+ fi
814
+
815
+ echo " $(date +%H:%M:%S) Status: ${status} (${elapsed}s elapsed)..."
816
+ sleep 10
817
+ done
818
+
819
+ echo "✅ Adapter IC deleted: ${adapter_ic_name}"
820
+
821
+ # ── Remove local conf file ────────────────────────────────────────────
822
+ rm -f "${conf_file}"
823
+ echo "✅ Removed: do/adapters/${adapter_name}.conf"
824
+
825
+ echo ""
826
+ echo "✅ Adapter removed successfully: ${adapter_name}"
827
+ }
828
+
829
+ _adapter_update() {
830
+ local adapter_name=""
831
+ local weights_uri=""
832
+ local from_hub=""
833
+
834
+ # Parse update arguments
835
+ shift # remove 'update' from args
836
+ while [ $# -gt 0 ]; do
837
+ case "$1" in
838
+ --weights)
839
+ if [ -z "${2:-}" ]; then
840
+ echo "❌ --weights requires an S3 URI argument"
841
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
842
+ exit 1
843
+ fi
844
+ weights_uri="$2"
845
+ shift 2
846
+ ;;
847
+ --from-hub)
848
+ if [ -z "${2:-}" ]; then
849
+ echo "❌ --from-hub requires a HuggingFace repo ID argument"
850
+ echo " Usage: ./do/adapter update <name> --from-hub <hf-repo-id>"
851
+ exit 1
852
+ fi
853
+ from_hub="$2"
854
+ shift 2
855
+ ;;
856
+ --help|-h)
857
+ echo "Usage: ./do/adapter update <name> --weights <new-s3-uri>"
858
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
859
+ echo ""
860
+ echo "Update the weights of an existing LoRA adapter."
861
+ echo ""
862
+ echo "Arguments:"
863
+ echo " <name> Adapter name to update"
864
+ echo " --weights <new-s3-uri> New S3 URI to adapter weights (.tar.gz)"
865
+ echo " --from-hub <hf-repo-id> Download new weights from HuggingFace Hub"
866
+ echo ""
867
+ echo "Note: --weights and --from-hub are mutually exclusive."
868
+ echo ""
869
+ echo "Examples:"
870
+ echo " ./do/adapter update ectsum --weights s3://bucket/adapters/ectsum-v2/adapter.tar.gz"
871
+ echo " ./do/adapter update ectsum --from-hub predibase/llama-3.1-8b-ectsum-v2"
872
+ exit 0
873
+ ;;
874
+ -*)
875
+ echo "❌ Unknown option: $1"
876
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
877
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
878
+ exit 1
879
+ ;;
880
+ *)
881
+ if [ -z "${adapter_name}" ]; then
882
+ adapter_name="$1"
883
+ else
884
+ echo "❌ Unexpected argument: $1"
885
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
886
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
887
+ exit 1
888
+ fi
889
+ shift
890
+ ;;
891
+ esac
892
+ done
893
+
894
+ # Validate required arguments
895
+ if [ -z "${adapter_name}" ]; then
896
+ echo "❌ Adapter name is required"
897
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
898
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
899
+ exit 1
900
+ fi
901
+
902
+ # ── Mutual exclusivity check ─────────────────────────────────────────
903
+ if [ -n "${weights_uri}" ] && [ -n "${from_hub}" ]; then
904
+ echo "❌ --weights and --from-hub are mutually exclusive"
905
+ echo ""
906
+ echo " Use one or the other:"
907
+ echo " ./do/adapter update ${adapter_name} --weights <s3-uri>"
908
+ echo " ./do/adapter update ${adapter_name} --from-hub <hf-repo-id>"
909
+ exit 1
910
+ fi
911
+
912
+ if [ -z "${weights_uri}" ] && [ -z "${from_hub}" ]; then
913
+ echo "❌ Either --weights or --from-hub is required"
914
+ echo " Usage: ./do/adapter update <name> --weights <new-s3-uri>"
915
+ echo " ./do/adapter update <name> --from-hub <hf-repo-id>"
916
+ exit 1
917
+ fi
918
+
919
+ # ── Validate HF repo ID format (if --from-hub) ───────────────────────
920
+ if [ -n "${from_hub}" ]; then
921
+ if ! echo "${from_hub}" | grep -qE '^[a-zA-Z0-9._-]+(/[a-zA-Z0-9._-]+)?$'; then
922
+ echo "❌ Invalid HuggingFace repo ID: ${from_hub}"
923
+ echo ""
924
+ echo " Repo ID must be in format 'org/name' or 'name'"
925
+ echo " Examples: predibase/llama-3.1-8b-ectsum-v2, my-adapter"
926
+ exit 1
927
+ fi
928
+ fi
929
+
930
+ # ── Validate S3 URI format (only when --weights is used) ─────────────
931
+ if [ -n "${weights_uri}" ]; then
932
+ if ! echo "${weights_uri}" | grep -qE '^s3://.*\.tar\.gz$'; then
933
+ echo "❌ Invalid S3 URI: ${weights_uri}"
934
+ echo ""
935
+ echo " Adapter weights must be:"
936
+ echo " • An S3 URI starting with s3://"
937
+ echo " • A .tar.gz archive containing adapter files"
938
+ echo ""
939
+ echo " Example: s3://my-bucket/adapters/ectsum-v2/adapter.tar.gz"
940
+ exit 1
941
+ fi
942
+ fi
943
+
944
+ # ── Validate adapter conf exists ──────────────────────────────────────
945
+ local conf_file="${SCRIPT_DIR}/adapters/${adapter_name}.conf"
946
+ if [ ! -f "${conf_file}" ]; then
947
+ echo "❌ Adapter not found: ${adapter_name}"
948
+ echo ""
949
+ echo " No configuration file at: do/adapters/${adapter_name}.conf"
950
+ echo ""
951
+ echo " Available adapters:"
952
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
953
+ for f in "${SCRIPT_DIR}"/adapters/*.conf; do
954
+ [ -f "${f}" ] || continue
955
+ echo " • $(basename "${f}" .conf)"
956
+ done
957
+ else
958
+ echo " (none)"
959
+ fi
960
+ exit 1
961
+ fi
962
+
963
+ # ── Read adapter IC name from conf ────────────────────────────────────
964
+ local adapter_ic_name
965
+ adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${conf_file}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//')
966
+
967
+ if [ -z "${adapter_ic_name}" ]; then
968
+ echo "❌ Could not read ADAPTER_IC_NAME from: do/adapters/${adapter_name}.conf"
969
+ echo " The conf file may be corrupted."
970
+ exit 1
971
+ fi
972
+
973
+ echo "🔄 Updating adapter: ${adapter_name}"
974
+ echo " IC Name: ${adapter_ic_name}"
975
+ if [ -n "${from_hub}" ]; then
976
+ echo " Source: HuggingFace Hub (${from_hub})"
977
+ else
978
+ echo " New weights: ${weights_uri}"
979
+ fi
980
+ echo ""
981
+
982
+ # ── If --from-hub: download, tar, upload to S3 ────────────────────────
983
+ if [ -n "${from_hub}" ]; then
984
+ _download_from_hub "${from_hub}" "${adapter_name}"
985
+ # weights_uri is now set by _download_from_hub
986
+ echo ""
987
+ fi
988
+
989
+ # ── Update the inference component ────────────────────────────────────
990
+ echo "🚀 Updating inference component: ${adapter_ic_name}"
991
+ if ! aws sagemaker update-inference-component \
992
+ --inference-component-name "${adapter_ic_name}" \
993
+ --specification "{\"Container\":{\"ArtifactUrl\":\"${weights_uri}\"}}" \
994
+ --region "${AWS_REGION}"; then
995
+
996
+ echo "❌ Failed to update adapter inference component"
997
+ echo " Check that:"
998
+ echo " • Your IAM credentials have sagemaker:UpdateInferenceComponent permission"
999
+ echo " • The adapter IC '${adapter_ic_name}' exists and is InService"
1000
+ echo " • The new S3 URI is accessible by the SageMaker execution role"
1001
+ exit 1
1002
+ fi
1003
+
1004
+ echo "✅ Adapter IC update initiated: ${adapter_ic_name}"
1005
+
1006
+ # ── Wait for adapter IC to return to InService ────────────────────────
1007
+ echo "⏳ Waiting for adapter IC to return to InService..."
1008
+ echo " The IC will transition through Updating state."
1009
+
1010
+ wait_ic "${adapter_ic_name}"
1011
+
1012
+ echo "✅ Adapter IC is InService: ${adapter_ic_name}"
1013
+
1014
+ # ── Update conf file ──────────────────────────────────────────────────
1015
+ sed -i.bak "s|^export ADAPTER_WEIGHTS_URI=.*|export ADAPTER_WEIGHTS_URI=\"${weights_uri}\"|" "${conf_file}"
1016
+ rm -f "${conf_file}.bak"
1017
+
1018
+ # Update hub-specific metadata
1019
+ if [ -n "${from_hub}" ]; then
1020
+ # Add or update ADAPTER_SOURCE
1021
+ if grep -q "^export ADAPTER_SOURCE=" "${conf_file}"; then
1022
+ sed -i.bak "s|^export ADAPTER_SOURCE=.*|export ADAPTER_SOURCE=\"hub\"|" "${conf_file}"
1023
+ rm -f "${conf_file}.bak"
1024
+ else
1025
+ echo "export ADAPTER_SOURCE=\"hub\"" >> "${conf_file}"
1026
+ fi
1027
+
1028
+ # Add or update ADAPTER_HF_REPO
1029
+ if grep -q "^export ADAPTER_HF_REPO=" "${conf_file}"; then
1030
+ sed -i.bak "s|^export ADAPTER_HF_REPO=.*|export ADAPTER_HF_REPO=\"${from_hub}\"|" "${conf_file}"
1031
+ rm -f "${conf_file}.bak"
1032
+ else
1033
+ echo "export ADAPTER_HF_REPO=\"${from_hub}\"" >> "${conf_file}"
1034
+ fi
1035
+ fi
1036
+
1037
+ echo ""
1038
+ echo "✅ Adapter updated successfully!"
1039
+ echo ""
1040
+ echo "📋 Updated Details:"
1041
+ echo " Name: ${adapter_name}"
1042
+ echo " IC Name: ${adapter_ic_name}"
1043
+ echo " New Weights: ${weights_uri}"
1044
+ if [ -n "${from_hub}" ]; then
1045
+ echo " Source: HuggingFace Hub (${from_hub})"
1046
+ fi
1047
+ echo ""
1048
+ echo "🧪 Test your updated adapter:"
1049
+ echo " ./do/test ${adapter_name}"
1050
+ }
1051
+
1052
+ _adapter_search() {
1053
+ local limit=10
1054
+
1055
+ # Parse search arguments
1056
+ shift # remove 'search' from args
1057
+ while [ $# -gt 0 ]; do
1058
+ case "$1" in
1059
+ --limit)
1060
+ if [ -z "${2:-}" ]; then
1061
+ echo "❌ --limit requires a numeric argument"
1062
+ echo " Usage: ./do/adapter search [--limit N]"
1063
+ exit 1
1064
+ fi
1065
+ limit="$2"
1066
+ shift 2
1067
+ ;;
1068
+ --help|-h)
1069
+ echo "Usage: ./do/adapter search [--limit N]"
1070
+ echo ""
1071
+ echo "Search HuggingFace Hub for LoRA adapters compatible with your base model."
1072
+ echo ""
1073
+ echo "Options:"
1074
+ echo " --limit N Maximum number of results (default: 10)"
1075
+ echo ""
1076
+ echo "Examples:"
1077
+ echo " ./do/adapter search"
1078
+ echo " ./do/adapter search --limit 20"
1079
+ echo ""
1080
+ echo "To add a found adapter:"
1081
+ echo " ./do/adapter add <name> --from-hub <repo-id>"
1082
+ exit 0
1083
+ ;;
1084
+ -*)
1085
+ echo "❌ Unknown option: $1"
1086
+ echo " Usage: ./do/adapter search [--limit N]"
1087
+ exit 1
1088
+ ;;
1089
+ *)
1090
+ echo "❌ Unexpected argument: $1"
1091
+ echo " Usage: ./do/adapter search [--limit N]"
1092
+ exit 1
1093
+ ;;
1094
+ esac
1095
+ done
1096
+
1097
+ # ── Validate MODEL_NAME is set ────────────────────────────────────────
1098
+ if [ -z "${MODEL_NAME:-}" ]; then
1099
+ echo "❌ MODEL_NAME is not configured in do/config."
1100
+ echo ""
1101
+ echo " The search command requires a base model to find compatible adapters."
1102
+ exit 1
1103
+ fi
1104
+
1105
+ echo "LoRA adapters for ${MODEL_NAME}:"
1106
+ echo ""
1107
+
1108
+ # ── Build API URL ─────────────────────────────────────────────────────
1109
+ local encoded_model
1110
+ encoded_model=$(echo "${MODEL_NAME}" | sed 's|/|%2F|g')
1111
+ local api_url="https://huggingface.co/api/models?filter=peft&other=base_model:adapter:${encoded_model}&sort=downloads&direction=-1&limit=${limit}"
1112
+
1113
+ # ── Make API request ──────────────────────────────────────────────────
1114
+ local curl_args=("-sS" "-f")
1115
+ if [ -n "${HF_TOKEN:-}" ]; then
1116
+ curl_args+=("-H" "Authorization: Bearer ${HF_TOKEN}")
1117
+ fi
1118
+
1119
+ local response
1120
+ if ! response=$(curl "${curl_args[@]}" "${api_url}" 2>/dev/null); then
1121
+ echo "❌ Could not reach HuggingFace Hub. Check network connectivity."
1122
+ exit 1
1123
+ fi
1124
+
1125
+ # ── Parse and display results ─────────────────────────────────────────
1126
+ local count=0
1127
+
1128
+ if command -v jq &>/dev/null; then
1129
+ count=$(echo "${response}" | jq 'length' 2>/dev/null)
1130
+ else
1131
+ # Fallback: count array elements by counting "id" fields
1132
+ count=$(echo "${response}" | grep -o '"id"' | wc -l | tr -d ' ')
1133
+ fi
1134
+
1135
+ if [ "${count}" -eq 0 ] || [ -z "${count}" ]; then
1136
+ echo "No adapters found for ${MODEL_NAME}."
1137
+ echo ""
1138
+ echo "Try searching with a different model name or check:"
1139
+ echo " https://huggingface.co/models?other=base_model:adapter:${MODEL_NAME}&sort=downloads"
1140
+ return 0
1141
+ fi
1142
+
1143
+ # ── Print results table ───────────────────────────────────────────────
1144
+ printf '%-4s%-42s%-12s%s\n' "#" "REPO ID" "DOWNLOADS" "DESCRIPTION"
1145
+
1146
+ if command -v jq &>/dev/null; then
1147
+ local i=0
1148
+ while [ "${i}" -lt "${count}" ]; do
1149
+ local repo_id downloads description
1150
+ repo_id=$(echo "${response}" | jq -r ".[${i}].id // \"\"" 2>/dev/null)
1151
+ downloads=$(echo "${response}" | jq -r ".[${i}].downloads // 0" 2>/dev/null)
1152
+ description=$(echo "${response}" | jq -r ".[${i}].pipeline_tag // .[${i}].tags[0] // \"\"" 2>/dev/null)
1153
+
1154
+ # Format downloads with commas
1155
+ local formatted_downloads
1156
+ formatted_downloads=$(printf "%'d" "${downloads}" 2>/dev/null || echo "${downloads}")
1157
+
1158
+ local num=$((i + 1))
1159
+ printf '%-4s%-42s%-12s%s\n' "${num}" "${repo_id}" "${formatted_downloads}" "${description}"
1160
+ i=$((i + 1))
1161
+ done
1162
+ else
1163
+ # Fallback without jq: basic parsing
1164
+ local idx=1
1165
+ echo "${response}" | grep -o '"id":"[^"]*"' | sed 's/"id":"//;s/"$//' | while IFS= read -r repo_id; do
1166
+ printf '%-4s%-42s\n' "${idx}" "${repo_id}"
1167
+ idx=$((idx + 1))
1168
+ done
1169
+ fi
1170
+
1171
+ echo ""
1172
+ echo "Add an adapter: ./do/adapter add <name> --from-hub <repo-id>"
1173
+ }
1174
+
1175
+ # ── Main: parse subcommand ────────────────────────────────────────────────────
1176
+ if [ $# -eq 0 ]; then
1177
+ _usage
1178
+ exit 1
1179
+ fi
1180
+
1181
+ SUBCOMMAND="$1"
1182
+
1183
+ case "${SUBCOMMAND}" in
1184
+ add)
1185
+ _validate_lora_enabled
1186
+ _adapter_add "$@"
1187
+ ;;
1188
+ list)
1189
+ _validate_lora_enabled
1190
+ _adapter_list
1191
+ ;;
1192
+ remove)
1193
+ _validate_lora_enabled
1194
+ _adapter_remove "$@"
1195
+ ;;
1196
+ update)
1197
+ _validate_lora_enabled
1198
+ _adapter_update "$@"
1199
+ ;;
1200
+ search)
1201
+ _validate_lora_enabled
1202
+ _adapter_search "$@"
1203
+ ;;
1204
+ --help|-h)
1205
+ _usage
1206
+ exit 0
1207
+ ;;
1208
+ *)
1209
+ echo "❌ Unknown command: ${SUBCOMMAND}"
1210
+ echo ""
1211
+ _usage
1212
+ exit 1
1213
+ ;;
1214
+ esac