@aws/ml-container-creator 0.13.4 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +23 -5
  2. package/config/parameter-schema-v2.json +32 -4
  3. package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
  4. package/infra/ci-harness/package-lock.json +122 -116
  5. package/infra/ci-harness/package.json +1 -1
  6. package/package.json +5 -3
  7. package/pyproject.toml +21 -0
  8. package/requirements.txt +19 -0
  9. package/servers/instance-sizer/index.js +72 -4
  10. package/servers/instance-sizer/lib/model-resolver.js +28 -2
  11. package/src/app.js +17 -0
  12. package/src/lib/bootstrap-command-handler.js +33 -23
  13. package/src/lib/config-loader.js +18 -0
  14. package/src/lib/config-manager.js +6 -1
  15. package/src/lib/dataset-slug.js +152 -0
  16. package/src/lib/generated/cli-options.js +9 -3
  17. package/src/lib/generated/parameter-matrix.js +14 -3
  18. package/src/lib/generated/validation-rules.js +1 -1
  19. package/src/lib/mcp-query-runner.js +6 -0
  20. package/src/lib/prompt-runner.js +5 -0
  21. package/src/lib/prompts/feature-prompts.js +1 -1
  22. package/src/lib/template-manager.js +0 -7
  23. package/src/lib/template-variable-resolver.js +51 -1
  24. package/src/lib/tune-config-state.js +14 -1
  25. package/templates/do/.adapter_helper.py +451 -0
  26. package/templates/do/.benchmark_writer.py +22 -0
  27. package/templates/do/.register_helper.py +1163 -0
  28. package/templates/do/.stage_helper.py +419 -0
  29. package/templates/do/.tune_helper.py +379 -65
  30. package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
  31. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  32. package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
  33. package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
  34. package/templates/do/adapter +427 -27
  35. package/templates/do/add-ic +85 -3
  36. package/templates/do/benchmark +173 -15
  37. package/templates/do/config +24 -0
  38. package/templates/do/lib/inference-component.sh +56 -3
  39. package/templates/do/lib/profile.sh +5 -0
  40. package/templates/do/register +552 -6
  41. package/templates/do/stage +91 -272
  42. package/templates/do/test +12 -2
  43. package/templates/do/tune +264 -12
package/templates/do/tune CHANGED
@@ -46,9 +46,13 @@ ARG_DRY_RUN=false
46
46
  ARG_LIST_MODELS=false
47
47
  ARG_NO_STALE_WARNING=false
48
48
  ARG_DISCOVER=false
49
+ ARG_LIST_DATASETS=false
49
50
  ARG_DISCOVER_FILTER=""
50
51
  ARG_COLUMN_MAP=""
52
+ ARG_TAKE=""
51
53
  ARG_ACCEPT_EULA=false
54
+ ARG_DATASET_NAME=""
55
+ ARG_EVALUATOR_NAME=""
52
56
 
53
57
 
54
58
  # ── _parse_args() ─────────────────────────────────────────────────────────────
@@ -147,6 +151,7 @@ _parse_args() {
147
151
  --help|-h) ARG_HELP=true; shift ;;
148
152
  --dry-run) ARG_DRY_RUN=true; shift ;;
149
153
  --list-models) ARG_LIST_MODELS=true; shift ;;
154
+ --list-datasets) ARG_LIST_DATASETS=true; shift ;;
150
155
  --no-stale-warning) ARG_NO_STALE_WARNING=true; shift ;;
151
156
  --column-map)
152
157
  if [ -z "${2:-}" ]; then
@@ -162,6 +167,24 @@ _parse_args() {
162
167
  shift
163
168
  fi
164
169
  ;;
170
+ --take)
171
+ if [ -z "${2:-}" ]; then
172
+ echo "❌ --take requires an integer value"
173
+ exit 1
174
+ fi
175
+ ARG_TAKE="$2"; shift 2 ;;
176
+ --dataset-name)
177
+ if [ -z "${2:-}" ]; then
178
+ echo "❌ --dataset-name requires a registered dataset name"
179
+ exit 1
180
+ fi
181
+ ARG_DATASET_NAME="$2"; shift 2 ;;
182
+ --evaluator-name)
183
+ if [ -z "${2:-}" ]; then
184
+ echo "❌ --evaluator-name requires a registered evaluator name"
185
+ exit 1
186
+ fi
187
+ ARG_EVALUATOR_NAME="$2"; shift 2 ;;
165
188
  *)
166
189
  echo "❌ Unknown option: $1"
167
190
  echo " Run ./do/tune --help for usage."
@@ -218,7 +241,8 @@ _show_help() {
218
241
  echo ""
219
242
  echo "Required:"
220
243
  echo " --technique <t> Customization technique: sft, dpo, rlaif, rlvr"
221
- echo " --dataset <source> Dataset: s3://bucket/path.jsonl or hf://org/name[/split]"
244
+ echo " --dataset <source> Dataset: s3://bucket/path.jsonl or hf://org/name[/split][?file=pattern]"
245
+ echo " ⚠️ Quote the URI if it contains ? or * to prevent shell expansion"
222
246
  echo ""
223
247
  echo "Model selection:"
224
248
  echo " --model <id> JumpStart Hub content name to use for fine-tuning."
@@ -251,6 +275,13 @@ _show_help() {
251
275
  echo " --no-wait Submit and exit without polling for completion"
252
276
  echo " --status Show status of all tracked tune jobs"
253
277
  echo ""
278
+ echo "Dataset options:"
279
+ echo " --column-map <map> Rename columns (e.g., prompt=question,completion=answer)"
280
+ echo " --take <n> Take only the first N records from the dataset"
281
+ echo ""
282
+ echo " Note: Always quote --dataset values containing ? or * characters."
283
+ echo " Unquoted, bash may interpret ? as a glob and * as a wildcard expansion."
284
+ echo ""
254
285
  echo "Discovery and diagnostics:"
255
286
  echo " --discover [filter] Query JumpStart Hub for tune-eligible models."
256
287
  echo " Without a filter, shows models for the current family."
@@ -276,6 +307,12 @@ _show_help() {
276
307
  echo " # Fine-tune a gated model (Meta Llama) — requires EULA acceptance:"
277
308
  echo " ./do/tune --technique dpo --dataset hf://argilla/ultrafeedback-binarized-preferences-cleaned --accept-eula"
278
309
  echo ""
310
+ echo " # Take only 500 records for a quick test run:"
311
+ echo " ./do/tune --technique sft --dataset hf://timdettmers/openassistant-guanaco --take 500"
312
+ echo ""
313
+ echo " # Filter files in a multi-file dataset (quote to prevent shell glob expansion):"
314
+ echo " ./do/tune --technique sft --dataset \"hf://Open-Orca/OpenOrca?file=1M-GPT4\""
315
+ echo ""
279
316
  echo " # Discover available models:"
280
317
  echo " ./do/tune --discover # Models for current family"
281
318
  echo " ./do/tune --discover qwen # Filter by keyword"
@@ -329,13 +366,47 @@ _show_status() {
329
366
  echo " Elapsed: ${mins}m ${secs}s"
330
367
  fi
331
368
 
332
- # Show output path if completed
369
+ # Show output path if completed — resolve if not yet set
333
370
  local output_var="TUNE_ADAPTER_PATH_$(echo "${technique}" | tr '[:lower:]' '[:upper:]')"
334
371
  local model_var="TUNE_MODEL_PATH_$(echo "${technique}" | tr '[:lower:]' '[:upper:]')"
335
372
  if [ -n "${!output_var:-}" ]; then
336
373
  echo " Output (adapter): ${!output_var}"
337
374
  elif [ -n "${!model_var:-}" ]; then
338
375
  echo " Output (model): ${!model_var}"
376
+ elif [ "${status}" = "Completed" ]; then
377
+ # Job is complete but output path not set — resolve now
378
+ echo " 🔄 Resolving artifacts..."
379
+ local training_type="${TUNE_TRAINING_TYPE:-lora}"
380
+ local resolve_result
381
+ resolve_result=$(python3 "${HELPER_SCRIPT}" resolve \
382
+ --job-name "${job_name}" \
383
+ --region "${AWS_REGION}" \
384
+ --training-type "${training_type}" \
385
+ --model-package-group "${PROJECT_NAME}-tune-models" 2>/dev/null) || resolve_result=""
386
+
387
+ if [ -n "${resolve_result}" ]; then
388
+ local artifact_path
389
+ artifact_path=$(echo "${resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('artifact_path',''))" 2>/dev/null) || artifact_path=""
390
+ local output_type
391
+ output_type=$(echo "${resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('output_type',''))" 2>/dev/null) || output_type=""
392
+
393
+ if [ -n "${artifact_path}" ]; then
394
+ local technique_upper
395
+ technique_upper=$(echo "${technique}" | tr '[:lower:]' '[:upper:]')
396
+
397
+ # Update config
398
+ if [ "${output_type}" = "adapter" ]; then
399
+ _update_config_var "TUNE_ADAPTER_PATH_${technique_upper}" "${artifact_path}"
400
+ echo " Output (adapter): ${artifact_path}"
401
+ else
402
+ _update_config_var "TUNE_MODEL_PATH_${technique_upper}" "${artifact_path}"
403
+ echo " Output (model): ${artifact_path}"
404
+ fi
405
+ _update_config_var "TUNE_OUTPUT_PATH_LATEST" "${artifact_path}"
406
+ _update_config_var "TUNE_OUTPUT_TYPE_LATEST" "${output_type}"
407
+ echo " ✅ Updated do/config with output paths"
408
+ fi
409
+ fi
339
410
  fi
340
411
  echo ""
341
412
  fi
@@ -720,10 +791,51 @@ else:
720
791
  _validate_dataset() {
721
792
  local dataset="${ARG_DATASET}"
722
793
 
794
+ # If --dataset-name is set, resolve from registry (AC-2b.4)
795
+ # --dataset-name takes precedence over --dataset for named registry lookup
796
+ if [ -n "${ARG_DATASET_NAME}" ]; then
797
+ echo "🔍 Resolving dataset '${ARG_DATASET_NAME}' from registry..."
798
+ local resolve_result
799
+ resolve_result=$(python3 "${SCRIPT_DIR}/.register_helper.py" resolve-dataset \
800
+ --name "${ARG_DATASET_NAME}" 2>/dev/null) || resolve_result=""
801
+
802
+ if [ -n "${resolve_result}" ]; then
803
+ local resolved_uri
804
+ resolved_uri=$(echo "${resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('s3_uri',''))" 2>/dev/null) || resolved_uri=""
805
+ if [ -n "${resolved_uri}" ]; then
806
+ echo " Resolved to: ${resolved_uri}"
807
+ dataset="${resolved_uri}"
808
+ ARG_DATASET="${resolved_uri}"
809
+ else
810
+ echo "❌ Dataset '${ARG_DATASET_NAME}' not found in registry"
811
+ echo " Register it first: ./do/register --dataset --dataset-name ${ARG_DATASET_NAME} --dataset-s3-uri s3://..."
812
+ exit 1
813
+ fi
814
+ else
815
+ echo "❌ Failed to resolve dataset '${ARG_DATASET_NAME}' from registry"
816
+ echo " Register it first: ./do/register --dataset --dataset-name ${ARG_DATASET_NAME} --dataset-s3-uri s3://..."
817
+ exit 1
818
+ fi
819
+ fi
820
+
821
+ # If --dataset value is not an S3 URI or HF reference, treat as a registry name
822
+ if [ -z "${ARG_DATASET_NAME}" ] && [ -n "${dataset}" ] && \
823
+ [[ "${dataset}" != s3://* ]] && [[ "${dataset}" != hf://* ]]; then
824
+ # Looks like a name — resolve from registry
825
+ ARG_DATASET_NAME="${dataset}"
826
+ dataset=""
827
+ fi
828
+
723
829
  if [ -z "${dataset}" ]; then
724
- echo " --dataset is required"
725
- echo " Provide an S3 URI (s3://bucket/path.jsonl) or HF reference (hf://org/name)"
726
- exit 1
830
+ if [ -n "${ARG_DATASET_NAME}" ]; then
831
+ # Name-based resolution happens below via resolve-dataset
832
+ :
833
+ else
834
+ echo "❌ --dataset is required"
835
+ echo " Provide an S3 URI (s3://bucket/path.jsonl), HF reference (hf://org/name), or registered name"
836
+ echo " Run ./do/tune --list-datasets to see available registered datasets."
837
+ exit 1
838
+ fi
727
839
  fi
728
840
 
729
841
  # Determine dataset type
@@ -826,6 +938,9 @@ _validate_dataset() {
826
938
  stage_args+=(--column-map "${ARG_COLUMN_MAP}")
827
939
  fi
828
940
  stage_args+=(--technique "${ARG_TECHNIQUE}")
941
+ if [ -n "${ARG_TAKE}" ]; then
942
+ stage_args+=(--take "${ARG_TAKE}")
943
+ fi
829
944
  if [ -n "${hf_file}" ]; then
830
945
  stage_args+=(--hf-file "${hf_file}")
831
946
  fi
@@ -850,16 +965,16 @@ _validate_dataset() {
850
965
  fi
851
966
 
852
967
  RESOLVED_DATASET_S3_URI=$(echo "${stage_result}" | python3 -c "import sys,json; print(json.load(sys.stdin)['s3_uri'])" 2>/dev/null)
853
- local num_records
854
- num_records=$(echo "${stage_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('num_records',0))" 2>/dev/null) || num_records="0"
968
+ RESOLVED_DATASET_ROW_COUNT=$(echo "${stage_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('num_records',0))" 2>/dev/null) || RESOLVED_DATASET_ROW_COUNT="0"
855
969
 
856
970
  echo " ✅ Staged to: ${RESOLVED_DATASET_S3_URI}"
857
- echo " Records: ${num_records}"
971
+ echo " Records: ${RESOLVED_DATASET_ROW_COUNT}"
858
972
  echo ""
859
973
 
860
974
  else
861
975
  echo "❌ Invalid dataset format: ${dataset}"
862
- echo " Expected: s3://bucket/path.jsonl or hf://org/name[/split]"
976
+ echo " Expected: s3://bucket/path.jsonl or hf://org/name[/split][?file=pattern]"
977
+ echo " Hint: Quote the value if it contains ? or * (e.g., \"hf://org/name?file=pattern\")"
863
978
  exit 1
864
979
  fi
865
980
  }
@@ -1067,6 +1182,37 @@ print(entry.get('provider', ''))
1067
1182
  if [ -n "${ARG_REWARD_PROMPT}" ]; then
1068
1183
  submit_args+=(--reward-prompt "${ARG_REWARD_PROMPT}")
1069
1184
  fi
1185
+
1186
+ # Resolve evaluator from registry if --evaluator-name is set (AC-2c.3, AC-2c.4)
1187
+ if [ -n "${ARG_EVALUATOR_NAME}" ] && [ -z "${ARG_REWARD_FUNCTION}" ] && [ -z "${ARG_REWARD_PROMPT}" ]; then
1188
+ echo "🔍 Resolving evaluator '${ARG_EVALUATOR_NAME}' from registry..."
1189
+ local ev_resolve_result
1190
+ ev_resolve_result=$(python3 "${SCRIPT_DIR}/.register_helper.py" resolve-evaluator \
1191
+ --name "${ARG_EVALUATOR_NAME}" 2>/dev/null) || ev_resolve_result=""
1192
+
1193
+ if [ -n "${ev_resolve_result}" ]; then
1194
+ local ev_type ev_arn_or_uri
1195
+ ev_type=$(echo "${ev_resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('type',''))" 2>/dev/null) || ev_type=""
1196
+ ev_arn_or_uri=$(echo "${ev_resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('arn_or_uri',''))" 2>/dev/null) || ev_arn_or_uri=""
1197
+
1198
+ if [ -n "${ev_arn_or_uri}" ]; then
1199
+ echo " Resolved evaluator: ${ev_type} → ${ev_arn_or_uri}"
1200
+ if [ "${ev_type}" = "lambda" ]; then
1201
+ submit_args+=(--reward-function "${ev_arn_or_uri}")
1202
+ else
1203
+ submit_args+=(--reward-prompt "${ev_arn_or_uri}")
1204
+ fi
1205
+ else
1206
+ echo "⚠️ Evaluator '${ARG_EVALUATOR_NAME}' not found in registry"
1207
+ echo " Register it first: ./do/register --evaluator --evaluator-name ${ARG_EVALUATOR_NAME} ..."
1208
+ exit 1
1209
+ fi
1210
+ else
1211
+ echo "⚠️ Failed to resolve evaluator '${ARG_EVALUATOR_NAME}' from registry"
1212
+ echo " Register it first: ./do/register --evaluator --evaluator-name ${ARG_EVALUATOR_NAME} ..."
1213
+ exit 1
1214
+ fi
1215
+ fi
1070
1216
  if [ "${ARG_ACCEPT_EULA}" = true ]; then
1071
1217
  submit_args+=(--accept-eula)
1072
1218
  fi
@@ -1158,6 +1304,14 @@ print(entry.get('provider', ''))
1158
1304
  _update_config_var "TUNE_TECHNIQUE" "${ARG_TECHNIQUE}"
1159
1305
  _update_config_var "TUNE_TRAINING_TYPE" "${ARG_TRAINING_TYPE}"
1160
1306
  _update_config_var "TUNE_DATASET_PATH" "${ARG_DATASET}"
1307
+ _update_config_var "TUNE_DATASET_S3_URI" "${RESOLVED_DATASET_S3_URI:-}"
1308
+ _update_config_var "TUNE_DATASET_ROW_COUNT" "${RESOLVED_DATASET_ROW_COUNT:-0}"
1309
+ _update_config_var "TUNE_DATASET_SOURCE" "${ARG_DATASET}"
1310
+ _update_config_var "TUNE_LAST_JOB_NAME" "${JOB_NAME}"
1311
+ # Technique-specific dataset vars (allows --from-tune <technique> resolution)
1312
+ _update_config_var "TUNE_DATASET_S3_URI_${technique_upper}" "${RESOLVED_DATASET_S3_URI:-}"
1313
+ _update_config_var "TUNE_DATASET_ROW_COUNT_${technique_upper}" "${RESOLVED_DATASET_ROW_COUNT:-0}"
1314
+ _update_config_var "TUNE_DATASET_SOURCE_${technique_upper}" "${ARG_DATASET}"
1161
1315
  }
1162
1316
 
1163
1317
 
@@ -1229,12 +1383,60 @@ _handle_interrupt() {
1229
1383
  echo ""
1230
1384
  echo "⚠️ Interrupted — job continues running in background"
1231
1385
  echo " Job: ${job_name}"
1232
- echo ""
1233
- echo " Resume monitoring: ./do/tune --technique ${ARG_TECHNIQUE} --dataset ${ARG_DATASET}"
1234
- echo " Check status: ./do/tune --status"
1386
+ echo " Check status: ./do/tune --status"
1235
1387
  exit 130
1236
1388
  }
1237
1389
 
1390
+ # ── _derive_dataset_slug() ────────────────────────────────────────────────────
1391
+ # Derive a short slug from the dataset argument for config variable naming.
1392
+ # Rules: lowercase, strip non-alphanumeric (keep hyphens), truncate to 20 chars,
1393
+ # collapse consecutive hyphens, strip leading/trailing hyphens.
1394
+ #
1395
+ # Examples:
1396
+ # hf://tatsu-lab/alpaca → alpaca
1397
+ # hf://Open-Orca/OpenOrca → openorca
1398
+ # s3://bucket/path/train.jsonl → train
1399
+ # s3://bucket/path/file.parquet → file
1400
+ _derive_dataset_slug() {
1401
+ local dataset="${1:-}"
1402
+ local slug=""
1403
+
1404
+ if [ -z "${dataset}" ]; then
1405
+ echo ""
1406
+ return
1407
+ fi
1408
+
1409
+ if [[ "${dataset}" == hf://* ]]; then
1410
+ # HuggingFace format: hf://org/name or hf://org/name?file=...
1411
+ # Strip query params
1412
+ local hf_path="${dataset#hf://}"
1413
+ hf_path="${hf_path%%\?*}"
1414
+ # Take the last component (dataset name, not org)
1415
+ slug="${hf_path##*/}"
1416
+ elif [[ "${dataset}" == s3://* ]]; then
1417
+ # S3 format: s3://bucket/path/file.ext → slug from filename without extension
1418
+ local filename="${dataset##*/}"
1419
+ slug="${filename%.*}"
1420
+ else
1421
+ # Fallback: use the last path component without extension
1422
+ local filename="${dataset##*/}"
1423
+ slug="${filename%.*}"
1424
+ fi
1425
+
1426
+ # Slugification: lowercase, strip non-alphanumeric (keep hyphens)
1427
+ slug=$(echo "${slug}" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]//g')
1428
+ # Collapse consecutive hyphens
1429
+ slug=$(echo "${slug}" | sed 's/-\{2,\}/-/g')
1430
+ # Strip leading/trailing hyphens
1431
+ slug=$(echo "${slug}" | sed 's/^-//;s/-$//')
1432
+ # Truncate to 20 chars
1433
+ slug="${slug:0:20}"
1434
+ # Strip trailing hyphen after truncation
1435
+ slug=$(echo "${slug}" | sed 's/-$//')
1436
+
1437
+ echo "${slug}"
1438
+ }
1439
+
1238
1440
  # ── _handle_completion() ──────────────────────────────────────────────────────
1239
1441
  # Store output paths, detect output type, print next-step commands.
1240
1442
  _handle_completion() {
@@ -1294,6 +1496,14 @@ _handle_completion() {
1294
1496
  # Store output paths in config
1295
1497
  if [ "${output_type}" = "adapter" ]; then
1296
1498
  _update_config_var "TUNE_ADAPTER_PATH_${technique_upper}" "${artifact_path}"
1499
+ # Write dataset-specific adapter path if dataset slug is available
1500
+ local dataset_slug
1501
+ dataset_slug=$(_derive_dataset_slug "${ARG_DATASET:-}")
1502
+ if [ -n "${dataset_slug}" ]; then
1503
+ local slug_upper
1504
+ slug_upper=$(echo "${dataset_slug}" | tr '[:lower:]' '[:upper:]' | sed 's/-/_/g')
1505
+ _update_config_var "TUNE_ADAPTER_PATH_${technique_upper}_${slug_upper}" "${artifact_path}"
1506
+ fi
1297
1507
  else
1298
1508
  _update_config_var "TUNE_MODEL_PATH_${technique_upper}" "${artifact_path}"
1299
1509
  fi
@@ -1304,9 +1514,14 @@ _handle_completion() {
1304
1514
  echo "📋 Next steps:"
1305
1515
  echo ""
1306
1516
  if [ "${output_type}" = "adapter" ]; then
1517
+ local dataset_slug
1518
+ dataset_slug=$(_derive_dataset_slug "${ARG_DATASET:-}")
1307
1519
  echo " Deploy as LoRA adapter:"
1308
1520
  echo " ./do/adapter add tuned-${ARG_TECHNIQUE} --from-tune"
1309
1521
  echo " ./do/adapter add tuned-${ARG_TECHNIQUE} --from-tune ${ARG_TECHNIQUE}"
1522
+ if [ -n "${dataset_slug}" ]; then
1523
+ echo " ./do/adapter add tuned-${ARG_TECHNIQUE}-${dataset_slug} --from-tune ${ARG_TECHNIQUE}-${dataset_slug}"
1524
+ fi
1310
1525
  echo " ./do/adapter add tuned-${ARG_TECHNIQUE} --weights ${artifact_path}"
1311
1526
  else
1312
1527
  echo " Deploy as new inference component:"
@@ -1403,6 +1618,43 @@ if [ "${ARG_DISCOVER}" = true ]; then
1403
1618
  exit 0
1404
1619
  fi
1405
1620
 
1621
+ # Handle --list-datasets (before requiring --technique and --dataset)
1622
+ if [ "${ARG_LIST_DATASETS}" = true ]; then
1623
+ echo ""
1624
+ echo "📦 Registered datasets:"
1625
+ echo ""
1626
+
1627
+ _ds_output=$(python3 "${SCRIPT_DIR}/.register_helper.py" list-datasets 2>/dev/null) || _ds_output=""
1628
+ _ds_json=$(echo "${_ds_output}" | grep -E '^\{' | tail -1)
1629
+
1630
+ if [ -n "${_ds_json}" ]; then
1631
+ _ds_count=$(echo "${_ds_json}" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('datasets',[])))" 2>/dev/null) || _ds_count=0
1632
+ if [ "${_ds_count}" -gt 0 ]; then
1633
+ printf " %-25s %-10s %-8s %s\n" "NAME" "TECHNIQUE" "ROWS" "S3 URI"
1634
+ printf " %-25s %-10s %-8s %s\n" "----" "---------" "----" "------"
1635
+ echo "${_ds_json}" | python3 -c "
1636
+ import sys, json
1637
+ data = json.load(sys.stdin)
1638
+ for ds in data.get('datasets', []):
1639
+ name = ds.get('name','')[:25]
1640
+ tech = ds.get('technique','')[:10]
1641
+ rows = str(ds.get('row_count',''))[:8]
1642
+ uri = ds.get('s3_uri','')
1643
+ print(f' {name:<25} {tech:<10} {rows:<8} {uri}')
1644
+ " 2>/dev/null
1645
+ else
1646
+ echo " (none registered)"
1647
+ fi
1648
+ else
1649
+ echo " (none registered)"
1650
+ fi
1651
+ echo ""
1652
+ echo " Register: ./do/register dataset <name> --s3-uri <uri> --technique <sft|dpo>"
1653
+ echo " Use: ./do/tune --technique sft --dataset <name>"
1654
+ echo ""
1655
+ exit 0
1656
+ fi
1657
+
1406
1658
  # Validate required arguments for job submission
1407
1659
  if [ -z "${ARG_TECHNIQUE}" ]; then
1408
1660
  echo "❌ --technique is required"