@aws/ml-container-creator 0.6.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +1 -1
- package/infra/ci-harness/buildspec.yml +4 -0
- package/package.json +1 -1
- package/servers/lib/catalogs/model-servers.json +80 -0
- package/servers/model-picker/index.js +27 -16
- package/src/app.js +89 -21
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +39 -2
- package/src/lib/cross-cutting-checker.js +146 -33
- package/src/lib/deployment-config-resolver.js +10 -4
- package/src/lib/e2e-bootstrap.js +227 -0
- package/src/lib/e2e-catalog-validator.js +103 -0
- package/src/lib/e2e-quota-validator.js +135 -0
- package/src/lib/prompt-runner.js +290 -22
- package/src/lib/prompts.js +9 -3
- package/src/lib/template-manager.js +10 -4
- package/src/lib/tune-catalog-validator.js +5 -5
- package/templates/Dockerfile +2 -0
- package/templates/code/cw_log_forwarder.py +64 -0
- package/templates/code/serve +14 -3
- package/templates/code/serving.properties +2 -2
- package/templates/deploy_notebook_generator.py +897 -0
- package/templates/diffusors/serve +3 -3
- package/templates/do/.tune_helper.py +2 -2
- package/templates/do/export +19 -2
- package/templates/do/lib/endpoint-config.sh +3 -1
- package/templates/do/lib/inference-component.sh +5 -1
- package/templates/do/register +8 -2
- package/templates/do/test +5 -5
- package/templates/do/tune +2 -2
- package/templates/marketplace/config +118 -0
- package/templates/marketplace/deploy +890 -0
- package/templates/marketplace/test +453 -0
package/bin/cli.js
CHANGED
|
@@ -37,7 +37,7 @@ program
|
|
|
37
37
|
.addOption(new Option('--deployment-config <config>', 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)'))
|
|
38
38
|
.addOption(new Option('--framework <framework>', 'ML framework — DEPRECATED: use --deployment-config').choices(['sklearn', 'xgboost', 'tensorflow', 'transformers']).hideHelp())
|
|
39
39
|
.addOption(new Option('--model-format <format>', 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)'))
|
|
40
|
-
.addOption(new Option('--model-name <name>', 'Model identifier (
|
|
40
|
+
.addOption(new Option('--model-name <name>', 'Model identifier (<hf-org/model>, s3://..., registry://..., marketplace://...)'))
|
|
41
41
|
.addOption(new Option('--model-server <server>', 'Model server — DEPRECATED: use --deployment-config').choices(['flask', 'fastapi', 'vllm', 'sglang']).hideHelp())
|
|
42
42
|
.addOption(new Option('--base-image <image>', 'Base container image for Dockerfile'))
|
|
43
43
|
|
|
@@ -128,6 +128,10 @@ phases:
|
|
|
128
128
|
echo "Skipping Build stage due to prior failure in $FIRST_FAILURE"
|
|
129
129
|
BUILD_STATUS="skip"
|
|
130
130
|
BUILD_DURATION=0
|
|
131
|
+
elif [ "$(cd /tmp/ci-project && source do/config && echo $DEPLOYMENT_CONFIG)" = "marketplace" ]; then
|
|
132
|
+
echo "Skipping Build stage — marketplace projects have no container to build"
|
|
133
|
+
BUILD_STATUS="skip"
|
|
134
|
+
BUILD_DURATION=0
|
|
131
135
|
else
|
|
132
136
|
(
|
|
133
137
|
set -e
|
package/package.json
CHANGED
|
@@ -1,5 +1,85 @@
|
|
|
1
1
|
{
|
|
2
2
|
"vllm": [
|
|
3
|
+
{
|
|
4
|
+
"image": "vllm/vllm-openai:v0.20.2",
|
|
5
|
+
"tag": "v0.20.2",
|
|
6
|
+
"architecture": "amd64",
|
|
7
|
+
"created": "2026-05-10T00:00:00Z",
|
|
8
|
+
"labels": {
|
|
9
|
+
"cuda_version": "12.9",
|
|
10
|
+
"python_version": "3.12",
|
|
11
|
+
"framework_version": "0.20.2"
|
|
12
|
+
},
|
|
13
|
+
"registry": "dockerhub",
|
|
14
|
+
"repository": "vllm/vllm-openai",
|
|
15
|
+
"defaults": {
|
|
16
|
+
"envVars": {
|
|
17
|
+
"VLLM_TENSOR_PARALLEL_SIZE": "1",
|
|
18
|
+
"VLLM_GPU_MEMORY_UTILIZATION": "0.9",
|
|
19
|
+
"VLLM_MAX_NUM_SEQS": "256",
|
|
20
|
+
"VLLM_MAX_MODEL_LEN": "4096",
|
|
21
|
+
"VLLM_ENABLE_PREFIX_CACHING": "true"
|
|
22
|
+
},
|
|
23
|
+
"inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
|
|
24
|
+
},
|
|
25
|
+
"accelerator": {
|
|
26
|
+
"type": "cuda",
|
|
27
|
+
"version": "12.9",
|
|
28
|
+
"versionRange": {
|
|
29
|
+
"min": "12.4",
|
|
30
|
+
"max": "12.9"
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"validationLevel": "community-validated",
|
|
34
|
+
"profiles": {
|
|
35
|
+
"low-latency": {
|
|
36
|
+
"displayName": "Low Latency",
|
|
37
|
+
"description": "Optimized for single-request latency with prefix caching",
|
|
38
|
+
"envVars": {
|
|
39
|
+
"VLLM_MAX_NUM_SEQS": "32",
|
|
40
|
+
"VLLM_GPU_MEMORY_UTILIZATION": "0.85",
|
|
41
|
+
"VLLM_ENABLE_PREFIX_CACHING": "true"
|
|
42
|
+
},
|
|
43
|
+
"notes": "Prefix caching improves latency for repeated prompts"
|
|
44
|
+
},
|
|
45
|
+
"high-throughput": {
|
|
46
|
+
"displayName": "High Throughput",
|
|
47
|
+
"description": "Optimized for batch processing with continuous batching",
|
|
48
|
+
"envVars": {
|
|
49
|
+
"VLLM_MAX_NUM_SEQS": "512",
|
|
50
|
+
"VLLM_GPU_MEMORY_UTILIZATION": "0.95",
|
|
51
|
+
"VLLM_MAX_MODEL_LEN": "2048",
|
|
52
|
+
"VLLM_ENABLE_PREFIX_CACHING": "false"
|
|
53
|
+
},
|
|
54
|
+
"notes": "Continuous batching maximizes GPU utilization"
|
|
55
|
+
},
|
|
56
|
+
"multi-gpu": {
|
|
57
|
+
"displayName": "Multi-GPU",
|
|
58
|
+
"description": "Tensor parallel across multiple GPUs for large models",
|
|
59
|
+
"envVars": {
|
|
60
|
+
"VLLM_TENSOR_PARALLEL_SIZE": "4",
|
|
61
|
+
"VLLM_GPU_MEMORY_UTILIZATION": "0.9",
|
|
62
|
+
"VLLM_MAX_NUM_SEQS": "256"
|
|
63
|
+
},
|
|
64
|
+
"notes": "Requires instance with 4+ GPUs. Set TENSOR_PARALLEL_SIZE to match GPU count"
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"notes": "vLLM 0.20.2 adds Gemma 4 support, CUDA 12.9, improved multi-GPU. Requires CUDA compat on drivers < 570.",
|
|
68
|
+
"supportedModelTypes": [
|
|
69
|
+
"gemma",
|
|
70
|
+
"gemma2",
|
|
71
|
+
"gemma3",
|
|
72
|
+
"llama",
|
|
73
|
+
"mistral",
|
|
74
|
+
"mixtral",
|
|
75
|
+
"qwen2",
|
|
76
|
+
"qwen3",
|
|
77
|
+
"qwen3_moe",
|
|
78
|
+
"deepseek_v3",
|
|
79
|
+
"phi3",
|
|
80
|
+
"command-r"
|
|
81
|
+
]
|
|
82
|
+
},
|
|
3
83
|
{
|
|
4
84
|
"image": "vllm/vllm-openai:v0.10.1",
|
|
5
85
|
"tag": "v0.10.1",
|
|
@@ -1531,18 +1531,25 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
|
|
|
1531
1531
|
let values = {}
|
|
1532
1532
|
let message = null
|
|
1533
1533
|
|
|
1534
|
+
// Reject deprecated JumpStart prefixes
|
|
1535
|
+
if (model_id.startsWith('jumpstart://') || model_id.startsWith('jumpstart-hub://')) {
|
|
1536
|
+
const bareId = model_id.replace(/^jumpstart(-hub)?:\/\//, '')
|
|
1537
|
+
message = `JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`
|
|
1538
|
+
return {
|
|
1539
|
+
content: [{
|
|
1540
|
+
type: 'text',
|
|
1541
|
+
text: JSON.stringify({ values: {}, choices: {}, message })
|
|
1542
|
+
}]
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
|
|
1534
1546
|
if (mode === 'static') {
|
|
1535
1547
|
// Static mode: use StaticCatalogResolver only
|
|
1536
|
-
// For jumpstart:// prefixed IDs, resolve from JumpStart static catalog
|
|
1537
1548
|
const metadata = await staticResolver.fetchModelMetadata(model_id, { fields })
|
|
1538
1549
|
if (metadata) {
|
|
1539
1550
|
values = { ...metadata }
|
|
1540
1551
|
} else {
|
|
1541
|
-
|
|
1542
|
-
message = `Model not found in JumpStart static catalog: ${model_id}`
|
|
1543
|
-
} else {
|
|
1544
|
-
message = `Model not found in static catalog: ${model_id}`
|
|
1545
|
-
}
|
|
1552
|
+
message = `Model not found in static catalog: ${model_id}`
|
|
1546
1553
|
}
|
|
1547
1554
|
} else {
|
|
1548
1555
|
// Discover mode: use ResolverRegistry for live data, merge with static
|
|
@@ -1564,11 +1571,7 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
|
|
|
1564
1571
|
values = { ...merged }
|
|
1565
1572
|
// If the resolver failed but we got data from static catalog, note the fallback
|
|
1566
1573
|
if (resolverFailed && !liveData && staticData) {
|
|
1567
|
-
if (model_id.startsWith('
|
|
1568
|
-
message = '[jumpstart] SageMaker API unreachable. Using static catalog fallback.'
|
|
1569
|
-
} else if (model_id.startsWith('jumpstart-hub://')) {
|
|
1570
|
-
message = '[jumpstart-hub] SageMaker API unreachable. Using static catalog fallback.'
|
|
1571
|
-
} else if (model_id.startsWith('registry://')) {
|
|
1574
|
+
if (model_id.startsWith('registry://')) {
|
|
1572
1575
|
message = '[registry] SageMaker API unreachable. Using static catalog fallback.'
|
|
1573
1576
|
} else if (model_id.startsWith('s3://')) {
|
|
1574
1577
|
message = '[s3] S3 API unreachable. Using static catalog fallback.'
|
|
@@ -1577,11 +1580,7 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
|
|
|
1577
1580
|
} else {
|
|
1578
1581
|
// No data from either source
|
|
1579
1582
|
if (resolverFailed) {
|
|
1580
|
-
if (model_id.startsWith('
|
|
1581
|
-
message = `[jumpstart] Resolver could not fetch data for: ${model_id}`
|
|
1582
|
-
} else if (model_id.startsWith('jumpstart-hub://')) {
|
|
1583
|
-
message = `[jumpstart-hub] Resolver could not fetch data for: ${model_id}`
|
|
1584
|
-
} else if (model_id.startsWith('registry://')) {
|
|
1583
|
+
if (model_id.startsWith('registry://')) {
|
|
1585
1584
|
message = `[registry] Resolver could not fetch data for: ${model_id}`
|
|
1586
1585
|
} else if (model_id.startsWith('s3://')) {
|
|
1587
1586
|
message = `[s3] Resolver could not fetch data for: ${model_id}`
|
|
@@ -1613,6 +1612,18 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
|
|
|
1613
1612
|
values = filtered
|
|
1614
1613
|
}
|
|
1615
1614
|
|
|
1615
|
+
// Exclude jumpstart:// prefixed results from output
|
|
1616
|
+
const resolvedModelId = values.modelId || model_id
|
|
1617
|
+
if (resolvedModelId.startsWith('jumpstart://') || resolvedModelId.startsWith('jumpstart-hub://')) {
|
|
1618
|
+
const bareId = resolvedModelId.replace(/^jumpstart(-hub)?:\/\//, '')
|
|
1619
|
+
return {
|
|
1620
|
+
content: [{
|
|
1621
|
+
type: 'text',
|
|
1622
|
+
text: JSON.stringify({ values: {}, choices: {}, message: `JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}` })
|
|
1623
|
+
}]
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
|
|
1616
1627
|
// Build choices with provider prefix labels
|
|
1617
1628
|
const choices = {}
|
|
1618
1629
|
if (Object.keys(values).length > 0) {
|
package/src/app.js
CHANGED
|
@@ -142,30 +142,27 @@ export async function run(projectName, options) {
|
|
|
142
142
|
// Infer modelSource from model name prefix if not set
|
|
143
143
|
const modelName = answers.modelName;
|
|
144
144
|
if (!answers.modelSource && modelName) {
|
|
145
|
+
// Reject deprecated JumpStart prefixes with migration message
|
|
146
|
+
if (modelName.startsWith('jumpstart://') || modelName.startsWith('jumpstart-hub://')) {
|
|
147
|
+
const bareId = modelName.replace(/^jumpstart(-hub)?:\/\//, '');
|
|
148
|
+
console.error(`\n ⚠️ JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`);
|
|
149
|
+
console.error(' JumpStart model sources have been removed. Use one of:');
|
|
150
|
+
console.error(' • HuggingFace model ID (e.g., meta-llama/Llama-2-7b-hf)');
|
|
151
|
+
console.error(' • s3://bucket/path/model.tar.gz');
|
|
152
|
+
console.error(' • registry://model-package-name');
|
|
153
|
+
console.error(' • marketplace://arn:aws:sagemaker:...\n');
|
|
154
|
+
process.exit(1);
|
|
155
|
+
}
|
|
145
156
|
if (modelName.startsWith('s3://')) {
|
|
146
157
|
answers.modelSource = 's3';
|
|
147
158
|
if (!answers.artifactUri) {
|
|
148
159
|
answers.artifactUri = modelName;
|
|
149
160
|
}
|
|
150
|
-
} else if (modelName.startsWith('jumpstart://')) {
|
|
151
|
-
answers.modelSource = 'jumpstart';
|
|
152
|
-
} else if (modelName.startsWith('jumpstart-hub://')) {
|
|
153
|
-
answers.modelSource = 'jumpstart-hub';
|
|
154
161
|
} else if (modelName.startsWith('registry://')) {
|
|
155
162
|
answers.modelSource = 'registry';
|
|
156
163
|
}
|
|
157
164
|
}
|
|
158
165
|
|
|
159
|
-
// Warn about unsupported model sources
|
|
160
|
-
if (answers.modelSource === 'jumpstart-hub') {
|
|
161
|
-
console.log('\n ⚠️ JumpStart Private Hub models are not yet fully supported.');
|
|
162
|
-
console.log(' The generated project will not be able to download model artifacts at runtime.');
|
|
163
|
-
console.log(' This feature is tracked for a future release.');
|
|
164
|
-
console.log(' Falling back to HuggingFace source.\n');
|
|
165
|
-
answers.modelSource = 'huggingface';
|
|
166
|
-
delete answers.artifactUri;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
166
|
// Note about registry model requirements
|
|
170
167
|
if (answers.modelSource === 'registry') {
|
|
171
168
|
console.log('\n ℹ️ Registry model: the container will resolve the artifact URI at startup');
|
|
@@ -363,9 +360,50 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
363
360
|
ignorePatterns.push('**/do/test');
|
|
364
361
|
}
|
|
365
362
|
|
|
366
|
-
//
|
|
367
|
-
|
|
368
|
-
|
|
363
|
+
// Marketplace projects: exclude everything container-related
|
|
364
|
+
if (architecture === 'marketplace') {
|
|
365
|
+
ignorePatterns.push('**/Dockerfile');
|
|
366
|
+
ignorePatterns.push('**/code/**');
|
|
367
|
+
ignorePatterns.push('**/do/build');
|
|
368
|
+
ignorePatterns.push('**/do/push');
|
|
369
|
+
ignorePatterns.push('**/do/submit');
|
|
370
|
+
ignorePatterns.push('**/do/adapter');
|
|
371
|
+
ignorePatterns.push('**/do/adapters/**');
|
|
372
|
+
ignorePatterns.push('**/do/tune');
|
|
373
|
+
ignorePatterns.push('**/do/.tune_helper.py');
|
|
374
|
+
ignorePatterns.push('**/do/add-ic');
|
|
375
|
+
ignorePatterns.push('**/do/run');
|
|
376
|
+
ignorePatterns.push('**/sample_model/**');
|
|
377
|
+
ignorePatterns.push('**/requirements.txt');
|
|
378
|
+
ignorePatterns.push('**/nginx-*.conf');
|
|
379
|
+
ignorePatterns.push('**/triton/**');
|
|
380
|
+
ignorePatterns.push('**/diffusors/**');
|
|
381
|
+
ignorePatterns.push('**/hyperpod/**');
|
|
382
|
+
ignorePatterns.push('**/MIGRATION.md');
|
|
383
|
+
ignorePatterns.push('**/TEMPLATE_SYSTEM.md');
|
|
384
|
+
ignorePatterns.push('**/IAM_PERMISSIONS.md');
|
|
385
|
+
ignorePatterns.push('**/PROJECT_README.md');
|
|
386
|
+
ignorePatterns.push('**/deploy_notebook_generator.py');
|
|
387
|
+
ignorePatterns.push('**/buildspec.yml');
|
|
388
|
+
ignorePatterns.push('**/test/**');
|
|
389
|
+
// Exclude templates that reference container-specific variables (framework, modelServer)
|
|
390
|
+
// Marketplace overlays its own config, deploy, and test templates
|
|
391
|
+
ignorePatterns.push('**/do/config');
|
|
392
|
+
ignorePatterns.push('**/do/deploy');
|
|
393
|
+
ignorePatterns.push('**/do/test');
|
|
394
|
+
ignorePatterns.push('**/do/README.md');
|
|
395
|
+
ignorePatterns.push('**/do/export');
|
|
396
|
+
ignorePatterns.push('**/do/validate');
|
|
397
|
+
ignorePatterns.push('**/do/ic/**');
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Always exclude architecture-specific source directories from main copy
|
|
401
|
+
// (they are overlaid separately for their respective architectures)
|
|
402
|
+
ignorePatterns.push('**/marketplace/**');
|
|
403
|
+
if (architecture !== 'marketplace') {
|
|
404
|
+
ignorePatterns.push('**/triton/**');
|
|
405
|
+
ignorePatterns.push('**/diffusors/**');
|
|
406
|
+
}
|
|
369
407
|
|
|
370
408
|
// For triton and diffusors, exclude the default Dockerfile
|
|
371
409
|
if (architecture === 'triton' || architecture === 'diffusors') {
|
|
@@ -431,6 +469,14 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
431
469
|
_copyFile(path.join(templateDir, 'diffusors/patch_image_api.py'), path.join(destDir, 'code/patch_image_api.py'));
|
|
432
470
|
break;
|
|
433
471
|
|
|
472
|
+
case 'marketplace':
|
|
473
|
+
// Marketplace projects: overlay marketplace-specific templates
|
|
474
|
+
// These replace the default do/config, do/deploy, and do/test with marketplace versions
|
|
475
|
+
_renderTemplate(path.join(templateDir, 'marketplace/config'), path.join(destDir, 'do/config'), templateVars);
|
|
476
|
+
_renderTemplate(path.join(templateDir, 'marketplace/deploy'), path.join(destDir, 'do/deploy'), templateVars);
|
|
477
|
+
_renderTemplate(path.join(templateDir, 'marketplace/test'), path.join(destDir, 'do/test'), templateVars);
|
|
478
|
+
break;
|
|
479
|
+
|
|
434
480
|
default:
|
|
435
481
|
// Fallback to HTTP behavior
|
|
436
482
|
_unlinkIfExists(path.join(destDir, 'code/chat_template.jinja'));
|
|
@@ -450,7 +496,10 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
450
496
|
}
|
|
451
497
|
|
|
452
498
|
// Copy PROJECT_README.md as README.md (overwriting the template README)
|
|
453
|
-
|
|
499
|
+
// Marketplace projects don't use the standard README (no container/framework info)
|
|
500
|
+
if (architecture !== 'marketplace') {
|
|
501
|
+
_renderTemplate(path.join(templateDir, 'PROJECT_README.md'), path.join(destDir, 'README.md'), templateVars);
|
|
502
|
+
}
|
|
454
503
|
|
|
455
504
|
// Copy do/lib/ Node.js modules (plain copy, no EJS)
|
|
456
505
|
const doLibDir = path.join(destDir, 'do', 'lib');
|
|
@@ -491,7 +540,7 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
491
540
|
*/
|
|
492
541
|
export async function postGenerate(destDir, answers, tritonBackends = {}) {
|
|
493
542
|
// Set executable permissions on shell scripts
|
|
494
|
-
_setExecutablePermissions(destDir);
|
|
543
|
+
_setExecutablePermissions(destDir, answers);
|
|
495
544
|
|
|
496
545
|
// Run sample model training if requested
|
|
497
546
|
const architecture = answers.architecture;
|
|
@@ -1092,8 +1141,25 @@ function _unlinkIfExists(filePath) {
|
|
|
1092
1141
|
*
|
|
1093
1142
|
* @param {string} destDir - Path to the generated project directory
|
|
1094
1143
|
*/
|
|
1095
|
-
function _setExecutablePermissions(destDir) {
|
|
1096
|
-
const
|
|
1144
|
+
function _setExecutablePermissions(destDir, answers = {}) {
|
|
1145
|
+
const architecture = answers.architecture;
|
|
1146
|
+
|
|
1147
|
+
// Marketplace projects have a reduced set of scripts
|
|
1148
|
+
const marketplaceScripts = [
|
|
1149
|
+
'do/config',
|
|
1150
|
+
'do/deploy',
|
|
1151
|
+
'do/test',
|
|
1152
|
+
'do/logs',
|
|
1153
|
+
'do/clean',
|
|
1154
|
+
'do/register',
|
|
1155
|
+
'do/ci',
|
|
1156
|
+
'do/manifest',
|
|
1157
|
+
'do/benchmark',
|
|
1158
|
+
'do/optimize',
|
|
1159
|
+
'do/status'
|
|
1160
|
+
];
|
|
1161
|
+
|
|
1162
|
+
const defaultScripts = [
|
|
1097
1163
|
'do/config',
|
|
1098
1164
|
'do/build',
|
|
1099
1165
|
'do/push',
|
|
@@ -1114,6 +1180,8 @@ function _setExecutablePermissions(destDir) {
|
|
|
1114
1180
|
'do/tune'
|
|
1115
1181
|
];
|
|
1116
1182
|
|
|
1183
|
+
const shellScripts = architecture === 'marketplace' ? marketplaceScripts : defaultScripts;
|
|
1184
|
+
|
|
1117
1185
|
shellScripts.forEach(script => {
|
|
1118
1186
|
const scriptPath = path.join(destDir, script);
|
|
1119
1187
|
try {
|
package/src/lib/cli-handler.js
CHANGED
|
@@ -178,7 +178,7 @@ CLI OPTIONS:
|
|
|
178
178
|
--project-name=<name> Project name
|
|
179
179
|
--project-dir=<dir> Output directory path
|
|
180
180
|
--framework=<framework> ML framework (sklearn|xgboost|tensorflow|transformers)
|
|
181
|
-
--model-name=<name>
|
|
181
|
+
--model-name=<name> Model identifier (<hf-org/model>, s3://..., registry://..., marketplace://...)
|
|
182
182
|
--model-server=<server> Model server (flask|fastapi|vllm|sglang|tensorrt-llm|lmi|djl)
|
|
183
183
|
--model-format=<format> Model format (depends on framework)
|
|
184
184
|
--include-sample Include sample model code
|
|
@@ -1089,6 +1089,17 @@ export default class ConfigManager {
|
|
|
1089
1089
|
required: false,
|
|
1090
1090
|
default: 64,
|
|
1091
1091
|
valueSpace: 'bounded'
|
|
1092
|
+
},
|
|
1093
|
+
modelPackageArn: {
|
|
1094
|
+
cliOption: 'model-package-arn',
|
|
1095
|
+
envVar: null,
|
|
1096
|
+
configFile: true,
|
|
1097
|
+
packageJson: false,
|
|
1098
|
+
mcp: true,
|
|
1099
|
+
promptable: true,
|
|
1100
|
+
required: false,
|
|
1101
|
+
default: null,
|
|
1102
|
+
valueSpace: 'unbounded'
|
|
1092
1103
|
}
|
|
1093
1104
|
};
|
|
1094
1105
|
}
|
|
@@ -1860,6 +1871,14 @@ export default class ConfigManager {
|
|
|
1860
1871
|
}
|
|
1861
1872
|
}
|
|
1862
1873
|
|
|
1874
|
+
// Validate model package ARN format if provided
|
|
1875
|
+
if (this.config.modelPackageArn) {
|
|
1876
|
+
const modelPackageArnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[a-zA-Z0-9]([a-zA-Z0-9-])*\/\d+$/;
|
|
1877
|
+
if (!modelPackageArnPattern.test(this.config.modelPackageArn)) {
|
|
1878
|
+
errors.push('❌ Invalid model package ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>');
|
|
1879
|
+
}
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1863
1882
|
// Only validate required parameters if we're skipping prompts
|
|
1864
1883
|
// If prompts are available, missing parameters can be collected later
|
|
1865
1884
|
if (this.skipPrompts) {
|
|
@@ -1946,10 +1965,15 @@ export default class ConfigManager {
|
|
|
1946
1965
|
const value = finalConfig[param];
|
|
1947
1966
|
const isEmpty = value === null || value === undefined || value === '';
|
|
1948
1967
|
|
|
1949
|
-
// Special case: modelFormat is not required for transformers/triton/diffusors
|
|
1950
|
-
if (param === 'modelFormat' && (finalConfig.architecture === 'transformers' || finalConfig.architecture === 'triton' || finalConfig.architecture === 'diffusors')) {
|
|
1968
|
+
// Special case: modelFormat is not required for transformers/triton/diffusors/marketplace
|
|
1969
|
+
if (param === 'modelFormat' && (finalConfig.architecture === 'transformers' || finalConfig.architecture === 'triton' || finalConfig.architecture === 'diffusors' || finalConfig.architecture === 'marketplace')) {
|
|
1951
1970
|
return; // Skip validation
|
|
1952
1971
|
}
|
|
1972
|
+
|
|
1973
|
+
// Special case: marketplace projects don't need container-related parameters
|
|
1974
|
+
if (finalConfig.architecture === 'marketplace' && (param === 'includeSampleModel' || param === 'buildTarget')) {
|
|
1975
|
+
return; // Skip validation — marketplace has no container to build
|
|
1976
|
+
}
|
|
1953
1977
|
|
|
1954
1978
|
// Special case: instanceType is not required for hyperpod-eks
|
|
1955
1979
|
// when not provided (backward compatibility) — but it IS prompted now
|
|
@@ -2368,6 +2392,19 @@ export default class ConfigManager {
|
|
|
2368
2392
|
}
|
|
2369
2393
|
}
|
|
2370
2394
|
break;
|
|
2395
|
+
|
|
2396
|
+
case 'modelPackageArn':
|
|
2397
|
+
if (value) {
|
|
2398
|
+
const modelPackageArnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[a-zA-Z0-9]([a-zA-Z0-9-])*\/\d+$/;
|
|
2399
|
+
if (!modelPackageArnPattern.test(value)) {
|
|
2400
|
+
throw new ValidationError(
|
|
2401
|
+
'❌ Invalid model package ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>',
|
|
2402
|
+
parameter,
|
|
2403
|
+
value
|
|
2404
|
+
);
|
|
2405
|
+
}
|
|
2406
|
+
}
|
|
2407
|
+
break;
|
|
2371
2408
|
}
|
|
2372
2409
|
}
|
|
2373
2410
|
|
|
@@ -23,6 +23,7 @@ export default class CrossCuttingChecker {
|
|
|
23
23
|
findings.push(...this.checkCudaCompatibility(context, instanceCatalog));
|
|
24
24
|
findings.push(...this.checkModelTypeInstanceAlignment(context, instanceCatalog));
|
|
25
25
|
findings.push(...this.checkKvCacheMemoryFit(context, instanceCatalog));
|
|
26
|
+
findings.push(...this.checkMarketplaceCompatibility(context));
|
|
26
27
|
|
|
27
28
|
return findings;
|
|
28
29
|
}
|
|
@@ -142,7 +143,7 @@ export default class CrossCuttingChecker {
|
|
|
142
143
|
}
|
|
143
144
|
|
|
144
145
|
/**
|
|
145
|
-
* Verify model source requirements (artifact URI
|
|
146
|
+
* Verify model source requirements (artifact URI).
|
|
146
147
|
* @param {Object} context - ValidationContext
|
|
147
148
|
* @returns {Array} Findings
|
|
148
149
|
*/
|
|
@@ -152,38 +153,8 @@ export default class CrossCuttingChecker {
|
|
|
152
153
|
|
|
153
154
|
const modelSource = config.modelSource || config.MODEL_SOURCE || '';
|
|
154
155
|
|
|
155
|
-
// When modelSource
|
|
156
|
-
|
|
157
|
-
const payloads = context.payloads || {};
|
|
158
|
-
let hubContentArnFound = false;
|
|
159
|
-
|
|
160
|
-
for (const payload of Object.values(payloads)) {
|
|
161
|
-
if (payload?.HubAccessConfig?.HubContentArn) {
|
|
162
|
-
hubContentArnFound = true;
|
|
163
|
-
break;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
if (!hubContentArnFound && !config.HUB_CONTENT_ARN) {
|
|
168
|
-
findings.push({
|
|
169
|
-
service: 'cross-cutting',
|
|
170
|
-
operation: 'configuration',
|
|
171
|
-
fieldPath: 'HubAccessConfig.HubContentArn',
|
|
172
|
-
invalidValue: null,
|
|
173
|
-
constraint: {
|
|
174
|
-
type: 'conditional-required',
|
|
175
|
-
condition: 'modelSource === jumpstart-hub'
|
|
176
|
-
},
|
|
177
|
-
severity: 'error',
|
|
178
|
-
confidence: 'high',
|
|
179
|
-
source: 'cross-cutting',
|
|
180
|
-
remediationHint: 'When modelSource is "jumpstart-hub", HubAccessConfig.HubContentArn must be present in the payload.'
|
|
181
|
-
});
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
// When modelSource in {s3, jumpstart, jumpstart-hub, registry}, verify MODEL_ARTIFACT_URI is non-empty
|
|
186
|
-
const sourcesRequiringArtifact = ['s3', 'jumpstart', 'jumpstart-hub', 'registry'];
|
|
156
|
+
// When modelSource in {s3, registry}, verify MODEL_ARTIFACT_URI is non-empty
|
|
157
|
+
const sourcesRequiringArtifact = ['s3', 'registry'];
|
|
187
158
|
if (sourcesRequiringArtifact.includes(modelSource)) {
|
|
188
159
|
const artifactUri = config.MODEL_ARTIFACT_URI || '';
|
|
189
160
|
if (!artifactUri || artifactUri.trim() === '') {
|
|
@@ -457,4 +428,146 @@ export default class CrossCuttingChecker {
|
|
|
457
428
|
|
|
458
429
|
return findings;
|
|
459
430
|
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Validate marketplace model package compatibility.
|
|
434
|
+
* Checks ARN format, subscription status, instance type support,
|
|
435
|
+
* deployment target support, LoRA incompatibility, and adapter operations.
|
|
436
|
+
*
|
|
437
|
+
* For live AWS API checks (DescribeModelPackage), gracefully skips
|
|
438
|
+
* when credentials are unavailable — only format checks are enforced.
|
|
439
|
+
*
|
|
440
|
+
* @param {Object} context - ValidationContext
|
|
441
|
+
* @returns {Array} Findings
|
|
442
|
+
*/
|
|
443
|
+
checkMarketplaceCompatibility(context) {
|
|
444
|
+
const findings = [];
|
|
445
|
+
const config = context.config || {};
|
|
446
|
+
|
|
447
|
+
const architecture = config.architecture || config.DEPLOYMENT_CONFIG || '';
|
|
448
|
+
if (architecture !== 'marketplace') return findings;
|
|
449
|
+
|
|
450
|
+
// 1. Validate ARN format
|
|
451
|
+
const modelPackageArn = config.modelPackageArn || config.MODEL_PACKAGE_ARN || '';
|
|
452
|
+
if (modelPackageArn) {
|
|
453
|
+
const arnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[a-zA-Z0-9]([a-zA-Z0-9-])*\/\d+$/;
|
|
454
|
+
if (!arnPattern.test(modelPackageArn)) {
|
|
455
|
+
findings.push({
|
|
456
|
+
service: 'cross-cutting',
|
|
457
|
+
operation: 'configuration',
|
|
458
|
+
fieldPath: 'MODEL_PACKAGE_ARN',
|
|
459
|
+
invalidValue: modelPackageArn,
|
|
460
|
+
constraint: {
|
|
461
|
+
type: 'arn-format',
|
|
462
|
+
pattern: 'arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>'
|
|
463
|
+
},
|
|
464
|
+
severity: 'error',
|
|
465
|
+
confidence: 'high',
|
|
466
|
+
source: 'cross-cutting',
|
|
467
|
+
remediationHint: '❌ Invalid model package ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>'
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// 2. Verify subscription is active (when package metadata is available)
|
|
473
|
+
const packageStatus = config._marketplacePackageStatus || config.marketplacePackageStatus || '';
|
|
474
|
+
if (packageStatus && packageStatus !== 'Active' && packageStatus !== 'Completed') {
|
|
475
|
+
findings.push({
|
|
476
|
+
service: 'cross-cutting',
|
|
477
|
+
operation: 'configuration',
|
|
478
|
+
fieldPath: 'MODEL_PACKAGE_ARN',
|
|
479
|
+
invalidValue: modelPackageArn,
|
|
480
|
+
constraint: {
|
|
481
|
+
type: 'subscription-status',
|
|
482
|
+
status: packageStatus
|
|
483
|
+
},
|
|
484
|
+
severity: 'error',
|
|
485
|
+
confidence: 'high',
|
|
486
|
+
source: 'cross-cutting',
|
|
487
|
+
remediationHint: `❌ Marketplace subscription is not active (status: ${packageStatus}). Renew at AWS Marketplace.`
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// 3. Verify instance type is in package's supported list
|
|
492
|
+
const instanceType = config.INSTANCE_TYPE || config.instanceType || '';
|
|
493
|
+
const supportedInstanceTypes = config._supportedInstanceTypes || config.supportedInstanceTypes || [];
|
|
494
|
+
if (instanceType && supportedInstanceTypes.length > 0) {
|
|
495
|
+
if (!supportedInstanceTypes.includes(instanceType)) {
|
|
496
|
+
findings.push({
|
|
497
|
+
service: 'cross-cutting',
|
|
498
|
+
operation: 'configuration',
|
|
499
|
+
fieldPath: 'INSTANCE_TYPE',
|
|
500
|
+
invalidValue: instanceType,
|
|
501
|
+
constraint: {
|
|
502
|
+
type: 'marketplace-instance-type',
|
|
503
|
+
supportedInstanceTypes
|
|
504
|
+
},
|
|
505
|
+
severity: 'error',
|
|
506
|
+
confidence: 'high',
|
|
507
|
+
source: 'cross-cutting',
|
|
508
|
+
remediationHint: `❌ Instance type ${instanceType} is not supported by this model package. Supported: ${supportedInstanceTypes.join(', ')}`
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// 4. Verify deployment target is supported by the package
|
|
514
|
+
const deploymentTarget = context.deploymentTarget || config.deploymentTarget || config.DEPLOYMENT_TARGET || '';
|
|
515
|
+
const supportedDeploymentTargets = config._supportedDeploymentTargets || config.supportedDeploymentTargets || [];
|
|
516
|
+
if (deploymentTarget && supportedDeploymentTargets.length > 0) {
|
|
517
|
+
if (!supportedDeploymentTargets.includes(deploymentTarget)) {
|
|
518
|
+
findings.push({
|
|
519
|
+
service: 'cross-cutting',
|
|
520
|
+
operation: 'configuration',
|
|
521
|
+
fieldPath: 'DEPLOYMENT_TARGET',
|
|
522
|
+
invalidValue: deploymentTarget,
|
|
523
|
+
constraint: {
|
|
524
|
+
type: 'marketplace-deployment-target',
|
|
525
|
+
supportedDeploymentTargets
|
|
526
|
+
},
|
|
527
|
+
severity: 'error',
|
|
528
|
+
confidence: 'high',
|
|
529
|
+
source: 'cross-cutting',
|
|
530
|
+
remediationHint: `❌ Deployment target ${deploymentTarget} is not supported by this model package.`
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// 5. Reject LoRA with marketplace
|
|
536
|
+
const enableLora = config.enableLora || config.ENABLE_LORA || false;
|
|
537
|
+
if (enableLora === true || enableLora === 'true') {
|
|
538
|
+
findings.push({
|
|
539
|
+
service: 'cross-cutting',
|
|
540
|
+
operation: 'configuration',
|
|
541
|
+
fieldPath: 'enableLora',
|
|
542
|
+
invalidValue: true,
|
|
543
|
+
constraint: {
|
|
544
|
+
type: 'marketplace-lora-incompatible'
|
|
545
|
+
},
|
|
546
|
+
severity: 'error',
|
|
547
|
+
confidence: 'high',
|
|
548
|
+
source: 'cross-cutting',
|
|
549
|
+
remediationHint: '❌ LoRA adapters are not supported for Marketplace model packages (vendor controls the model).'
|
|
550
|
+
});
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// 6. Reject adapter operations on marketplace projects
|
|
554
|
+
const operation = config._operation || config.operation || '';
|
|
555
|
+
if (operation === 'adapter' || operation === 'do/adapter') {
|
|
556
|
+
findings.push({
|
|
557
|
+
service: 'cross-cutting',
|
|
558
|
+
operation: 'configuration',
|
|
559
|
+
fieldPath: 'operation',
|
|
560
|
+
invalidValue: operation,
|
|
561
|
+
constraint: {
|
|
562
|
+
type: 'marketplace-adapter-incompatible'
|
|
563
|
+
},
|
|
564
|
+
severity: 'error',
|
|
565
|
+
confidence: 'high',
|
|
566
|
+
source: 'cross-cutting',
|
|
567
|
+
remediationHint: '❌ Adapter operations are not available for Marketplace projects.'
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
return findings;
|
|
572
|
+
}
|
|
460
573
|
}
|