@aws/ml-container-creator 1.0.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +1 -1
- package/config/tune-catalog.json +303 -1
- package/package.json +2 -1
- package/servers/endpoint-picker/index.js +24 -4
- package/servers/lib/catalogs/model-servers.json +334 -120
- package/src/lib/bootstrap-command-handler.js +20 -2
- package/src/lib/bootstrap-profile-manager.js +33 -0
- package/src/lib/bootstrap-provisioners.js +48 -0
- package/src/lib/cross-cutting-checker.js +6 -1
- package/src/lib/generated/cli-options.js +1 -1
- package/src/lib/generated/parameter-matrix.js +1 -1
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/path-prover-brain.js +57 -0
- package/src/lib/prove-pipeline-executor.js +35 -0
- package/templates/do/.benchmark_writer.py +114 -4
- package/templates/do/.register_helper.py +643 -67
- package/templates/do/.stage_helper.py +1 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +267 -171
- package/templates/do/benchmark +60 -5
- package/templates/do/config +1 -1
- package/templates/do/lib/inference-component.sh +6 -25
- package/templates/do/register +29 -2
- package/templates/do/tune +94 -12
package/bin/cli.js
CHANGED
|
@@ -162,7 +162,7 @@ program
|
|
|
162
162
|
.command('bootstrap')
|
|
163
163
|
.description('Set up AWS infrastructure (IAM role, ECR repo, S3 buckets)')
|
|
164
164
|
.passThroughOptions()
|
|
165
|
-
.argument('[action]', 'Bootstrap action (status, use, list, remove, scan, prune, update, sync-schemas)')
|
|
165
|
+
.argument('[action]', 'Bootstrap action (status, use, list, remove, scan, prune, update, migrate, sync-schemas, sync-model-families)')
|
|
166
166
|
.argument('[args...]', 'Additional arguments')
|
|
167
167
|
.option('--profile <profile>', 'AWS profile name')
|
|
168
168
|
.option('--region <region>', 'AWS region')
|
package/config/tune-catalog.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": "2026-05-27",
|
|
3
|
-
"lastSynced": "2026-
|
|
3
|
+
"lastSynced": "2026-06-26T19:01:02.821Z",
|
|
4
4
|
"source": "https://docs.aws.amazon.com/sagemaker/latest/dg/model-customize-open-weight.html",
|
|
5
5
|
"models": {
|
|
6
6
|
"huggingface-llm-qwen2-5-7b-instruct": {
|
|
@@ -1614,6 +1614,24 @@
|
|
|
1614
1614
|
"prompt": "array"
|
|
1615
1615
|
}
|
|
1616
1616
|
}
|
|
1617
|
+
},
|
|
1618
|
+
"dpo": {
|
|
1619
|
+
"trainingTypes": [
|
|
1620
|
+
"lora"
|
|
1621
|
+
],
|
|
1622
|
+
"datasetFormat": "default-dpo",
|
|
1623
|
+
"datasetSchema": {
|
|
1624
|
+
"required": [
|
|
1625
|
+
"prompt",
|
|
1626
|
+
"chosen",
|
|
1627
|
+
"rejected"
|
|
1628
|
+
],
|
|
1629
|
+
"types": {
|
|
1630
|
+
"prompt": "string",
|
|
1631
|
+
"chosen": "string",
|
|
1632
|
+
"rejected": "string"
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1617
1635
|
}
|
|
1618
1636
|
},
|
|
1619
1637
|
"goldenPath": false
|
|
@@ -1667,6 +1685,24 @@
|
|
|
1667
1685
|
"prompt": "array"
|
|
1668
1686
|
}
|
|
1669
1687
|
}
|
|
1688
|
+
},
|
|
1689
|
+
"dpo": {
|
|
1690
|
+
"trainingTypes": [
|
|
1691
|
+
"lora"
|
|
1692
|
+
],
|
|
1693
|
+
"datasetFormat": "default-dpo",
|
|
1694
|
+
"datasetSchema": {
|
|
1695
|
+
"required": [
|
|
1696
|
+
"prompt",
|
|
1697
|
+
"chosen",
|
|
1698
|
+
"rejected"
|
|
1699
|
+
],
|
|
1700
|
+
"types": {
|
|
1701
|
+
"prompt": "string",
|
|
1702
|
+
"chosen": "string",
|
|
1703
|
+
"rejected": "string"
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1670
1706
|
}
|
|
1671
1707
|
},
|
|
1672
1708
|
"goldenPath": false
|
|
@@ -1773,6 +1809,272 @@
|
|
|
1773
1809
|
"prompt": "array"
|
|
1774
1810
|
}
|
|
1775
1811
|
}
|
|
1812
|
+
},
|
|
1813
|
+
"dpo": {
|
|
1814
|
+
"trainingTypes": [
|
|
1815
|
+
"lora"
|
|
1816
|
+
],
|
|
1817
|
+
"datasetFormat": "default-dpo",
|
|
1818
|
+
"datasetSchema": {
|
|
1819
|
+
"required": [
|
|
1820
|
+
"prompt",
|
|
1821
|
+
"chosen",
|
|
1822
|
+
"rejected"
|
|
1823
|
+
],
|
|
1824
|
+
"types": {
|
|
1825
|
+
"prompt": "string",
|
|
1826
|
+
"chosen": "string",
|
|
1827
|
+
"rejected": "string"
|
|
1828
|
+
}
|
|
1829
|
+
}
|
|
1830
|
+
}
|
|
1831
|
+
},
|
|
1832
|
+
"goldenPath": false
|
|
1833
|
+
},
|
|
1834
|
+
"huggingface-llm-nvidia-nemotron-3-super-120b-a12b-bf16": {
|
|
1835
|
+
"family": "huggingface-llm-nvidia-nemotron",
|
|
1836
|
+
"provider": "unknown",
|
|
1837
|
+
"displayName": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
|
|
1838
|
+
"huggingFaceId": "",
|
|
1839
|
+
"techniques": {
|
|
1840
|
+
"sft": {
|
|
1841
|
+
"trainingTypes": [
|
|
1842
|
+
"lora"
|
|
1843
|
+
],
|
|
1844
|
+
"datasetFormat": "default-sft",
|
|
1845
|
+
"datasetSchema": {
|
|
1846
|
+
"required": [
|
|
1847
|
+
"prompt",
|
|
1848
|
+
"completion"
|
|
1849
|
+
],
|
|
1850
|
+
"types": {
|
|
1851
|
+
"prompt": "string",
|
|
1852
|
+
"completion": "string"
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
},
|
|
1856
|
+
"rlvr": {
|
|
1857
|
+
"trainingTypes": [
|
|
1858
|
+
"lora"
|
|
1859
|
+
],
|
|
1860
|
+
"datasetFormat": "default-rlvr",
|
|
1861
|
+
"datasetSchema": {
|
|
1862
|
+
"required": [
|
|
1863
|
+
"prompt"
|
|
1864
|
+
],
|
|
1865
|
+
"types": {
|
|
1866
|
+
"prompt": "array"
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
},
|
|
1870
|
+
"rlaif": {
|
|
1871
|
+
"trainingTypes": [
|
|
1872
|
+
"lora"
|
|
1873
|
+
],
|
|
1874
|
+
"datasetFormat": "default-rlaif",
|
|
1875
|
+
"datasetSchema": {
|
|
1876
|
+
"required": [
|
|
1877
|
+
"prompt"
|
|
1878
|
+
],
|
|
1879
|
+
"types": {
|
|
1880
|
+
"prompt": "array"
|
|
1881
|
+
}
|
|
1882
|
+
}
|
|
1883
|
+
}
|
|
1884
|
+
},
|
|
1885
|
+
"goldenPath": false
|
|
1886
|
+
},
|
|
1887
|
+
"huggingface-reasoning-nvidia-nemotron-3-nano-30b-a3b-bf16": {
|
|
1888
|
+
"family": "huggingface-reasoning-nvidia-nemotron",
|
|
1889
|
+
"provider": "unknown",
|
|
1890
|
+
"displayName": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
1891
|
+
"huggingFaceId": "",
|
|
1892
|
+
"techniques": {
|
|
1893
|
+
"sft": {
|
|
1894
|
+
"trainingTypes": [
|
|
1895
|
+
"lora"
|
|
1896
|
+
],
|
|
1897
|
+
"datasetFormat": "default-sft",
|
|
1898
|
+
"datasetSchema": {
|
|
1899
|
+
"required": [
|
|
1900
|
+
"prompt",
|
|
1901
|
+
"completion"
|
|
1902
|
+
],
|
|
1903
|
+
"types": {
|
|
1904
|
+
"prompt": "string",
|
|
1905
|
+
"completion": "string"
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
},
|
|
1909
|
+
"rlaif": {
|
|
1910
|
+
"trainingTypes": [
|
|
1911
|
+
"lora"
|
|
1912
|
+
],
|
|
1913
|
+
"datasetFormat": "default-rlaif",
|
|
1914
|
+
"datasetSchema": {
|
|
1915
|
+
"required": [
|
|
1916
|
+
"prompt"
|
|
1917
|
+
],
|
|
1918
|
+
"types": {
|
|
1919
|
+
"prompt": "array"
|
|
1920
|
+
}
|
|
1921
|
+
}
|
|
1922
|
+
},
|
|
1923
|
+
"rlvr": {
|
|
1924
|
+
"trainingTypes": [
|
|
1925
|
+
"lora"
|
|
1926
|
+
],
|
|
1927
|
+
"datasetFormat": "default-rlvr",
|
|
1928
|
+
"datasetSchema": {
|
|
1929
|
+
"required": [
|
|
1930
|
+
"prompt"
|
|
1931
|
+
],
|
|
1932
|
+
"types": {
|
|
1933
|
+
"prompt": "array"
|
|
1934
|
+
}
|
|
1935
|
+
}
|
|
1936
|
+
}
|
|
1937
|
+
},
|
|
1938
|
+
"goldenPath": false
|
|
1939
|
+
},
|
|
1940
|
+
"huggingface-vlm-gemma-4-e4b-it": {
|
|
1941
|
+
"family": "huggingface-vlm",
|
|
1942
|
+
"provider": "unknown",
|
|
1943
|
+
"displayName": "gemma-4-e4b-it",
|
|
1944
|
+
"huggingFaceId": "",
|
|
1945
|
+
"techniques": {
|
|
1946
|
+
"dpo": {
|
|
1947
|
+
"trainingTypes": [
|
|
1948
|
+
"lora"
|
|
1949
|
+
],
|
|
1950
|
+
"datasetFormat": "default-dpo",
|
|
1951
|
+
"datasetSchema": {
|
|
1952
|
+
"required": [
|
|
1953
|
+
"prompt",
|
|
1954
|
+
"chosen",
|
|
1955
|
+
"rejected"
|
|
1956
|
+
],
|
|
1957
|
+
"types": {
|
|
1958
|
+
"prompt": "string",
|
|
1959
|
+
"chosen": "string",
|
|
1960
|
+
"rejected": "string"
|
|
1961
|
+
}
|
|
1962
|
+
}
|
|
1963
|
+
},
|
|
1964
|
+
"sft": {
|
|
1965
|
+
"trainingTypes": [
|
|
1966
|
+
"lora"
|
|
1967
|
+
],
|
|
1968
|
+
"datasetFormat": "default-sft",
|
|
1969
|
+
"datasetSchema": {
|
|
1970
|
+
"required": [
|
|
1971
|
+
"prompt",
|
|
1972
|
+
"completion"
|
|
1973
|
+
],
|
|
1974
|
+
"types": {
|
|
1975
|
+
"prompt": "string",
|
|
1976
|
+
"completion": "string"
|
|
1977
|
+
}
|
|
1978
|
+
}
|
|
1979
|
+
},
|
|
1980
|
+
"rlvr": {
|
|
1981
|
+
"trainingTypes": [
|
|
1982
|
+
"lora"
|
|
1983
|
+
],
|
|
1984
|
+
"datasetFormat": "default-rlvr",
|
|
1985
|
+
"datasetSchema": {
|
|
1986
|
+
"required": [
|
|
1987
|
+
"prompt"
|
|
1988
|
+
],
|
|
1989
|
+
"types": {
|
|
1990
|
+
"prompt": "array"
|
|
1991
|
+
}
|
|
1992
|
+
}
|
|
1993
|
+
},
|
|
1994
|
+
"rlaif": {
|
|
1995
|
+
"trainingTypes": [
|
|
1996
|
+
"lora"
|
|
1997
|
+
],
|
|
1998
|
+
"datasetFormat": "default-rlaif",
|
|
1999
|
+
"datasetSchema": {
|
|
2000
|
+
"required": [
|
|
2001
|
+
"prompt"
|
|
2002
|
+
],
|
|
2003
|
+
"types": {
|
|
2004
|
+
"prompt": "array"
|
|
2005
|
+
}
|
|
2006
|
+
}
|
|
2007
|
+
}
|
|
2008
|
+
},
|
|
2009
|
+
"goldenPath": false
|
|
2010
|
+
},
|
|
2011
|
+
"huggingface-vlm-gemma-4-31b-it": {
|
|
2012
|
+
"family": "huggingface-vlm",
|
|
2013
|
+
"provider": "unknown",
|
|
2014
|
+
"displayName": "gemma-4-31b-it",
|
|
2015
|
+
"huggingFaceId": "",
|
|
2016
|
+
"techniques": {
|
|
2017
|
+
"dpo": {
|
|
2018
|
+
"trainingTypes": [
|
|
2019
|
+
"lora"
|
|
2020
|
+
],
|
|
2021
|
+
"datasetFormat": "default-dpo",
|
|
2022
|
+
"datasetSchema": {
|
|
2023
|
+
"required": [
|
|
2024
|
+
"prompt",
|
|
2025
|
+
"chosen",
|
|
2026
|
+
"rejected"
|
|
2027
|
+
],
|
|
2028
|
+
"types": {
|
|
2029
|
+
"prompt": "string",
|
|
2030
|
+
"chosen": "string",
|
|
2031
|
+
"rejected": "string"
|
|
2032
|
+
}
|
|
2033
|
+
}
|
|
2034
|
+
},
|
|
2035
|
+
"sft": {
|
|
2036
|
+
"trainingTypes": [
|
|
2037
|
+
"lora"
|
|
2038
|
+
],
|
|
2039
|
+
"datasetFormat": "default-sft",
|
|
2040
|
+
"datasetSchema": {
|
|
2041
|
+
"required": [
|
|
2042
|
+
"prompt",
|
|
2043
|
+
"completion"
|
|
2044
|
+
],
|
|
2045
|
+
"types": {
|
|
2046
|
+
"prompt": "string",
|
|
2047
|
+
"completion": "string"
|
|
2048
|
+
}
|
|
2049
|
+
}
|
|
2050
|
+
},
|
|
2051
|
+
"rlaif": {
|
|
2052
|
+
"trainingTypes": [
|
|
2053
|
+
"lora"
|
|
2054
|
+
],
|
|
2055
|
+
"datasetFormat": "default-rlaif",
|
|
2056
|
+
"datasetSchema": {
|
|
2057
|
+
"required": [
|
|
2058
|
+
"prompt"
|
|
2059
|
+
],
|
|
2060
|
+
"types": {
|
|
2061
|
+
"prompt": "array"
|
|
2062
|
+
}
|
|
2063
|
+
}
|
|
2064
|
+
},
|
|
2065
|
+
"rlvr": {
|
|
2066
|
+
"trainingTypes": [
|
|
2067
|
+
"lora"
|
|
2068
|
+
],
|
|
2069
|
+
"datasetFormat": "default-rlvr",
|
|
2070
|
+
"datasetSchema": {
|
|
2071
|
+
"required": [
|
|
2072
|
+
"prompt"
|
|
2073
|
+
],
|
|
2074
|
+
"types": {
|
|
2075
|
+
"prompt": "array"
|
|
2076
|
+
}
|
|
2077
|
+
}
|
|
1776
2078
|
}
|
|
1777
2079
|
},
|
|
1778
2080
|
"goldenPath": false
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aws/ml-container-creator",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.3",
|
|
4
4
|
"description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -107,6 +107,7 @@
|
|
|
107
107
|
"prepare": "husky || true"
|
|
108
108
|
},
|
|
109
109
|
"dependencies": {
|
|
110
|
+
"@aws/ml-container-creator": "^1.0.2",
|
|
110
111
|
"@inquirer/prompts": "^8.4.2",
|
|
111
112
|
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
112
113
|
"ajv": "^8.12.0",
|
|
@@ -200,22 +200,37 @@ async function fetchEndpoints(client, { limit = 10, showFull = false } = {}) {
|
|
|
200
200
|
|
|
201
201
|
const variantName = primaryVariant.VariantName || 'AllTraffic';
|
|
202
202
|
let instanceType = primaryVariant.InstanceType || null;
|
|
203
|
+
let instancePools = primaryVariant.InstancePools || null;
|
|
203
204
|
|
|
204
205
|
// For IC-based endpoints, InstanceType may not be in the variant runtime response.
|
|
205
|
-
// Fall back to DescribeEndpointConfig which
|
|
206
|
-
if (!instanceType && detail.EndpointConfigName) {
|
|
206
|
+
// Fall back to DescribeEndpointConfig which has either InstanceType or InstancePools.
|
|
207
|
+
if (!instanceType && !instancePools && detail.EndpointConfigName) {
|
|
207
208
|
try {
|
|
208
209
|
const ecCmd = new _DescribeEndpointConfigCommand({ EndpointConfigName: detail.EndpointConfigName });
|
|
209
210
|
const ecDetail = await client.send(ecCmd);
|
|
210
211
|
const ecVariant = (ecDetail.ProductionVariants || [])[0];
|
|
211
212
|
if (ecVariant?.InstanceType) {
|
|
212
213
|
instanceType = ecVariant.InstanceType;
|
|
214
|
+
} else if (ecVariant?.InstancePools && ecVariant.InstancePools.length > 0) {
|
|
215
|
+
instancePools = ecVariant.InstancePools;
|
|
213
216
|
}
|
|
214
217
|
} catch (ecErr) {
|
|
215
218
|
log(`Warning: could not describe endpoint config for "${endpointName}": ${ecErr.message}`);
|
|
216
219
|
}
|
|
217
220
|
}
|
|
218
|
-
|
|
221
|
+
|
|
222
|
+
// Resolve instanceType display string from pools if needed
|
|
223
|
+
if (!instanceType && instancePools && instancePools.length > 0) {
|
|
224
|
+
// Sort by priority, use highest-priority (lowest number) for GPU lookup
|
|
225
|
+
const sorted = [...instancePools].sort((a, b) => (a.Priority || 99) - (b.Priority || 99));
|
|
226
|
+
instanceType = sorted[0].InstanceType || 'unknown';
|
|
227
|
+
// Build display string showing the pool: "ml.g5.12xl (pool: 3 types)"
|
|
228
|
+
if (sorted.length > 1) {
|
|
229
|
+
instanceType = `${instanceType} (pool: ${sorted.length} types)`;
|
|
230
|
+
}
|
|
231
|
+
} else {
|
|
232
|
+
instanceType = instanceType || 'unknown';
|
|
233
|
+
}
|
|
219
234
|
|
|
220
235
|
const instanceCount = primaryVariant.CurrentInstanceCount ?? primaryVariant.DesiredInstanceCount ?? 1;
|
|
221
236
|
const hasInstancePools = !!(primaryVariant.InstancePools && primaryVariant.InstancePools.length > 0);
|
|
@@ -244,7 +259,12 @@ async function fetchEndpoints(client, { limit = 10, showFull = false } = {}) {
|
|
|
244
259
|
} while (icNextToken);
|
|
245
260
|
|
|
246
261
|
// Capacity estimation
|
|
247
|
-
|
|
262
|
+
// For pool endpoints, instanceType may be "ml.g5.12xlarge (pool: 3 types)"
|
|
263
|
+
// Extract the raw type for catalog lookup
|
|
264
|
+
const instanceTypeForLookup = instanceType.includes(' (pool:')
|
|
265
|
+
? instanceType.split(' (pool:')[0]
|
|
266
|
+
: instanceType;
|
|
267
|
+
const gpusPerInstance = getGpusForInstance(instanceTypeForLookup);
|
|
248
268
|
let availableGpus;
|
|
249
269
|
if (gpusPerInstance === null) {
|
|
250
270
|
availableGpus = '?';
|