@aws/ml-container-creator 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -298
- package/bin/cli.js +7 -2
- package/package.json +7 -8
- package/servers/base-image-picker/index.js +3 -3
- package/servers/base-image-picker/manifest.json +4 -2
- package/servers/instance-sizer/index.js +561 -0
- package/servers/instance-sizer/lib/instance-ranker.js +245 -0
- package/servers/instance-sizer/lib/model-resolver.js +265 -0
- package/servers/instance-sizer/lib/vram-estimator.js +177 -0
- package/servers/instance-sizer/manifest.json +17 -0
- package/servers/instance-sizer/package.json +15 -0
- package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
- package/servers/{base-image-picker → lib}/catalogs/model-servers.json +19 -249
- package/servers/lib/catalogs/model-sizes.json +131 -0
- package/servers/lib/catalogs/models.json +602 -0
- package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
- package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
- package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
- package/servers/lib/schemas/image-catalog.schema.json +0 -12
- package/servers/lib/schemas/instances.schema.json +29 -0
- package/servers/lib/schemas/model-catalog.schema.json +12 -10
- package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
- package/servers/model-picker/index.js +2 -3
- package/servers/model-picker/manifest.json +2 -3
- package/servers/region-picker/index.js +1 -1
- package/servers/region-picker/manifest.json +1 -1
- package/src/app.js +17 -0
- package/src/lib/bootstrap-command-handler.js +38 -0
- package/src/lib/cli-handler.js +3 -3
- package/src/lib/config-manager.js +4 -1
- package/src/lib/configuration-manager.js +2 -2
- package/src/lib/cross-cutting-checker.js +341 -0
- package/src/lib/dry-run-validator.js +78 -0
- package/src/lib/generation-validator.js +102 -0
- package/src/lib/mcp-validator-config.js +89 -0
- package/src/lib/payload-builder.js +153 -0
- package/src/lib/prompt-runner.js +445 -135
- package/src/lib/prompts.js +1 -1
- package/src/lib/registry-loader.js +5 -5
- package/src/lib/schema-sync.js +203 -0
- package/src/lib/schema-validation-engine.js +195 -0
- package/src/lib/service-model-parser.js +102 -0
- package/src/lib/validate-runner.js +167 -0
- package/src/lib/validation-report.js +133 -0
- package/src/lib/validators/base-validator.js +36 -0
- package/src/lib/validators/catalog-validator.js +177 -0
- package/src/lib/validators/enum-validator.js +120 -0
- package/src/lib/validators/required-field-validator.js +150 -0
- package/src/lib/validators/type-validator.js +313 -0
- package/templates/Dockerfile +1 -1
- package/templates/do/build +15 -5
- package/templates/do/run +5 -1
- package/templates/do/validate +61 -0
- package/servers/instance-recommender/LICENSE +0 -202
- package/servers/instance-recommender/index.js +0 -284
- package/servers/instance-recommender/manifest.json +0 -16
- package/servers/instance-recommender/package.json +0 -15
- /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
- /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
|
@@ -1,284 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Instance Recommender MCP Server
|
|
7
|
-
*
|
|
8
|
-
* A bundled MCP server that recommends SageMaker instance types and
|
|
9
|
-
* IAM role ARNs based on the current ML framework and model configuration.
|
|
10
|
-
*
|
|
11
|
-
* Supports two modes:
|
|
12
|
-
* - Static (default): Returns hardcoded instance lists by framework category
|
|
13
|
-
* - Smart (--smart flag or BEDROCK_SMART=true): Queries Amazon Bedrock for
|
|
14
|
-
* context-aware recommendations, falling back to static on failure
|
|
15
|
-
*
|
|
16
|
-
* Tool: get_instance_types
|
|
17
|
-
* Accepts: { parameters: string[], limit: number, context: object }
|
|
18
|
-
* Returns: { values: Record<string, string>, choices: Record<string, string[]> }
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
|
22
|
-
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
23
|
-
import { z } from 'zod'
|
|
24
|
-
import { readFileSync } from 'node:fs'
|
|
25
|
-
import { fileURLToPath } from 'node:url'
|
|
26
|
-
import { resolve, dirname } from 'node:path'
|
|
27
|
-
import { queryBedrock } from '../lib/bedrock-client.js'
|
|
28
|
-
|
|
29
|
-
// ── Catalog loader ───────────────────────────────────────────────────────────
|
|
30
|
-
|
|
31
|
-
const __filename = fileURLToPath(import.meta.url)
|
|
32
|
-
const __dirname = dirname(__filename)
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Load and parse a JSON catalog file relative to the server directory.
|
|
36
|
-
* Throws on missing file or invalid JSON with the file path in the message.
|
|
37
|
-
*
|
|
38
|
-
* @param {string} relativePath - Path relative to server dir (e.g. './catalogs/instances.json')
|
|
39
|
-
* @returns {any} Parsed JSON content
|
|
40
|
-
*/
|
|
41
|
-
function loadCatalog(relativePath) {
|
|
42
|
-
const fullPath = resolve(__dirname, relativePath)
|
|
43
|
-
let raw
|
|
44
|
-
try {
|
|
45
|
-
raw = readFileSync(fullPath, 'utf8')
|
|
46
|
-
} catch (err) {
|
|
47
|
-
throw new Error(`Catalog file not found: ${fullPath}`)
|
|
48
|
-
}
|
|
49
|
-
try {
|
|
50
|
-
return JSON.parse(raw)
|
|
51
|
-
} catch (err) {
|
|
52
|
-
throw new Error(`Failed to parse catalog ${fullPath}: ${err.message}`)
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// ── Load catalogs from JSON files ─────────────────────────────────────────────
|
|
57
|
-
|
|
58
|
-
let INSTANCE_CATALOG
|
|
59
|
-
let INSTANCE_RECOMMENDATIONS
|
|
60
|
-
|
|
61
|
-
try {
|
|
62
|
-
const data = loadCatalog('./catalogs/instances.json')
|
|
63
|
-
INSTANCE_CATALOG = data.catalog
|
|
64
|
-
INSTANCE_RECOMMENDATIONS = data.recommendations
|
|
65
|
-
} catch (err) {
|
|
66
|
-
process.stderr.write(`[instance-recommender] Fatal: ${err.message}\n`)
|
|
67
|
-
process.exit(1)
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const GPU_FRAMEWORKS = new Set(['transformers'])
|
|
71
|
-
|
|
72
|
-
// Bedrock configuration
|
|
73
|
-
const SMART_MODE = process.env.BEDROCK_SMART === 'true'
|
|
74
|
-
const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0'
|
|
75
|
-
const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1'
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* Per-server configuration passed to the shared Bedrock client.
|
|
79
|
-
*/
|
|
80
|
-
const SERVER_CONFIG = {
|
|
81
|
-
serverName: 'instance-recommender',
|
|
82
|
-
systemPromptTemplate: `You are an AWS SageMaker instance type advisor. Given the following ML deployment context, recommend the best SageMaker instance types.
|
|
83
|
-
|
|
84
|
-
Current configuration: {context}
|
|
85
|
-
Requested parameters: {parameters}
|
|
86
|
-
Maximum recommendations per parameter: {limit}
|
|
87
|
-
|
|
88
|
-
Respond with ONLY a JSON object in this exact format, no other text:
|
|
89
|
-
{
|
|
90
|
-
"values": {
|
|
91
|
-
"instanceType": "the single best instance type as a string",
|
|
92
|
-
"awsRoleArn": "a recommended role ARN pattern if applicable"
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
Rules:
|
|
97
|
-
- Only include parameters that were requested
|
|
98
|
-
- For instanceType: recommend real SageMaker instance types (ml.* prefix) appropriate for the framework and model
|
|
99
|
-
- For awsRoleArn: skip this field, do not recommend ARNs
|
|
100
|
-
- The first value in any list should be your top recommendation
|
|
101
|
-
- Consider GPU vs CPU needs based on the framework
|
|
102
|
-
- Consider model size and memory requirements if model info is available
|
|
103
|
-
- Return valid JSON only`,
|
|
104
|
-
temperature: 0.3,
|
|
105
|
-
maxTokens: 1024,
|
|
106
|
-
modelId: BEDROCK_MODEL,
|
|
107
|
-
region: BEDROCK_REGION
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Determine which instance list to use based on framework context and optional search term.
|
|
112
|
-
* When instanceSearch is provided, filters the catalog by keyword matching.
|
|
113
|
-
*/
|
|
114
|
-
function getStaticInstances(context) {
|
|
115
|
-
const framework = context?.framework
|
|
116
|
-
const search = context?.instanceSearch
|
|
117
|
-
|
|
118
|
-
// Start with framework-based category filter
|
|
119
|
-
const isGpu = framework && GPU_FRAMEWORKS.has(framework)
|
|
120
|
-
if (!search) {
|
|
121
|
-
// No search term — return the legacy category-based list
|
|
122
|
-
return isGpu ? INSTANCE_RECOMMENDATIONS.gpu : INSTANCE_RECOMMENDATIONS.cpu
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
// Search mode: use the full catalog
|
|
126
|
-
let candidates = Object.entries(INSTANCE_CATALOG)
|
|
127
|
-
|
|
128
|
-
// Tokenize search into lowercase keywords
|
|
129
|
-
const tokens = search.toLowerCase().split(/[\s,\-_]+/).filter(Boolean)
|
|
130
|
-
|
|
131
|
-
// Detect compound terms before tokenization
|
|
132
|
-
const rawLower = search.toLowerCase()
|
|
133
|
-
const wantsMultiGpu = rawLower.includes('multi gpu') || rawLower.includes('multi-gpu') || rawLower.includes('multigpu')
|
|
134
|
-
|
|
135
|
-
// Detect CUDA version requests: "cuda 12", "cuda 11.8", "cuda-12.1"
|
|
136
|
-
const cudaMatch = rawLower.match(/cuda[\s\-_]*(\d+(?:\.\d+)?)/)
|
|
137
|
-
const wantsCudaVersion = cudaMatch ? cudaMatch[1] : null
|
|
138
|
-
|
|
139
|
-
// Score each instance by how many tokens match its tags, accelerator, or instance name
|
|
140
|
-
const scored = candidates.map(([name, meta]) => {
|
|
141
|
-
let score = 0
|
|
142
|
-
const cudaStr = meta.cudaVersions ? meta.cudaVersions.join(' ') : ''
|
|
143
|
-
const haystack = [...meta.tags, meta.accelerator.toLowerCase(), name, meta.category, cudaStr].join(' ')
|
|
144
|
-
|
|
145
|
-
// Compound term: multi-gpu — only match instances with >1 GPU
|
|
146
|
-
if (wantsMultiGpu) {
|
|
147
|
-
if (meta.gpus > 1) {
|
|
148
|
-
score += 5
|
|
149
|
-
} else {
|
|
150
|
-
return { name, meta, score: 0 }
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
// Compound term: cuda version — only match instances supporting that version
|
|
155
|
-
if (wantsCudaVersion) {
|
|
156
|
-
if (!meta.cudaVersions) return { name, meta, score: 0 }
|
|
157
|
-
const hasExact = meta.cudaVersions.includes(wantsCudaVersion)
|
|
158
|
-
const hasMajor = meta.cudaVersions.some(v => v.startsWith(wantsCudaVersion))
|
|
159
|
-
if (hasExact) {
|
|
160
|
-
score += 4
|
|
161
|
-
} else if (hasMajor) {
|
|
162
|
-
score += 3
|
|
163
|
-
} else {
|
|
164
|
-
return { name, meta, score: 0 }
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
for (const token of tokens) {
|
|
169
|
-
// Skip tokens already handled by compound term detection
|
|
170
|
-
if (wantsMultiGpu && (token === 'multi' || token === 'gpu')) continue
|
|
171
|
-
if (wantsCudaVersion && (token === 'cuda' || token === wantsCudaVersion)) continue
|
|
172
|
-
|
|
173
|
-
if (haystack.includes(token)) score += 1
|
|
174
|
-
if (meta.gpus > 1 && (token === 'parallel')) score += 2
|
|
175
|
-
if (token === 'gpu' && meta.gpus > 0) score += 1
|
|
176
|
-
if (token === 'cpu' && meta.gpus === 0) score += 1
|
|
177
|
-
if (token === 'cheap' || token === 'budget' || token === 'cost') {
|
|
178
|
-
if (meta.tags.includes('budget') || meta.tags.includes('cost-effective')) score += 1
|
|
179
|
-
}
|
|
180
|
-
if (token === 'memory' || token === 'high-memory') {
|
|
181
|
-
if (meta.memGb >= 32) score += 1
|
|
182
|
-
}
|
|
183
|
-
if (token === 'large' && meta.vcpus >= 16) score += 1
|
|
184
|
-
// Match specific CUDA versions (e.g. "11.8", "12.1")
|
|
185
|
-
if (meta.cudaVersions && meta.cudaVersions.includes(token)) score += 2
|
|
186
|
-
}
|
|
187
|
-
return { name, meta, score }
|
|
188
|
-
})
|
|
189
|
-
|
|
190
|
-
// Keep only instances with a positive score, sorted descending
|
|
191
|
-
const matched = scored.filter(s => s.score > 0).sort((a, b) => b.score - a.score)
|
|
192
|
-
|
|
193
|
-
if (matched.length === 0) {
|
|
194
|
-
// No matches — fall back to legacy category-based list
|
|
195
|
-
return isGpu ? INSTANCE_RECOMMENDATIONS.gpu : INSTANCE_RECOMMENDATIONS.cpu
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
return matched.map(s => s.name)
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
/**
|
|
202
|
-
* Log to stderr so it doesn't interfere with MCP stdio protocol on stdout.
|
|
203
|
-
*/
|
|
204
|
-
function log(message) {
|
|
205
|
-
process.stderr.write(`[instance-recommender] ${message}\n`)
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// Create MCP server
|
|
209
|
-
const server = new McpServer({
|
|
210
|
-
name: 'instance-recommender',
|
|
211
|
-
version: '1.0.0'
|
|
212
|
-
})
|
|
213
|
-
|
|
214
|
-
// Register the get_instance_types tool
|
|
215
|
-
server.tool(
|
|
216
|
-
'get_instance_types',
|
|
217
|
-
'Returns recommended SageMaker instance types and configuration values for ML Container Creator',
|
|
218
|
-
{
|
|
219
|
-
parameters: z.array(z.string()).describe('List of parameter names to provide values for'),
|
|
220
|
-
limit: z.number().int().positive().default(10).describe('Maximum number of choices per parameter'),
|
|
221
|
-
context: z.record(z.string(), z.any()).optional().describe('Current configuration context (framework, modelServer, etc.)')
|
|
222
|
-
},
|
|
223
|
-
async ({ parameters, limit, context }) => {
|
|
224
|
-
const values = {}
|
|
225
|
-
const choices = {}
|
|
226
|
-
let usedSmart = false
|
|
227
|
-
|
|
228
|
-
// Smart mode: try Bedrock first
|
|
229
|
-
if (SMART_MODE && parameters.includes('instanceType')) {
|
|
230
|
-
log('[smart] Smart mode enabled, querying Amazon Bedrock...')
|
|
231
|
-
const bedrockResult = await queryBedrock(SERVER_CONFIG, parameters, limit, context || {})
|
|
232
|
-
|
|
233
|
-
if (bedrockResult?.values?.instanceType) {
|
|
234
|
-
values.instanceType = bedrockResult.values.instanceType
|
|
235
|
-
// Use the Bedrock recommendation as the top choice, pad with static list
|
|
236
|
-
const staticInstances = getStaticInstances(context || {})
|
|
237
|
-
const bedrockValue = bedrockResult.values.instanceType
|
|
238
|
-
const combined = [bedrockValue, ...staticInstances.filter(i => i !== bedrockValue)]
|
|
239
|
-
choices.instanceType = combined.slice(0, limit)
|
|
240
|
-
usedSmart = true
|
|
241
|
-
log(`[smart] Using Bedrock recommendation: ${bedrockValue}`)
|
|
242
|
-
} else {
|
|
243
|
-
log('[smart] Bedrock did not return usable results, falling back to static recommendations')
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// Static fallback (or non-smart mode)
|
|
248
|
-
if (!usedSmart) {
|
|
249
|
-
for (const param of parameters) {
|
|
250
|
-
if (param === 'instanceType') {
|
|
251
|
-
const instances = getStaticInstances(context || {})
|
|
252
|
-
const limited = instances.slice(0, limit)
|
|
253
|
-
values.instanceType = limited[0]
|
|
254
|
-
choices.instanceType = limited
|
|
255
|
-
}
|
|
256
|
-
// awsRoleArn is left to the user — no default recommendations
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
return {
|
|
261
|
-
content: [{
|
|
262
|
-
type: 'text',
|
|
263
|
-
text: JSON.stringify({ values, choices })
|
|
264
|
-
}]
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
// Export for standalone testing
|
|
270
|
-
export { loadCatalog, getStaticInstances, INSTANCE_CATALOG, INSTANCE_RECOMMENDATIONS, GPU_FRAMEWORKS }
|
|
271
|
-
|
|
272
|
-
// Guard MCP transport — only connect when run as main module
|
|
273
|
-
const isMain = process.argv[1] && resolve(process.argv[1]) === __filename
|
|
274
|
-
|
|
275
|
-
if (isMain) {
|
|
276
|
-
if (SMART_MODE) {
|
|
277
|
-
log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`)
|
|
278
|
-
} else {
|
|
279
|
-
log('Static mode (set BEDROCK_SMART=true to enable Bedrock-powered recommendations)')
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
const transport = new StdioServerTransport()
|
|
283
|
-
await server.connect(transport)
|
|
284
|
-
}
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@amzn/ml-container-creator-instance-recommender",
|
|
3
|
-
"version": "1.0.0",
|
|
4
|
-
"description": "MCP server that recommends SageMaker instance types for ML Container Creator.",
|
|
5
|
-
"modes": {
|
|
6
|
-
"static": true,
|
|
7
|
-
"smart": true,
|
|
8
|
-
"discover": false
|
|
9
|
-
},
|
|
10
|
-
"catalogs": {
|
|
11
|
-
"instances": "./catalogs/instances.json"
|
|
12
|
-
},
|
|
13
|
-
"tool": {
|
|
14
|
-
"name": "get_instance_types"
|
|
15
|
-
}
|
|
16
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@amzn/ml-container-creator-instance-recommender",
|
|
3
|
-
"private": true,
|
|
4
|
-
"version": "1.0.0",
|
|
5
|
-
"description": "MCP server that recommends SageMaker instance types and IAM role ARNs based on ML framework and model configuration. Supports Bedrock-powered smart recommendations.",
|
|
6
|
-
"type": "module",
|
|
7
|
-
"main": "index.js",
|
|
8
|
-
"license": "Apache-2.0",
|
|
9
|
-
"scripts": {
|
|
10
|
-
"test": "node test.js"
|
|
11
|
-
},
|
|
12
|
-
"dependencies": {
|
|
13
|
-
"@modelcontextprotocol/sdk": "^1.0.0"
|
|
14
|
-
}
|
|
15
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|