@aws/ml-container-creator 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +62 -298
  2. package/bin/cli.js +7 -2
  3. package/package.json +7 -8
  4. package/servers/base-image-picker/index.js +3 -3
  5. package/servers/base-image-picker/manifest.json +4 -2
  6. package/servers/instance-sizer/index.js +561 -0
  7. package/servers/instance-sizer/lib/instance-ranker.js +245 -0
  8. package/servers/instance-sizer/lib/model-resolver.js +265 -0
  9. package/servers/instance-sizer/lib/vram-estimator.js +177 -0
  10. package/servers/instance-sizer/manifest.json +17 -0
  11. package/servers/instance-sizer/package.json +15 -0
  12. package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
  13. package/servers/{base-image-picker → lib}/catalogs/model-servers.json +19 -249
  14. package/servers/lib/catalogs/model-sizes.json +131 -0
  15. package/servers/lib/catalogs/models.json +602 -0
  16. package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
  17. package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
  18. package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
  19. package/servers/lib/schemas/image-catalog.schema.json +0 -12
  20. package/servers/lib/schemas/instances.schema.json +29 -0
  21. package/servers/lib/schemas/model-catalog.schema.json +12 -10
  22. package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
  23. package/servers/model-picker/index.js +2 -3
  24. package/servers/model-picker/manifest.json +2 -3
  25. package/servers/region-picker/index.js +1 -1
  26. package/servers/region-picker/manifest.json +1 -1
  27. package/src/app.js +17 -0
  28. package/src/lib/bootstrap-command-handler.js +38 -0
  29. package/src/lib/cli-handler.js +3 -3
  30. package/src/lib/config-manager.js +4 -1
  31. package/src/lib/configuration-manager.js +2 -2
  32. package/src/lib/cross-cutting-checker.js +341 -0
  33. package/src/lib/dry-run-validator.js +78 -0
  34. package/src/lib/generation-validator.js +102 -0
  35. package/src/lib/mcp-validator-config.js +89 -0
  36. package/src/lib/payload-builder.js +153 -0
  37. package/src/lib/prompt-runner.js +445 -135
  38. package/src/lib/prompts.js +1 -1
  39. package/src/lib/registry-loader.js +5 -5
  40. package/src/lib/schema-sync.js +203 -0
  41. package/src/lib/schema-validation-engine.js +195 -0
  42. package/src/lib/service-model-parser.js +102 -0
  43. package/src/lib/validate-runner.js +167 -0
  44. package/src/lib/validation-report.js +133 -0
  45. package/src/lib/validators/base-validator.js +36 -0
  46. package/src/lib/validators/catalog-validator.js +177 -0
  47. package/src/lib/validators/enum-validator.js +120 -0
  48. package/src/lib/validators/required-field-validator.js +150 -0
  49. package/src/lib/validators/type-validator.js +313 -0
  50. package/templates/Dockerfile +1 -1
  51. package/templates/do/build +15 -5
  52. package/templates/do/run +5 -1
  53. package/templates/do/validate +61 -0
  54. package/servers/instance-recommender/LICENSE +0 -202
  55. package/servers/instance-recommender/index.js +0 -284
  56. package/servers/instance-recommender/manifest.json +0 -16
  57. package/servers/instance-recommender/package.json +0 -15
  58. /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
  59. /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
  60. /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
  61. /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
@@ -1,284 +0,0 @@
1
- #!/usr/bin/env node
2
- // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- // SPDX-License-Identifier: Apache-2.0
4
-
5
- /**
6
- * Instance Recommender MCP Server
7
- *
8
- * A bundled MCP server that recommends SageMaker instance types and
9
- * IAM role ARNs based on the current ML framework and model configuration.
10
- *
11
- * Supports two modes:
12
- * - Static (default): Returns hardcoded instance lists by framework category
13
- * - Smart (--smart flag or BEDROCK_SMART=true): Queries Amazon Bedrock for
14
- * context-aware recommendations, falling back to static on failure
15
- *
16
- * Tool: get_instance_types
17
- * Accepts: { parameters: string[], limit: number, context: object }
18
- * Returns: { values: Record<string, string>, choices: Record<string, string[]> }
19
- */
20
-
21
- import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
22
- import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
23
- import { z } from 'zod'
24
- import { readFileSync } from 'node:fs'
25
- import { fileURLToPath } from 'node:url'
26
- import { resolve, dirname } from 'node:path'
27
- import { queryBedrock } from '../lib/bedrock-client.js'
28
-
29
- // ── Catalog loader ───────────────────────────────────────────────────────────
30
-
31
- const __filename = fileURLToPath(import.meta.url)
32
- const __dirname = dirname(__filename)
33
-
34
- /**
35
- * Load and parse a JSON catalog file relative to the server directory.
36
- * Throws on missing file or invalid JSON with the file path in the message.
37
- *
38
- * @param {string} relativePath - Path relative to server dir (e.g. './catalogs/instances.json')
39
- * @returns {any} Parsed JSON content
40
- */
41
- function loadCatalog(relativePath) {
42
- const fullPath = resolve(__dirname, relativePath)
43
- let raw
44
- try {
45
- raw = readFileSync(fullPath, 'utf8')
46
- } catch (err) {
47
- throw new Error(`Catalog file not found: ${fullPath}`)
48
- }
49
- try {
50
- return JSON.parse(raw)
51
- } catch (err) {
52
- throw new Error(`Failed to parse catalog ${fullPath}: ${err.message}`)
53
- }
54
- }
55
-
56
- // ── Load catalogs from JSON files ─────────────────────────────────────────────
57
-
58
- let INSTANCE_CATALOG
59
- let INSTANCE_RECOMMENDATIONS
60
-
61
- try {
62
- const data = loadCatalog('./catalogs/instances.json')
63
- INSTANCE_CATALOG = data.catalog
64
- INSTANCE_RECOMMENDATIONS = data.recommendations
65
- } catch (err) {
66
- process.stderr.write(`[instance-recommender] Fatal: ${err.message}\n`)
67
- process.exit(1)
68
- }
69
-
70
- const GPU_FRAMEWORKS = new Set(['transformers'])
71
-
72
- // Bedrock configuration
73
- const SMART_MODE = process.env.BEDROCK_SMART === 'true'
74
- const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0'
75
- const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1'
76
-
77
- /**
78
- * Per-server configuration passed to the shared Bedrock client.
79
- */
80
- const SERVER_CONFIG = {
81
- serverName: 'instance-recommender',
82
- systemPromptTemplate: `You are an AWS SageMaker instance type advisor. Given the following ML deployment context, recommend the best SageMaker instance types.
83
-
84
- Current configuration: {context}
85
- Requested parameters: {parameters}
86
- Maximum recommendations per parameter: {limit}
87
-
88
- Respond with ONLY a JSON object in this exact format, no other text:
89
- {
90
- "values": {
91
- "instanceType": "the single best instance type as a string",
92
- "awsRoleArn": "a recommended role ARN pattern if applicable"
93
- }
94
- }
95
-
96
- Rules:
97
- - Only include parameters that were requested
98
- - For instanceType: recommend real SageMaker instance types (ml.* prefix) appropriate for the framework and model
99
- - For awsRoleArn: skip this field, do not recommend ARNs
100
- - The first value in any list should be your top recommendation
101
- - Consider GPU vs CPU needs based on the framework
102
- - Consider model size and memory requirements if model info is available
103
- - Return valid JSON only`,
104
- temperature: 0.3,
105
- maxTokens: 1024,
106
- modelId: BEDROCK_MODEL,
107
- region: BEDROCK_REGION
108
- }
109
-
110
- /**
111
- * Determine which instance list to use based on framework context and optional search term.
112
- * When instanceSearch is provided, filters the catalog by keyword matching.
113
- */
114
- function getStaticInstances(context) {
115
- const framework = context?.framework
116
- const search = context?.instanceSearch
117
-
118
- // Start with framework-based category filter
119
- const isGpu = framework && GPU_FRAMEWORKS.has(framework)
120
- if (!search) {
121
- // No search term — return the legacy category-based list
122
- return isGpu ? INSTANCE_RECOMMENDATIONS.gpu : INSTANCE_RECOMMENDATIONS.cpu
123
- }
124
-
125
- // Search mode: use the full catalog
126
- let candidates = Object.entries(INSTANCE_CATALOG)
127
-
128
- // Tokenize search into lowercase keywords
129
- const tokens = search.toLowerCase().split(/[\s,\-_]+/).filter(Boolean)
130
-
131
- // Detect compound terms before tokenization
132
- const rawLower = search.toLowerCase()
133
- const wantsMultiGpu = rawLower.includes('multi gpu') || rawLower.includes('multi-gpu') || rawLower.includes('multigpu')
134
-
135
- // Detect CUDA version requests: "cuda 12", "cuda 11.8", "cuda-12.1"
136
- const cudaMatch = rawLower.match(/cuda[\s\-_]*(\d+(?:\.\d+)?)/)
137
- const wantsCudaVersion = cudaMatch ? cudaMatch[1] : null
138
-
139
- // Score each instance by how many tokens match its tags, accelerator, or instance name
140
- const scored = candidates.map(([name, meta]) => {
141
- let score = 0
142
- const cudaStr = meta.cudaVersions ? meta.cudaVersions.join(' ') : ''
143
- const haystack = [...meta.tags, meta.accelerator.toLowerCase(), name, meta.category, cudaStr].join(' ')
144
-
145
- // Compound term: multi-gpu — only match instances with >1 GPU
146
- if (wantsMultiGpu) {
147
- if (meta.gpus > 1) {
148
- score += 5
149
- } else {
150
- return { name, meta, score: 0 }
151
- }
152
- }
153
-
154
- // Compound term: cuda version — only match instances supporting that version
155
- if (wantsCudaVersion) {
156
- if (!meta.cudaVersions) return { name, meta, score: 0 }
157
- const hasExact = meta.cudaVersions.includes(wantsCudaVersion)
158
- const hasMajor = meta.cudaVersions.some(v => v.startsWith(wantsCudaVersion))
159
- if (hasExact) {
160
- score += 4
161
- } else if (hasMajor) {
162
- score += 3
163
- } else {
164
- return { name, meta, score: 0 }
165
- }
166
- }
167
-
168
- for (const token of tokens) {
169
- // Skip tokens already handled by compound term detection
170
- if (wantsMultiGpu && (token === 'multi' || token === 'gpu')) continue
171
- if (wantsCudaVersion && (token === 'cuda' || token === wantsCudaVersion)) continue
172
-
173
- if (haystack.includes(token)) score += 1
174
- if (meta.gpus > 1 && (token === 'parallel')) score += 2
175
- if (token === 'gpu' && meta.gpus > 0) score += 1
176
- if (token === 'cpu' && meta.gpus === 0) score += 1
177
- if (token === 'cheap' || token === 'budget' || token === 'cost') {
178
- if (meta.tags.includes('budget') || meta.tags.includes('cost-effective')) score += 1
179
- }
180
- if (token === 'memory' || token === 'high-memory') {
181
- if (meta.memGb >= 32) score += 1
182
- }
183
- if (token === 'large' && meta.vcpus >= 16) score += 1
184
- // Match specific CUDA versions (e.g. "11.8", "12.1")
185
- if (meta.cudaVersions && meta.cudaVersions.includes(token)) score += 2
186
- }
187
- return { name, meta, score }
188
- })
189
-
190
- // Keep only instances with a positive score, sorted descending
191
- const matched = scored.filter(s => s.score > 0).sort((a, b) => b.score - a.score)
192
-
193
- if (matched.length === 0) {
194
- // No matches — fall back to legacy category-based list
195
- return isGpu ? INSTANCE_RECOMMENDATIONS.gpu : INSTANCE_RECOMMENDATIONS.cpu
196
- }
197
-
198
- return matched.map(s => s.name)
199
- }
200
-
201
- /**
202
- * Log to stderr so it doesn't interfere with MCP stdio protocol on stdout.
203
- */
204
- function log(message) {
205
- process.stderr.write(`[instance-recommender] ${message}\n`)
206
- }
207
-
208
- // Create MCP server
209
- const server = new McpServer({
210
- name: 'instance-recommender',
211
- version: '1.0.0'
212
- })
213
-
214
- // Register the get_instance_types tool
215
- server.tool(
216
- 'get_instance_types',
217
- 'Returns recommended SageMaker instance types and configuration values for ML Container Creator',
218
- {
219
- parameters: z.array(z.string()).describe('List of parameter names to provide values for'),
220
- limit: z.number().int().positive().default(10).describe('Maximum number of choices per parameter'),
221
- context: z.record(z.string(), z.any()).optional().describe('Current configuration context (framework, modelServer, etc.)')
222
- },
223
- async ({ parameters, limit, context }) => {
224
- const values = {}
225
- const choices = {}
226
- let usedSmart = false
227
-
228
- // Smart mode: try Bedrock first
229
- if (SMART_MODE && parameters.includes('instanceType')) {
230
- log('[smart] Smart mode enabled, querying Amazon Bedrock...')
231
- const bedrockResult = await queryBedrock(SERVER_CONFIG, parameters, limit, context || {})
232
-
233
- if (bedrockResult?.values?.instanceType) {
234
- values.instanceType = bedrockResult.values.instanceType
235
- // Use the Bedrock recommendation as the top choice, pad with static list
236
- const staticInstances = getStaticInstances(context || {})
237
- const bedrockValue = bedrockResult.values.instanceType
238
- const combined = [bedrockValue, ...staticInstances.filter(i => i !== bedrockValue)]
239
- choices.instanceType = combined.slice(0, limit)
240
- usedSmart = true
241
- log(`[smart] Using Bedrock recommendation: ${bedrockValue}`)
242
- } else {
243
- log('[smart] Bedrock did not return usable results, falling back to static recommendations')
244
- }
245
- }
246
-
247
- // Static fallback (or non-smart mode)
248
- if (!usedSmart) {
249
- for (const param of parameters) {
250
- if (param === 'instanceType') {
251
- const instances = getStaticInstances(context || {})
252
- const limited = instances.slice(0, limit)
253
- values.instanceType = limited[0]
254
- choices.instanceType = limited
255
- }
256
- // awsRoleArn is left to the user — no default recommendations
257
- }
258
- }
259
-
260
- return {
261
- content: [{
262
- type: 'text',
263
- text: JSON.stringify({ values, choices })
264
- }]
265
- }
266
- }
267
- )
268
-
269
- // Export for standalone testing
270
- export { loadCatalog, getStaticInstances, INSTANCE_CATALOG, INSTANCE_RECOMMENDATIONS, GPU_FRAMEWORKS }
271
-
272
- // Guard MCP transport — only connect when run as main module
273
- const isMain = process.argv[1] && resolve(process.argv[1]) === __filename
274
-
275
- if (isMain) {
276
- if (SMART_MODE) {
277
- log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`)
278
- } else {
279
- log('Static mode (set BEDROCK_SMART=true to enable Bedrock-powered recommendations)')
280
- }
281
-
282
- const transport = new StdioServerTransport()
283
- await server.connect(transport)
284
- }
@@ -1,16 +0,0 @@
1
- {
2
- "name": "@amzn/ml-container-creator-instance-recommender",
3
- "version": "1.0.0",
4
- "description": "MCP server that recommends SageMaker instance types for ML Container Creator.",
5
- "modes": {
6
- "static": true,
7
- "smart": true,
8
- "discover": false
9
- },
10
- "catalogs": {
11
- "instances": "./catalogs/instances.json"
12
- },
13
- "tool": {
14
- "name": "get_instance_types"
15
- }
16
- }
@@ -1,15 +0,0 @@
1
- {
2
- "name": "@amzn/ml-container-creator-instance-recommender",
3
- "private": true,
4
- "version": "1.0.0",
5
- "description": "MCP server that recommends SageMaker instance types and IAM role ARNs based on ML framework and model configuration. Supports Bedrock-powered smart recommendations.",
6
- "type": "module",
7
- "main": "index.js",
8
- "license": "Apache-2.0",
9
- "scripts": {
10
- "test": "node test.js"
11
- },
12
- "dependencies": {
13
- "@modelcontextprotocol/sdk": "^1.0.0"
14
- }
15
- }