@aws/ml-container-creator 0.10.0 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +5 -21
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +51 -66
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +138 -138
  23. package/servers/instance-sizer/lib/instance-ranker.js +76 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/model-servers.json +201 -3
  29. package/servers/lib/custom-validators.js +13 -13
  30. package/servers/lib/dynamic-resolver.js +4 -4
  31. package/servers/marketplace-picker/index.js +342 -0
  32. package/servers/marketplace-picker/manifest.json +14 -0
  33. package/servers/marketplace-picker/package.json +18 -0
  34. package/servers/model-picker/index.js +382 -382
  35. package/servers/region-picker/index.js +56 -56
  36. package/servers/workload-picker/LICENSE +202 -0
  37. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  38. package/servers/workload-picker/index.js +171 -0
  39. package/servers/workload-picker/manifest.json +16 -0
  40. package/servers/workload-picker/package.json +16 -0
  41. package/src/app.js +4 -2
  42. package/src/lib/bootstrap-command-handler.js +579 -14
  43. package/src/lib/bootstrap-config.js +36 -0
  44. package/src/lib/bootstrap-profile-manager.js +48 -41
  45. package/src/lib/ci-register-helpers.js +74 -0
  46. package/src/lib/config-loader.js +3 -0
  47. package/src/lib/config-manager.js +7 -0
  48. package/src/lib/cuda-resolver.js +17 -8
  49. package/src/lib/generated/cli-options.js +315 -315
  50. package/src/lib/generated/parameter-matrix.js +661 -661
  51. package/src/lib/generated/validation-rules.js +71 -71
  52. package/src/lib/path-prover-brain.js +607 -0
  53. package/src/lib/prompts/project-prompts.js +12 -0
  54. package/src/lib/template-variable-resolver.js +25 -1
  55. package/src/lib/tune-catalog-validator.js +37 -4
  56. package/templates/Dockerfile +9 -0
  57. package/templates/code/adapter_sidecar.py +444 -0
  58. package/templates/code/serve +6 -0
  59. package/templates/code/serve.d/vllm.ejs +1 -1
  60. package/templates/do/.benchmark_writer.py +1476 -0
  61. package/templates/do/.tune_helper.py +982 -57
  62. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  63. package/templates/do/adapter +149 -0
  64. package/templates/do/benchmark +639 -85
  65. package/templates/do/config +108 -5
  66. package/templates/do/deploy.d/managed-inference.ejs +192 -11
  67. package/templates/do/optimize +106 -37
  68. package/templates/do/register +89 -0
  69. package/templates/do/test +13 -0
  70. package/templates/do/tune +378 -59
  71. package/templates/do/validate +44 -4
@@ -0,0 +1,536 @@
1
+ #!/usr/bin/env node
2
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ /**
6
+ * Endpoint Picker MCP Server
7
+ *
8
+ * A bundled MCP server that discovers available SageMaker real-time endpoints
9
+ * with capacity for attaching new inference components.
10
+ *
11
+ * Uses ListEndpoints (InService only), DescribeEndpoint for variant info,
12
+ * and ListInferenceComponents to calculate available GPU capacity.
13
+ *
14
+ * Tool: get_inference_endpoints
15
+ * Accepts: { parameters: string[], limit: number, context: object }
16
+ * Returns: { values: Record<string, string>, choices: Record<string, string[]>, metadata: object }
17
+ *
18
+ * Environment variables:
19
+ * AWS_REGION - AWS region for SageMaker API calls (default: us-east-1)
20
+ * AWS_PROFILE - AWS profile to use for credentials
21
+ */
22
+
23
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
24
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
25
+ import { z } from 'zod';
26
+ import { fileURLToPath } from 'node:url';
27
+ import { resolve, dirname } from 'node:path';
28
+ import { readFileSync } from 'node:fs';
29
+ import { homedir } from 'node:os';
30
+ import { DynamicResolver } from '../lib/dynamic-resolver.js';
31
+
32
+ /**
33
+ * Log to stderr so it doesn't interfere with MCP stdio protocol on stdout.
34
+ */
35
+ function log(message) {
36
+ process.stderr.write(`[endpoint-picker] ${message}\n`);
37
+ }
38
+
39
+ // ── Instance catalog for GPU lookup ──────────────────────────────────────────
40
+
41
+ const __filename = fileURLToPath(import.meta.url);
42
+ const __dirname = dirname(__filename);
43
+
44
+ let _instanceCatalog = null;
45
+
46
+ /**
47
+ * Load the instance catalog from servers/lib/catalogs/instances.json.
48
+ * Returns a map of instanceType -> { gpus, ... }
49
+ */
50
+ function _loadInstanceCatalog() {
51
+ if (_instanceCatalog) return _instanceCatalog;
52
+ try {
53
+ const catalogPath = resolve(__dirname, '../lib/catalogs/instances.json');
54
+ const raw = readFileSync(catalogPath, 'utf8');
55
+ const parsed = JSON.parse(raw);
56
+ _instanceCatalog = parsed.catalog || parsed;
57
+ return _instanceCatalog;
58
+ } catch (err) {
59
+ log(`Warning: could not load instance catalog: ${err.message}`);
60
+ _instanceCatalog = {};
61
+ return _instanceCatalog;
62
+ }
63
+ }
64
+
65
+ /**
66
+ * Look up GPUs per instance for a given instance type.
67
+ * Returns null if the instance type is not in the catalog.
68
+ */
69
+ function getGpusForInstance(instanceType) {
70
+ const catalog = _loadInstanceCatalog();
71
+ const entry = catalog[instanceType];
72
+ if (!entry) return null;
73
+ return entry.gpus ?? null;
74
+ }
75
+
76
+ // ── AWS SDK lazy loading ─────────────────────────────────────────────────────
77
+
78
+ let _SageMakerClient = null;
79
+ let _ListEndpointsCommand = null;
80
+ let _DescribeEndpointCommand = null;
81
+ let _ListInferenceComponentsCommand = null;
82
+ let _fromIni = null;
83
+
84
+ /**
85
+ * Lazily load the AWS SDK SageMaker client classes.
86
+ */
87
+ async function _ensureSdkLoaded() {
88
+ if (_SageMakerClient) return;
89
+ const sdk = await import('@aws-sdk/client-sagemaker');
90
+ _SageMakerClient = sdk.SageMakerClient;
91
+ _ListEndpointsCommand = sdk.ListEndpointsCommand;
92
+ _DescribeEndpointCommand = sdk.DescribeEndpointCommand;
93
+ _ListInferenceComponentsCommand = sdk.ListInferenceComponentsCommand;
94
+ try {
95
+ const credentialProviders = await import('@aws-sdk/credential-providers');
96
+ _fromIni = credentialProviders.fromIni;
97
+ } catch {
98
+ // credential-providers not available — profile-based fallback won't work
99
+ }
100
+ }
101
+
102
+ function _defaultClientFactory(region) {
103
+ return new _SageMakerClient({ region });
104
+ }
105
+
106
+ /**
107
+ * Create a SageMaker client for the given region.
108
+ */
109
+ function createSageMakerClient(region, clientFactory = null) {
110
+ if (clientFactory) return clientFactory(region);
111
+ return _defaultClientFactory(region);
112
+ }
113
+
114
+ /**
115
+ * Create a SageMaker client using a named AWS profile via fromIni.
116
+ */
117
+ function _createClientWithProfile(region, profile) {
118
+ if (!_fromIni) {
119
+ throw new Error('Cannot use profile-based credentials: @aws-sdk/credential-providers not available');
120
+ }
121
+ return new _SageMakerClient({
122
+ region,
123
+ credentials: _fromIni({ profile })
124
+ });
125
+ }
126
+
127
+ /**
128
+ * Detect available AWS profile names from ~/.aws/credentials and ~/.aws/config.
129
+ */
130
+ function _detectAwsProfiles() {
131
+ const profiles = new Set();
132
+ try {
133
+ const credsPath = resolve(homedir(), '.aws/credentials');
134
+ const creds = readFileSync(credsPath, 'utf8');
135
+ for (const match of creds.matchAll(/^\[(.+)\]$/gm)) {
136
+ profiles.add(match[1]);
137
+ }
138
+ } catch { /* no credentials file */ }
139
+ try {
140
+ const configPath = resolve(homedir(), '.aws/config');
141
+ const config = readFileSync(configPath, 'utf8');
142
+ for (const match of config.matchAll(/^\[profile\s+(.+)\]$/gm)) {
143
+ profiles.add(match[1]);
144
+ }
145
+ } catch { /* no config file */ }
146
+ return [...profiles];
147
+ }
148
+
149
+ // ── Core logic ───────────────────────────────────────────────────────────────
150
+
151
+ /**
152
+ * Fetch InService real-time endpoints with capacity information.
153
+ *
154
+ * @param {object} client - SageMaker client instance
155
+ * @param {object} options - { limit, showFull }
156
+ * @returns {Promise<Array<object>>} Array of endpoint info objects
157
+ */
158
+ async function fetchEndpoints(client, { limit = 10, showFull = false } = {}) {
159
+ const endpoints = [];
160
+ let nextToken;
161
+ const maxDescribeCalls = 10;
162
+
163
+ // Paginate ListEndpoints — InService only, sorted by creation time descending
164
+ const collectedNames = [];
165
+ do {
166
+ const params = {
167
+ StatusEquals: 'InService',
168
+ SortBy: 'CreationTime',
169
+ SortOrder: 'Descending',
170
+ MaxResults: 100
171
+ };
172
+ if (nextToken) params.NextToken = nextToken;
173
+
174
+ const command = new _ListEndpointsCommand(params);
175
+ const response = await client.send(command);
176
+
177
+ const summaries = response.Endpoints || [];
178
+ for (const summary of summaries) {
179
+ collectedNames.push(summary.EndpointName);
180
+ if (collectedNames.length >= limit) break;
181
+ }
182
+
183
+ nextToken = response.NextToken;
184
+ } while (nextToken && collectedNames.length < limit);
185
+
186
+ // Cap describe calls to maxDescribeCalls
187
+ const toDescribe = collectedNames.slice(0, maxDescribeCalls);
188
+
189
+ // Describe each endpoint and list its inference components
190
+ for (const endpointName of toDescribe) {
191
+ try {
192
+ // DescribeEndpoint
193
+ const describeCmd = new _DescribeEndpointCommand({ EndpointName: endpointName });
194
+ const detail = await client.send(describeCmd);
195
+
196
+ const variants = detail.ProductionVariants || [];
197
+ const primaryVariant = variants[0] || {};
198
+
199
+ const variantName = primaryVariant.VariantName || 'AllTraffic';
200
+ const instanceType = primaryVariant.CurrentInstanceCount !== null && primaryVariant.CurrentInstanceCount !== undefined
201
+ ? (primaryVariant.InstanceType || detail.ProductionVariants?.[0]?.InstanceType || 'unknown')
202
+ : (primaryVariant.InstanceType || 'unknown');
203
+ const instanceCount = primaryVariant.CurrentInstanceCount ?? primaryVariant.DesiredInstanceCount ?? 1;
204
+ const hasInstancePools = !!(primaryVariant.InstancePools && primaryVariant.InstancePools.length > 0);
205
+
206
+ // ListInferenceComponents for this endpoint
207
+ let icCount = 0;
208
+ let totalGpuAllocated = 0;
209
+ let icNextToken;
210
+ do {
211
+ const icParams = { EndpointNameEquals: endpointName, MaxResults: 100 };
212
+ if (icNextToken) icParams.NextToken = icNextToken;
213
+
214
+ const icCmd = new _ListInferenceComponentsCommand(icParams);
215
+ const icResponse = await client.send(icCmd);
216
+
217
+ const components = icResponse.InferenceComponents || [];
218
+ for (const ic of components) {
219
+ icCount++;
220
+ const gpuReq = ic.Specification?.ComputeResourceRequirements?.NumberOfAcceleratorDevicesRequired
221
+ ?? ic.ComputeResourceRequirements?.NumberOfAcceleratorDevicesRequired
222
+ ?? 0;
223
+ totalGpuAllocated += gpuReq;
224
+ }
225
+
226
+ icNextToken = icResponse.NextToken;
227
+ } while (icNextToken);
228
+
229
+ // Capacity estimation
230
+ const gpusPerInstance = getGpusForInstance(instanceType);
231
+ let availableGpus;
232
+ if (gpusPerInstance === null) {
233
+ availableGpus = '?';
234
+ } else {
235
+ availableGpus = (instanceCount * gpusPerInstance) - totalGpuAllocated;
236
+ }
237
+
238
+ // Filter: by default only return endpoints with available capacity
239
+ if (!showFull && availableGpus !== '?' && availableGpus <= 0) {
240
+ continue;
241
+ }
242
+
243
+ endpoints.push({
244
+ endpointName,
245
+ variantName,
246
+ instanceType,
247
+ instanceCount,
248
+ icCount,
249
+ availableGpus,
250
+ hasInstancePools
251
+ });
252
+ } catch (err) {
253
+ if (err.name === 'AccessDeniedException' || err.Code === 'AccessDeniedException') {
254
+ log(`AccessDeniedException for endpoint "${endpointName}" — skipping`);
255
+ continue;
256
+ }
257
+ log(`Warning: could not describe endpoint "${endpointName}": ${err.message}`);
258
+ }
259
+ }
260
+
261
+ return endpoints;
262
+ }
263
+
264
+ /**
265
+ * Build the MCP response from a list of discovered endpoints.
266
+ *
267
+ * @param {Array} endpoints - Array of endpoint objects from fetchEndpoints
268
+ * @returns {{ values: object, choices: object, metadata?: object, message?: string }}
269
+ */
270
+ function buildResponse(endpoints) {
271
+ if (!endpoints || endpoints.length === 0) {
272
+ return {
273
+ values: {},
274
+ choices: { endpointName: [] },
275
+ message: 'No InService real-time endpoints with available capacity found in the specified region.'
276
+ };
277
+ }
278
+
279
+ const endpointNames = endpoints.map(e => e.endpointName);
280
+
281
+ return {
282
+ values: { endpointName: endpointNames[0] },
283
+ choices: { endpointName: endpointNames },
284
+ metadata: Object.fromEntries(
285
+ endpoints.map(e => [e.endpointName, {
286
+ variantName: e.variantName,
287
+ instanceType: e.instanceType,
288
+ instanceCount: e.instanceCount,
289
+ icCount: e.icCount,
290
+ availableGpus: e.availableGpus,
291
+ hasInstancePools: e.hasInstancePools
292
+ }])
293
+ )
294
+ };
295
+ }
296
+
297
+ // ── EndpointResolver ─────────────────────────────────────────────────────────
298
+
299
+ /**
300
+ * EndpointResolver — discovers InService SageMaker real-time endpoints.
301
+ *
302
+ * Extends DynamicResolver to fit the shared resolver pattern. Wraps the
303
+ * fetchEndpoints logic with credential strategy fallback.
304
+ */
305
+ class EndpointResolver extends DynamicResolver {
306
+ constructor(options = {}) {
307
+ super();
308
+ this._region = options.region || process.env.AWS_REGION || 'us-east-1';
309
+ this._profile = options.profile || process.env.AWS_PROFILE || null;
310
+ this._clientFactory = options.clientFactory || null;
311
+ }
312
+
313
+ async fetch(key, options = {}) {
314
+ const { limit = 10, showFull = false } = options;
315
+
316
+ await _ensureSdkLoaded();
317
+
318
+ let endpoints = null;
319
+ let lastError = null;
320
+
321
+ // Strategy 1: If a specific profile was requested, use it directly
322
+ if (this._profile) {
323
+ try {
324
+ const client = _createClientWithProfile(this._region, this._profile);
325
+ endpoints = await fetchEndpoints(client, { limit, showFull });
326
+ } catch (err) {
327
+ log(`Profile "${this._profile}" failed: ${err.message}`);
328
+ lastError = err;
329
+ }
330
+ }
331
+
332
+ // Strategy 2: Try the default credential chain
333
+ if (!endpoints) {
334
+ try {
335
+ const client = createSageMakerClient(this._region, this._clientFactory);
336
+ endpoints = await fetchEndpoints(client, { limit, showFull });
337
+ } catch (err) {
338
+ log(`Default credential chain failed: ${err.message}`);
339
+ lastError = err;
340
+ }
341
+ }
342
+
343
+ // Strategy 3: Detect available AWS profiles and try each
344
+ if (!endpoints && _fromIni) {
345
+ const profiles = _detectAwsProfiles();
346
+ for (const p of profiles) {
347
+ try {
348
+ const client = _createClientWithProfile(this._region, p);
349
+ endpoints = await fetchEndpoints(client, { limit, showFull });
350
+ log(`Profile "${p}" succeeded`);
351
+ break;
352
+ } catch (err) {
353
+ log(`Profile "${p}" failed: ${err.message}`);
354
+ lastError = err;
355
+ }
356
+ }
357
+ }
358
+
359
+ if (!endpoints) {
360
+ throw lastError || new Error('No AWS credentials available');
361
+ }
362
+
363
+ return {
364
+ items: endpoints,
365
+ defaultItem: endpoints[0] || null
366
+ };
367
+ }
368
+
369
+ supportedKeys() {
370
+ return ['endpointName'];
371
+ }
372
+ }
373
+
374
+ // ── MCP Server ───────────────────────────────────────────────────────────────
375
+
376
+ const server = new McpServer({
377
+ name: 'endpoint-picker',
378
+ version: '1.0.0'
379
+ });
380
+
381
+ // Register the get_inference_endpoints tool
382
+ server.tool(
383
+ 'get_inference_endpoints',
384
+ 'Discovers InService SageMaker real-time endpoints with available capacity for IC attachment',
385
+ {
386
+ parameters: z.array(z.string()).describe('List of parameter names to provide values for'),
387
+ limit: z.number().int().positive().default(10).describe('Maximum number of endpoints to return'),
388
+ context: z.record(z.string(), z.any()).optional().describe('Current configuration context (awsRegion, awsProfile, deploymentTarget)')
389
+ },
390
+ async ({ parameters, limit, context }) => {
391
+ // Only respond if parameters includes endpointName AND context.deploymentTarget is realtime-inference
392
+ if (!parameters.includes('endpointName')) {
393
+ return {
394
+ content: [{
395
+ type: 'text',
396
+ text: JSON.stringify({ values: {}, choices: {} })
397
+ }]
398
+ };
399
+ }
400
+
401
+ if (context?.deploymentTarget && context.deploymentTarget !== 'realtime-inference') {
402
+ return {
403
+ content: [{
404
+ type: 'text',
405
+ text: JSON.stringify({ values: {}, choices: {} })
406
+ }]
407
+ };
408
+ }
409
+
410
+ const region = context?.awsRegion || process.env.AWS_REGION || 'us-east-1';
411
+ const profile = context?.awsProfile || process.env.AWS_PROFILE || null;
412
+ const showFull = context?.showFull || false;
413
+ log(`Querying InService endpoints in region: ${region}${profile ? ` (profile: ${profile})` : ''}`);
414
+
415
+ try {
416
+ await _ensureSdkLoaded();
417
+
418
+ let endpoints = null;
419
+ let lastError = null;
420
+
421
+ // Strategy 1: If a specific profile was requested, use it directly
422
+ if (profile) {
423
+ try {
424
+ log(`Trying explicit profile: ${profile}`);
425
+ const client = _createClientWithProfile(region, profile);
426
+ endpoints = await fetchEndpoints(client, { limit, showFull });
427
+ } catch (err) {
428
+ log(`Profile "${profile}" failed: ${err.message}`);
429
+ lastError = err;
430
+ }
431
+ }
432
+
433
+ // Strategy 2: Try the default credential chain
434
+ if (!endpoints) {
435
+ try {
436
+ log('Trying default credential chain');
437
+ const client = createSageMakerClient(region);
438
+ endpoints = await fetchEndpoints(client, { limit, showFull });
439
+ } catch (err) {
440
+ log(`Default credential chain failed: ${err.message}`);
441
+ lastError = err;
442
+ }
443
+ }
444
+
445
+ // Strategy 3: Detect available AWS profiles and try each
446
+ if (!endpoints && _fromIni) {
447
+ const profiles = _detectAwsProfiles();
448
+ if (profiles.length > 0) {
449
+ log(`Default credentials failed, trying ${profiles.length} detected profile(s): ${profiles.join(', ')}`);
450
+ for (const p of profiles) {
451
+ try {
452
+ const client = _createClientWithProfile(region, p);
453
+ endpoints = await fetchEndpoints(client, { limit, showFull });
454
+ log(`Profile "${p}" succeeded`);
455
+ break;
456
+ } catch (err) {
457
+ log(`Profile "${p}" failed: ${err.message}`);
458
+ lastError = err;
459
+ }
460
+ }
461
+ }
462
+ }
463
+
464
+ // If all strategies failed, throw the last error
465
+ if (!endpoints) {
466
+ throw lastError || new Error('No AWS credentials available');
467
+ }
468
+
469
+ const result = buildResponse(endpoints);
470
+
471
+ if (endpoints.length > 0) {
472
+ log(`Found ${endpoints.length} endpoint(s) with available capacity`);
473
+ } else {
474
+ log('No InService endpoints with available capacity found');
475
+ }
476
+
477
+ return {
478
+ content: [{
479
+ type: 'text',
480
+ text: JSON.stringify(result)
481
+ }]
482
+ };
483
+ } catch (err) {
484
+ log(`Error querying endpoints: ${err.message}`);
485
+
486
+ // Handle AccessDeniedException gracefully
487
+ if (err.name === 'AccessDeniedException' || err.Code === 'AccessDeniedException') {
488
+ log('AccessDeniedException — returning empty result');
489
+ return {
490
+ content: [{
491
+ type: 'text',
492
+ text: JSON.stringify({
493
+ values: {},
494
+ choices: { endpointName: [] },
495
+ message: 'Access denied when querying SageMaker endpoints. Check IAM permissions.'
496
+ })
497
+ }]
498
+ };
499
+ }
500
+
501
+ const errorResult = {
502
+ values: {},
503
+ choices: { endpointName: [] },
504
+ error: err.message,
505
+ message: `Failed to query endpoints: ${err.message}`
506
+ };
507
+ return {
508
+ content: [{
509
+ type: 'text',
510
+ text: JSON.stringify(errorResult)
511
+ }]
512
+ };
513
+ }
514
+ }
515
+ );
516
+
517
+ // Export for testing
518
+ export {
519
+ fetchEndpoints,
520
+ buildResponse,
521
+ createSageMakerClient,
522
+ getGpusForInstance,
523
+ _ensureSdkLoaded,
524
+ _loadInstanceCatalog,
525
+ EndpointResolver
526
+ };
527
+
528
+ // Guard MCP transport — only connect when run as main module
529
+ const isMain = process.argv[1] && resolve(process.argv[1]) === __filename;
530
+
531
+ if (isMain) {
532
+ log('Starting Endpoint Picker MCP server');
533
+ await _ensureSdkLoaded();
534
+ const transport = new StdioServerTransport();
535
+ await server.connect(transport);
536
+ }
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "@amzn/ml-container-creator-endpoint-picker",
3
+ "version": "1.0.0",
4
+ "description": "Discovers InService SageMaker real-time endpoints for IC attachment.",
5
+ "modes": {
6
+ "static": false,
7
+ "smart": false,
8
+ "discover": true
9
+ },
10
+ "catalogs": {},
11
+ "tool": {
12
+ "name": "get_inference_endpoints"
13
+ }
14
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ "name": "@amzn/ml-container-creator-endpoint-picker",
3
+ "private": true,
4
+ "version": "1.0.0",
5
+ "description": "MCP server that discovers InService SageMaker real-time endpoints with available capacity for IC attachment.",
6
+ "type": "module",
7
+ "main": "index.js",
8
+ "license": "Apache-2.0",
9
+ "scripts": {
10
+ "test": "node test.js"
11
+ },
12
+ "dependencies": {
13
+ "@aws-sdk/client-sagemaker": "^3.700.0",
14
+ "@aws-sdk/credential-providers": "^3.700.0",
15
+ "@modelcontextprotocol/sdk": "^1.0.0",
16
+ "zod": "^3.22.0"
17
+ }
18
+ }