@aws/ml-container-creator 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +40 -9
  3. package/infra/ci-harness/buildspec.yml +60 -0
  4. package/infra/ci-harness/package-lock.json +5 -1
  5. package/package.json +1 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +10 -4
  8. package/servers/instance-sizer/lib/model-resolver.js +1 -1
  9. package/servers/lib/catalogs/model-sizes.json +135 -90
  10. package/servers/lib/catalogs/models.json +483 -411
  11. package/src/app.js +33 -2
  12. package/src/lib/bootstrap-command-handler.js +6 -0
  13. package/src/lib/cli-handler.js +1 -1
  14. package/src/lib/config-manager.js +41 -2
  15. package/src/lib/deployment-entry-schema.js +16 -0
  16. package/src/lib/mcp-client.js +3 -3
  17. package/src/lib/prompt-runner.js +179 -8
  18. package/src/lib/prompts.js +253 -7
  19. package/src/lib/registry-command-handler.js +12 -0
  20. package/templates/Dockerfile +12 -0
  21. package/templates/code/serving.properties +14 -0
  22. package/templates/do/adapter +1230 -0
  23. package/templates/do/adapters/.gitkeep +2 -0
  24. package/templates/do/add-ic +130 -0
  25. package/templates/do/benchmark +81 -9
  26. package/templates/do/clean +507 -17
  27. package/templates/do/config +28 -5
  28. package/templates/do/deploy +513 -367
  29. package/templates/do/ic/default.conf +32 -0
  30. package/templates/do/lib/endpoint-config.sh +216 -0
  31. package/templates/do/lib/inference-component.sh +167 -0
  32. package/templates/do/lib/secrets.sh +44 -0
  33. package/templates/do/lib/wait.sh +131 -0
  34. package/templates/do/logs +107 -27
  35. package/templates/do/optimize +528 -0
  36. package/templates/do/register +111 -1
  37. package/templates/do/status +337 -0
  38. package/templates/do/test +80 -28
package/bin/cli.js CHANGED
@@ -98,6 +98,9 @@ program
98
98
  .addOption(new Option('--include-sample', 'Include sample model code'))
99
99
  .addOption(new Option('--include-testing', 'Include test suite'))
100
100
  .addOption(new Option('--test-types <types>', 'Comma-separated test types'))
101
+ .addOption(new Option('--enable-lora', 'Enable LoRA adapter serving (transformers with vllm/sglang/djl-lmi only)'))
102
+ .addOption(new Option('--max-loras <n>', 'Maximum concurrent LoRA adapters in GPU memory (default: 30)'))
103
+ .addOption(new Option('--max-lora-rank <n>', 'Maximum LoRA rank (default: 64)'))
101
104
 
102
105
  // --- MCP & Discovery ---
103
106
  .addOption(new Option('--smart', 'Enable Bedrock-powered smart mode on MCP servers'))
@@ -190,7 +193,7 @@ program.configureHelp({
190
193
  groups.env.push(opt);
191
194
  } else if (['--hf-token', '--hf-token-arn', '--ngc-token', '--ngc-token-arn'].includes(long)) {
192
195
  groups.auth.push(opt);
193
- } else if (['--include-sample', '--include-testing', '--test-types'].includes(long)) {
196
+ } else if (['--include-sample', '--include-testing', '--test-types', '--enable-lora', '--max-loras', '--max-lora-rank'].includes(long)) {
194
197
  groups.features.push(opt);
195
198
  } else if (['--smart', '--discover'].includes(long)) {
196
199
  groups.mcp.push(opt);
@@ -307,7 +310,6 @@ program
307
310
  program
308
311
  .command('registry')
309
312
  .description('Registry operations (list, get, remove, replay, export, import, search) — experimental, may be reconciled with do/register')
310
- .passThroughOptions()
311
313
  .argument('<action>', 'Registry action (log, list, get, remove, replay, export, import, search)')
312
314
  .argument('[args...]', 'Additional arguments')
313
315
  .option('--backend <backend>', 'Filter by backend')
@@ -328,6 +330,7 @@ program
328
330
  .option('--notes <text>', 'Deployment notes')
329
331
  .option('--project', 'Use project-level registry')
330
332
  .option('--parameters <json>', 'Parameters JSON string')
333
+ .option('--ic-list <json>', 'IC list JSON string')
331
334
  .option('--generator-version <version>', 'Generator version')
332
335
  // Options used by `registry list-architectures`
333
336
  .option('--server <name>', 'Filter by server name (for list-architectures)')
@@ -7,7 +7,7 @@
7
7
  "Type": "String",
8
8
  "Default": "false",
9
9
  "AllowedValues": ["true", "false"],
10
- "Description": "Whether to create S3 buckets for async inference and batch transform"
10
+ "Description": "Whether to create S3 buckets for async inference, batch transform, adapters, and benchmarks"
11
11
  },
12
12
  "UseExistingRoleArn": {
13
13
  "Type": "String",
@@ -134,8 +134,8 @@
134
134
  "s3:ListBucket"
135
135
  ],
136
136
  "Resource": [
137
- "arn:aws:s3:::ml-container-creator-*",
138
- "arn:aws:s3:::ml-container-creator-*/*"
137
+ "arn:aws:s3:::mlcc-*",
138
+ "arn:aws:s3:::mlcc-*/*"
139
139
  ]
140
140
  },
141
141
  {
@@ -209,7 +209,7 @@
209
209
  "DeletionPolicy": "Retain",
210
210
  "UpdateReplacePolicy": "Retain",
211
211
  "Properties": {
212
- "BucketName": { "Fn::Sub": "${AWS::AccountId}-${AWS::Region}-ml-container-creator-async" },
212
+ "BucketName": { "Fn::Sub": "mlcc-async-${AWS::AccountId}-${AWS::Region}" },
213
213
  "VersioningConfiguration": { "Status": "Enabled" },
214
214
  "BucketEncryption": {
215
215
  "ServerSideEncryptionConfiguration": [
@@ -218,7 +218,8 @@
218
218
  },
219
219
  "Tags": [
220
220
  { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
221
- { "Key": "mlcc:created-by", "Value": "bootstrap" }
221
+ { "Key": "mlcc:created-by", "Value": "bootstrap" },
222
+ { "Key": "mlcc:purpose", "Value": "async-inference-output" }
222
223
  ]
223
224
  }
224
225
  },
@@ -229,7 +230,7 @@
229
230
  "DeletionPolicy": "Retain",
230
231
  "UpdateReplacePolicy": "Retain",
231
232
  "Properties": {
232
- "BucketName": { "Fn::Sub": "${AWS::AccountId}-${AWS::Region}-ml-container-creator-batch" },
233
+ "BucketName": { "Fn::Sub": "mlcc-batch-${AWS::AccountId}-${AWS::Region}" },
233
234
  "VersioningConfiguration": { "Status": "Enabled" },
234
235
  "BucketEncryption": {
235
236
  "ServerSideEncryptionConfiguration": [
@@ -238,17 +239,40 @@
238
239
  },
239
240
  "Tags": [
240
241
  { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
241
- { "Key": "mlcc:created-by", "Value": "bootstrap" }
242
+ { "Key": "mlcc:created-by", "Value": "bootstrap" },
243
+ { "Key": "mlcc:purpose", "Value": "batch-transform-io" }
244
+ ]
245
+ }
246
+ },
247
+
248
+ "AdapterS3Bucket": {
249
+ "Type": "AWS::S3::Bucket",
250
+ "Condition": "ShouldCreateS3Buckets",
251
+ "DeletionPolicy": "Retain",
252
+ "UpdateReplacePolicy": "Retain",
253
+ "Properties": {
254
+ "BucketName": { "Fn::Sub": "mlcc-adapters-${AWS::AccountId}-${AWS::Region}" },
255
+ "VersioningConfiguration": { "Status": "Enabled" },
256
+ "BucketEncryption": {
257
+ "ServerSideEncryptionConfiguration": [
258
+ { "ServerSideEncryptionByDefault": { "SSEAlgorithm": "AES256" } }
259
+ ]
260
+ },
261
+ "Tags": [
262
+ { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
263
+ { "Key": "mlcc:created-by", "Value": "bootstrap" },
264
+ { "Key": "mlcc:purpose", "Value": "lora-adapter-storage" }
242
265
  ]
243
266
  }
244
267
  },
245
268
 
246
269
  "BenchmarkS3Bucket": {
247
270
  "Type": "AWS::S3::Bucket",
271
+ "Condition": "ShouldCreateS3Buckets",
248
272
  "DeletionPolicy": "Retain",
249
273
  "UpdateReplacePolicy": "Retain",
250
274
  "Properties": {
251
- "BucketName": { "Fn::Sub": "ml-container-creator-benchmark-${AWS::Region}-${AWS::AccountId}" },
275
+ "BucketName": { "Fn::Sub": "mlcc-benchmark-${AWS::AccountId}-${AWS::Region}" },
252
276
  "VersioningConfiguration": { "Status": "Enabled" },
253
277
  "BucketEncryption": {
254
278
  "ServerSideEncryptionConfiguration": [
@@ -257,7 +281,8 @@
257
281
  },
258
282
  "Tags": [
259
283
  { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
260
- { "Key": "mlcc:created-by", "Value": "bootstrap" }
284
+ { "Key": "mlcc:created-by", "Value": "bootstrap" },
285
+ { "Key": "mlcc:purpose", "Value": "benchmark-results" }
261
286
  ]
262
287
  }
263
288
  }
@@ -292,7 +317,13 @@
292
317
  "Description": "S3 bucket for batch transform I/O",
293
318
  "Value": { "Ref": "BatchS3Bucket" }
294
319
  },
320
+ "AdapterS3BucketName": {
321
+ "Condition": "ShouldCreateS3Buckets",
322
+ "Description": "S3 bucket for LoRA adapter storage",
323
+ "Value": { "Ref": "AdapterS3Bucket" }
324
+ },
295
325
  "BenchmarkS3BucketName": {
326
+ "Condition": "ShouldCreateS3Buckets",
296
327
  "Description": "S3 bucket for benchmark results output",
297
328
  "Value": { "Ref": "BenchmarkS3Bucket" }
298
329
  },
@@ -40,6 +40,10 @@ phases:
40
40
  - REGISTER_DURATION=0
41
41
  - REGISTER_LOG_POINTER=""
42
42
  - REGISTER_ERROR_SUMMARY=""
43
+ - ADAPTER_TEST_STATUS="skip"
44
+ - ADAPTER_TEST_DURATION=0
45
+ - ADAPTER_TEST_LOG_POINTER=""
46
+ - ADAPTER_TEST_ERROR_SUMMARY=""
43
47
  - TEARDOWN_STATUS="skip"
44
48
  - TEARDOWN_DURATION=0
45
49
  - TEARDOWN_LOG_POINTER=""
@@ -182,6 +186,54 @@ phases:
182
186
  fi
183
187
  - rm -f "$STAGE_STDERR_FILE"
184
188
 
189
+ # --- Stage: Adapter_Test (only if do/adapters/ has .conf files) ---
190
+ - echo "=== Stage: Adapter_Test ==="
191
+ - STAGE_START=$(date +%s)
192
+ - ADAPTER_TEST_LOG_POINTER="$LOG_POINTER_PREFIX"
193
+ - STAGE_STDERR_FILE=$(mktemp)
194
+ - |
195
+ if [ -n "$FIRST_FAILURE" ]; then
196
+ echo "Skipping Adapter_Test stage due to prior failure in $FIRST_FAILURE"
197
+ ADAPTER_TEST_STATUS="skip"
198
+ ADAPTER_TEST_DURATION=0
199
+ else
200
+ cd /tmp/ci-project
201
+ ADAPTER_CONFS=$(find do/adapters -name '*.conf' 2>/dev/null | grep -v '.gitkeep' || true)
202
+ if [ -z "$ADAPTER_CONFS" ]; then
203
+ echo "No adapter configs found in do/adapters/ — skipping"
204
+ ADAPTER_TEST_STATUS="skip"
205
+ ADAPTER_TEST_DURATION=0
206
+ else
207
+ (
208
+ set -e
209
+ cd /tmp/ci-project
210
+ for conf in do/adapters/*.conf; do
211
+ [ -f "$conf" ] || continue
212
+ [[ "$(basename "$conf")" == ".gitkeep" ]] && continue
213
+ ADAPTER_NAME=$(basename "$conf" .conf)
214
+ echo "Testing adapter: ${ADAPTER_NAME}"
215
+ # Source to get weights URI
216
+ source "$conf"
217
+ ./do/adapter add "${ADAPTER_NAME}" --weights "${ADAPTER_WEIGHTS_URI}"
218
+ ./do/test --ic "${ADAPTER_NAME}"
219
+ ./do/adapter remove "${ADAPTER_NAME}"
220
+ done
221
+ ) 2>"$STAGE_STDERR_FILE"; STAGE_EXIT=$?
222
+ STAGE_END=$(date +%s)
223
+ ADAPTER_TEST_DURATION=$((STAGE_END - STAGE_START))
224
+ if [ "$STAGE_EXIT" -eq 0 ]; then
225
+ ADAPTER_TEST_STATUS="pass"
226
+ echo "Adapter_Test stage passed in ${ADAPTER_TEST_DURATION}s"
227
+ else
228
+ ADAPTER_TEST_STATUS="fail"
229
+ ADAPTER_TEST_ERROR_SUMMARY=$(tail -c 500 "$STAGE_STDERR_FILE" | tr -d '\000' | tr '"' "'" | tr '\n' ' ')
230
+ FIRST_FAILURE="adapter_test"
231
+ echo "Adapter_Test stage FAILED (exit code $STAGE_EXIT) in ${ADAPTER_TEST_DURATION}s"
232
+ fi
233
+ fi
234
+ fi
235
+ - rm -f "$STAGE_STDERR_FILE"
236
+
185
237
  # --- Stage: Register (placeholder) ---
186
238
  - echo "=== Stage: Register ==="
187
239
  - STAGE_START=$(date +%s)
@@ -260,6 +312,7 @@ phases:
260
312
  validate) FINAL_ERROR_MESSAGE="$VALIDATE_ERROR_SUMMARY" ;;
261
313
  build) FINAL_ERROR_MESSAGE="$BUILD_ERROR_SUMMARY" ;;
262
314
  deploy_test) FINAL_ERROR_MESSAGE="$DEPLOY_TEST_ERROR_SUMMARY" ;;
315
+ adapter_test) FINAL_ERROR_MESSAGE="$ADAPTER_TEST_ERROR_SUMMARY" ;;
263
316
  register) FINAL_ERROR_MESSAGE="$REGISTER_ERROR_SUMMARY" ;;
264
317
  *) FINAL_ERROR_MESSAGE="Unknown failure stage" ;;
265
318
  esac
@@ -272,6 +325,7 @@ phases:
272
325
  ESCAPED_VALIDATE_ERROR=$(printf '%s' "$VALIDATE_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
273
326
  ESCAPED_BUILD_ERROR=$(printf '%s' "$BUILD_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
274
327
  ESCAPED_DEPLOY_TEST_ERROR=$(printf '%s' "$DEPLOY_TEST_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
328
+ ESCAPED_ADAPTER_TEST_ERROR=$(printf '%s' "$ADAPTER_TEST_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
275
329
  ESCAPED_REGISTER_ERROR=$(printf '%s' "$REGISTER_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
276
330
  ESCAPED_TEARDOWN_ERROR=$(printf '%s' "$TEARDOWN_ERROR_SUMMARY" | sed 's/\\/\\\\/g; s/"/\\"/g')
277
331
  ESCAPED_FINAL_ERROR=$(printf '%s' "$FINAL_ERROR_MESSAGE" | sed 's/\\/\\\\/g; s/"/\\"/g')
@@ -314,6 +368,12 @@ phases:
314
368
  \"logPointer\": {\"S\": \"$DEPLOY_TEST_LOG_POINTER\"},
315
369
  \"errorSummary\": {\"S\": \"$ESCAPED_DEPLOY_TEST_ERROR\"}
316
370
  }},
371
+ \"adapter_test\": {\"M\": {
372
+ \"status\": {\"S\": \"$ADAPTER_TEST_STATUS\"},
373
+ \"durationSeconds\": {\"N\": \"$ADAPTER_TEST_DURATION\"},
374
+ \"logPointer\": {\"S\": \"$ADAPTER_TEST_LOG_POINTER\"},
375
+ \"errorSummary\": {\"S\": \"$ESCAPED_ADAPTER_TEST_ERROR\"}
376
+ }},
317
377
  \"register\": {\"M\": {
318
378
  \"status\": {\"S\": \"$REGISTER_STATUS\"},
319
379
  \"durationSeconds\": {\"N\": \"$REGISTER_DURATION\"},
@@ -48,6 +48,7 @@
48
48
  "semver"
49
49
  ],
50
50
  "license": "Apache-2.0",
51
+ "peer": true,
51
52
  "dependencies": {
52
53
  "jsonschema": "~1.4.1",
53
54
  "semver": "^7.7.4"
@@ -2150,6 +2151,7 @@
2150
2151
  "integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==",
2151
2152
  "dev": true,
2152
2153
  "license": "MIT",
2154
+ "peer": true,
2153
2155
  "dependencies": {
2154
2156
  "undici-types": "~6.21.0"
2155
2157
  }
@@ -2789,7 +2791,8 @@
2789
2791
  "version": "10.6.0",
2790
2792
  "resolved": "https://registry.npmjs.org/constructs/-/constructs-10.6.0.tgz",
2791
2793
  "integrity": "sha512-TxHOnBO5zMo/G76ykzGF/wMpEHu257TbWiIxP9K0Yv/+t70UzgBQiTqjkAsWOPC6jW91DzJI0+ehQV6xDRNBuQ==",
2792
- "license": "Apache-2.0"
2794
+ "license": "Apache-2.0",
2795
+ "peer": true
2793
2796
  },
2794
2797
  "node_modules/create-require": {
2795
2798
  "version": "1.1.1",
@@ -3694,6 +3697,7 @@
3694
3697
  "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
3695
3698
  "dev": true,
3696
3699
  "license": "Apache-2.0",
3700
+ "peer": true,
3697
3701
  "bin": {
3698
3702
  "tsc": "bin/tsc",
3699
3703
  "tsserver": "bin/tsserver"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aws/ml-container-creator",
3
- "version": "0.4.0",
3
+ "version": "0.6.0",
4
4
  "description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
5
5
  "type": "module",
6
6
  "main": "src/app.js",
package/servers/README.md CHANGED
@@ -15,7 +15,12 @@ servers/
15
15
  │ ├── test.js # Standalone tests (node test.js)
16
16
  │ ├── package.json
17
17
  │ └── LICENSE
18
- └── region-picker/ # AWS region suggestion server
18
+ ├── region-picker/ # AWS region suggestion server
19
+ │ ├── index.js # MCP server entry point
20
+ │ ├── test.js # Standalone tests (node test.js)
21
+ │ ├── package.json
22
+ │ └── LICENSE
23
+ └── endpoint-picker/ # SageMaker endpoint discovery server
19
24
  ├── index.js # MCP server entry point
20
25
  ├── test.js # Standalone tests (node test.js)
21
26
  ├── package.json
@@ -74,6 +79,39 @@ Suggests AWS regions for SageMaker deployments based on a search term. Filters t
74
79
  }
75
80
  ```
76
81
 
82
+ ### endpoint-picker
83
+
84
+ Discovers InService SageMaker real-time endpoints with available GPU capacity for attaching new inference components. Uses `ListEndpoints`, `DescribeEndpoint`, and `ListInferenceComponents` to calculate available capacity.
85
+
86
+ **Discover mode:** Queries the SageMaker API using a 3-strategy credential fallback (explicit profile → default chain → detect profiles). No static mode — always requires AWS credentials.
87
+
88
+ **Tool:** `get_inference_endpoints`
89
+
90
+ | Input Field | Type | Description |
91
+ |-------------|------|-------------|
92
+ | `parameters` | `string[]` | Must include `"endpointName"` to get results |
93
+ | `limit` | `number` | Max endpoints to return (default: 10) |
94
+ | `context` | `object` | `awsRegion`, `awsProfile`, `deploymentTarget` (must be `realtime-inference`) |
95
+
96
+ **Example response:**
97
+
98
+ ```json
99
+ {
100
+ "values": { "endpointName": "my-endpoint-1234567890" },
101
+ "choices": { "endpointName": ["my-endpoint-1234567890", "prod-llm-endpoint"] },
102
+ "metadata": {
103
+ "my-endpoint-1234567890": {
104
+ "variantName": "AllTraffic",
105
+ "instanceType": "ml.g6e.48xlarge",
106
+ "instanceCount": 1,
107
+ "icCount": 2,
108
+ "availableGpus": 4,
109
+ "hasInstancePools": false
110
+ }
111
+ }
112
+ }
113
+ ```
114
+
77
115
  ## Usage
78
116
 
79
117
  ### Adding a Bundled Server
@@ -297,6 +335,7 @@ The Bedrock API didn't respond within 10 seconds. This usually means network con
297
335
  ```bash
298
336
  node servers/region-picker/test.js
299
337
  node servers/instance-recommender/test.js
338
+ node servers/endpoint-picker/test.js
300
339
  ```
301
340
 
302
341
  ### Smart Mode Not Activating
@@ -313,6 +352,7 @@ Each server has standalone tests that run without AWS credentials or network acc
313
352
  # Run individual server tests
314
353
  node servers/region-picker/test.js
315
354
  node servers/instance-recommender/test.js
355
+ node servers/endpoint-picker/test.js
316
356
 
317
357
  # Run all server tests from the project root
318
358
  npm run test:servers
@@ -51,7 +51,7 @@ try {
51
51
 
52
52
  // ── Mode configuration ───────────────────────────────────────────────────────
53
53
 
54
- const DISCOVER_MODE = process.argv.includes('--discover') || process.env.DISCOVER_MODE === 'true'
54
+ const DISCOVER_MODE = process.env.DISCOVER_MODE !== 'false' && !process.argv.includes('--no-discover')
55
55
  const SMART_MODE = process.env.BEDROCK_SMART === 'true'
56
56
  const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0'
57
57
  const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1'
@@ -383,6 +383,7 @@ async function handleGetInstanceRecommendation(params) {
383
383
  // Step 3a: Quota & availability filtering (discover mode only)
384
384
  let preQuotaFilterCount = 0
385
385
  let allFilteredByQuota = false
386
+ let preQuotaRecommendations = []
386
387
  if (DISCOVER_MODE && recommendations.length > 0) {
387
388
  try {
388
389
  const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION
@@ -396,6 +397,7 @@ async function handleGetInstanceRecommendation(params) {
396
397
  ])
397
398
 
398
399
  preQuotaFilterCount = recommendations.length
400
+ preQuotaRecommendations = [...recommendations]
399
401
  recommendations = applyAvailabilityRanking(
400
402
  recommendations,
401
403
  quotas.status === 'fulfilled' ? quotas.value : null,
@@ -404,6 +406,10 @@ async function handleGetInstanceRecommendation(params) {
404
406
  )
405
407
  if (recommendations.length === 0 && preQuotaFilterCount > 0) {
406
408
  allFilteredByQuota = true
409
+ // Restore pre-filter recommendations so user can see compatible instances
410
+ // and request quota increases for the ones they want
411
+ recommendations = preQuotaRecommendations
412
+ log(`All ${preQuotaFilterCount} instances filtered by zero-quota — restoring unfiltered list`)
407
413
  }
408
414
  } catch (err) {
409
415
  // Graceful degradation: if credentials are missing or any unexpected
@@ -587,10 +593,10 @@ const isMain = process.argv[1] && resolve(process.argv[1]) === __filename
587
593
  if (isMain) {
588
594
  if (SMART_MODE) {
589
595
  log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`)
590
- } else if (DISCOVER_MODE) {
591
- log('Discover mode enabled (HuggingFace API lookups active)')
596
+ } else if (!DISCOVER_MODE) {
597
+ log('Static mode (catalog-only, no network calls) — use --no-discover to force this')
592
598
  } else {
593
- log('Static mode (catalog-only, no network calls)')
599
+ log('Discover mode (HuggingFace API + quota lookups active)')
594
600
  }
595
601
 
596
602
  const transport = new StdioServerTransport()
@@ -207,7 +207,7 @@ const isHuggingFacePattern = (modelName) => {
207
207
  * @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
208
208
  */
209
209
  const resolveModelMetadata = async (modelName, options = {}) => {
210
- const { discover = false, catalogPath } = options
210
+ const { discover = true, catalogPath } = options
211
211
 
212
212
  // Tier 1: Catalog lookup
213
213
  const catalog = await loadCatalog(catalogPath)