@aws/ml-container-creator 0.9.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +2049 -0
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -68
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +138 -138
  23. package/servers/instance-sizer/lib/instance-ranker.js +76 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/catalogs/models.json +182 -26
  31. package/servers/lib/custom-validators.js +13 -13
  32. package/servers/lib/dynamic-resolver.js +4 -4
  33. package/servers/marketplace-picker/index.js +342 -0
  34. package/servers/marketplace-picker/manifest.json +14 -0
  35. package/servers/marketplace-picker/package.json +18 -0
  36. package/servers/model-picker/index.js +382 -382
  37. package/servers/region-picker/index.js +56 -56
  38. package/servers/workload-picker/LICENSE +202 -0
  39. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  40. package/servers/workload-picker/index.js +171 -0
  41. package/servers/workload-picker/manifest.json +16 -0
  42. package/servers/workload-picker/package.json +16 -0
  43. package/src/app.js +4 -390
  44. package/src/lib/bootstrap-command-handler.js +710 -1148
  45. package/src/lib/bootstrap-config.js +36 -0
  46. package/src/lib/bootstrap-profile-manager.js +641 -0
  47. package/src/lib/bootstrap-provisioners.js +421 -0
  48. package/src/lib/ci-register-helpers.js +74 -0
  49. package/src/lib/config-loader.js +408 -0
  50. package/src/lib/config-manager.js +66 -1685
  51. package/src/lib/config-mcp-client.js +118 -0
  52. package/src/lib/config-validator.js +634 -0
  53. package/src/lib/cuda-resolver.js +149 -0
  54. package/src/lib/e2e-catalog-validator.js +251 -3
  55. package/src/lib/e2e-ci-recorder.js +103 -0
  56. package/src/lib/generated/cli-options.js +315 -311
  57. package/src/lib/generated/parameter-matrix.js +671 -0
  58. package/src/lib/generated/validation-rules.js +71 -71
  59. package/src/lib/marketplace-flow.js +276 -0
  60. package/src/lib/mcp-query-runner.js +768 -0
  61. package/src/lib/parameter-schema-validator.js +62 -18
  62. package/src/lib/path-prover-brain.js +607 -0
  63. package/src/lib/prompt-runner.js +41 -1504
  64. package/src/lib/prompts/feature-prompts.js +172 -0
  65. package/src/lib/prompts/index.js +48 -0
  66. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  67. package/src/lib/prompts/model-prompts.js +552 -0
  68. package/src/lib/prompts/project-prompts.js +82 -0
  69. package/src/lib/prompts.js +2 -1446
  70. package/src/lib/registry-command-handler.js +135 -3
  71. package/src/lib/secrets-prompt-runner.js +251 -0
  72. package/src/lib/template-variable-resolver.js +422 -0
  73. package/src/lib/tune-catalog-validator.js +37 -4
  74. package/templates/Dockerfile +9 -0
  75. package/templates/code/adapter_sidecar.py +444 -0
  76. package/templates/code/serve +6 -0
  77. package/templates/code/serve.d/vllm.ejs +1 -1
  78. package/templates/do/.benchmark_writer.py +1476 -0
  79. package/templates/do/.tune_helper.py +982 -57
  80. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  81. package/templates/do/adapter +149 -0
  82. package/templates/do/benchmark +639 -85
  83. package/templates/do/config +108 -5
  84. package/templates/do/deploy.d/managed-inference.ejs +192 -11
  85. package/templates/do/optimize +106 -37
  86. package/templates/do/register +89 -0
  87. package/templates/do/test +13 -0
  88. package/templates/do/tune +378 -59
  89. package/templates/do/validate +44 -4
  90. package/config/parameter-schema.json +0 -88
@@ -0,0 +1,422 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ import fs from 'fs';
5
+ import path from 'path';
6
+ import { fileURLToPath } from 'url';
7
+ import { isTuneSupported, lookupModel } from './tune-catalog-validator.js';
8
+
9
+ const __filename = fileURLToPath(import.meta.url);
10
+ const __dirname = path.dirname(__filename);
11
+
12
+ /**
13
+ * Finds model configuration by exact match or glob-pattern match.
14
+ *
15
+ * @param {string} modelName - Model ID to look up
16
+ * @param {object} registryConfigManager - Registry configuration manager
17
+ * @returns {object|null} Model configuration or null
18
+ */
19
+ function _findModelConfig(modelName, registryConfigManager) {
20
+ if (!registryConfigManager?.modelRegistry) return null;
21
+
22
+ // Exact match first
23
+ const exact = registryConfigManager.modelRegistry[modelName];
24
+ if (exact) return exact;
25
+
26
+ // Pattern matching with glob-style wildcards
27
+ for (const [pattern, config] of Object.entries(registryConfigManager.modelRegistry)) {
28
+ if (pattern.includes('*')) {
29
+ const regex = new RegExp(`^${pattern.replace(/\*/g, '.*')}$`);
30
+ if (regex.test(modelName)) {
31
+ return config;
32
+ }
33
+ }
34
+ }
35
+
36
+ return null;
37
+ }
38
+
39
+ /**
40
+ * Merges environment variables from all catalog sources with correct precedence.
41
+ * Precedence (lowest → highest):
42
+ * 1. catalog defaults (Image_Entry defaults.envVars)
43
+ * 2. framework profile (Image_Entry profiles[selectedProfile].envVars)
44
+ * 3. model entry (model catalog entry envVars)
45
+ * 4. model profile (model catalog entry profiles[selectedProfile].envVars)
46
+ * 5. CLI overrides (existing answers.envVars from user CLI input)
47
+ *
48
+ * @param {object} answers - Configuration answers
49
+ * @param {object|null} registryConfigManager - Registry configuration manager
50
+ */
51
+ export async function _mergeEnvVarsWithPrecedence(answers, registryConfigManager) {
52
+ if (!registryConfigManager) return;
53
+
54
+ // Capture CLI-provided env vars before merging (highest precedence)
55
+ const cliEnvVars = { ...answers.envVars };
56
+
57
+ // Resolve the framework config for the selected framework + version
58
+ const frameworkName = answers.framework || answers.deploymentConfig;
59
+ const frameworkVersion = answers.frameworkVersion;
60
+ let frameworkConfig = null;
61
+
62
+ if (frameworkName && registryConfigManager.frameworkRegistry) {
63
+ const frameworkVersions = registryConfigManager.frameworkRegistry[frameworkName];
64
+ if (frameworkVersions) {
65
+ if (frameworkVersion && frameworkVersions[frameworkVersion]) {
66
+ frameworkConfig = frameworkVersions[frameworkVersion];
67
+ } else {
68
+ // Fall back to latest version for Triton and other non-versioned lookups
69
+ const versions = Object.keys(frameworkVersions).sort((a, b) =>
70
+ b.localeCompare(a, undefined, { numeric: true })
71
+ );
72
+ if (versions.length > 0) {
73
+ frameworkConfig = frameworkVersions[versions[0]];
74
+ }
75
+ }
76
+ }
77
+ }
78
+
79
+ // Resolve the model config (exact match or pattern match)
80
+ let modelConfig = null;
81
+ if (answers.modelName && registryConfigManager.modelRegistry) {
82
+ modelConfig = _findModelConfig(answers.modelName, registryConfigManager);
83
+ }
84
+
85
+ // Layer 1: catalog defaults (Image_Entry defaults.envVars)
86
+ const catalogDefaults = frameworkConfig?.envVars || {};
87
+
88
+ // Layer 2: framework profile envVars
89
+ let frameworkProfileEnvVars = {};
90
+ if (answers.frameworkProfile && frameworkConfig?.profiles) {
91
+ const profile = frameworkConfig.profiles[answers.frameworkProfile];
92
+ if (profile?.envVars) {
93
+ frameworkProfileEnvVars = profile.envVars;
94
+ }
95
+ }
96
+
97
+ // Layer 3: model entry envVars
98
+ const modelEntryEnvVars = modelConfig?.envVars || {};
99
+
100
+ // Layer 4: model profile envVars
101
+ let modelProfileEnvVars = {};
102
+ if (answers.modelProfile && modelConfig?.profiles) {
103
+ const profile = modelConfig.profiles[answers.modelProfile];
104
+ if (profile?.envVars) {
105
+ modelProfileEnvVars = profile.envVars;
106
+ }
107
+ }
108
+
109
+ // Layer 5: CLI overrides (captured above)
110
+
111
+ // Merge in precedence order: each layer overrides the previous
112
+ answers.envVars = {
113
+ ...catalogDefaults,
114
+ ...frameworkProfileEnvVars,
115
+ ...modelEntryEnvVars,
116
+ ...modelProfileEnvVars,
117
+ ...cliEnvVars
118
+ };
119
+ }
120
+
121
+ /**
122
+ * Validates environment variables using the registry system.
123
+ * Displays errors and warnings to the user.
124
+ *
125
+ * @param {object} answers - Configuration answers
126
+ * @param {object} registryConfigManager - Registry configuration manager
127
+ */
128
+ export async function _validateEnvironmentVariables(answers, registryConfigManager) {
129
+ // Get framework configuration
130
+ // For Triton configs, look up using deploymentConfig key (e.g. 'triton-fil')
131
+ let frameworkConfig;
132
+ if (answers.architecture === 'triton' && answers.deploymentConfig) {
133
+ const tritonEntry = registryConfigManager.frameworkRegistry?.[answers.deploymentConfig];
134
+ if (tritonEntry) {
135
+ const versions = Object.keys(tritonEntry);
136
+ if (versions.length > 0) {
137
+ frameworkConfig = tritonEntry[versions[0]];
138
+ }
139
+ }
140
+ }
141
+ if (!frameworkConfig) {
142
+ frameworkConfig = registryConfigManager.frameworkRegistry?.[answers.framework]?.[answers.frameworkVersion];
143
+ }
144
+
145
+ if (!frameworkConfig || !frameworkConfig.envVars) {
146
+ return; // No env vars to validate
147
+ }
148
+
149
+ console.log('\n🔍 Validating environment variables...');
150
+
151
+ // Validate environment variables
152
+ const validationResult = registryConfigManager.validateEnvironmentVariables(
153
+ frameworkConfig.envVars,
154
+ frameworkConfig
155
+ );
156
+
157
+ // Display validation results
158
+ if (validationResult.errors && validationResult.errors.length > 0) {
159
+ console.log('\n❌ Environment Variable Validation Errors:');
160
+ validationResult.errors.forEach(error => {
161
+ console.log(` • ${error.key}: ${error.message}`);
162
+ });
163
+ }
164
+
165
+ if (validationResult.warnings && validationResult.warnings.length > 0) {
166
+ console.log('\n⚠️ Environment Variable Validation Warnings:');
167
+ validationResult.warnings.forEach(warning => {
168
+ console.log(` • ${warning.key ? `${warning.key}: ` : ''}${warning.message}`);
169
+ });
170
+ }
171
+
172
+ if (validationResult.strategiesUsed && validationResult.strategiesUsed.length > 0) {
173
+ console.log(`\n✅ Validation methods used: ${validationResult.strategiesUsed.join(', ')}`);
174
+ }
175
+
176
+ if (!validationResult.errors || validationResult.errors.length === 0) {
177
+ if (!validationResult.warnings || validationResult.warnings.length === 0) {
178
+ console.log(' ✅ All environment variables validated successfully');
179
+ }
180
+ }
181
+
182
+ // In non-interactive mode (skip-prompts), throw on errors
183
+ if (validationResult.errors && validationResult.errors.length > 0) {
184
+ throw new Error('Environment variable validation failed. Please fix the errors and try again.');
185
+ }
186
+ }
187
+
188
+ /**
189
+ * Ensures all template variables have proper defaults to prevent
190
+ * "undefined" errors in EJS templates. Also enriches answers with
191
+ * registry data (env var merging, HuggingFace data, Triton base image).
192
+ *
193
+ * @param {object} answers - Answers object to fill defaults into
194
+ * @param {object|null} registryConfigManager - Registry configuration manager (or null)
195
+ */
196
+ export async function _ensureTemplateVariables(answers, registryConfigManager = null) {
197
+ const defaults = {
198
+ chatTemplate: null,
199
+ chatTemplateSource: null,
200
+ hfToken: null,
201
+ hfTokenArn: null,
202
+ ngcApiKey: null,
203
+ ngcTokenArn: null,
204
+ envVars: {},
205
+ inferenceAmiVersion: null,
206
+ accelerator: null,
207
+ frameworkVersion: null,
208
+ validationLevel: 'unknown',
209
+ configSources: [],
210
+ recommendedInstanceTypes: [],
211
+ roleArn: null,
212
+ deploymentConfig: '',
213
+ architecture: null,
214
+ backend: null,
215
+ engine: null,
216
+ codebuildComputeType: null,
217
+ codebuildProjectName: null,
218
+ modelName: null,
219
+ modelFormat: null,
220
+ includeSampleModel: true,
221
+ includeTesting: true,
222
+ testTypes: [],
223
+ buildTimestamp: new Date().toISOString(),
224
+ buildTarget: 'codebuild',
225
+ deploymentTarget: 'realtime-inference',
226
+ hyperPodCluster: null,
227
+ hyperPodNamespace: 'default',
228
+ hyperPodReplicas: 1,
229
+ fsxVolumeHandle: null,
230
+ baseImage: null,
231
+ modelSource: 'huggingface',
232
+ artifactUri: '',
233
+ modelLoadStrategy: 'runtime',
234
+ existingEndpointName: null,
235
+ enableLora: false,
236
+ maxLoras: 30,
237
+ maxLoraRank: 64
238
+ };
239
+
240
+ Object.entries(defaults).forEach(([key, value]) => {
241
+ if (answers[key] === undefined) {
242
+ answers[key] = value;
243
+ }
244
+ });
245
+
246
+ // Backward compatibility: populate framework and modelServer from architecture/backend
247
+ if (!answers.framework && answers.architecture) {
248
+ answers.framework = answers.architecture;
249
+ }
250
+ if (!answers.modelServer && answers.backend) {
251
+ answers.modelServer = answers.backend;
252
+ }
253
+
254
+ // Always include testing with all available test types
255
+ answers.includeTesting = true;
256
+ if (!answers.testTypes || answers.testTypes.length === 0) {
257
+ if (answers.architecture === 'transformers' || answers.framework === 'transformers') {
258
+ answers.testTypes = ['hosted-model-endpoint'];
259
+ } else {
260
+ answers.testTypes = ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'];
261
+ }
262
+ }
263
+
264
+ // Merge catalog env vars into answers.envVars with correct precedence
265
+ await _mergeEnvVarsWithPrecedence(answers, registryConfigManager);
266
+
267
+ // For Triton architecture, set default base image fallback
268
+ if (answers.architecture === 'triton' && !answers.baseImage) {
269
+ // Try to look up base image from framework registry using deployment-config key
270
+ const tritonRegistryKey = answers.deploymentConfig;
271
+ if (tritonRegistryKey && registryConfigManager?.frameworkRegistry) {
272
+ const tritonFrameworkConfig = registryConfigManager.frameworkRegistry[tritonRegistryKey];
273
+ if (tritonFrameworkConfig) {
274
+ const versions = Object.keys(tritonFrameworkConfig).sort((a, b) =>
275
+ b.localeCompare(a, undefined, { numeric: true })
276
+ );
277
+ if (versions.length > 0) {
278
+ const latestConfig = tritonFrameworkConfig[versions[0]];
279
+ if (latestConfig.baseImage) {
280
+ answers.baseImage = latestConfig.baseImage;
281
+ }
282
+ if (latestConfig.inferenceAmiVersion && !answers.inferenceAmiVersion) {
283
+ answers.inferenceAmiVersion = latestConfig.inferenceAmiVersion;
284
+ }
285
+ if (latestConfig.accelerator) {
286
+ answers.accelerator = latestConfig.accelerator;
287
+ }
288
+ }
289
+ }
290
+ }
291
+ // Final fallback: hardcoded default Triton base image
292
+ if (!answers.baseImage) {
293
+ answers.baseImage = 'nvcr.io/nvidia/tritonserver:24.08-py3';
294
+ }
295
+ }
296
+
297
+ // For transformer models, enrich with HuggingFace data and non-envVar metadata
298
+ if (answers.framework === 'transformers' && answers.modelName && registryConfigManager) {
299
+ try {
300
+ // Fetch HuggingFace data for model-specific info
301
+ const hfData = await registryConfigManager._fetchHuggingFaceData(answers.modelName);
302
+
303
+ // Merge chatTemplate if available and not already set
304
+ if (hfData && hfData.chatTemplate && !answers.chatTemplate) {
305
+ answers.chatTemplate = hfData.chatTemplate;
306
+ answers.chatTemplateSource = 'HuggingFace_Hub_API';
307
+ }
308
+
309
+ // Check Model Registry for chatTemplate overrides
310
+ if (registryConfigManager.modelRegistry) {
311
+ const modelConfig = _findModelConfig(answers.modelName, registryConfigManager);
312
+
313
+ if (modelConfig && modelConfig.chatTemplate) {
314
+ answers.chatTemplate = modelConfig.chatTemplate;
315
+ answers.chatTemplateSource = 'Model_Registry';
316
+ }
317
+ }
318
+
319
+ // Set framework-level metadata (non-envVar fields)
320
+ if (answers.frameworkVersion && registryConfigManager.frameworkRegistry) {
321
+ const frameworkConfig = registryConfigManager.frameworkRegistry[answers.framework]?.[answers.frameworkVersion];
322
+
323
+ if (frameworkConfig) {
324
+ if (frameworkConfig.inferenceAmiVersion && !answers.inferenceAmiVersion) {
325
+ answers.inferenceAmiVersion = frameworkConfig.inferenceAmiVersion;
326
+ }
327
+ if (frameworkConfig.accelerator) {
328
+ answers.accelerator = frameworkConfig.accelerator;
329
+ }
330
+ }
331
+ }
332
+ } catch (error) {
333
+ // Silently continue - defaults are already set
334
+ }
335
+ }
336
+
337
+ // Populate baseImage from the catalog when still falsy (covers --skip-prompts and
338
+ // cases where MCP/CLI/config did not provide a base image).
339
+ // Precedence: MCP > CLI > config > catalog default (this block).
340
+ if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
341
+ const backendKey = answers.backend || answers.modelServer;
342
+ if (backendKey) {
343
+ const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
344
+ if (frameworkVersions) {
345
+ let resolvedConfig = null;
346
+ if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
347
+ resolvedConfig = frameworkVersions[answers.frameworkVersion];
348
+ } else {
349
+ // Fall back to latest version
350
+ const versions = Object.keys(frameworkVersions).sort((a, b) =>
351
+ b.localeCompare(a, undefined, { numeric: true })
352
+ );
353
+ if (versions.length > 0) {
354
+ resolvedConfig = frameworkVersions[versions[0]];
355
+ }
356
+ }
357
+ if (resolvedConfig?.baseImage) {
358
+ answers.baseImage = resolvedConfig.baseImage;
359
+ }
360
+ }
361
+ }
362
+ }
363
+
364
+ // Populate icGpuCount from instance catalog when not explicitly set.
365
+ // The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
366
+ // so it must always have a value for GPU deployments.
367
+ if ((answers.icGpuCount === null || answers.icGpuCount === undefined) && answers.instanceType) {
368
+ // Use gpuCount from instance-sizer recommendation if available
369
+ if (answers.gpuCount) {
370
+ answers.icGpuCount = answers.gpuCount;
371
+ } else {
372
+ // Look up from instances catalog
373
+ try {
374
+ const catalogPath = path.resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
375
+ const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
376
+ const instanceInfo = catalogData?.catalog?.[answers.instanceType];
377
+ if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
378
+ answers.icGpuCount = instanceInfo.gpus;
379
+ }
380
+ } catch {
381
+ // Silently continue — template fallback handles missing value
382
+ }
383
+ }
384
+ }
385
+
386
+ // Determine tune support based on model presence in the tune catalog.
387
+ // Used by the do/config template to write TUNE_SUPPORTED=true|false.
388
+ if (answers.tuneSupported === undefined) {
389
+ try {
390
+ const tuneCatalogPath = path.resolve(__dirname, '..', '..', 'config', 'tune-catalog.json');
391
+ const tuneCatalog = JSON.parse(fs.readFileSync(tuneCatalogPath, 'utf-8'));
392
+ const modelId = answers.modelName || '';
393
+ answers.tuneSupported = isTuneSupported(modelId, tuneCatalog);
394
+ } catch {
395
+ answers.tuneSupported = false;
396
+ }
397
+ }
398
+
399
+ // Resolve tuneModelId from the catalog — static lookup, no network calls.
400
+ // Maps the HuggingFace model ID to the Hub content name (catalog key).
401
+ if (answers.tuneModelId === undefined) {
402
+ if (answers.tuneSupported && answers.modelName) {
403
+ try {
404
+ const tuneCatalogPath = path.resolve(__dirname, '..', '..', 'config', 'tune-catalog.json');
405
+ const tuneCatalog = JSON.parse(fs.readFileSync(tuneCatalogPath, 'utf-8'));
406
+ const entry = lookupModel(answers.modelName, tuneCatalog);
407
+ if (entry) {
408
+ const hubContentName = Object.entries(tuneCatalog.models)
409
+ .find(([, v]) => v === entry)?.[0];
410
+ if (hubContentName) {
411
+ answers.tuneModelId = hubContentName;
412
+ }
413
+ }
414
+ } catch {
415
+ // Silently continue — tuneModelId will be set to null below
416
+ }
417
+ }
418
+ if (!answers.tuneModelId) {
419
+ answers.tuneModelId = null;
420
+ }
421
+ }
422
+ }
@@ -13,7 +13,8 @@
13
13
 
14
14
  /**
15
15
  * Look up a model entry in the catalog by model ID.
16
- * @param {string} modelId - The model ID to look up
16
+ * Tries: direct key match, huggingFaceId field match, then normalized/suffix matching.
17
+ * @param {string} modelId - The model ID to look up (Hub content name or HuggingFace ID)
17
18
  * @param {Object} catalog - The tune catalog object with a `models` map
18
19
  * @returns {Object|null} The catalog entry for the model, or null if not found
19
20
  */
@@ -21,10 +22,42 @@ export function lookupModel(modelId, catalog) {
21
22
  if (!catalog || !catalog.models) {
22
23
  return null;
23
24
  }
24
- if (!Object.hasOwn(catalog.models, modelId)) {
25
- return null;
25
+
26
+ // Direct key match (Hub content name)
27
+ if (Object.hasOwn(catalog.models, modelId)) {
28
+ return catalog.models[modelId] || null;
29
+ }
30
+
31
+ // Match by huggingFaceId field (e.g., "Qwen/Qwen3-0.6B")
32
+ for (const [, entry] of Object.entries(catalog.models)) {
33
+ if (entry.huggingFaceId === modelId) {
34
+ return entry;
35
+ }
36
+ }
37
+
38
+ // Normalized match: strip org prefix, lowercase, replace dots/spaces with hyphens
39
+ const normalized = modelId.split('/').pop().toLowerCase().replace(/[.\s]+/g, '-');
40
+ if (normalized && Object.hasOwn(catalog.models, normalized)) {
41
+ return catalog.models[normalized] || null;
26
42
  }
27
- return catalog.models[modelId] || null;
43
+
44
+ // Try without trailing suffixes like -instruct, -chat, -hf, -base
45
+ const base = normalized ? normalized.replace(/-(instruct|chat|hf|base)$/i, '') : '';
46
+ if (base && base !== normalized && Object.hasOwn(catalog.models, base)) {
47
+ return catalog.models[base] || null;
48
+ }
49
+
50
+ // Suffix match: catalog keys may have prefixes (e.g., "huggingface-reasoning-")
51
+ // Match if a catalog key ends with the normalized name (must be non-trivial match)
52
+ if (normalized && normalized.length >= 4) {
53
+ for (const [key, entry] of Object.entries(catalog.models)) {
54
+ if (key.endsWith(normalized) || (base && base.length >= 4 && key.endsWith(base))) {
55
+ return entry || null;
56
+ }
57
+ }
58
+ }
59
+
60
+ return null;
28
61
  }
29
62
 
30
63
  /**
@@ -243,6 +243,7 @@ ENV <%= key %>=<%= value %>
243
243
  ENV VLLM_ENABLE_LORA=true
244
244
  ENV VLLM_MAX_LORAS=<%= maxLoras %>
245
245
  ENV VLLM_MAX_LORA_RANK=<%= maxLoraRank %>
246
+ ENV VLLM_ALLOW_RUNTIME_LORA_UPDATING=true
246
247
  <% } %>
247
248
  <% if (enableLora && modelServer === 'sglang') { %>
248
249
  # LoRA adapter serving configuration
@@ -307,9 +308,17 @@ COPY code/serving.properties /opt/ml/model/serving.properties
307
308
  # LMI/DJL containers use their own entrypoint
308
309
  # The container will automatically start DJL Serving with the configuration
309
310
  <% } else { %>
311
+ <% if (enableLora && (modelServer === 'vllm' || modelServer === 'sglang')) { %>
312
+ # Install aiohttp for the adapter sidecar
313
+ RUN pip install --no-cache-dir aiohttp
314
+
315
+ <% } %>
310
316
  COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
311
317
  COPY code/cw_log_forwarder.py /usr/bin/cw_log_forwarder.py
312
318
  COPY code/serve /usr/bin/serve
319
+ <% if (enableLora && (modelServer === 'vllm' || modelServer === 'sglang')) { %>
320
+ COPY code/adapter_sidecar.py /usr/bin/adapter_sidecar.py
321
+ <% } %>
313
322
  RUN chmod 777 /usr/bin/serve /usr/bin/cuda_compat.sh
314
323
 
315
324
  <% if (comments && comments.troubleshooting) { %>