@aws/ml-container-creator 0.2.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,8 +17,6 @@ import {
17
17
  modelServerPrompts,
18
18
  modelLoadStrategyPrompts,
19
19
  modelProfilePrompts,
20
- hfTokenPrompts,
21
- ngcApiKeyPrompts,
22
20
  modulePrompts,
23
21
  infraRegionAndTargetPrompts,
24
22
  infraInstancePrompts,
@@ -35,9 +33,13 @@ import {
35
33
 
36
34
  import fs from 'fs';
37
35
  import path from 'path';
36
+ import { execSync } from 'node:child_process';
38
37
  import { fileURLToPath } from 'node:url';
39
38
  import RegistryLoader from './registry-loader.js';
40
39
  import { runPrompts } from '../prompt-adapter.js';
40
+ import { SECRET_CLASSIFICATIONS } from './secret-classification.js';
41
+ import { isSecretsManagerArn } from './arn-detection.js';
42
+ import BootstrapConfig from './bootstrap-config.js';
41
43
 
42
44
  const __pr_filename = fileURLToPath(import.meta.url);
43
45
  const __pr_dirname = path.dirname(__pr_filename);
@@ -170,6 +172,9 @@ export default class PromptRunner {
170
172
  existingConfig
171
173
  );
172
174
 
175
+ // Requirements: 4.2-4.5 — Check model architecture compatibility after base image selection
176
+ this._checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers);
177
+
173
178
  // Extract CUDA version from selected base image for instance-sizer context
174
179
  const selectedBaseImageCuda = this._extractCudaFromBaseImage(baseImageAnswers);
175
180
 
@@ -353,13 +358,11 @@ export default class PromptRunner {
353
358
  existingConfig
354
359
  );
355
360
 
356
- const hfTokenAnswers = await this._runPhase(hfTokenPrompts,
357
- { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
358
- explicitConfig, existingConfig);
359
-
360
- const ngcApiKeyAnswers = await this._runPhase(ngcApiKeyPrompts,
361
- { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
362
- explicitConfig, existingConfig);
361
+ // Secret prompts registry-driven secret selection (replaces hardcoded hfToken/ngcApiKey prompts)
362
+ const secretPreviousAnswers = { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers };
363
+ const secretAnswers = await this._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
364
+ const hfTokenAnswers = { hfToken: secretAnswers.hfToken, hfTokenArn: secretAnswers.hfTokenArn };
365
+ const ngcApiKeyAnswers = { ngcApiKey: secretAnswers.ngcApiKey, ngcTokenArn: secretAnswers.ngcTokenArn };
363
366
 
364
367
  // Module selection
365
368
  const moduleAnswers = await this._runPhase(modulePrompts, { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig);
@@ -741,6 +744,69 @@ export default class PromptRunner {
741
744
  return null;
742
745
  }
743
746
 
747
+ /**
748
+ * Check model architecture compatibility against the selected base image.
749
+ * Emits an advisory warning if the model's model_type is not in the server's
750
+ * supportedModelTypes. Skips silently if supportedModelTypes is empty (sync not run).
751
+ * Requirements: 4.2, 4.3, 4.4, 4.5
752
+ * @param {Object} baseImageAnswers - Answers from base image selection phase
753
+ * @param {Object} frameworkAnswers - Answers from framework/deployment config phase
754
+ * @private
755
+ */
756
+ _checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers) {
757
+ // Requirement 4.5: skip if no model_type was resolved
758
+ if (!this._modelType) return;
759
+
760
+ // Determine the selected image
761
+ const selectedImage = baseImageAnswers.baseImage || baseImageAnswers.customBaseImage;
762
+ if (!selectedImage || selectedImage === 'custom') return;
763
+
764
+ // Resolve the matching choice from MCP base image choices
765
+ if (!this._mcpBaseImageChoices) return;
766
+ const matchingChoice = this._mcpBaseImageChoices.find(c => c.value === selectedImage);
767
+ if (!matchingChoice) return;
768
+
769
+ // Determine the server name from framework answers
770
+ const server = frameworkAnswers.modelServer || frameworkAnswers.backend;
771
+ if (!server) return;
772
+
773
+ // Load the model-servers catalog to find the entry with supportedModelTypes
774
+ try {
775
+ const catalogPath = path.resolve(GENERATOR_ROOT, 'servers', 'lib', 'catalogs', 'model-servers.json');
776
+ const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
777
+
778
+ const serverEntries = catalog[server];
779
+ if (!Array.isArray(serverEntries)) return;
780
+
781
+ // Find the catalog entry matching the selected image
782
+ const entry = serverEntries.find(e => e.image === selectedImage);
783
+ if (!entry) return;
784
+
785
+ const supported = entry.supportedModelTypes;
786
+ // Requirement 4.5: skip silently when supportedModelTypes is empty (sync not run)
787
+ if (!supported || supported.length === 0) return;
788
+
789
+ // Requirement 4.2-4.3: cross-reference model_type (case-insensitive)
790
+ const modelTypeLower = this._modelType.toLowerCase();
791
+ if (!supported.includes(modelTypeLower)) {
792
+ const version = entry.labels?.framework_version || entry.tag || 'unknown';
793
+ const docsUrls = {
794
+ vllm: 'https://docs.vllm.ai/en/latest/models/supported_models.html',
795
+ sglang: 'https://sgl-project.github.io/references/supported_models.html',
796
+ 'tensorrt-llm': 'https://nvidia.github.io/TensorRT-LLM/reference/support-matrix.html'
797
+ };
798
+ const docsUrl = docsUrls[server] || `https://github.com/search?q=${server}+supported+models`;
799
+
800
+ // Requirement 4.3-4.4: emit advisory warning (does not block generation)
801
+ console.log(`\n ⚠️ Model architecture "${this._modelType}" may not be supported by ${server} ${version}`);
802
+ console.log(' Consider upgrading to a newer base image, or verify compatibility at:');
803
+ console.log(` ${docsUrl}`);
804
+ }
805
+ } catch (err) {
806
+ // Graceful degradation: if catalog can't be read, skip silently
807
+ }
808
+ }
809
+
744
810
  /**
745
811
  * Get architecture-based heuristic default instance type.
746
812
  * Used when the instance-sizer cannot produce a recommendation.
@@ -917,7 +983,7 @@ export default class PromptRunner {
917
983
 
918
984
  const toolArgs = {
919
985
  modelName,
920
- limit: 8,
986
+ limit: 10,
921
987
  context: {
922
988
  architecture: frameworkAnswers.architecture || undefined,
923
989
  backend: frameworkAnswers.backend || undefined,
@@ -966,13 +1032,17 @@ export default class PromptRunner {
966
1032
  const choices = parsed.choices.instanceType;
967
1033
  const topRec = recommendations[0];
968
1034
  const vramInfo = estimatedVramGb
969
- ? ` (VRAM: ${estimatedVramGb.toFixed(1)}GB)`
970
- : '';
971
- const tpInfo = topRec?.tensorParallelism > 1
972
- ? ` [TP=${topRec.tensorParallelism}]`
1035
+ ? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
973
1036
  : '';
974
1037
 
975
- console.log(` ✓ ${choices.length} sized instance(s): ${choices[0]}${vramInfo}${tpInfo}`);
1038
+ console.log(` ✓ ${choices.length} compatible instance(s) found${vramInfo}`);
1039
+ // Display compact recommendation table
1040
+ for (const rec of recommendations) {
1041
+ const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
1042
+ const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
1043
+ const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
1044
+ console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
1045
+ }
976
1046
  } else if (parsed.metadata?.warning) {
977
1047
  console.log(` ⚠️ ${parsed.metadata.warning}`);
978
1048
  } else {
@@ -1376,6 +1446,12 @@ export default class PromptRunner {
1376
1446
  modelFamily = vals.family;
1377
1447
  }
1378
1448
 
1449
+ // Extract model_type for architecture validation
1450
+ // Requirements: 4.1
1451
+ if (vals.model_type) {
1452
+ this._modelType = vals.model_type;
1453
+ }
1454
+
1379
1455
  // Extract model source metadata for loading adapter
1380
1456
  // Requirements: 2.1, 2.2, 2.3, 2.4
1381
1457
  if (vals.provider) {
@@ -1427,6 +1503,11 @@ export default class PromptRunner {
1427
1503
  if (hfData.chatTemplate) {
1428
1504
  chatTemplate = hfData.chatTemplate;
1429
1505
  }
1506
+ // Extract model_type for architecture validation
1507
+ // Requirements: 4.1
1508
+ if (hfData.modelConfig?.model_type) {
1509
+ this._modelType = hfData.modelConfig.model_type;
1510
+ }
1430
1511
  console.log(' ✅ Found on HuggingFace Hub');
1431
1512
  } else {
1432
1513
  console.log(' ℹ️ Not found on HuggingFace Hub (may be private or offline)');
@@ -1554,6 +1635,332 @@ export default class PromptRunner {
1554
1635
  }
1555
1636
  }
1556
1637
 
1638
+ /**
1639
+ * Run secret prompts using the Secret_Classification registry.
1640
+ * For each secret type whose stages apply to the current context:
1641
+ * - Query for managed secrets of that type
1642
+ * - If managed secrets exist: show selection list (secrets + "Enter plaintext token" + "Skip")
1643
+ * - If no managed secrets exist: fall back to existing plaintext prompt
1644
+ *
1645
+ * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9
1646
+ * @param {object} previousAnswers - Answers from previous prompt phases
1647
+ * @param {object} explicitConfig - Explicit CLI/config values
1648
+ * @param {object} existingConfig - Existing project configuration
1649
+ * @returns {Promise<object>} Object with token/ARN values keyed by config field names
1650
+ * @private
1651
+ */
1652
+ async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
1653
+ const results = {};
1654
+
1655
+ for (const classification of SECRET_CLASSIFICATIONS) {
1656
+ // Check if this secret type's stages apply to the current context
1657
+ if (!this._secretStagesApply(classification, previousAnswers)) continue;
1658
+
1659
+ // Determine the config keys for this classification
1660
+ const arnConfigKey = this._getArnConfigKey(classification);
1661
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1662
+
1663
+ // Skip if ARN already provided via CLI flag
1664
+ if (explicitConfig[arnConfigKey]) {
1665
+ results[arnConfigKey] = explicitConfig[arnConfigKey];
1666
+ continue;
1667
+ }
1668
+
1669
+ // Skip if plaintext already provided via CLI flag
1670
+ if (explicitConfig[plaintextConfigKey]) {
1671
+ results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
1672
+ continue;
1673
+ }
1674
+
1675
+ // Query for existing managed secrets of this type
1676
+ const managedSecrets = await this._listManagedSecrets(classification.identifier);
1677
+
1678
+ if (managedSecrets.length > 0) {
1679
+ // Show selection list: managed secrets + plaintext entry + skip
1680
+ const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
1681
+ Object.assign(results, answer);
1682
+ } else {
1683
+ // Fall back to existing plaintext prompt
1684
+ const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
1685
+ Object.assign(results, answer);
1686
+ }
1687
+ }
1688
+
1689
+ return results;
1690
+ }
1691
+
1692
+ /**
1693
+ * Determine if a secret classification's stages apply to the current generation context.
1694
+ * Build-time secrets apply when the project involves a Docker build step.
1695
+ * Runtime secrets apply when the architecture uses HuggingFace Hub models.
1696
+ * Requirements: 8.9
1697
+ * @param {object} classification - Secret classification entry
1698
+ * @param {object} answers - Current answers from previous phases
1699
+ * @returns {boolean} True if the secret type is applicable
1700
+ * @private
1701
+ */
1702
+ _secretStagesApply(classification, answers) {
1703
+ const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
1704
+ const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
1705
+
1706
+ if (classification.identifier === 'hf-token') {
1707
+ // HF token applies to transformers, diffusors, and Triton LLM backends
1708
+ const isTransformers = architecture === 'transformers';
1709
+ const isDiffusors = architecture === 'diffusors';
1710
+ const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
1711
+
1712
+ if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
1713
+
1714
+ // Skip for non-HuggingFace model sources
1715
+ const modelSource = answers.modelSource;
1716
+ if (modelSource && modelSource !== 'huggingface') return false;
1717
+
1718
+ return true;
1719
+ }
1720
+
1721
+ if (classification.identifier === 'ngc-token') {
1722
+ // NGC token only applies to transformers-tensorrt-llm (build-time only)
1723
+ if (architecture === 'triton') return false;
1724
+ if (architecture === 'diffusors') return false;
1725
+ return architecture === 'transformers' && backend === 'tensorrt-llm';
1726
+ }
1727
+
1728
+ // For future secret types, check if any stage applies
1729
+ // Build-time applies to all Docker-based deployments
1730
+ // Runtime applies to architectures that download at startup
1731
+ return classification.stages.length > 0;
1732
+ }
1733
+
1734
+ /**
1735
+ * Get the ARN config key for a classification.
1736
+ * Maps classification identifiers to config field names.
1737
+ * @param {object} classification - Secret classification entry
1738
+ * @returns {string} Config key for the ARN value
1739
+ * @private
1740
+ */
1741
+ _getArnConfigKey(classification) {
1742
+ const keyMap = {
1743
+ 'hf-token': 'hfTokenArn',
1744
+ 'ngc-token': 'ngcTokenArn'
1745
+ };
1746
+ return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
1747
+ }
1748
+
1749
+ /**
1750
+ * Get the plaintext config key for a classification.
1751
+ * Maps classification identifiers to config field names.
1752
+ * @param {object} classification - Secret classification entry
1753
+ * @returns {string} Config key for the plaintext value
1754
+ * @private
1755
+ */
1756
+ _getPlaintextConfigKey(classification) {
1757
+ const keyMap = {
1758
+ 'hf-token': 'hfToken',
1759
+ 'ngc-token': 'ngcApiKey'
1760
+ };
1761
+ return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
1762
+ }
1763
+
1764
+ /**
1765
+ * List managed secrets of a given type from AWS Secrets Manager.
1766
+ * Uses the active bootstrap profile to query for secrets tagged with
1767
+ * the mlcc:secret-type matching the given identifier.
1768
+ * @param {string} secretType - The secret type identifier (e.g., 'hf-token')
1769
+ * @returns {Promise<Array<{name: string, arn: string}>>} Array of managed secrets
1770
+ * @private
1771
+ */
1772
+ async _listManagedSecrets(secretType) {
1773
+ try {
1774
+ const bootstrapConfig = new BootstrapConfig();
1775
+ const activeProfile = bootstrapConfig.getActiveProfile();
1776
+ if (!activeProfile) return [];
1777
+
1778
+ const profile = activeProfile.config.awsProfile;
1779
+ const region = activeProfile.config.awsRegion;
1780
+ if (!profile || !region) return [];
1781
+
1782
+ const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
1783
+ const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
1784
+ const trimmed = output.trim();
1785
+ if (!trimmed) return [];
1786
+
1787
+ const result = JSON.parse(trimmed);
1788
+ const secrets = result.SecretList || [];
1789
+
1790
+ // Filter by secret type tag
1791
+ return secrets
1792
+ .filter(secret => {
1793
+ const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
1794
+ return typeTag && typeTag.Value === secretType;
1795
+ })
1796
+ .map(secret => ({
1797
+ name: secret.Name,
1798
+ arn: secret.ARN
1799
+ }));
1800
+ } catch {
1801
+ // If AWS CLI fails (not configured, no credentials, etc.), return empty
1802
+ return [];
1803
+ }
1804
+ }
1805
+
1806
+ /**
1807
+ * Display a selection list for managed secrets of a given type.
1808
+ * Shows available secrets plus options for plaintext entry and skip.
1809
+ * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
1810
+ * @param {object} classification - Secret classification entry
1811
+ * @param {Array<{name: string, arn: string}>} managedSecrets - Available managed secrets
1812
+ * @param {object} previousAnswers - Answers from previous phases
1813
+ * @returns {Promise<object>} Object with the selected value keyed by config field name
1814
+ * @private
1815
+ */
1816
+ async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
1817
+ const arnConfigKey = this._getArnConfigKey(classification);
1818
+
1819
+ console.log(`\n🔐 ${classification.displayName}`);
1820
+ console.log(` ${classification.purpose}`);
1821
+
1822
+ // Build choices: managed secrets + enter plaintext + skip
1823
+ const choices = [
1824
+ ...managedSecrets.map(secret => ({
1825
+ name: `🔒 ${secret.name} (${secret.arn})`,
1826
+ value: secret.arn,
1827
+ short: secret.name
1828
+ })),
1829
+ { name: '✏️ Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
1830
+ { name: '⏭️ Skip (use environment variable)', value: '__skip__', short: 'Skip' }
1831
+ ];
1832
+
1833
+ const { secretSelection } = await this._runPrompts([{
1834
+ type: 'list',
1835
+ name: 'secretSelection',
1836
+ message: `Select ${classification.promptLabel}:`,
1837
+ choices
1838
+ }]);
1839
+
1840
+ if (secretSelection === '__skip__') {
1841
+ return {};
1842
+ }
1843
+
1844
+ if (secretSelection === '__plaintext__') {
1845
+ // Use existing plaintext flow
1846
+ return this._promptPlaintextEntry(classification, previousAnswers);
1847
+ }
1848
+
1849
+ // User selected a managed secret ARN
1850
+ return { [arnConfigKey]: secretSelection };
1851
+ }
1852
+
1853
+ /**
1854
+ * Prompt for plaintext token entry with ARN detection.
1855
+ * If the user enters an ARN, store it as an ARN reference.
1856
+ * Requirements: 8.4, 8.5, 8.6
1857
+ * @param {object} classification - Secret classification entry
1858
+ * @param {object} previousAnswers - Answers from previous phases
1859
+ * @returns {Promise<object>} Object with the value keyed by config field name
1860
+ * @private
1861
+ */
1862
+ async _promptPlaintextEntry(classification, _previousAnswers) {
1863
+ const arnConfigKey = this._getArnConfigKey(classification);
1864
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1865
+
1866
+ const { tokenValue } = await this._runPrompts([{
1867
+ type: 'input',
1868
+ name: 'tokenValue',
1869
+ message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
1870
+ validate: (input) => {
1871
+ // Empty is valid
1872
+ if (!input || input.trim() === '') return true;
1873
+ // Environment variable reference is valid
1874
+ if (input.trim().startsWith('$')) return true;
1875
+ return true;
1876
+ }
1877
+ }]);
1878
+
1879
+ if (!tokenValue || tokenValue.trim() === '') {
1880
+ return {};
1881
+ }
1882
+
1883
+ const value = tokenValue.trim();
1884
+
1885
+ // ARN detection: if the value is a Secrets Manager ARN, store as ARN
1886
+ if (isSecretsManagerArn(value)) {
1887
+ return { [arnConfigKey]: value };
1888
+ }
1889
+
1890
+ // Otherwise store as plaintext
1891
+ return { [plaintextConfigKey]: value };
1892
+ }
1893
+
1894
+ /**
1895
+ * Fall back to existing plaintext prompt when no managed secrets exist.
1896
+ * Uses the same prompts as the original hfTokenPrompts/ngcApiKeyPrompts
1897
+ * but with ARN detection on the input.
1898
+ * Requirements: 8.7
1899
+ * @param {object} classification - Secret classification entry
1900
+ * @param {object} previousAnswers - Answers from previous phases
1901
+ * @param {object} explicitConfig - Explicit CLI/config values
1902
+ * @param {object} existingConfig - Existing project configuration
1903
+ * @returns {Promise<object>} Object with the value keyed by config field name
1904
+ * @private
1905
+ */
1906
+ async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
1907
+ const arnConfigKey = this._getArnConfigKey(classification);
1908
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1909
+
1910
+ // If in auto-prompt mode, skip
1911
+ if (this.configManager?.isAutoPrompt()) {
1912
+ return {};
1913
+ }
1914
+
1915
+ // Display context-appropriate security message
1916
+ if (classification.identifier === 'hf-token') {
1917
+ console.log('\n🔐 HuggingFace Authentication');
1918
+ console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
1919
+ console.log(' 💡 Tip: Use `ml-container-creator secrets create --type hf-token` to store');
1920
+ console.log(' your token in AWS Secrets Manager for zero-knowledge operation.');
1921
+ console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
1922
+ } else if (classification.identifier === 'ngc-token') {
1923
+ console.log('\n🔐 NVIDIA NGC Authentication');
1924
+ console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
1925
+ console.log(' 💡 Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
1926
+ console.log(' your key in AWS Secrets Manager for zero-knowledge operation.');
1927
+ console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
1928
+ } else {
1929
+ console.log(`\n🔐 ${classification.displayName}`);
1930
+ console.log(` ${classification.purpose}\n`);
1931
+ }
1932
+
1933
+ const { tokenValue } = await this._runPrompts([{
1934
+ type: 'input',
1935
+ name: 'tokenValue',
1936
+ message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
1937
+ validate: (input) => {
1938
+ if (!input || input.trim() === '') return true;
1939
+ if (input.trim().startsWith('$')) return true;
1940
+ // Warn about HF token format
1941
+ if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
1942
+ console.warn('\n⚠️ Warning: HuggingFace tokens typically start with "hf_"');
1943
+ console.warn(' If this is intentional, you can ignore this warning.');
1944
+ }
1945
+ return true;
1946
+ }
1947
+ }]);
1948
+
1949
+ if (!tokenValue || tokenValue.trim() === '') {
1950
+ return {};
1951
+ }
1952
+
1953
+ const value = tokenValue.trim();
1954
+
1955
+ // ARN detection: if the value is a Secrets Manager ARN, store as ARN
1956
+ if (isSecretsManagerArn(value)) {
1957
+ return { [arnConfigKey]: value };
1958
+ }
1959
+
1960
+ // Otherwise store as plaintext
1961
+ return { [plaintextConfigKey]: value };
1962
+ }
1963
+
1557
1964
  /**
1558
1965
  * CUDA-to-AMI mapping.
1559
1966
  * Maps CUDA major.minor versions to the SageMaker inference AMI that provides
@@ -1561,13 +1968,13 @@ export default class PromptRunner {
1561
1968
  * @private
1562
1969
  */
1563
1970
  static CUDA_AMI_MAP = {
1564
- '11.0': 'al2-ami-sagemaker-inference-gpu-2-1',
1971
+ '11.0': 'al2-ami-sagemaker-inference-gpu-2',
1565
1972
  '11.4': 'al2-ami-sagemaker-inference-gpu-2-1',
1566
- '11.8': 'al2-ami-sagemaker-inference-gpu-3-1',
1973
+ '11.8': 'al2-ami-sagemaker-inference-gpu-2-1',
1567
1974
  '12.1': 'al2-ami-sagemaker-inference-gpu-3-1',
1568
- '12.2': 'al2-ami-sagemaker-inference-gpu-3-2',
1569
- '12.4': 'al2-ami-sagemaker-inference-gpu-3-2',
1570
- '12.6': 'al2-ami-sagemaker-inference-gpu-3-2'
1975
+ '12.2': 'al2023-ami-sagemaker-inference-gpu-4-1',
1976
+ '12.4': 'al2023-ami-sagemaker-inference-gpu-4-1',
1977
+ '12.6': 'al2023-ami-sagemaker-inference-gpu-4-1'
1571
1978
  };
1572
1979
 
1573
1980
  /**
@@ -1053,7 +1053,7 @@ function formatImageChoices(entries, isTransformer) {
1053
1053
  ? `${entry.repository.padEnd(30)} ${entry.tag.padEnd(16)} ${entry.architecture.padEnd(7)} ${cuda.padEnd(6)} ${python.padEnd(8)} ${date}`
1054
1054
  : `${entry.repository.padEnd(30)} ${entry.tag.padEnd(16)} ${entry.architecture.padEnd(7)} ${python.padEnd(8)} ${date}`;
1055
1055
 
1056
- return { name, value: entry.image };
1056
+ return { name, value: entry.image, _meta: { labels: entry.labels, accelerator: entry.accelerator } };
1057
1057
  });
1058
1058
  }
1059
1059