npm - @aws/ml-container-creator - Versions diffs - 1.0.3 → 1.1.0 - Mend

@aws/ml-container-creator 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/README.md +10 -1
package/bin/cli.js +57 -0
package/config/agent.json +16 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +43 -0
package/package.json +5 -2
package/pyproject.toml +3 -0
package/servers/agent-knowledge/index.js +592 -0
package/servers/agent-knowledge/package.json +15 -0
package/servers/base-image-picker/index.js +65 -18
package/servers/instance-sizer/index.js +32 -0
package/servers/lib/catalogs/fleet-drivers.json +38 -0
package/servers/lib/catalogs/model-arch-support.json +51 -0
package/servers/lib/catalogs/model-servers.json +2842 -1730
package/servers/lib/schemas/image-catalog.schema.json +12 -0
package/src/agent/__init__.py +2 -0
package/src/agent/__pycache__/__init__.cpython-312.pyc +0 -0
package/src/agent/__pycache__/config_loader.cpython-312.pyc +0 -0
package/src/agent/__pycache__/context.cpython-312.pyc +0 -0
package/src/agent/__pycache__/health_check.cpython-312.pyc +0 -0
package/src/agent/agent.py +513 -0
package/src/agent/config_loader.py +215 -0
package/src/agent/context.py +380 -0
package/src/agent/data/capability-matrix.json +106 -0
package/src/agent/health_check.py +341 -0
package/src/agent/prompts/system.md +173 -0
package/src/agent/requirements-agent.txt +3 -0
package/src/app.js +6 -4
package/src/lib/generated/cli-options.js +1 -1
package/src/lib/generated/parameter-matrix.js +1 -1
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/mcp-query-runner.js +110 -3
package/src/lib/prompt-runner.js +66 -22
package/src/lib/template-variable-resolver.js +8 -0
package/src/lib/train-config-builder.js +339 -0
package/src/lib/tune-config-state.js +89 -68
package/templates/do/.benchmark_writer.py +3 -0
package/templates/do/.eval_helper.py +409 -0
package/templates/do/.register_helper.py +185 -11
package/templates/do/.train_build_request.py +102 -113
package/templates/do/.train_helper.py +433 -0
package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +157 -0
package/templates/do/benchmark +60 -3
package/templates/do/config +6 -1
package/templates/do/deploy.d/managed-inference.ejs +83 -0
package/templates/do/evaluate +272 -0
package/templates/do/lib/resolve-instance.sh +155 -0
package/templates/do/register +5 -0
package/templates/do/test +1 -0
package/templates/do/train +879 -126
package/templates/do/training/config.yaml +83 -11
package/templates/do/training/dpo/accelerate_config.yaml +24 -0
package/templates/do/training/dpo/defaults.yaml +26 -0
package/templates/do/training/dpo/prompts.json +8 -0
package/templates/do/training/dpo/train.py +363 -0
package/templates/do/training/sft/accelerate_config.yaml +22 -0
package/templates/do/training/sft/defaults.yaml +18 -0
package/templates/do/training/sft/prompts.json +7 -0
package/templates/do/training/sft/train.py +310 -0
package/templates/do/tune +11 -2
package/src/lib/auto-prompt-builder.js +0 -172
package/src/lib/cli-handler.js +0 -529
package/src/lib/community-reports-validator.js +0 -91
package/src/lib/configuration-exporter.js +0 -204
package/src/lib/dataset-slug.js +0 -152
package/src/lib/docker-introspection-validator.js +0 -51
package/src/lib/known-flags-validator.js +0 -200
package/src/lib/schema-validator.js +0 -157
package/src/lib/train-config-parser.js +0 -136
package/src/lib/train-config-persistence.js +0 -143
package/src/lib/train-config-validator.js +0 -112
package/src/lib/train-feedback.js +0 -46
package/src/lib/train-idempotency.js +0 -97
package/src/lib/train-request-builder.js +0 -120
package/src/lib/tune-dataset-validator.js +0 -279
package/src/lib/tune-output-resolver.js +0 -66
package/templates/do/.train_poll_parser.py +0 -135
package/templates/do/.train_status_parser.py +0 -187
/package/templates/do/training/{train.py → custom/train.py} +0 -0

package/README.md CHANGED Viewed

@@ -74,6 +74,14 @@ ml-container-creator my-model \
 ./do/test         # Test the endpoint
 ```
+### Get help from the advisor
+```bash
+ml-container-creator hey   # Conversational AI advisor (powered by Bedrock)
+```
+Ask questions about your project, get optimization recommendations, troubleshoot issues, and plan workflows. See [Agent docs](https://awslabs.github.io/ml-container-creator/agent/) for details.
 ## Documentation
 Full documentation is available at [awslabs.github.io/ml-container-creator](https://awslabs.github.io/ml-container-creator/).
@@ -83,6 +91,7 @@ Full documentation is available at [awslabs.github.io/ml-container-creator](http
 - [Deployment Guide](https://awslabs.github.io/ml-container-creator/deployments/) — All deployment targets and lifecycle scripts
 - [CI Integration](https://awslabs.github.io/ml-container-creator/ci-integration/) — Automated lifecycle testing for all deployment configurations
 - [Examples](https://awslabs.github.io/ml-container-creator/EXAMPLES/) — Framework-specific walkthroughs
+- [Advisory Agent](https://awslabs.github.io/ml-container-creator/agent/) — Conversational AI advisor (`ml-container-creator hey`)
 - [Troubleshooting](https://awslabs.github.io/ml-container-creator/TROUBLESHOOTING/) — Common issues and solutions
 ## Prerequisites
@@ -97,7 +106,7 @@ Full documentation is available at [awslabs.github.io/ml-container-creator](http
 ### Python dependencies
-The `do/` lifecycle scripts (`do/tune`, `do/stage`, `do/adapter`) require Python packages. Install them in your Python environment before first use:
+The `do/` lifecycle scripts (`do/tune`, `do/train`, `do/stage`, `do/adapter`) require Python packages. Install them in your Python environment before first use:
 ```bash
 # Recommended (fast):

package/bin/cli.js CHANGED Viewed

@@ -4,10 +4,15 @@
 import { createRequire } from 'module';
 import path from 'path';
+import { fileURLToPath } from 'url';
+import { spawn, execSync } from 'child_process';
 import { program, Option, Help } from 'commander';
 import { run } from '../src/app.js';
 import { cliOptions, helpGroups } from '../src/lib/generated/cli-options.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
 const require = createRequire(import.meta.url);
 const { version } = require('../package.json');
@@ -314,4 +319,56 @@ program
         }
     });
+program
+    .command('hey')
+    .description('Chat with the ml-container-creator advisor')
+    .option('--project-dir <dir>', 'Project directory to analyze', process.cwd())
+    .option('-o, --offline', 'Static reference mode (no Bedrock calls)')
+    .action(async (options) => {
+        // 1. Check python3 is available
+        try {
+            execSync('python3 --version', { stdio: 'ignore' });
+        } catch {
+            console.error('❌ python3 not found. Install Python 3.10+ to use the advisor.');
+            console.error('   macOS: brew install python3');
+            console.error('   Ubuntu: sudo apt install python3');
+            process.exit(1);
+        }
+        // 2. If not offline, check strands-agents is installed
+        if (!options.offline) {
+            try {
+                execSync('python3 -c "import strands"', { stdio: 'ignore' });
+            } catch {
+                console.error('❌ strands-agents not installed. Run:');
+                console.error('   pip install -r src/agent/requirements-agent.txt');
+                process.exit(1);
+            }
+        }
+        // 3. Resolve agent script path
+        const agentScript = path.join(__dirname, '..', 'src', 'agent', 'agent.py');
+        // 4. Build args and spawn
+        const args = [agentScript, '--project-dir', options.projectDir];
+        if (options.offline) {
+            args.push('--offline');
+        }
+        const child = spawn('python3', args, {
+            stdio: 'inherit',
+            env: { ...process.env, PYTHONUNBUFFERED: '1' }
+        });
+        // 5. Forward exit code
+        child.on('close', (code) => {
+            process.exit(code ?? 0);
+        });
+        child.on('error', (err) => {
+            console.error(`❌ Failed to start agent: ${err.message}`);
+            process.exit(1);
+        });
+    });
 program.parse();

package/config/agent.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "modelId": "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+  "mcpServers": [
+    "instance-sizer",
+    "base-image-picker",
+    "model-picker",
+    "workload-picker",
+    "e2e-status",
+    "agent-knowledge"
+  ],
+  "inputCostPer1k": 0.003,
+  "outputCostPer1k": 0.015,
+  "exitCommands": ["exit", "quit", "bye", "q"],
+  "reloadCommands": ["reload"],
+  "mcpServerTimeout": 30
+}

package/infra/ci-harness/lib/ci-harness-stack.ts CHANGED Viewed

@@ -1057,6 +1057,49 @@ export class MlccCiHarnessStack extends cdk.Stack {
         glueTable.addDependency(glueDatabase);
         glueTable.cfnOptions.condition = benchmarkInfraCondition;
+        // Glue Table: mlcc_evaluations — model quality evaluation results
+        // Written by do/evaluate via .eval_helper.py eval-write subcommand.
+        // Partitioned by model + adapter for efficient comparison queries.
+        const evalGlueTable = new glue.CfnTable(this, 'EvaluationResultsTable', {
+            catalogId: this.account,
+            databaseName: 'mlcc_ci',
+            tableInput: {
+                name: 'mlcc_evaluations',
+                tableType: 'EXTERNAL_TABLE',
+                parameters: {
+                    'classification': 'json',
+                },
+                storageDescriptor: {
+                    columns: [
+                        { name: 'project_name', type: 'string', comment: 'MCC project name' },
+                        { name: 'model_name', type: 'string', comment: 'HuggingFace model ID' },
+                        { name: 'adapter_name', type: 'string', comment: 'Adapter name or IC name' },
+                        { name: 'technique', type: 'string', comment: 'Training technique (sft, dpo)' },
+                        { name: 'eval_dataset', type: 'string', comment: 'Evaluation dataset URI or name' },
+                        { name: 'samples_evaluated', type: 'int', comment: 'Number of samples evaluated' },
+                        { name: 'metrics', type: 'string', comment: 'JSON blob of all computed metrics' },
+                        { name: 'timestamp', type: 'string', comment: 'ISO 8601 UTC timestamp' },
+                        { name: 'region', type: 'string', comment: 'AWS region' },
+                    ],
+                    location: `s3://mlcc-benchmark-results-${this.account}-${this.region}/evaluations/`,
+                    inputFormat: 'org.apache.hadoop.mapred.TextInputFormat',
+                    outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
+                    serdeInfo: {
+                        serializationLibrary: 'org.openx.data.jsonserde.JsonSerDe',
+                        parameters: {
+                            'serialization.format': '1',
+                        },
+                    },
+                },
+                partitionKeys: [
+                    { name: 'model', type: 'string', comment: 'Model name (partition key)' },
+                    { name: 'adapter', type: 'string', comment: 'Adapter name (partition key)' },
+                ],
+            },
+        });
+        evalGlueTable.addDependency(glueDatabase);
+        evalGlueTable.cfnOptions.condition = benchmarkInfraCondition;
         // Configurable lifecycle parameters for the benchmark results bucket
         const benchmarkIaTransitionDays = new cdk.CfnParameter(this, 'BenchmarkIaTransitionDays', {
             type: 'Number',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aws/ml-container-creator",
-  "version": "1.0.3",
+  "version": "1.1.0",
   "description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
   "main": "src/index.js",
   "bin": {
@@ -51,6 +51,8 @@
     "servers/workload-picker/index.js",
     "servers/workload-picker/manifest.json",
     "servers/workload-picker/package.json",
+    "servers/agent-knowledge/index.js",
+    "servers/agent-knowledge/package.json",
     "servers/lib/bedrock-client.js",
     "servers/lib/custom-validators.js",
     "servers/lib/dynamic-resolver.js",
@@ -61,6 +63,7 @@
     "config/bootstrap-stack.json",
     "config/bootstrap-e2e-stack.json",
     "config/parameter-schema-v2.json",
+    "config/agent.json",
     "config/tune-catalog.json",
     "config/presets/",
     "infra/ci-harness/bin/",
@@ -88,7 +91,7 @@
   },
   "scripts": {
     "test": "mocha 'test/**/*.test.js' --ignore 'test/property/**' --recursive --timeout 30000 --parallel",
-    "test:property": "mocha 'test/property/**/*.test.js' --recursive --timeout 60000 --parallel",
+    "test:property": "NODE_OPTIONS='--max-old-space-size=8192' mocha 'test/property/**/*.test.js' --recursive --timeout 60000 --parallel --jobs 4",
     "test:all": "npm run test && npm run test:property",
     "test:fast": "mocha 'test/**/*.test.js' --recursive --timeout 15000 --parallel",
     "test:unit": "mocha 'test/unit/**/*.test.js' --recursive --timeout 15000",

package/pyproject.toml CHANGED Viewed

@@ -15,6 +15,9 @@ dependencies = [
     "pyyaml>=6.0",
 ]
+[tool.pytest.ini_options]
+addopts = "--import-mode=importlib"
 [dependency-groups]
 dev = [
     "pytest>=8.0",