npm - @aws/ml-container-creator - Versions diffs - 1.0.3 → 1.1.0 - Mend

@aws/ml-container-creator 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/README.md +10 -1
package/bin/cli.js +57 -0
package/config/agent.json +16 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +43 -0
package/package.json +5 -2
package/pyproject.toml +3 -0
package/servers/agent-knowledge/index.js +592 -0
package/servers/agent-knowledge/package.json +15 -0
package/servers/base-image-picker/index.js +65 -18
package/servers/instance-sizer/index.js +32 -0
package/servers/lib/catalogs/fleet-drivers.json +38 -0
package/servers/lib/catalogs/model-arch-support.json +51 -0
package/servers/lib/catalogs/model-servers.json +2842 -1730
package/servers/lib/schemas/image-catalog.schema.json +12 -0
package/src/agent/__init__.py +2 -0
package/src/agent/__pycache__/__init__.cpython-312.pyc +0 -0
package/src/agent/__pycache__/config_loader.cpython-312.pyc +0 -0
package/src/agent/__pycache__/context.cpython-312.pyc +0 -0
package/src/agent/__pycache__/health_check.cpython-312.pyc +0 -0
package/src/agent/agent.py +513 -0
package/src/agent/config_loader.py +215 -0
package/src/agent/context.py +380 -0
package/src/agent/data/capability-matrix.json +106 -0
package/src/agent/health_check.py +341 -0
package/src/agent/prompts/system.md +173 -0
package/src/agent/requirements-agent.txt +3 -0
package/src/app.js +6 -4
package/src/lib/generated/cli-options.js +1 -1
package/src/lib/generated/parameter-matrix.js +1 -1
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/mcp-query-runner.js +110 -3
package/src/lib/prompt-runner.js +66 -22
package/src/lib/template-variable-resolver.js +8 -0
package/src/lib/train-config-builder.js +339 -0
package/src/lib/tune-config-state.js +89 -68
package/templates/do/.benchmark_writer.py +3 -0
package/templates/do/.eval_helper.py +409 -0
package/templates/do/.register_helper.py +185 -11
package/templates/do/.train_build_request.py +102 -113
package/templates/do/.train_helper.py +433 -0
package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +157 -0
package/templates/do/benchmark +60 -3
package/templates/do/config +6 -1
package/templates/do/deploy.d/managed-inference.ejs +83 -0
package/templates/do/evaluate +272 -0
package/templates/do/lib/resolve-instance.sh +155 -0
package/templates/do/register +5 -0
package/templates/do/test +1 -0
package/templates/do/train +879 -126
package/templates/do/training/config.yaml +83 -11
package/templates/do/training/dpo/accelerate_config.yaml +24 -0
package/templates/do/training/dpo/defaults.yaml +26 -0
package/templates/do/training/dpo/prompts.json +8 -0
package/templates/do/training/dpo/train.py +363 -0
package/templates/do/training/sft/accelerate_config.yaml +22 -0
package/templates/do/training/sft/defaults.yaml +18 -0
package/templates/do/training/sft/prompts.json +7 -0
package/templates/do/training/sft/train.py +310 -0
package/templates/do/tune +11 -2
package/src/lib/auto-prompt-builder.js +0 -172
package/src/lib/cli-handler.js +0 -529
package/src/lib/community-reports-validator.js +0 -91
package/src/lib/configuration-exporter.js +0 -204
package/src/lib/dataset-slug.js +0 -152
package/src/lib/docker-introspection-validator.js +0 -51
package/src/lib/known-flags-validator.js +0 -200
package/src/lib/schema-validator.js +0 -157
package/src/lib/train-config-parser.js +0 -136
package/src/lib/train-config-persistence.js +0 -143
package/src/lib/train-config-validator.js +0 -112
package/src/lib/train-feedback.js +0 -46
package/src/lib/train-idempotency.js +0 -97
package/src/lib/train-request-builder.js +0 -120
package/src/lib/tune-dataset-validator.js +0 -279
package/src/lib/tune-output-resolver.js +0 -66
package/templates/do/.train_poll_parser.py +0 -135
package/templates/do/.train_status_parser.py +0 -187
/package/templates/do/training/{train.py → custom/train.py} +0 -0

package/src/agent/config_loader.py ADDED Viewed

@@ -0,0 +1,215 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Agent configuration loader.
+Reads config/agent.json, applies MCC_* environment variable overrides,
+validates values, and returns a resolved AgentConfig dataclass.
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable
+@dataclass(frozen=True)
+class AgentConfig:
+    """Resolved agent configuration (immutable after creation)."""
+    model_id: str
+    mcp_servers: list[str]
+    input_cost_per_1k: float
+    output_cost_per_1k: float
+    exit_commands: list[str]
+    reload_commands: list[str]
+    mcp_server_timeout: int
+_DEFAULTS = AgentConfig(
+    model_id="us.anthropic.claude-sonnet-4-20250514",
+    mcp_servers=[
+        "instance-sizer",
+        "base-image-picker",
+        "model-picker",
+        "workload-picker",
+        "e2e-status",
+        "agent-knowledge",
+    ],
+    input_cost_per_1k=0.003,
+    output_cost_per_1k=0.015,
+    exit_commands=["exit", "quit", "bye", "q"],
+    reload_commands=["reload"],
+    mcp_server_timeout=30,
+)
+def _warn(msg: str) -> None:
+    """Emit a config warning to stderr."""
+    print(f"[config] warning: {msg}", file=sys.stderr)
+def _resolve_field(
+    field_name: str,
+    env_var: str | None,
+    file_value: Any | None,
+    default_value: Any,
+    parser: Callable[[str], Any],
+    validator: Callable[[Any], bool],
+) -> Any:
+    """Resolve a single config field using the precedence chain.
+    1. Try env var → parse → validate
+    2. Try file value → validate
+    3. Return default
+    """
+    # 1. Environment override
+    if env_var:
+        raw = os.environ.get(env_var)
+        if raw is not None:
+            try:
+                parsed = parser(raw)
+                if validator(parsed):
+                    return parsed
+                else:
+                    _warn(f"{env_var}={raw!r} failed validation, skipping")
+            except (ValueError, TypeError) as e:
+                _warn(f"{env_var}={raw!r} cannot be parsed: {e}")
+    # 2. Config file value
+    if file_value is not None:
+        if validator(file_value):
+            return file_value
+        else:
+            _warn(
+                f"config field '{field_name}' has invalid value {file_value!r}, using default"
+            )
+    # 3. Hardcoded default
+    return default_value
+def load_agent_config(config_path: Path | None = None) -> AgentConfig:
+    """Load, validate, and resolve agent configuration.
+    Resolution order per parameter (highest to lowest):
+      1. MCC_* environment variable (if set and valid)
+      2. Value from config/agent.json (if file exists and value is valid)
+      3. Hardcoded default
+    Args:
+        config_path: Override path to the JSON config file.
+                     Defaults to <package_root>/config/agent.json.
+    Returns:
+        Fully-resolved AgentConfig instance.
+    """
+    if config_path is None:
+        package_root = Path(__file__).resolve().parent.parent.parent
+        config_path = package_root / "config" / "agent.json"
+    # Read and parse config file
+    file_data: dict[str, Any] = {}
+    if config_path.exists():
+        try:
+            file_data = json.loads(config_path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, ValueError) as e:
+            _warn(f"config file '{config_path}' contains invalid JSON: {e}")
+    else:
+        _warn(f"config file '{config_path}' not found, using defaults")
+    # Parsers
+    _parse_str: Callable[[str], str] = lambda x: x
+    _parse_list: Callable[[str], list[str]] = lambda raw: [
+        s.strip() for s in raw.split(",")
+    ]
+    _parse_float: Callable[[str], float] = float
+    _parse_int: Callable[[str], int] = int
+    # Validators
+    _valid_str: Callable[[Any], bool] = lambda v: isinstance(v, str)
+    _valid_list_str: Callable[[Any], bool] = lambda v: isinstance(v, list) and all(
+        isinstance(s, str) for s in v
+    )
+    _valid_float_nn: Callable[[Any], bool] = (
+        lambda v: isinstance(v, (int, float)) and v >= 0
+    )
+    _valid_int_nn: Callable[[Any], bool] = lambda v: isinstance(v, int) and v >= 0
+    # Resolve each field
+    model_id = _resolve_field(
+        "modelId",
+        "MCC_MODEL_ID",
+        file_data.get("modelId"),
+        _DEFAULTS.model_id,
+        _parse_str,
+        _valid_str,
+    )
+    mcp_servers = _resolve_field(
+        "mcpServers",
+        "MCC_MCP_SERVERS",
+        file_data.get("mcpServers"),
+        _DEFAULTS.mcp_servers,
+        _parse_list,
+        _valid_list_str,
+    )
+    input_cost_per_1k = _resolve_field(
+        "inputCostPer1k",
+        "MCC_INPUT_COST_PER_1K",
+        file_data.get("inputCostPer1k"),
+        _DEFAULTS.input_cost_per_1k,
+        _parse_float,
+        _valid_float_nn,
+    )
+    output_cost_per_1k = _resolve_field(
+        "outputCostPer1k",
+        "MCC_OUTPUT_COST_PER_1K",
+        file_data.get("outputCostPer1k"),
+        _DEFAULTS.output_cost_per_1k,
+        _parse_float,
+        _valid_float_nn,
+    )
+    exit_commands = _resolve_field(
+        "exitCommands",
+        "MCC_EXIT_COMMANDS",
+        file_data.get("exitCommands"),
+        _DEFAULTS.exit_commands,
+        _parse_list,
+        _valid_list_str,
+    )
+    reload_commands = _resolve_field(
+        "reloadCommands",
+        "MCC_RELOAD_COMMANDS",
+        file_data.get("reloadCommands"),
+        _DEFAULTS.reload_commands,
+        _parse_list,
+        _valid_list_str,
+    )
+    mcp_server_timeout = _resolve_field(
+        "mcpServerTimeout",
+        "MCC_MCP_SERVER_TIMEOUT",
+        file_data.get("mcpServerTimeout"),
+        _DEFAULTS.mcp_server_timeout,
+        _parse_int,
+        _valid_int_nn,
+    )
+    return AgentConfig(
+        model_id=model_id,
+        mcp_servers=mcp_servers,
+        input_cost_per_1k=input_cost_per_1k,
+        output_cost_per_1k=output_cost_per_1k,
+        exit_commands=exit_commands,
+        reload_commands=reload_commands,
+        mcp_server_timeout=mcp_server_timeout,
+    )

package/src/agent/context.py ADDED Viewed

@@ -0,0 +1,380 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Project context reader for the Strands agent.
+Parses all project configuration files into a structured dict for prompt injection.
+Pure file I/O and regex/YAML parsing — no subprocess calls.
+"""
+from __future__ import annotations
+import json
+import os
+import re
+from pathlib import Path
+from typing import Any
+import yaml
+# Regex patterns for shell export parsing
+# Matches: export KEY="VALUE"  or  export KEY='VALUE'  or  export KEY=VALUE
+_EXPORT_QUOTED_RE = re.compile(
+    r"""^export\s+([A-Za-z_][A-Za-z0-9_]*)=["'](.*)["']\s*$"""
+)
+# Matches: export KEY=${KEY:-DEFAULT}
+_EXPORT_DEFAULT_RE = re.compile(
+    r"""^export\s+([A-Za-z_][A-Za-z0-9_]*)=\$\{[A-Za-z_][A-Za-z0-9_]*:-([^}]*)\}\s*$"""
+)
+# Matches: export KEY=VALUE (unquoted, no spaces in value)
+_EXPORT_BARE_RE = re.compile(
+    r"""^export\s+([A-Za-z_][A-Za-z0-9_]*)=([^\s"'$][^\s]*)\s*$"""
+)
+# Matches: export KEY="" (empty quoted value)
+_EXPORT_EMPTY_RE = re.compile(
+    r"""^export\s+([A-Za-z_][A-Za-z0-9_]*)=["']["']\s*$"""
+)
+# Dockerfile patterns
+_FROM_RE = re.compile(r"^FROM\s+(.+?)(?:\s+AS\s+\S+)?\s*$", re.IGNORECASE)
+_ENTRYPOINT_RE = re.compile(r"^ENTRYPOINT\s+(.+)\s*$", re.IGNORECASE)
+class ProjectContext:
+    """Reads and structures all project configuration for the agent.
+    Parses do/config, do/ic/*.conf, do/training/config.yaml, Dockerfile,
+    do/adapters/*.conf, the bootstrap profile, and user context files into
+    a unified dict suitable for LLM prompt injection.
+    """
+    def __init__(self, project_dir: str) -> None:
+        """Initialize with the project root directory.
+        Args:
+            project_dir: Absolute or relative path to the project root
+                         (the directory containing do/config).
+        """
+        self.project_dir = Path(project_dir).resolve()
+    def load(self) -> dict[str, Any]:
+        """Load all context. Returns structured dict for prompt injection.
+        Gracefully handles missing files — partial context is returned with
+        a ``_missing`` field listing files that could not be parsed.
+        Returns:
+            Dict with project configuration structured for prompt injection.
+        """
+        missing: list[str] = []
+        do_config = self._parse_do_config(missing)
+        ic_env_vars = self._parse_ic_confs(missing)
+        training_config = self._parse_training_config(missing)
+        dockerfile_info = self._parse_dockerfile(missing)
+        adapters = self._parse_adapters(missing)
+        profile = self._load_profile(missing)
+        user_context = self._load_user_context(missing)
+        context: dict[str, Any] = {
+            "project_name": do_config.get("PROJECT_NAME"),
+            "engine": do_config.get("MODEL_SERVER"),
+            "deployment_target": do_config.get("DEPLOYMENT_TARGET"),
+            "model": do_config.get("HF_MODEL_ID") or do_config.get("MODEL_NAME"),
+            "instance_type": do_config.get("INSTANCE_TYPE"),
+            "aws_region": do_config.get("AWS_REGION"),
+            "lora_enabled": do_config.get("ENABLE_LORA", "").lower() == "true",
+            "existing_endpoint": do_config.get("ENDPOINT_NAME")
+            if do_config.get("ENDPOINT_EXTERNAL") == "true"
+            else None,
+            "do_config_vars": do_config,
+            "ic_env_vars": ic_env_vars,
+            "training_config": training_config,
+            "base_image": dockerfile_info.get("base_image"),
+            "entrypoint": dockerfile_info.get("entrypoint"),
+            "adapters": adapters,
+            "profile": profile,
+            "user_context": user_context,
+        }
+        if missing:
+            context["_missing"] = missing
+        return context
+    def _parse_do_config(self, missing: list[str]) -> dict[str, str]:
+        """Parse do/config — regex for export KEY=VALUE lines.
+        Handles:
+          - export KEY="VALUE"
+          - export KEY='VALUE'
+          - export KEY=${KEY:-DEFAULT}
+          - export KEY=VALUE (bare, no spaces)
+          - Multi-line values via single-quoted heredoc-style (rare but possible)
+        Args:
+            missing: Accumulator list for files that could not be found/parsed.
+        Returns:
+            Dict of variable names to their values.
+        """
+        config_path = self.project_dir / "do" / "config"
+        if not config_path.is_file():
+            missing.append("do/config")
+            return {}
+        return self._parse_shell_exports(config_path)
+    def _parse_ic_confs(self, missing: list[str]) -> dict[str, dict[str, str]]:
+        """Parse do/ic/*.conf — IC_ENV_* variables grouped by filename.
+        Args:
+            missing: Accumulator list for files that could not be found/parsed.
+        Returns:
+            Dict mapping conf filename (without .conf) to a dict of variables.
+        """
+        ic_dir = self.project_dir / "do" / "ic"
+        if not ic_dir.is_dir():
+            missing.append("do/ic/")
+            return {}
+        result: dict[str, dict[str, str]] = {}
+        conf_files = sorted(ic_dir.glob("*.conf"))
+        if not conf_files:
+            missing.append("do/ic/*.conf")
+            return {}
+        for conf_path in conf_files:
+            name = conf_path.stem
+            result[name] = self._parse_shell_exports(conf_path)
+        return result
+    def _parse_training_config(self, missing: list[str]) -> dict[str, Any] | None:
+        """Parse do/training/config.yaml via yaml.safe_load().
+        Extracts key fields: technique, instance_type, hyperparameters,
+        dataset, image, and any other top-level keys.
+        Args:
+            missing: Accumulator list for files that could not be found/parsed.
+        Returns:
+            Parsed YAML dict, or None if file is missing/invalid.
+        """
+        yaml_path = self.project_dir / "do" / "training" / "config.yaml"
+        if not yaml_path.is_file():
+            missing.append("do/training/config.yaml")
+            return None
+        try:
+            text = yaml_path.read_text(encoding="utf-8")
+            data = yaml.safe_load(text)
+            if not isinstance(data, dict):
+                missing.append("do/training/config.yaml (invalid format)")
+                return None
+            return data
+        except (yaml.YAMLError, OSError):
+            missing.append("do/training/config.yaml (parse error)")
+            return None
+    def _parse_dockerfile(self, missing: list[str]) -> dict[str, str | None]:
+        """Extract FROM image and ENTRYPOINT from Dockerfile.
+        Args:
+            missing: Accumulator list for files that could not be found/parsed.
+        Returns:
+            Dict with 'base_image' and 'entrypoint' keys.
+        """
+        dockerfile_path = self.project_dir / "Dockerfile"
+        if not dockerfile_path.is_file():
+            missing.append("Dockerfile")
+            return {"base_image": None, "entrypoint": None}
+        try:
+            lines = dockerfile_path.read_text(encoding="utf-8").splitlines()
+        except OSError:
+            missing.append("Dockerfile (read error)")
+            return {"base_image": None, "entrypoint": None}
+        base_image: str | None = None
+        entrypoint: str | None = None
+        for line in lines:
+            stripped = line.strip()
+            # Take the last FROM (multi-stage build — final stage is what runs)
+            match = _FROM_RE.match(stripped)
+            if match:
+                base_image = match.group(1).strip()
+            match = _ENTRYPOINT_RE.match(stripped)
+            if match:
+                entrypoint = match.group(1).strip()
+        return {"base_image": base_image, "entrypoint": entrypoint}
+    def _parse_adapters(self, missing: list[str]) -> list[dict[str, Any]]:
+        """List do/adapters/*.conf with adapter names and key variables.
+        Args:
+            missing: Accumulator list for files that could not be found/parsed.
+        Returns:
+            List of dicts with 'name' and 'vars' for each adapter conf file.
+        """
+        adapters_dir = self.project_dir / "do" / "adapters"
+        if not adapters_dir.is_dir():
+            missing.append("do/adapters/")
+            return []
+        conf_files = sorted(adapters_dir.glob("*.conf"))
+        if not conf_files:
+            missing.append("do/adapters/*.conf")
+            return []
+        adapters: list[dict[str, Any]] = []
+        for conf_path in conf_files:
+            name = conf_path.stem
+            variables = self._parse_shell_exports(conf_path)
+            adapters.append({"name": name, "vars": variables})
+        return adapters
+    def _load_profile(self, missing: list[str]) -> dict[str, Any] | None:
+        """Load bootstrap profile from ~/.ml-container-creator/config.json.
+        Reads the config file, finds the active profile, and returns its
+        config object.
+        Args:
+            missing: Accumulator list for files that could not be found/parsed.
+        Returns:
+            Active profile config dict, or None if unavailable.
+        """
+        config_path = Path.home() / ".ml-container-creator" / "config.json"
+        if not config_path.is_file():
+            missing.append("~/.ml-container-creator/config.json")
+            return None
+        try:
+            text = config_path.read_text(encoding="utf-8")
+            data = json.loads(text)
+        except (json.JSONDecodeError, OSError):
+            missing.append("~/.ml-container-creator/config.json (parse error)")
+            return None
+        if not isinstance(data, dict):
+            missing.append("~/.ml-container-creator/config.json (invalid format)")
+            return None
+        active_name = data.get("activeProfile")
+        profiles = data.get("profiles")
+        if not active_name or not isinstance(profiles, dict):
+            return {"_raw": data, "_note": "no active profile set"}
+        profile_config = profiles.get(active_name)
+        if profile_config is None:
+            return {"_raw": data, "_note": f"active profile '{active_name}' not found in profiles"}
+        return {"name": active_name, "config": profile_config}
+    def _load_user_context(self, missing: list[str]) -> str | None:
+        """Read .mlcc-agent-context.md if it exists in project root.
+        This file allows teams to inject custom patterns, conventions,
+        and project-specific guidance into the agent's system prompt.
+        Args:
+            missing: Accumulator list for files that could not be found/parsed.
+        Returns:
+            File contents as string, or None if file doesn't exist.
+        """
+        context_path = self.project_dir / ".mlcc-agent-context.md"
+        if not context_path.is_file():
+            # This is optional — do not add to missing
+            return None
+        try:
+            return context_path.read_text(encoding="utf-8")
+        except OSError:
+            missing.append(".mlcc-agent-context.md (read error)")
+            return None
+    def _parse_shell_exports(self, file_path: Path) -> dict[str, str]:
+        """Parse shell export statements from a file.
+        Handles multiple patterns:
+          - export KEY="VALUE"
+          - export KEY='VALUE'
+          - export KEY=${KEY:-DEFAULT}
+          - export KEY=BARE_VALUE
+          - Multi-line values with trailing backslash continuation
+        Lines starting with # are treated as comments and skipped.
+        Lines that are not export statements are skipped.
+        Args:
+            file_path: Path to the shell file to parse.
+        Returns:
+            Dict of variable names to their string values.
+        """
+        try:
+            content = file_path.read_text(encoding="utf-8")
+        except OSError:
+            return {}
+        variables: dict[str, str] = {}
+        lines = content.splitlines()
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            stripped = line.strip()
+            # Skip comments and empty lines
+            if not stripped or stripped.startswith("#"):
+                i += 1
+                continue
+            # Handle line continuation (trailing backslash)
+            while stripped.endswith("\\") and i + 1 < len(lines):
+                i += 1
+                stripped = stripped[:-1] + lines[i].strip()
+            # Try each pattern in order of specificity
+            match = _EXPORT_EMPTY_RE.match(stripped)
+            if match:
+                variables[match.group(1)] = ""
+                i += 1
+                continue
+            match = _EXPORT_QUOTED_RE.match(stripped)
+            if match:
+                variables[match.group(1)] = match.group(2)
+                i += 1
+                continue
+            match = _EXPORT_DEFAULT_RE.match(stripped)
+            if match:
+                variables[match.group(1)] = match.group(2)
+                i += 1
+                continue
+            match = _EXPORT_BARE_RE.match(stripped)
+            if match:
+                variables[match.group(1)] = match.group(2)
+                i += 1
+                continue
+            i += 1
+        return variables